def main(experiments_dir: str, output_json_path: str, keys_of_interest,
         random_subset_of_size: int) -> None:
    runs_infos_paths = tuple(path for path in traverse_files(experiments_dir)
                             if path.endswith("run_info.txt"))
    if random_subset_of_size != -1:
        runs_infos_paths = tuple(
            shuffled(runs_infos_paths)[:random_subset_of_size])
    experiments_dirs_relpaths = tuple(
        os.path.relpath(os.path.dirname(path), experiments_dir)
        for path in runs_infos_paths
    )  # contains relative paths to each dir containing an experiment in `experiments_dir`
    runs_infos: Dict[str, Any] = tuple(
        load_json(path) for path in runs_infos_paths)
    union_of_keys = reduce(lambda x, y: x | y, (frozenset(run_info.keys())
                                                for run_info in runs_infos))
    assert union_of_keys.issuperset(keys_of_interest)
    shared_items = {}
    for key in union_of_keys:
        if key not in runs_infos[0]:
            continue
        value = runs_infos[0][key]
        if all(key in run_info and run_info[key] == value
               for run_info in runs_infos):
            shared_items[key] = value
    non_shared_items = tuple(
        {k: v
         for k, v in run_info.items() if k not in shared_items}
        for run_info in runs_infos)
    names = (tuple(
        str(i)
        for i in range(len(runs_infos))) if not keys_of_interest else tuple(
            str({k: v
                 for k, v in d.items() if k in keys_of_interest})
            for d in non_shared_items))
    descriptions = tuple(str(d) for d in non_shared_items)
    json_struct = {
        "common_description":
        pformat(shared_items, indent=0),
        "experiments": [{
            "rel_dir": rel_dir,
            "name": name,
            "description": description
        } for (
            rel_dir, name,
            description) in zip(experiments_dirs_relpaths, names, descriptions)
                        ],
    }
    save_json(json_struct, output_json_path)
Ejemplo n.º 2
0
def load_cifar10_as_colored_tensors(ds_path: str,
                                    colors: str) -> DatasetAsTensors:
    assert colors in ("rgb", "YCbCr")
    ds_train_and_val = torchvision.datasets.CIFAR10(ds_path, train=True)
    ds_test = torchvision.datasets.CIFAR10(ds_path, train=False)
    images_train: Tuple[Image,
                        ...] = tuple(ds_train_and_val[i][0]
                                     for i in range(len(ds_train_and_val)))
    images_test: Tuple[Image,
                       ...] = tuple(ds_test[i][0] for i in range(len(ds_test)))
    if colors == "YCbCr":
        images_train = tuple(image.convert("YCbCr") for image in images_train)
        images_test = tuple(image.convert("YCbCr") for image in images_test)

    x: torch.FloatTensor = torch.stack([
        rearrange(to_tensor(image), "c h w -> h w c") for image in images_train
    ]).unsqueeze(0)  # 1 × 50000 × 32 × 32 × 3
    y: List[int] = ds_train_and_val.targets

    x_test: torch.FloatTensor = torch.stack([
        rearrange(to_tensor(image), "c h w -> h w c") for image in images_test
    ]).unsqueeze(0)  # 1 × 10000 × 32 × 32 × 3

    # shuffle the training dataset
    seed(0)
    shuffled_indices: List[int] = shuffled(range(len(y)))
    getLogger(
        f"{__name__}.{load_cifar10_as_colored_tensors.__qualname__}").info(
            f"{hash(tuple(shuffled_indices))=}, {shuffled_indices[:10]=}")
    # 6271394816323448769 and (25247, 49673, 27562, 2653, 16968, 33506, 31845, 26537, 19877, 31234)

    x_shuffled: torch.Tensor = x[:, shuffled_indices]
    y_shuffled: List[int] = np.array(y)[shuffled_indices].tolist()

    return DatasetAsTensors(
        x_train=
        x_shuffled[:, :CIFAR10_NUM_TRAIN_SAMPLES],  # (1, 45000, height, width)
        y_train=torch.tensor(y_shuffled[:CIFAR10_NUM_TRAIN_SAMPLES]),
        indices_train=torch.tensor(
            shuffled_indices[:CIFAR10_NUM_TRAIN_SAMPLES]),
        x_val=x_shuffled[:, CIFAR10_NUM_TRAIN_SAMPLES:],
        y_val=torch.tensor(y_shuffled[CIFAR10_NUM_TRAIN_SAMPLES:]),
        indices_val=torch.tensor(shuffled_indices[CIFAR10_NUM_TRAIN_SAMPLES:]),
        x_test=x_test,
        y_test=torch.tensor(ds_test.targets),
        indices_test=torch.tensor(range(len(ds_test))),
    )
Ejemplo n.º 3
0
def load_cifar10_as_grayscale_tensors(ds_path: str,
                                      image_size: int) -> DatasetAsTensors:
    assert image_size in (28, 32)
    ds_train_and_val = torchvision.datasets.CIFAR10(ds_path, train=True)
    x: np.ndarray = ds_train_and_val.data  # (50000, 32, 32, 3)
    y: List[int] = ds_train_and_val.targets
    x_tensor = (_to_28x28_grayscale_tensor(ds_train_and_val) if image_size
                == 28 else _to_32x32_grayscale_tensor(ds_train_and_val))

    # shuffle the training dataset
    seed(0)
    shuffled_indices: List[int] = shuffled(range(len(x)))
    getLogger(
        f"{__name__}.{load_cifar10_as_grayscale_tensors.__qualname__}").info(
            f"{hash(tuple(shuffled_indices))=}, {shuffled_indices[:10]=}")
    # 6271394816323448769 and (25247, 49673, 27562, 2653, 16968, 33506, 31845, 26537, 19877, 31234)

    x_tensor_shuffled: torch.Tensor = x_tensor[shuffled_indices]
    y_shuffled: List[int] = np.array(y)[shuffled_indices].tolist()

    return DatasetAsTensors(
        x_train=
        x_tensor_shuffled[:
                          CIFAR10_NUM_TRAIN_SAMPLES],  # (45000, height, width)
        y_train=torch.tensor(y_shuffled[:CIFAR10_NUM_TRAIN_SAMPLES]),
        indices_train=torch.tensor(
            shuffled_indices[:CIFAR10_NUM_TRAIN_SAMPLES]),
        x_val=x_tensor_shuffled[CIFAR10_NUM_TRAIN_SAMPLES:],
        y_val=torch.tensor(y_shuffled[CIFAR10_NUM_TRAIN_SAMPLES:]),
        indices_val=torch.tensor(shuffled_indices[CIFAR10_NUM_TRAIN_SAMPLES:]),
        x_test=(
            _to_28x28_grayscale_tensor
            if image_size == 28 else _to_32x32_grayscale_tensor)(
                ds_test := torchvision.datasets.CIFAR10(ds_path, train=False)),
        y_test=torch.tensor(ds_test.targets),
        indices_test=torch.tensor(range(len(ds_test))),
    )
def main(dataset_root, train_dataset_size, tb_log_dir, models_dir,
         learning_rate, batch_size, device, seed, shuffle_pixels,
         load_model: Optional[str], train: bool, test: bool):
    if not shuffle_pixels:
        transform = MNIST_TRANSFORM
    else:
        print("Pixel shuffling is enabled")
        pixel_shuffle_transform = transforms.Lambda(
            partial(permute_pixels, shuffled(range(h * w))))
        transform = transforms.Compose(
            (MNIST_TRANSFORM, pixel_shuffle_transform))
    model = TTMnistModel((r1, r2, r3, r4))
    if load_model is not None:
        model.load_state_dict(torch.load(load_model, "cpu"))
        logger.debug(f"Loaded model from {load_model}")
    metrics = {
        "cross_entropy_loss": Loss(tnnf.cross_entropy),
        "accuracy": Accuracy()
    }
    if train:
        dataset = MNIST(dataset_root,
                        train=True,
                        download=True,
                        transform=transform)
        assert len(dataset) == MNIST_DATASET_SIZE
        train_dataset, val_dataset = random_split(
            dataset,
            (train_dataset_size, MNIST_DATASET_SIZE - train_dataset_size))
        train_loader, val_loader = (DataLoader(
            dataset_,
            batch_size=batch_size,
            shuffle=True,
            pin_memory=(device.type == "cuda")) for dataset_ in (train_dataset,
                                                                 val_dataset))
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=learning_rate,
                                    momentum=0.95,
                                    weight_decay=0.0005)

        prepare_batch_for_trainer = make_standard_prepare_batch_with_events(
            device)
        trainer = setup_trainer(model,
                                optimizer,
                                tnnf.cross_entropy,
                                device=device,
                                prepare_batch=prepare_batch_for_trainer)
        scheduler = LRScheduler(
            torch.optim.lr_scheduler.StepLR(optimizer,
                                            step_size=2,
                                            gamma=0.8547))
        trainer.add_event_handler(Events.EPOCH_STARTED, scheduler)
        prepare_batch_for_val_evaluator = make_standard_prepare_batch_with_events(
            device)
        val_evaluator = setup_evaluator(
            model,
            trainer,
            val_loader,
            metrics,
            device=device,
            prepare_batch=prepare_batch_for_val_evaluator)
        checkpointer = add_checkpointing(models_dir,
                                         "cross_entropy_loss",
                                         val_evaluator,
                                         objects_to_save={"model": model},
                                         model=model)
        add_early_stopping(trainer,
                           val_evaluator,
                           "cross_entropy_loss",
                           patience_num_evaluations=25)
        with setup_tensorboard_logger(tb_log_dir,
                                      trainer,
                                      metrics.keys(), {"val": val_evaluator},
                                      model=model) as tb_logger:
            add_weights_and_grads_logging(trainer, tb_logger, model)
            add_logging_input_images(tb_logger, trainer, "train",
                                     prepare_batch_for_trainer)
            add_logging_input_images(tb_logger,
                                     val_evaluator,
                                     "val",
                                     prepare_batch_for_val_evaluator,
                                     another_engine=trainer)
            trainer.run(train_loader, max_epochs=100)
        if len(checkpointer._saved) > 0:
            best_model_path = checkpointer._saved[0][1][0]
            logger.info(f"The best model is saved at '{best_model_path}'")
            model.load_state_dict(torch.load(best_model_path))
    if test:
        test_dataset = MNIST(dataset_root,
                             train=False,
                             download=True,
                             transform=transform)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=True,
                                 pin_memory=(device.type == "cuda"))
        test_evaluator = create_supervised_evaluator(model, metrics, device)
        test_evaluator.run(test_loader)
        print(
            f"On test dataset the best model got: {test_evaluator.state.metrics}"
        )
Ejemplo n.º 5
0
def main(
    dataset_root,
    init_load_file,
    train_dataset_size,
    num_sbs_layers,
    bond_dim_size,
    tb_log_dir,
    models_dir,
    learning_rate,
    momentum,
    batch_size,
    initialization,
    initialization_std,
    scale_layers_using_batch,
    epochs,
    device,
    seed,
    early_stopping_patience_num_epochs,
    warmup_num_epochs,
    warmup_initial_multiplier,
    cos_sin_squared,
    make_input_window_std_one,
    input_multiplier,
    optimizer_type,
    rmsprop_alpha,
    weight_decay,
    shuffle_pixels,
):
    if not shuffle_pixels:
        transform = MNIST_TRANSFORM
    else:
        print("Pixel shuffling is enabled")
        shuffled_pixels_indices = tuple(shuffled(range(h * w)))
        logger.info(f"{hash(shuffled_pixels_indices)=}")
        pixel_shuffle_transform = transforms.Lambda(
            partial(permute_pixels, shuffled_pixels_indices))
        transform = transforms.Compose(
            (MNIST_TRANSFORM, pixel_shuffle_transform))
    dataset = MNIST(dataset_root,
                    train=True,
                    download=True,
                    transform=transform)
    assert len(dataset) == MNIST_DATASET_SIZE
    train_dataset, val_dataset = random_split(
        dataset, (train_dataset_size, MNIST_DATASET_SIZE - train_dataset_size))
    logger.info(f"{hash(tuple(val_dataset.indices))=}")
    train_loader, val_loader = (DataLoader(
        dataset_,
        batch_size=batch_size,
        shuffle=True,
        pin_memory=(device.type == "cuda"),
    ) for dataset_ in (train_dataset, val_dataset))
    if initialization == "dumb-normal":
        assert initialization_std is not None
        init = DumbNormalInitialization(initialization_std)
    elif initialization == "khrulkov-normal":
        init = KhrulkovNormalInitialization(initialization_std)
    elif initialization == "normal-preserving-output-std":
        assert initialization_std is None
        init = NormalPreservingOutputStdInitialization()
    elif initialization == "min-random-eye":
        assert initialization_std is not None
        init = MinRandomEyeInitialization(initialization_std)
    else:
        raise ValueError(f"Invalid value: {initialization=}")
    assert not make_input_window_std_one or input_multiplier is None
    if make_input_window_std_one:
        kernel_size = 3
        window_std = calc_std_of_coordinates_of_windows(
            next(
                iter(
                    DataLoader(dataset,
                               batch_size=MNIST_DATASET_SIZE,
                               shuffle=False)))[0],
            kernel_size=kernel_size,
            cos_sin_squared=cos_sin_squared,
        ).item()
        logger.info(f"{window_std=}")
        input_multiplier = (1.0 / window_std)**(1 / kernel_size**2)
    elif input_multiplier is None:
        input_multiplier = 1.0
    logger.info(f"{input_multiplier=}")
    model = DCTNMnistModel(
        num_sbs_layers,
        bond_dim_size,
        False,
        init,
        cos_sin_squared,
        input_multiplier,
    )
    # with torch.autograd.detect_anomaly():
    #   X, y = next(iter(train_loader))
    #   logits = model(X)
    #   loss = tnnf.cross_entropy(logits, y)
    #   print(loss.item())
    #   loss.backward()
    if init_load_file:
        model.load_state_dict(torch.load(init_load_file, map_location=device))
    elif scale_layers_using_batch is not None:
        model.scale_layers_using_batch(
            next(
                iter(
                    DataLoader(dataset,
                               batch_size=scale_layers_using_batch,
                               shuffle=True)))[0])
        logger.info("Done model.scale_layers_using_batch")
    assert rmsprop_alpha is None or optimizer_type == "rmsprop"
    if optimizer_type == "sgd":
        optimizer = torch.optim.SGD(
            model.parameters(),
            lr=learning_rate,
            momentum=momentum,
            weight_decay=weight_decay,
        )
    elif optimizer_type == "rmsprop":
        optimizer = torch.optim.RMSprop(
            model.parameters(),
            lr=learning_rate,
            momentum=momentum,
            alpha=rmsprop_alpha,
            weight_decay=weight_decay,
        )
    else:
        raise ValueError("Invalid optimizer_type: {optimizer_type}")

    prepare_batch_for_trainer = make_standard_prepare_batch_with_events(device)
    trainer = setup_trainer(
        model,
        optimizer,
        tnnf.cross_entropy,
        device=device,
        prepare_batch=prepare_batch_for_trainer,
    )

    scheduler = LRScheduler(
        torch.optim.lr_scheduler.LambdaLR(
            optimizer,
            lambda epoch: (warmup_initial_multiplier**(
                (warmup_num_epochs - epoch) / warmup_num_epochs)
                           if epoch < warmup_num_epochs else 1.0),
        ))
    trainer.add_event_handler(Events.EPOCH_STARTED, scheduler)
    metrics = {
        "cross_entropy_loss": Loss(tnnf.cross_entropy),
        "accuracy": Accuracy()
    }
    prepare_batch_for_val_evaluator = make_standard_prepare_batch_with_events(
        device)
    val_evaluator = setup_evaluator(
        model,
        trainer,
        val_loader,
        metrics,
        device=device,
        prepare_batch=prepare_batch_for_val_evaluator,
    )
    add_checkpointing(
        models_dir,
        "cross_entropy_loss",
        val_evaluator,
        objects_to_save={"model": model},
        model=model,
    )
    add_checkpointing_of_last_models(
        models_dir,
        val_evaluator,
        {"model": model},
        model,
        num_checkpoints=10,
        save_interval=20,
    )
    if early_stopping_patience_num_epochs is not None:
        add_early_stopping(
            trainer,
            val_evaluator,
            "cross_entropy_loss",
            patience_num_evaluations=early_stopping_patience_num_epochs,
        )
    with setup_tensorboard_logger(tb_log_dir,
                                  trainer,
                                  metrics.keys(), {"val": val_evaluator},
                                  model=model) as tb_logger:
        add_weights_and_grads_logging(trainer, tb_logger, model)
        add_optimizer_params_logging(optimizer, tb_logger, trainer)
        is_string = lambda _, module: isinstance(module, ConvSBS)
        create_every_n_iters_intermediate_outputs_logger(
            model,
            tb_logger.writer,
            is_string,
            trainer,
            "train",
            every_n_iters=20,
            loggers=(
                log_dumb_mean_of_abs,
                log_dumb_min_of_abs,
                log_dumb_max_of_abs,
                log_dumb_mean,
                log_dumb_std,
                log_dumb_histogram,  # maybe remove this later for performance's sake
            ),
        )
        add_conv_sbs_tt_tensor_statistics_logging(model, tb_logger.writer,
                                                  trainer, 20)
        create_every_n_iters_intermediate_outputs_logger(
            model,
            tb_logger.writer,
            lambda _, module: module is model,
            trainer,
            "train_outputs_of_the_whole_model",
            every_n_iters=20,
            loggers=(
                log_logits_as_probabilities,
                log_dumb_min,
                log_dumb_max,
                log_dumb_mean,
                log_dumb_std,
            ),
        )
        add_quantum_inputs_statistics_logging(model, trainer, tb_logger.writer,
                                              20)
        create_every_n_iters_intermediate_outputs_logger(
            model,
            tb_logger.writer,
            lambda _, module: module is model,
            trainer,
            "train_input",
            20,
            loggers=((
                "std_of_coordinates_of_windows",
                RecordType.SCALAR,
                partial(
                    calc_std_of_coordinates_of_windows,
                    kernel_size=3,
                    cos_sin_squared=cos_sin_squared,
                    multiplier=input_multiplier,
                ),
            ), ),
            use_input=True,
        )
        trainer.run(train_loader, max_epochs=epochs)