def main(experiments_dir: str, output_json_path: str, keys_of_interest, random_subset_of_size: int) -> None: runs_infos_paths = tuple(path for path in traverse_files(experiments_dir) if path.endswith("run_info.txt")) if random_subset_of_size != -1: runs_infos_paths = tuple( shuffled(runs_infos_paths)[:random_subset_of_size]) experiments_dirs_relpaths = tuple( os.path.relpath(os.path.dirname(path), experiments_dir) for path in runs_infos_paths ) # contains relative paths to each dir containing an experiment in `experiments_dir` runs_infos: Dict[str, Any] = tuple( load_json(path) for path in runs_infos_paths) union_of_keys = reduce(lambda x, y: x | y, (frozenset(run_info.keys()) for run_info in runs_infos)) assert union_of_keys.issuperset(keys_of_interest) shared_items = {} for key in union_of_keys: if key not in runs_infos[0]: continue value = runs_infos[0][key] if all(key in run_info and run_info[key] == value for run_info in runs_infos): shared_items[key] = value non_shared_items = tuple( {k: v for k, v in run_info.items() if k not in shared_items} for run_info in runs_infos) names = (tuple( str(i) for i in range(len(runs_infos))) if not keys_of_interest else tuple( str({k: v for k, v in d.items() if k in keys_of_interest}) for d in non_shared_items)) descriptions = tuple(str(d) for d in non_shared_items) json_struct = { "common_description": pformat(shared_items, indent=0), "experiments": [{ "rel_dir": rel_dir, "name": name, "description": description } for ( rel_dir, name, description) in zip(experiments_dirs_relpaths, names, descriptions) ], } save_json(json_struct, output_json_path)
def load_cifar10_as_colored_tensors(ds_path: str, colors: str) -> DatasetAsTensors: assert colors in ("rgb", "YCbCr") ds_train_and_val = torchvision.datasets.CIFAR10(ds_path, train=True) ds_test = torchvision.datasets.CIFAR10(ds_path, train=False) images_train: Tuple[Image, ...] = tuple(ds_train_and_val[i][0] for i in range(len(ds_train_and_val))) images_test: Tuple[Image, ...] = tuple(ds_test[i][0] for i in range(len(ds_test))) if colors == "YCbCr": images_train = tuple(image.convert("YCbCr") for image in images_train) images_test = tuple(image.convert("YCbCr") for image in images_test) x: torch.FloatTensor = torch.stack([ rearrange(to_tensor(image), "c h w -> h w c") for image in images_train ]).unsqueeze(0) # 1 × 50000 × 32 × 32 × 3 y: List[int] = ds_train_and_val.targets x_test: torch.FloatTensor = torch.stack([ rearrange(to_tensor(image), "c h w -> h w c") for image in images_test ]).unsqueeze(0) # 1 × 10000 × 32 × 32 × 3 # shuffle the training dataset seed(0) shuffled_indices: List[int] = shuffled(range(len(y))) getLogger( f"{__name__}.{load_cifar10_as_colored_tensors.__qualname__}").info( f"{hash(tuple(shuffled_indices))=}, {shuffled_indices[:10]=}") # 6271394816323448769 and (25247, 49673, 27562, 2653, 16968, 33506, 31845, 26537, 19877, 31234) x_shuffled: torch.Tensor = x[:, shuffled_indices] y_shuffled: List[int] = np.array(y)[shuffled_indices].tolist() return DatasetAsTensors( x_train= x_shuffled[:, :CIFAR10_NUM_TRAIN_SAMPLES], # (1, 45000, height, width) y_train=torch.tensor(y_shuffled[:CIFAR10_NUM_TRAIN_SAMPLES]), indices_train=torch.tensor( shuffled_indices[:CIFAR10_NUM_TRAIN_SAMPLES]), x_val=x_shuffled[:, CIFAR10_NUM_TRAIN_SAMPLES:], y_val=torch.tensor(y_shuffled[CIFAR10_NUM_TRAIN_SAMPLES:]), indices_val=torch.tensor(shuffled_indices[CIFAR10_NUM_TRAIN_SAMPLES:]), x_test=x_test, y_test=torch.tensor(ds_test.targets), indices_test=torch.tensor(range(len(ds_test))), )
def load_cifar10_as_grayscale_tensors(ds_path: str, image_size: int) -> DatasetAsTensors: assert image_size in (28, 32) ds_train_and_val = torchvision.datasets.CIFAR10(ds_path, train=True) x: np.ndarray = ds_train_and_val.data # (50000, 32, 32, 3) y: List[int] = ds_train_and_val.targets x_tensor = (_to_28x28_grayscale_tensor(ds_train_and_val) if image_size == 28 else _to_32x32_grayscale_tensor(ds_train_and_val)) # shuffle the training dataset seed(0) shuffled_indices: List[int] = shuffled(range(len(x))) getLogger( f"{__name__}.{load_cifar10_as_grayscale_tensors.__qualname__}").info( f"{hash(tuple(shuffled_indices))=}, {shuffled_indices[:10]=}") # 6271394816323448769 and (25247, 49673, 27562, 2653, 16968, 33506, 31845, 26537, 19877, 31234) x_tensor_shuffled: torch.Tensor = x_tensor[shuffled_indices] y_shuffled: List[int] = np.array(y)[shuffled_indices].tolist() return DatasetAsTensors( x_train= x_tensor_shuffled[: CIFAR10_NUM_TRAIN_SAMPLES], # (45000, height, width) y_train=torch.tensor(y_shuffled[:CIFAR10_NUM_TRAIN_SAMPLES]), indices_train=torch.tensor( shuffled_indices[:CIFAR10_NUM_TRAIN_SAMPLES]), x_val=x_tensor_shuffled[CIFAR10_NUM_TRAIN_SAMPLES:], y_val=torch.tensor(y_shuffled[CIFAR10_NUM_TRAIN_SAMPLES:]), indices_val=torch.tensor(shuffled_indices[CIFAR10_NUM_TRAIN_SAMPLES:]), x_test=( _to_28x28_grayscale_tensor if image_size == 28 else _to_32x32_grayscale_tensor)( ds_test := torchvision.datasets.CIFAR10(ds_path, train=False)), y_test=torch.tensor(ds_test.targets), indices_test=torch.tensor(range(len(ds_test))), )
def main(dataset_root, train_dataset_size, tb_log_dir, models_dir, learning_rate, batch_size, device, seed, shuffle_pixels, load_model: Optional[str], train: bool, test: bool): if not shuffle_pixels: transform = MNIST_TRANSFORM else: print("Pixel shuffling is enabled") pixel_shuffle_transform = transforms.Lambda( partial(permute_pixels, shuffled(range(h * w)))) transform = transforms.Compose( (MNIST_TRANSFORM, pixel_shuffle_transform)) model = TTMnistModel((r1, r2, r3, r4)) if load_model is not None: model.load_state_dict(torch.load(load_model, "cpu")) logger.debug(f"Loaded model from {load_model}") metrics = { "cross_entropy_loss": Loss(tnnf.cross_entropy), "accuracy": Accuracy() } if train: dataset = MNIST(dataset_root, train=True, download=True, transform=transform) assert len(dataset) == MNIST_DATASET_SIZE train_dataset, val_dataset = random_split( dataset, (train_dataset_size, MNIST_DATASET_SIZE - train_dataset_size)) train_loader, val_loader = (DataLoader( dataset_, batch_size=batch_size, shuffle=True, pin_memory=(device.type == "cuda")) for dataset_ in (train_dataset, val_dataset)) optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.95, weight_decay=0.0005) prepare_batch_for_trainer = make_standard_prepare_batch_with_events( device) trainer = setup_trainer(model, optimizer, tnnf.cross_entropy, device=device, prepare_batch=prepare_batch_for_trainer) scheduler = LRScheduler( torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8547)) trainer.add_event_handler(Events.EPOCH_STARTED, scheduler) prepare_batch_for_val_evaluator = make_standard_prepare_batch_with_events( device) val_evaluator = setup_evaluator( model, trainer, val_loader, metrics, device=device, prepare_batch=prepare_batch_for_val_evaluator) checkpointer = add_checkpointing(models_dir, "cross_entropy_loss", val_evaluator, objects_to_save={"model": model}, model=model) add_early_stopping(trainer, val_evaluator, "cross_entropy_loss", patience_num_evaluations=25) with setup_tensorboard_logger(tb_log_dir, trainer, metrics.keys(), {"val": val_evaluator}, model=model) as tb_logger: add_weights_and_grads_logging(trainer, tb_logger, model) add_logging_input_images(tb_logger, trainer, "train", prepare_batch_for_trainer) add_logging_input_images(tb_logger, val_evaluator, "val", prepare_batch_for_val_evaluator, another_engine=trainer) trainer.run(train_loader, max_epochs=100) if len(checkpointer._saved) > 0: best_model_path = checkpointer._saved[0][1][0] logger.info(f"The best model is saved at '{best_model_path}'") model.load_state_dict(torch.load(best_model_path)) if test: test_dataset = MNIST(dataset_root, train=False, download=True, transform=transform) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, pin_memory=(device.type == "cuda")) test_evaluator = create_supervised_evaluator(model, metrics, device) test_evaluator.run(test_loader) print( f"On test dataset the best model got: {test_evaluator.state.metrics}" )
def main( dataset_root, init_load_file, train_dataset_size, num_sbs_layers, bond_dim_size, tb_log_dir, models_dir, learning_rate, momentum, batch_size, initialization, initialization_std, scale_layers_using_batch, epochs, device, seed, early_stopping_patience_num_epochs, warmup_num_epochs, warmup_initial_multiplier, cos_sin_squared, make_input_window_std_one, input_multiplier, optimizer_type, rmsprop_alpha, weight_decay, shuffle_pixels, ): if not shuffle_pixels: transform = MNIST_TRANSFORM else: print("Pixel shuffling is enabled") shuffled_pixels_indices = tuple(shuffled(range(h * w))) logger.info(f"{hash(shuffled_pixels_indices)=}") pixel_shuffle_transform = transforms.Lambda( partial(permute_pixels, shuffled_pixels_indices)) transform = transforms.Compose( (MNIST_TRANSFORM, pixel_shuffle_transform)) dataset = MNIST(dataset_root, train=True, download=True, transform=transform) assert len(dataset) == MNIST_DATASET_SIZE train_dataset, val_dataset = random_split( dataset, (train_dataset_size, MNIST_DATASET_SIZE - train_dataset_size)) logger.info(f"{hash(tuple(val_dataset.indices))=}") train_loader, val_loader = (DataLoader( dataset_, batch_size=batch_size, shuffle=True, pin_memory=(device.type == "cuda"), ) for dataset_ in (train_dataset, val_dataset)) if initialization == "dumb-normal": assert initialization_std is not None init = DumbNormalInitialization(initialization_std) elif initialization == "khrulkov-normal": init = KhrulkovNormalInitialization(initialization_std) elif initialization == "normal-preserving-output-std": assert initialization_std is None init = NormalPreservingOutputStdInitialization() elif initialization == "min-random-eye": assert initialization_std is not None init = MinRandomEyeInitialization(initialization_std) else: raise ValueError(f"Invalid value: {initialization=}") assert not make_input_window_std_one or input_multiplier is None if make_input_window_std_one: kernel_size = 3 window_std = calc_std_of_coordinates_of_windows( next( iter( DataLoader(dataset, batch_size=MNIST_DATASET_SIZE, shuffle=False)))[0], kernel_size=kernel_size, cos_sin_squared=cos_sin_squared, ).item() logger.info(f"{window_std=}") input_multiplier = (1.0 / window_std)**(1 / kernel_size**2) elif input_multiplier is None: input_multiplier = 1.0 logger.info(f"{input_multiplier=}") model = DCTNMnistModel( num_sbs_layers, bond_dim_size, False, init, cos_sin_squared, input_multiplier, ) # with torch.autograd.detect_anomaly(): # X, y = next(iter(train_loader)) # logits = model(X) # loss = tnnf.cross_entropy(logits, y) # print(loss.item()) # loss.backward() if init_load_file: model.load_state_dict(torch.load(init_load_file, map_location=device)) elif scale_layers_using_batch is not None: model.scale_layers_using_batch( next( iter( DataLoader(dataset, batch_size=scale_layers_using_batch, shuffle=True)))[0]) logger.info("Done model.scale_layers_using_batch") assert rmsprop_alpha is None or optimizer_type == "rmsprop" if optimizer_type == "sgd": optimizer = torch.optim.SGD( model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay, ) elif optimizer_type == "rmsprop": optimizer = torch.optim.RMSprop( model.parameters(), lr=learning_rate, momentum=momentum, alpha=rmsprop_alpha, weight_decay=weight_decay, ) else: raise ValueError("Invalid optimizer_type: {optimizer_type}") prepare_batch_for_trainer = make_standard_prepare_batch_with_events(device) trainer = setup_trainer( model, optimizer, tnnf.cross_entropy, device=device, prepare_batch=prepare_batch_for_trainer, ) scheduler = LRScheduler( torch.optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: (warmup_initial_multiplier**( (warmup_num_epochs - epoch) / warmup_num_epochs) if epoch < warmup_num_epochs else 1.0), )) trainer.add_event_handler(Events.EPOCH_STARTED, scheduler) metrics = { "cross_entropy_loss": Loss(tnnf.cross_entropy), "accuracy": Accuracy() } prepare_batch_for_val_evaluator = make_standard_prepare_batch_with_events( device) val_evaluator = setup_evaluator( model, trainer, val_loader, metrics, device=device, prepare_batch=prepare_batch_for_val_evaluator, ) add_checkpointing( models_dir, "cross_entropy_loss", val_evaluator, objects_to_save={"model": model}, model=model, ) add_checkpointing_of_last_models( models_dir, val_evaluator, {"model": model}, model, num_checkpoints=10, save_interval=20, ) if early_stopping_patience_num_epochs is not None: add_early_stopping( trainer, val_evaluator, "cross_entropy_loss", patience_num_evaluations=early_stopping_patience_num_epochs, ) with setup_tensorboard_logger(tb_log_dir, trainer, metrics.keys(), {"val": val_evaluator}, model=model) as tb_logger: add_weights_and_grads_logging(trainer, tb_logger, model) add_optimizer_params_logging(optimizer, tb_logger, trainer) is_string = lambda _, module: isinstance(module, ConvSBS) create_every_n_iters_intermediate_outputs_logger( model, tb_logger.writer, is_string, trainer, "train", every_n_iters=20, loggers=( log_dumb_mean_of_abs, log_dumb_min_of_abs, log_dumb_max_of_abs, log_dumb_mean, log_dumb_std, log_dumb_histogram, # maybe remove this later for performance's sake ), ) add_conv_sbs_tt_tensor_statistics_logging(model, tb_logger.writer, trainer, 20) create_every_n_iters_intermediate_outputs_logger( model, tb_logger.writer, lambda _, module: module is model, trainer, "train_outputs_of_the_whole_model", every_n_iters=20, loggers=( log_logits_as_probabilities, log_dumb_min, log_dumb_max, log_dumb_mean, log_dumb_std, ), ) add_quantum_inputs_statistics_logging(model, trainer, tb_logger.writer, 20) create_every_n_iters_intermediate_outputs_logger( model, tb_logger.writer, lambda _, module: module is model, trainer, "train_input", 20, loggers=(( "std_of_coordinates_of_windows", RecordType.SCALAR, partial( calc_std_of_coordinates_of_windows, kernel_size=3, cos_sin_squared=cos_sin_squared, multiplier=input_multiplier, ), ), ), use_input=True, ) trainer.run(train_loader, max_epochs=epochs)