def set_meta_record_keeper(self): is_new_experiment = self.beginning_of_training() if len(self.split_manager.split_scheme_names) > 1: folders = {folder_type: s % (self.experiment_folder, "meta_logs") for folder_type, s in self.sub_experiment_dirs.items()} csv_folder, tensorboard_folder = folders["csvs"], folders["tensorboard"] self.meta_record_keeper, _, _ = logging_presets.get_record_keeper(csv_folder, tensorboard_folder, self.global_db_path, self.args.experiment_name, is_new_experiment) self.meta_accuracies = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
def run(self): ax_client = self.get_ax_client() trials = ax_client.experiment.trials record_keeper, _, _ = logging_presets.get_record_keeper( self.csv_folder, self.tensorboard_folder) temp_YR_for_config_diffs = self.read_yaml_and_find_bayes( find_bayes_params=False) for i in range(0, self.bayes_opt_iters): if i in trials and trials[i].status == TrialStatus.COMPLETED: continue logging.info("Optimization iteration %d" % i) c_f.save_config_files(self.YR.args.place_to_save_configs, temp_YR_for_config_diffs.args.dict_of_yamls, True, [i]) # save config diffs, if any sub_experiment_name = self.get_sub_experiment_name(i) parameters, trial_index, experiment_func = self.get_parameters_and_trial_index( ax_client, sub_experiment_name, i) ax_client.complete_trial(trial_index=trial_index, raw_data=experiment_func( parameters, sub_experiment_name)) self.save_new_log(ax_client) self.update_records(record_keeper, ax_client, i) self.plot_progress(ax_client) logging.info("DONE BAYESIAN OPTIMIZATION") self.plot_progress(ax_client) best_sub_experiment_name = self.save_best_parameters( record_keeper, ax_client) self.test_model(best_sub_experiment_name) self.reproduce_results(best_sub_experiment_name) self.create_accuracy_report(best_sub_experiment_name) logging.info("##### FINISHED #####")
def run(self): ax_client = self.get_ax_client() num_explored_points = len( ax_client.experiment.trials) if ax_client.experiment.trials else 0 is_new_experiment = num_explored_points == 0 record_keeper, _, _ = logging_presets.get_record_keeper( self.csv_folder, self.tensorboard_folder) for i in range(num_explored_points, self.bayes_opt_iters): logging.info("Optimization iteration %d" % i) sub_experiment_name = self.get_sub_experiment_name(i) parameters, trial_index, experiment_func = self.get_parameters_and_trial_index( ax_client, sub_experiment_name) ax_client.complete_trial(trial_index=trial_index, raw_data=experiment_func( parameters, sub_experiment_name)) self.save_new_log(ax_client) self.update_records(record_keeper, ax_client, i) self.plot_progress(ax_client) logging.info("DONE BAYESIAN OPTIMIZATION") self.plot_progress(ax_client) best_sub_experiment_name = self.save_best_parameters( record_keeper, ax_client) self.test_model(best_sub_experiment_name) self.reproduce_results(best_sub_experiment_name) self.create_accuracy_report(best_sub_experiment_name) logging.info("##### FINISHED #####")
def _train(self) -> Optional[float]: record_keeper, _, _ = logging_presets.get_record_keeper( "example_logs", "example_tensorboard") hooks = logging_presets.get_hook_container(record_keeper) dataset_dict = {"val": self.val_dataset} model_folder = "example_saved_models" def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname, *args): logging.info("UMAP plot for the {} split and label set {}".format( split_name, keyname)) label_set = np.unique(labels) num_classes = len(label_set) fig = plt.figure(figsize=(20, 15)) plt.gca().set_prop_cycle( cycler("color", [ plt.cm.nipy_spectral(i) for i in np.linspace(0, 0.9, num_classes) ])) for i in range(num_classes): idx = labels == label_set[i] plt.plot(umap_embeddings[idx, 0], umap_embeddings[idx, 1], ".", markersize=1) #plt.show() #plt.show(block=False) file_name = './plots/metric_{0}.png'.format(args[0]) plt.savefig(file_name, dpi=300) # # Create the tester tester = testers.GlobalEmbeddingSpaceTester( end_of_testing_hook=hooks.end_of_testing_hook, visualizer=umap.UMAP(), visualizer_hook=visualizer_hook, dataloader_num_workers=32) end_of_epoch_hook = hooks.end_of_epoch_hook(tester, dataset_dict, model_folder, test_interval=1, patience=200) trainer = trainers.MetricLossOnly( self.models_dict, self.optimizers, self._train_cfg.batch_per_gpu, self.loss_funcs, self.mining_funcs, #self._train_loader, self.train_set, sampler=self.sampler, dataloader_num_workers=self._train_cfg.workers - 1, end_of_iteration_hook=hooks.end_of_iteration_hook, end_of_epoch_hook=end_of_epoch_hook) #trainer.train(num_epochs=self._train_cfg.epochs) trainer.train(num_epochs=500)
def set_record_keeper(self): is_new_experiment = self.beginning_of_training() and self.curr_split_count == 0 self.record_keeper, _, _ = logging_presets.get_record_keeper(csv_folder = self.csv_folder, tensorboard_folder = self.tensorboard_folder, global_db_path = self.global_db_path, experiment_name = self.args.experiment_name, is_new_experiment = is_new_experiment, save_figures = self.args.save_figures_on_tensorboard, save_lists = self.args.save_lists_in_db)
def delete_sub_experiment_folder(self, sub_experiment_name): logging.warning("Deleting and starting fresh for %s" % sub_experiment_name) shutil.rmtree(self.get_sub_experiment_path(sub_experiment_name)) global_record_keeper, _, _ = logging_presets.get_record_keeper( self.csv_folder, self.tensorboard_folder, self.global_db_path, sub_experiment_name, False) global_record_keeper.record_writer.global_db.delete_experiment( sub_experiment_name)
def set_meta_record_keeper(self): is_new_experiment = self.beginning_of_training() if len(self.split_manager.split_scheme_names) > 1: _, csv_folder, tensorboard_folder = [ s % (self.experiment_folder, "meta_logs") for s in self.sub_experiment_dirs ] self.meta_record_keeper, _, _ = logging_presets.get_record_keeper( csv_folder, tensorboard_folder, self.db_path, self.args.experiment_name, is_new_experiment) self.meta_accuracies = defaultdict(lambda: defaultdict(dict))
def set_meta_record_keeper(self): is_new_experiment = self.beginning_of_training() folders = {folder_type: s % (self.experiment_folder, "meta_logs") for folder_type, s in self.sub_experiment_dirs.items()} csv_folder, tensorboard_folder = folders["csvs"], folders["tensorboard"] self.meta_record_keeper, _, _ = logging_presets.get_record_keeper(csv_folder = csv_folder, tensorboard_folder = tensorboard_folder, global_db_path = self.global_db_path, experiment_name = self.args.experiment_name, is_new_experiment = is_new_experiment, save_figures = self.args.save_figures_on_tensorboard, save_lists = self.args.save_lists_in_db) self.meta_accuracies = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
def get_testing_hooks(experiment_id, val_dataset, test_interval, patience): experiment_dir = os.path.join('experiment_logs', experiment_id) record_keeper, _, _ = logging_presets.get_record_keeper( experiment_dir, os.path.join('experiment_logs', 'tensorboard', experiment_id)) hooks = logging_presets.get_hook_container(record_keeper) dataset_dict = {"val": val_dataset} model_folder = experiment_dir def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname, *args): logging.info("UMAP plot for the {} split and label set {}".format( split_name, keyname)) label_set = np.unique(labels) num_classes = len(label_set) fig = plt.figure(figsize=(20, 15)) plt.gca().set_prop_cycle( cycler("color", [ plt.cm.nipy_spectral(i) for i in np.linspace(0, 0.9, num_classes) ])) for i in range(num_classes): idx = labels == label_set[i] plt.plot(umap_embeddings[idx, 0], umap_embeddings[idx, 1], ".", markersize=1) plt.show() writer = SummaryWriter(log_dir=os.path.join( 'experiment_logs', 'tensorboard', experiment_id)) writer.add_embedding(umap_embeddings, metadata=labels) writer.close() # Create the tester tester = testers.GlobalEmbeddingSpaceTester( end_of_testing_hook=hooks.end_of_testing_hook, visualizer=umap.UMAP(), visualizer_hook=visualizer_hook, dataloader_num_workers=6) end_of_epoch_hook = hooks.end_of_epoch_hook(tester, dataset_dict, model_folder, test_interval=test_interval, patience=patience) return end_of_epoch_hook, hooks.end_of_iteration_hook
def create_accuracy_report(self, best_sub_experiment_name): global_record_keeper, _, _ = logging_presets.get_record_keeper(self.csv_folder, self.tensorboard_folder, self.global_db_path, "", False) exp_names = glob.glob(os.path.join(self.bayes_opt_root_experiment_folder, "%s*"%best_sub_experiment_name)) exp_names = [os.path.basename(e) for e in exp_names] results, summary = {}, {} for eval_type in ["meta", "meta_ConcatenateEmbeddings"]: results[eval_type] = {} summary[eval_type] = collections.defaultdict(lambda: collections.defaultdict(list)) table_name = self.eval_record_group_dicts[eval_type]["test"] for exp in exp_names: results[eval_type][exp] = {} exp_id = global_record_keeper.record_writer.global_db.get_experiment_id(exp) base_query = "SELECT * FROM %s WHERE experiment_id=? AND id=? AND is_trained=?"%table_name max_id_query = "SELECT max(id) FROM %s WHERE experiment_id=? AND is_trained=?"%table_name qs = {} for key, is_trained in [("trained", 1), ("untrained", 0)]: max_id = global_record_keeper.query(max_id_query, values=(exp_id, is_trained), use_global_db=True)[0]["max(id)"] q = global_record_keeper.query(base_query, values=(exp_id, max_id, is_trained), use_global_db=True) if len(q) > 0: qs[key] = q[0] for is_trained, v1 in qs.items(): q_as_dict = dict(v1) results[eval_type][exp][is_trained] = q_as_dict for acc_key, v2 in q_as_dict.items(): if all(not acc_key.startswith(x) for x in ["is_trained", "best_epoch", "best_accuracy", "SEM", "id", "experiment_id", "timestamp"]): summary[eval_type][is_trained][acc_key].append(v2) for is_trained, v1 in summary[eval_type].items(): for acc_key in v1.keys(): v2 = v1[acc_key] mean = np.mean(v2) cf_low, cf_high = scipy_stats.t.interval(0.95, len(v2)-1, loc=np.mean(v2), scale=scipy_stats.sem(v2)) #https://stackoverflow.com/a/34474255 cf_width = mean-cf_low summary[eval_type][is_trained][acc_key] = {"mean": float(mean), "95%_confidence_interval": (float(cf_low), float(cf_high)), "95%_confidence_interval_width": float(cf_width)} c_f.write_yaml(self.accuracy_report_detailed_filename, results, open_as="w") c_f.write_yaml(self.accuracy_report_filename, json.loads(json.dumps(summary)), open_as="w")
print("Use following best parameter:") print(best_params) param_gen = ParameterGenerator(best_trial, CONF["_fix_params"], logger=logging.getLogger()) constructors = MODEL_DEF.get(CONF, best_trial, param_gen) train_dataset, dev_dataset, train_sampler, batch_size = \ next(constructors["fold_generator"]()) trainer_kwargs = constructors["modules"]() # logging record_keeper, _, _ = logging_presets.get_record_keeper( csv_folder=os.path.join(args.log_dir, f"csv"), tensorboard_folder=os.path.join(args.log_dir, f"tensorboard")) hooks = logging_presets.get_hook_container(record_keeper) # tester tester = testers.GlobalEmbeddingSpaceTester( end_of_testing_hook=hooks.end_of_testing_hook, dataloader_num_workers=32) end_of_epoch_hook = hooks.end_of_epoch_hook(tester, {"val": dev_dataset}, os.path.join( args.log_dir, f"model"), test_interval=1, patience=args.patience) # train if args.trainer == "MetricLossOnly": trainer = trainers.MetricLossOnly(
def test_metric_loss_only(self): cifar_resnet_folder = "temp_cifar_resnet_for_pytorch_metric_learning_test" dataset_folder = "temp_dataset_for_pytorch_metric_learning_test" model_folder = "temp_saved_models_for_pytorch_metric_learning_test" logs_folder = "temp_logs_for_pytorch_metric_learning_test" tensorboard_folder = "temp_tensorboard_for_pytorch_metric_learning_test" os.system( "git clone https://github.com/akamaster/pytorch_resnet_cifar10.git {}" .format(cifar_resnet_folder)) loss_fn = NTXentLoss() normalize_transform = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, 4), transforms.ToTensor(), normalize_transform, ]) eval_transform = transforms.Compose( [transforms.ToTensor(), normalize_transform]) assert not os.path.isdir(dataset_folder) assert not os.path.isdir(model_folder) assert not os.path.isdir(logs_folder) assert not os.path.isdir(tensorboard_folder) subset_idx = np.arange(10000) train_dataset = datasets.CIFAR100(dataset_folder, train=True, download=True, transform=train_transform) train_dataset_for_eval = datasets.CIFAR100(dataset_folder, train=True, download=True, transform=eval_transform) val_dataset = datasets.CIFAR100(dataset_folder, train=False, download=True, transform=eval_transform) train_dataset = torch.utils.data.Subset(train_dataset, subset_idx) train_dataset_for_eval = torch.utils.data.Subset( train_dataset_for_eval, subset_idx) val_dataset = torch.utils.data.Subset(val_dataset, subset_idx) for dtype in TEST_DTYPES: for splits_to_eval in [ None, [("train", ["train", "val"]), ("val", ["train", "val"])], ]: from temp_cifar_resnet_for_pytorch_metric_learning_test import resnet model = torch.nn.DataParallel(resnet.resnet20()) checkpoint = torch.load( "{}/pretrained_models/resnet20-12fca82f.th".format( cifar_resnet_folder), map_location=TEST_DEVICE, ) model.load_state_dict(checkpoint["state_dict"]) model.module.linear = c_f.Identity() if TEST_DEVICE == torch.device("cpu"): model = model.module model = model.to(TEST_DEVICE).type(dtype) optimizer = torch.optim.Adam( model.parameters(), lr=0.0002, weight_decay=0.0001, eps=1e-04, ) batch_size = 32 iterations_per_epoch = None if splits_to_eval is None else 1 model_dict = {"trunk": model} optimizer_dict = {"trunk_optimizer": optimizer} loss_fn_dict = {"metric_loss": loss_fn} sampler = MPerClassSampler( np.array(train_dataset.dataset.targets)[subset_idx], m=4, batch_size=32, length_before_new_iter=len(train_dataset), ) record_keeper, _, _ = logging_presets.get_record_keeper( logs_folder, tensorboard_folder) hooks = logging_presets.get_hook_container( record_keeper, primary_metric="precision_at_1") dataset_dict = { "train": train_dataset_for_eval, "val": val_dataset } tester = GlobalEmbeddingSpaceTester( end_of_testing_hook=hooks.end_of_testing_hook, accuracy_calculator=accuracy_calculator.AccuracyCalculator( include=("precision_at_1", "AMI"), k=1), data_device=TEST_DEVICE, dtype=dtype, dataloader_num_workers=32, ) end_of_epoch_hook = hooks.end_of_epoch_hook( tester, dataset_dict, model_folder, test_interval=1, patience=1, splits_to_eval=splits_to_eval, ) trainer = MetricLossOnly( models=model_dict, optimizers=optimizer_dict, batch_size=batch_size, loss_funcs=loss_fn_dict, mining_funcs={}, dataset=train_dataset, sampler=sampler, data_device=TEST_DEVICE, dtype=dtype, dataloader_num_workers=32, iterations_per_epoch=iterations_per_epoch, freeze_trunk_batchnorm=True, end_of_iteration_hook=hooks.end_of_iteration_hook, end_of_epoch_hook=end_of_epoch_hook, ) num_epochs = 3 trainer.train(num_epochs=num_epochs) best_epoch, best_accuracy = hooks.get_best_epoch_and_accuracy( tester, "val") if splits_to_eval is None: self.assertTrue(best_epoch == 3) self.assertTrue(best_accuracy > 0.2) accuracies, primary_metric_key = hooks.get_accuracies_of_best_epoch( tester, "val") accuracies = c_f.sqliteObjToDict(accuracies) self.assertTrue( accuracies[primary_metric_key][0] == best_accuracy) self.assertTrue(primary_metric_key == "precision_at_1_level0") best_epoch_accuracies = hooks.get_accuracies_of_epoch( tester, "val", best_epoch) best_epoch_accuracies = c_f.sqliteObjToDict( best_epoch_accuracies) self.assertTrue(best_epoch_accuracies[primary_metric_key][0] == best_accuracy) accuracy_history = hooks.get_accuracy_history(tester, "val") self.assertTrue(accuracy_history[primary_metric_key][ accuracy_history["epoch"].index(best_epoch)] == best_accuracy) loss_history = hooks.get_loss_history() if splits_to_eval is None: self.assertTrue( len(loss_history["metric_loss"]) == (len(sampler) / batch_size) * num_epochs) curr_primary_metric = hooks.get_curr_primary_metric( tester, "val") self.assertTrue(curr_primary_metric == accuracy_history[primary_metric_key][-1]) base_record_group_name = hooks.base_record_group_name(tester) self.assertTrue( base_record_group_name == "accuracies_normalized_GlobalEmbeddingSpaceTester_level_0") record_group_name = hooks.record_group_name(tester, "val") if splits_to_eval is None: self.assertTrue( record_group_name == "accuracies_normalized_GlobalEmbeddingSpaceTester_level_0_VAL_vs_self" ) else: self.assertTrue( record_group_name == "accuracies_normalized_GlobalEmbeddingSpaceTester_level_0_VAL_vs_TRAIN_and_VAL" ) shutil.rmtree(model_folder) shutil.rmtree(logs_folder) shutil.rmtree(tensorboard_folder) shutil.rmtree(cifar_resnet_folder) shutil.rmtree(dataset_folder)
def set_meta_record_keeper(self): if len(self.split_manager.split_scheme_names) > 1: _, pkl_folder, tensorboard_folder = [ s % (self.experiment_folder, "meta_logs") for s in self.sub_experiment_dirs ] self.meta_record_keeper, self.meta_pickler_and_csver, self.meta_tensorboard_writer = logging_presets.get_record_keeper( pkl_folder, tensorboard_folder) self.meta_accuracies = defaultdict(lambda: defaultdict(dict)) c_f.makedir_if_not_there(pkl_folder) c_f.makedir_if_not_there(tensorboard_folder)
def set_record_keeper(self): self.record_keeper, self.pickler_and_csver, self.tensorboard_writer = logging_presets.get_record_keeper( self.pkl_folder, self.tensorboard_folder)
def train_app(cfg): print(cfg.pretty()) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Set trunk model and replace the softmax layer with an identity function trunk = torchvision.models.__dict__[cfg.model.model_name](pretrained=cfg.model.pretrained) #resnet18(pretrained=True) #trunk = models.alexnet(pretrained=True) #trunk = models.resnet50(pretrained=True) #trunk = models.resnet152(pretrained=True) #trunk = models.wide_resnet50_2(pretrained=True) #trunk = EfficientNet.from_pretrained('efficientnet-b2') trunk_output_size = trunk.fc.in_features trunk.fc = Identity() trunk = torch.nn.DataParallel(trunk.to(device)) embedder = torch.nn.DataParallel(MLP([trunk_output_size, cfg.embedder.size]).to(device)) classifier = torch.nn.DataParallel(MLP([cfg.embedder.size, cfg.embedder.class_out_size])).to(device) # Set optimizers if cfg.optimizer.name == "sdg": trunk_optimizer = torch.optim.SGD(trunk.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) embedder_optimizer = torch.optim.SGD(embedder.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) classifier_optimizer = torch.optim.SGD(classifier.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) elif cfg.optimizer.name == "rmsprop": trunk_optimizer = torch.optim.RMSprop(trunk.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) embedder_optimizer = torch.optim.RMSprop(embedder.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) classifier_optimizer = torch.optim.RMSprop(classifier.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) # Set the datasets data_dir = os.environ["DATASET_FOLDER"]+"/"+cfg.dataset.data_dir print("Data dir: "+data_dir) train_dataset, val_dataset, val_samples_dataset = get_datasets(data_dir, cfg, mode=cfg.mode.type) print("Trainset: ",len(train_dataset), "Testset: ",len(val_dataset), "Samplesset: ",len(val_samples_dataset)) # Set the loss function if cfg.embedder_loss.name == "margin_loss": loss = losses.MarginLoss(margin=cfg.embedder_loss.margin,nu=cfg.embedder_loss.nu,beta=cfg.embedder_loss.beta) if cfg.embedder_loss.name == "triplet_margin": loss = losses.TripletMarginLoss(margin=cfg.embedder_loss.margin) if cfg.embedder_loss.name == "multi_similarity": loss = losses.MultiSimilarityLoss(alpha=cfg.embedder_loss.alpha, beta=cfg.embedder_loss.beta, base=cfg.embedder_loss.base) # Set the classification loss: classification_loss = torch.nn.CrossEntropyLoss() # Set the mining function if cfg.miner.name == "triplet_margin": #miner = miners.TripletMarginMiner(margin=0.2) miner = miners.TripletMarginMiner(margin=cfg.miner.margin) if cfg.miner.name == "multi_similarity": miner = miners.MultiSimilarityMiner(epsilon=cfg.miner.epsilon) #miner = miners.MultiSimilarityMiner(epsilon=0.05) batch_size = cfg.trainer.batch_size num_epochs = cfg.trainer.num_epochs iterations_per_epoch = cfg.trainer.iterations_per_epoch # Set the dataloader sampler sampler = samplers.MPerClassSampler(train_dataset.targets, m=4, length_before_new_iter=len(train_dataset)) # Package the above stuff into dictionaries. models = {"trunk": trunk, "embedder": embedder, "classifier": classifier} optimizers = {"trunk_optimizer": trunk_optimizer, "embedder_optimizer": embedder_optimizer, "classifier_optimizer": classifier_optimizer} loss_funcs = {"metric_loss": loss, "classifier_loss": classification_loss} mining_funcs = {"tuple_miner": miner} # We can specify loss weights if we want to. This is optional loss_weights = {"metric_loss": cfg.loss.metric_loss, "classifier_loss": cfg.loss.classifier_loss} schedulers = { #"metric_loss_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(classifier_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma), "embedder_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(embedder_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma), "classifier_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(classifier_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma), "trunk_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(embedder_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma), } experiment_name = "%s_model_%s_cl_%s_ml_%s_miner_%s_mix_ml_%02.2f_mix_cl_%02.2f_resize_%d_emb_size_%d_class_size_%d_opt_%s_lr_%02.2f_m_%02.2f_wd_%02.2f"%(cfg.dataset.name, cfg.model.model_name, "cross_entropy", cfg.embedder_loss.name, cfg.miner.name, cfg.loss.metric_loss, cfg.loss.classifier_loss, cfg.transform.transform_resize, cfg.embedder.size, cfg.embedder.class_out_size, cfg.optimizer.name, cfg.optimizer.lr, cfg.optimizer.momentum, cfg.optimizer.weight_decay) record_keeper, _, _ = logging_presets.get_record_keeper("logs/%s"%(experiment_name), "tensorboard/%s"%(experiment_name)) hooks = logging_presets.get_hook_container(record_keeper) dataset_dict = {"samples": val_samples_dataset, "val": val_dataset} model_folder = "example_saved_models/%s/"%(experiment_name) # Create the tester tester = OneShotTester( end_of_testing_hook=hooks.end_of_testing_hook, #size_of_tsne=20 ) #tester.embedding_filename=data_dir+"/embeddings_pretrained_triplet_loss_multi_similarity_miner.pkl" tester.embedding_filename=data_dir+"/"+experiment_name+".pkl" end_of_epoch_hook = hooks.end_of_epoch_hook(tester, dataset_dict, model_folder) trainer = trainers.TrainWithClassifier(models, optimizers, batch_size, loss_funcs, mining_funcs, train_dataset, sampler=sampler, lr_schedulers=schedulers, dataloader_num_workers = cfg.trainer.batch_size, loss_weights=loss_weights, end_of_iteration_hook=hooks.end_of_iteration_hook, end_of_epoch_hook=end_of_epoch_hook ) trainer.train(num_epochs=num_epochs) tester = OneShotTester()
def objective(trial): param_gen = ParameterGenerator(trial, CONF["_fix_params"], logger=logger) # Average results of multiple folds. print("New parameter.") metrics = [] constructors = MODEL_DEF.get(CONF, trial, param_gen) for i_fold, (train_dataset, dev_dataset, train_sampler, batch_size) in enumerate(constructors["fold_generator"]()): print(f"Fold {i_fold}") trainer_kwargs = constructors["modules"]() # logging record_keeper, _, _ = logging_presets.get_record_keeper( csv_folder=os.path.join(args.log_dir, f"trial_{trial.number}_{i_fold}_csv"), tensorboard_folder=os.path.join( args.log_dir, f"trial_{trial.number}_{i_fold}_tensorboard")) hooks = logging_presets.get_hook_container(record_keeper) # tester tester = testers.GlobalEmbeddingSpaceTester( end_of_testing_hook=hooks.end_of_testing_hook, dataloader_num_workers=args.n_test_loader) end_of_epoch_hook = hooks.end_of_epoch_hook( tester, {"val": dev_dataset}, os.path.join(args.log_dir, f"trial_{trial.number}_{i_fold}_model"), test_interval=1, patience=args.patience) CHECKPOINT_FN = os.path.join( args.log_dir, f"trial_{trial.number}_{i_fold}_last.pth") def actual_end_of_epoch_hook(trainer): continue_training = end_of_epoch_hook(trainer) torch.save( ({k: m.state_dict() for k, m in trainer.models.items()}, {k: m.state_dict() for k, m in trainer.optimizers.items()}, {k: m.state_dict() for k, m in trainer.loss_funcs.items()}, trainer.epoch), CHECKPOINT_FN) return continue_training # train if args.trainer == "MetricLossOnly": trainer = trainers.MetricLossOnly( batch_size=batch_size, mining_funcs={}, dataset=train_dataset, sampler=train_sampler, dataloader_num_workers=args.n_train_loader, end_of_iteration_hook=hooks.end_of_iteration_hook, end_of_epoch_hook=actual_end_of_epoch_hook, **trainer_kwargs) elif args.trainer == "TrainWithClassifier": trainer = trainers.TrainWithClassifier( batch_size=batch_size, mining_funcs={}, dataset=train_dataset, sampler=train_sampler, dataloader_num_workers=args.n_train_loader, end_of_iteration_hook=hooks.end_of_iteration_hook, end_of_epoch_hook=actual_end_of_epoch_hook, **trainer_kwargs) while True: start_epoch = 1 if os.path.exists(CHECKPOINT_FN): model_dicts, optimizer_dicts, loss_dicts, last_epoch = \ torch.load(CHECKPOINT_FN) for k, d in model_dicts.items(): trainer.models[k].load_state_dict(d) for k, d in optimizer_dicts.items(): trainer.optimizers[k].load_state_dict(d) for k, d in loss_dicts.items(): trainer.loss_funcs[k].load_state_dict(d) start_epoch = last_epoch + 1 logger.critical(f"Start from old epoch: {last_epoch + 1}") try: trainer.train(num_epochs=args.max_epoch, start_epoch=start_epoch) except Exception as err: logger.critical(f"Error: {err}") if not args.ignore_error: break else: raise err else: break rslt = hooks.get_accuracy_history( tester, "val", metrics=["mean_average_precision_at_r"]) metrics.append(max(rslt["mean_average_precision_at_r_level0"])) return np.mean(metrics)
def _create_general(self, record_keeper_type): record_keeper, _, _ = logging_presets.get_record_keeper( **record_keeper_type) return record_keeper
def train(train_data, test_data, save_model, num_epochs, lr, embedding_size, batch_size): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Set trunk model and replace the softmax layer with an identity function trunk = torchvision.models.resnet18(pretrained=True) trunk_output_size = trunk.fc.in_features trunk.fc = common_functions.Identity() trunk = torch.nn.DataParallel(trunk.to(device)) # Set embedder model. This takes in the output of the trunk and outputs 64 dimensional embeddings embedder = torch.nn.DataParallel( MLP([trunk_output_size, embedding_size]).to(device)) # Set optimizers trunk_optimizer = torch.optim.Adam(trunk.parameters(), lr=lr / 10, weight_decay=0.0001) embedder_optimizer = torch.optim.Adam(embedder.parameters(), lr=lr, weight_decay=0.0001) # Set the loss function loss = losses.TripletMarginLoss(margin=0.1) # Set the mining function miner = miners.MultiSimilarityMiner(epsilon=0.1) # Set the dataloader sampler sampler = samplers.MPerClassSampler(train_data.targets, m=4, length_before_new_iter=len(train_data)) save_dir = os.path.join( save_model, ''.join(str(lr).split('.')) + '_' + str(batch_size) + '_' + str(embedding_size)) os.makedirs(save_dir, exist_ok=True) # Package the above stuff into dictionaries. models = {"trunk": trunk, "embedder": embedder} optimizers = { "trunk_optimizer": trunk_optimizer, "embedder_optimizer": embedder_optimizer } loss_funcs = {"metric_loss": loss} mining_funcs = {"tuple_miner": miner} record_keeper, _, _ = logging_presets.get_record_keeper( os.path.join(save_dir, "example_logs"), os.path.join(save_dir, "example_tensorboard")) hooks = logging_presets.get_hook_container(record_keeper) dataset_dict = {"val": test_data, "train": train_data} model_folder = "example_saved_models" def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname, *args): logging.info("UMAP plot for the {} split and label set {}".format( split_name, keyname)) label_set = np.unique(labels) num_classes = len(label_set) fig = plt.figure(figsize=(20, 15)) plt.title(str(split_name) + '_' + str(num_embeddings)) plt.gca().set_prop_cycle( cycler("color", [ plt.cm.nipy_spectral(i) for i in np.linspace(0, 0.9, num_classes) ])) for i in range(num_classes): idx = labels == label_set[i] plt.plot(umap_embeddings[idx, 0], umap_embeddings[idx, 1], ".", markersize=1) plt.show() # Create the tester tester = testers.GlobalEmbeddingSpaceTester( end_of_testing_hook=hooks.end_of_testing_hook, visualizer=umap.UMAP(), visualizer_hook=visualizer_hook, dataloader_num_workers=32, accuracy_calculator=AccuracyCalculator(k="max_bin_count")) end_of_epoch_hook = hooks.end_of_epoch_hook(tester, dataset_dict, model_folder, test_interval=1, patience=1) trainer = trainers.MetricLossOnly( models, optimizers, batch_size, loss_funcs, mining_funcs, train_data, sampler=sampler, dataloader_num_workers=32, end_of_iteration_hook=hooks.end_of_iteration_hook, end_of_epoch_hook=end_of_epoch_hook) trainer.train(num_epochs=num_epochs) if save_model is not None: torch.save(models["trunk"].state_dict(), os.path.join(save_dir, 'trunk.pth')) torch.save(models["embedder"].state_dict(), os.path.join(save_dir, 'embedder.pth')) print('Model saved in ', save_dir)
def create_accuracy_report(self, best_sub_experiment_name): dummy_YR = self.read_yaml_and_find_bayes(find_bayes_params=False, merge_argparse=True) dummy_api_parser = self.get_api_parser(dummy_YR.args) eval_record_group_dicts = dummy_api_parser.get_eval_record_name_dict( return_all=True) global_record_keeper, _, _ = logging_presets.get_record_keeper( self.csv_folder, self.tensorboard_folder, self.global_db_path, "", False) exp_names = glob.glob( os.path.join(self.bayes_opt_root_experiment_folder, "%s*" % best_sub_experiment_name)) exp_names = [os.path.basename(e) for e in exp_names] results, summary = {}, {} for eval_type in c_f.if_str_convert_to_singleton_list( dummy_YR.args.meta_testing_method): results[eval_type] = {} summary[eval_type] = collections.defaultdict( lambda: collections.defaultdict(list)) table_name = eval_record_group_dicts[eval_type]["test"] for exp in exp_names: results[eval_type][exp] = {} exp_id = global_record_keeper.record_writer.global_db.get_experiment_id( exp) base_query = "SELECT * FROM {} WHERE experiment_id=? AND id=? AND {}=?".format( table_name, const.TRAINED_STATUS_COL_NAME) max_id_query = "SELECT max(id) FROM {} WHERE experiment_id=? AND {}=?".format( table_name, const.TRAINED_STATUS_COL_NAME) qs = {} for trained_status in [ const.UNTRAINED_TRUNK, const.UNTRAINED_TRUNK_AND_EMBEDDER, const.TRAINED ]: max_id = global_record_keeper.query( max_id_query, values=(exp_id, trained_status), use_global_db=True)[0]["max(id)"] q = global_record_keeper.query(base_query, values=(exp_id, max_id, trained_status), use_global_db=True) if len(q) > 0: qs[trained_status] = q[0] for trained_status, v1 in qs.items(): q_as_dict = dict(v1) results[eval_type][exp][trained_status] = q_as_dict for acc_key, v2 in q_as_dict.items(): if all(not acc_key.startswith(x) for x in [ const.TRAINED_STATUS_COL_NAME, "epoch", "SEM", "id", "experiment_id", "timestamp" ]): summary[eval_type][trained_status][acc_key].append( v2) for trained_status, v1 in summary[eval_type].items(): for acc_key in v1.keys(): v2 = v1[acc_key] mean = np.mean(v2) cf_low, cf_high = scipy_stats.t.interval( 0.95, len(v2) - 1, loc=np.mean(v2), scale=scipy_stats.sem( v2)) #https://stackoverflow.com/a/34474255 cf_width = mean - cf_low summary[eval_type][trained_status][acc_key] = { "mean": float(mean), "95%_confidence_interval": (float(cf_low), float(cf_high)), "95%_confidence_interval_width": float(cf_width) } eval_name = c_f.first_val_of_dict( dummy_api_parser.get_eval_record_name_dict( eval_type=const.NON_META, return_base_record_group_name=True)) detailed_report_filename = os.path.join( self.bayes_opt_root_experiment_folder, "detailed_report_{}.yaml".format(eval_name)) report_filename = os.path.join(self.bayes_opt_root_experiment_folder, "report_{}.yaml".format(eval_name)) c_f.write_yaml(detailed_report_filename, results, open_as="w") c_f.write_yaml(report_filename, json.loads(json.dumps(summary)), open_as="w")
def set_record_keeper(self): is_new_experiment = self.beginning_of_training( ) and self.curr_split_count == 0 self.record_keeper, _, _ = logging_presets.get_record_keeper( self.csv_folder, self.tensorboard_folder, self.global_db_path, self.args.experiment_name, is_new_experiment)
"synth_loss": synth_loss, "g_adv_loss": g_adv_loss } # Package the above stuff into dictionaries. mining_funcs = {"tuple_miner": miner} loss_weights = { "metric_loss": 1, "synth_loss": 0.1, "g_adv_loss": 0.1, "g_hard_loss": 0.1, "g_reg_loss": 0.1 } record_keeper, _, _ = logging_presets.get_record_keeper( "example_logs", "example_tensorboard") hooks = logging_presets.get_hook_container(record_keeper) dataset_dict = {"val": val_dataset} model_folder = "example_saved_models" # Create the tester tester = testers.GlobalEmbeddingSpaceTester( end_of_testing_hook=hooks.end_of_testing_hook) end_of_epoch_hook = hooks.end_of_epoch_hook(tester, dataset_dict, model_folder) trainer = trainers.DeepAdversarialMetricLearning( models=models, optimizers=optimizers, batch_size=batch_size, loss_funcs=loss_funcs, mining_funcs=mining_funcs, iterations_per_epoch=iterations_per_epoch,
def main(): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Running on device: {}'.format(device)) # Data transformations trans_train = transforms.Compose([ transforms.RandomApply(transforms=[ transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET), # transforms.RandomPerspective(distortion_scale=0.6, p=1.0), transforms.RandomRotation(degrees=(0, 180)), transforms.RandomHorizontalFlip(), ]), np.float32, transforms.ToTensor(), fixed_image_standardization, ]) trans_val = transforms.Compose([ # transforms.CenterCrop(120), np.float32, transforms.ToTensor(), fixed_image_standardization, ]) train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train_aligned"), transform=trans_train) val_dataset = datasets.ImageFolder(os.path.join(data_dir, "val_aligned"), transform=trans_val) # Prepare the model model = InceptionResnetV1(classify=False, pretrained="vggface2", dropout_prob=0.5).to(device) # for param in list(model.parameters())[:-8]: # param.requires_grad = False trunk_optimizer = torch.optim.SGD(model.parameters(), lr=LR) # Set the loss function loss = losses.ArcFaceLoss(len(train_dataset.classes), 512) # Package the above stuff into dictionaries. models = {"trunk": model} optimizers = {"trunk_optimizer": trunk_optimizer} loss_funcs = {"metric_loss": loss} mining_funcs = {} lr_scheduler = { "trunk_scheduler_by_plateau": torch.optim.lr_scheduler.ReduceLROnPlateau(trunk_optimizer) } # Create the tester record_keeper, _, _ = logging_presets.get_record_keeper( "logs", "tensorboard") hooks = logging_presets.get_hook_container(record_keeper) dataset_dict = {"val": val_dataset, "train": train_dataset} model_folder = "training_saved_models" def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname, *args): logging.info("UMAP plot for the {} split and label set {}".format( split_name, keyname)) label_set = np.unique(labels) num_classes = len(label_set) fig = plt.figure(figsize=(8, 7)) plt.gca().set_prop_cycle( cycler("color", [ plt.cm.nipy_spectral(i) for i in np.linspace(0, 0.9, num_classes) ])) for i in range(num_classes): idx = labels == label_set[i] plt.plot(umap_embeddings[idx, 0], umap_embeddings[idx, 1], ".", markersize=1) plt.show() tester = testers.GlobalEmbeddingSpaceTester( end_of_testing_hook=hooks.end_of_testing_hook, dataloader_num_workers=4, accuracy_calculator=AccuracyCalculator( include=['mean_average_precision_at_r'], k="max_bin_count")) end_of_epoch_hook = hooks.end_of_epoch_hook(tester, dataset_dict, model_folder, splits_to_eval=[('val', ['train'])]) # Create the trainer trainer = trainers.MetricLossOnly( models, optimizers, batch_size, loss_funcs, mining_funcs, train_dataset, lr_schedulers=lr_scheduler, dataloader_num_workers=8, end_of_iteration_hook=hooks.end_of_iteration_hook, end_of_epoch_hook=end_of_epoch_hook) trainer.train(num_epochs=num_epochs)