def test_if_ModelTrainer_method___train____is_running_properly(): input_data = prepped_data = os.path.join(os.path.dirname(__file__), "data/model_trainer/train") model_candidate_folder = os.path.join(prepped_data, "models") if os.path.isdir(model_candidate_folder): clean_dir(model_candidate_folder) run = AzureMLRunMoq(None) trainer = ModelTrainer(run) trainer.train(input_data, prepped_data, model_candidate_folder) classifier_file = os.path.join(model_candidate_folder, "classifier.hdf5") assert os.path.isfile(classifier_file) == True generator_file = os.path.join(model_candidate_folder, "generator.hdf5") assert os.path.isfile(generator_file) == True
def train(model, train_set, test_set, opts): """ Train model, create visualization & document results in CSV file """ print("Training model...") trainer = ModelTrainer() vis = Visualizer(opts) with open(opts.output_path("results.csv"), "w") as csv_file: fieldnames = opts.var_names() fieldnames.extend(["final_test_loss", "final_test_acc", "duration"]) writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() if opts.gridSearch: for config in opts.iter(): print("Testing {}".format(config)) start_time = time.time() vis.show(lambda: trainer.trainModel(model, train_set, test_set, opts)) csv_dict = opts.values() end_time = time.time() csv_dict.update({ "final_test_loss": trainer.final_test_loss, "final_test_acc": trainer.final_test_acc, "duration": end_time - start_time }) writer.writerow(csv_dict) csv_file.flush() else: start_time = time.time() vis.show( lambda: trainer.trainModel(model, train_set, test_set, opts)) end_time = time.time() csv_dict = opts.values() csv_dict.update({ "final_test_loss": trainer.final_test_loss, "final_test_acc": trainer.final_test_acc, "duration": end_time - start_time }) writer.writerow(csv_dict)
def test(model, data_filepath, opts): """ Test a given pytorch model by calculating the loss and accuracy """ print("Evaluating model on {}...".format(data_filepath)) test_set = SignMNISTDataset(opts, data_filepath) test_loader = torch.utils.data.DataLoader(test_set, batch_size=opts.batch_size, shuffle=opts.shuffleTestData, num_workers=1) _, loss, acc, _ = ModelTrainer.test(opts, test_loader, test_set, model, torch.nn.CrossEntropyLoss(), 0) print("Loss: {}".format(loss)) print("Accuracy: {}".format(acc))
arc_labels = datamanager_train_file.arc_labels # If provided, load test data if not options.test_file is None: datamanager_test_file = ConllLoader(input_file=options.test_file, oracle=False) datamanager_test_file.load_file() else: datamanager_test_file = None model = BiLSTMParser(name=options.model_name, vocab=vocab, pos_tags=pos_tags, word_dim=options.word_dim, pos_dim=options.pos_dim, num_layers_lstm=options.num_layers_lstm, hidden_units_lstm=options.hidden_units_lstm, hidden_units_mlp=options.hidden_units_mlp, arc_labels=arc_labels, features=options.features) trainer = ModelTrainer(model=model, datamanager_train_file=datamanager_train_file, datamanager_test_file=datamanager_test_file, epochs=options.num_epochs, criterion=options.criterion, optimizer=options.optimizer, run=options.run, l2_penalty=options.l2_penalty) trainer.train(test_each_epoch=True)
def processer(self): if self.impute: self.train_dataframe, self.test_dataframe = self.data_preprocess_instance.data_imputer( train_dataframe=self.train_dataframe, test_dataframe=self.test_dataframe) if self.shuffle: print(f'Shuffling train and test dataframe') self.train_dataframe, self.test_dataframe = self.data_preprocess_instance.shuffle_data( train_dataframe=self.train_dataframe, test_dataframe=self.test_dataframe) if self.cross_validation: print( f'cross validating the dataset using {self.problem_type} method' ) cross_instance = CrossValidation( df=self.train_dataframe, target_cols=self.target_column, multilabel_delimiter=self.multilabel_delimiter, problem_type=self.problem_type, num_folds=self.num_folds, random_state=self.random_state) self.train_dataframe = cross_instance.split() if self.encoding: if self.data_type == 'numerical': print( f'Performing categorical encoding using {self.encoding_type}' ) self.train_dataframe, self.test_dataframe = self.data_preprocess_instance.numerical_encoder( train_dataframe=self.train_dataframe, test_dataframe=self.test_dataframe) elif self.data_type == 'categorical': print( f'Performing categorical encoding using {self.encoding_type}' ) self.train_dataframe, self.test_dataframe = self.data_preprocess_instance.categorical_encoder( train_dataframe=self.train_dataframe, test_dataframe=self.test_dataframe) else: raise Exception(f"{self.data_type} not available") if self.train_model: for fold in range(5): print(f"selecting fold {fold}") main_train = self.train_dataframe[ self.train_dataframe.kfold.isin( self.FOLD_MAPPING.get(fold))] main_validate = self.train_dataframe[self.train_dataframe.kfold == fold] ########### splitting the train data frame into x_train, x_test, y_train, X_test ############## self.y_train = main_train[self.target_column].values self.y_validate = main_validate[self.target_column].values self.X_train = main_train.drop(["id", "target", "kfold"], axis=1) self.X_validate = main_validate.drop(["id", "target", "kfold"], axis=1) if self.feature_scaling: print(f'feature scaling the dataset of fold {fold}') self.X_train, self.X_validate = self.data_preprocess_instance.feature_scalar( train_dataframe=self.X_train, test_dataframe=self.X_validate) if self.feature_extractor: print( f"extracting features from the dataset of fold {fold} using {self.feature_extractor_type}" ) feat_ext = FeatureExtractor( X_train=self.X_train, X_validate=self.X_validate, feature_extractor_type=self.feature_extractor_type, n_components=self.n_components, y_train=self.y_train) self.X_train, self.X_validate, self.n_components = feat_ext.extact( ) train_instance = ModelTrainer(X_train=self.X_train, X_validate=self.X_validate, y_train=self.y_train, y_validate=self.y_validate, model_name=self.model_name) train_instance.train()
dropout=tt.arg.dropout) if tt.arg.dataset == 'mini': test_loader = MiniImagenetLoader(root=tt.arg.dataset_root, partition='test') elif tt.arg.dataset == 'tiered': test_loader = TieredImagenetLoader(root=tt.arg.dataset_root, partition='test') else: print('Unknown dataset!') data_loader = {'test': test_loader} # create trainer tester = ModelTrainer(enc_module=enc_module, gnn_module=gnn_module, data_loader=data_loader) #checkpoint = torch.load('asset/checkpoints/{}/'.format(exp_name) + 'model_best.pth.tar') checkpoint = torch.load('./trained_models/{}/'.format(exp_name) + 'model_best.pth.tar') tester.enc_module.load_state_dict(checkpoint['enc_module_state_dict']) print("load pre-trained enc_nn done!") # initialize gnn pre-trained tester.gnn_module.load_state_dict(checkpoint['gnn_module_state_dict']) print("load pre-trained egnn done!") tester.val_acc = checkpoint['val_acc'] tester.global_step = checkpoint['iteration']
elif tt.arg.dataset == 'tiered': test_loader = TieredImagenetLoader(root=tt.arg.dataset_root, partition='test') elif tt.arg.dataset == 'cub': test_loader = Cub200Loader(root=tt.arg.dataset_root, partition='test') elif tt.arg.dataset == 'imnet': test_loader = ImNetLoader(root=tt.arg.dataset_root, partition='test') else: print('Unknown dataset!') raise NameError('Unknown dataset!!!') data_loader = {'test': test_loader} # create trainer tester = ModelTrainer(enc_module=enc_module, unet_module=unet_module, dcompression=dcompression, data_loader=data_loader) checkpoint = torch.load('asset/checkpoints/{}/'.format(tt.arg.exp_name) + 'model_best.pth.tar', map_location=tt.arg.device) # checkpoint = torch.load('HGNN_trained_models/{}/'.format(tt.arg.exp_name) + 'model_best.pth.tar',map_location=tt.arg.device) tester.enc_module.load_state_dict(checkpoint['enc_module_state_dict']) print("load pre-trained enc_nn done!") # initialize dcompression pre-trained tester.unet_module.load_state_dict(checkpoint['unet_module_state_dict']) print("load pre-trained unet done!") # initialize gnn pre-trained
def test(): tt.arg.test_model = 'asset/checkpoints/WRN_mini_5_5' if tt.arg.test_model is None else tt.arg.test_model tt.arg.device = 'cuda:0' if tt.arg.device is None else tt.arg.device # replace dataset_root with your own tt.arg.dataset_root = '/root/IPN/' if tt.arg.dataset_root is None else tt.arg.dataset_root tt.arg.dataset = 'mini' if tt.arg.dataset is None else tt.arg.dataset tt.arg.num_ways = 5 if tt.arg.num_ways is None else tt.arg.num_ways tt.arg.num_shots = 5 if tt.arg.num_shots is None else tt.arg.num_shots tt.arg.num_unlabeled = 0 if tt.arg.num_unlabeled is None else tt.arg.num_unlabeled tt.arg.meta_batch_size = 20 if tt.arg.meta_batch_size is None else tt.arg.meta_batch_size tt.arg.seed = 222 if tt.arg.seed is None else tt.arg.seed tt.arg.num_gpus = 1 if tt.arg.num_gpus is None else tt.arg.num_gpus tt.arg.features = False tt.arg.num_ways_train = tt.arg.num_ways tt.arg.num_ways_test = tt.arg.num_ways tt.arg.num_shots_train = tt.arg.num_shots tt.arg.num_shots_test = tt.arg.num_shots # model parameter related tt.arg.emb_size = 640 # train, test parameters tt.arg.train_iteration = 100000 if tt.arg.dataset == 'mini' else 200000 tt.arg.test_iteration = 10000 tt.arg.test_interval = 5000 tt.arg.test_batch_size = 10 tt.arg.log_step = 100 tt.arg.lr = 1e-3 tt.arg.grad_clip = 5 tt.arg.weight_decay = 1e-6 tt.arg.dec_lr = 15000 if tt.arg.dataset == 'mini' else 30000 tt.arg.dropout = 0.1 if tt.arg.dataset == 'mini' else 0.0 # set random seed np.random.seed(tt.arg.seed) torch.manual_seed(tt.arg.seed) torch.cuda.manual_seed_all(tt.arg.seed) random.seed(tt.arg.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False enc_module = wide_res(num_classes=64, remove_linear=True) ccmnet_module = CCMNet(in_features=tt.arg.emb_size, hidden_features=tt.arg.emb_size) dif_module = DifNet() if tt.arg.dataset == 'mini': test_loader = MiniImagenetLoader(root=tt.arg.dataset_root, partition='test') elif tt.arg.dataset == 'tiered': test_loader = TieredImagenetLoader(root=tt.arg.dataset_root, partition='test') else: print('Unknown dataset!') data_loader = {'test': test_loader} # create trainer tester = ModelTrainer(enc_module=enc_module, ccmnet_module=ccmnet_module, dif_module=dif_module, data_loader=data_loader) wrn_checkpoint = torch.load(tt.arg.test_model + '/pretrained_wrn.pth.tar') checkpoint = torch.load(tt.arg.test_model + '/model_best.pth.tar') state_dict = OrderedDict() for k in wrn_checkpoint['enc_module_state_dict']: name = k # loaded model is single GPU but we will train it in multiple GPUS! if name[:7] != 'module.' and torch.cuda.device_count() > 1: name = 'module.' + name # add 'module' # loaded model is multiple GPUs but we will train it in single GPU! elif name[:7] == 'module.' and torch.cuda.device_count() == 1: name = k[7:] # remove `module.` state_dict[name] = wrn_checkpoint['enc_module_state_dict'][k] tester.enc_module.load_state_dict(state_dict) print("load pre-trained enc_module done!") state_dict = OrderedDict() for k in checkpoint['ccmnet_module_state_dict']: name = k # loaded model is single GPU but we will train it in multiple GPUS! if name[:7] != 'module.' and torch.cuda.device_count() > 1: name = 'module.' + name # add 'module' # loaded model is multiple GPUs but we will train it in single GPU! elif name[:7] == 'module.' and torch.cuda.device_count() == 1: name = k[7:] # remove `module.` state_dict[name] = checkpoint['ccmnet_module_state_dict'][k] tester.ccmnet_module.load_state_dict(state_dict) print("load pre-trained ccmnet_module done!") tester.val_acc = checkpoint['val_acc'] tester.global_step = checkpoint['iteration'] print(tester.val_acc) print(tester.global_step) with torch.no_grad(): tester.eval(partition='test')