def test_determinism(self): """Test whether training and scoring is deterministic given seed""" em = EndModel( seed=123, batchnorm=True, dropout=0.1, layer_out_dims=[2, 10, 2], verbose=False, ) Xs, Ys = self.single_problem em.train_model((Xs[0], Ys[0]), valid_data=(Xs[1], Ys[1]), n_epochs=1, checkpoint=False) score_1 = em.score((Xs[2], Ys[2]), verbose=False) # Test scoring determinism score_2 = em.score((Xs[2], Ys[2]), verbose=False) self.assertEqual(score_1, score_2) # Test training determinism em_2 = EndModel( seed=123, batchnorm=True, dropout=0.1, layer_out_dims=[2, 10, 2], verbose=False, ) em_2.train_model((Xs[0], Ys[0]), valid_data=(Xs[1], Ys[1]), n_epochs=1, checkpoint=False) score_3 = em_2.score((Xs[2], Ys[2]), verbose=False) self.assertEqual(score_1, score_3)
def test_save_and_load(self): """Test basic saving and loading""" em = EndModel( seed=1337, input_batchnorm=False, middle_batchnorm=False, input_dropout=0.0, middle_dropout=0.0, layer_out_dims=[2, 10, 2], verbose=False, ) Xs, Ys = self.single_problem em.train_model((Xs[0], Ys[0]), valid_data=(Xs[1], Ys[1]), n_epochs=3, checkpoint=False) score = em.score((Xs[2], Ys[2]), verbose=False) # Save model SAVE_PATH = "test_save_model.pkl" em.save(SAVE_PATH) # Reload and make sure (a) score and (b) non-buffer, non-Parameter # attributes are the same em_2 = EndModel.load(SAVE_PATH) self.assertEqual(em.seed, em_2.seed) score_2 = em_2.score((Xs[2], Ys[2]), verbose=False) self.assertEqual(score, score_2) # Clean up os.remove(SAVE_PATH)
def test_lstm_memorize_first(self): """Confirm that lstm can memorize the first token in a long sequence""" X = torch.randint(1, MAX_INT + 1, (n, SEQ_LEN)).long() Y = X[:, 0] Xs = self._split_dataset(X) Ys = self._split_dataset(Y) embed_size = 4 hidden_size = 10 vocab_size = MAX_INT + 1 lstm_module = LSTMModule( embed_size, hidden_size, vocab_size=vocab_size, bidirectional=False, verbose=False, lstm_reduction="attention", ) em = EndModel( k=MAX_INT, input_module=lstm_module, layer_out_dims=[hidden_size, MAX_INT], optimizer="adam", batchnorm=True, seed=1, verbose=False, ) em.train_model((Xs[0], Ys[0]), dev_data=(Xs[1], Ys[1]), n_epochs=10) score = em.score((Xs[2], Ys[2]), verbose=False) self.assertGreater(score, 0.95)
def test_lstm_memorize_first(self): X = torch.randint(1, MAX_INT + 1, (n, SEQ_LEN)).long() Y = X[:, 0] Xs = self._split_dataset(X) Ys = self._split_dataset(Y) embed_size = 4 hidden_size = 10 vocab_size = MAX_INT + 1 lstm_module = LSTMModule( embed_size, hidden_size, vocab_size, bidirectional=False, verbose=False, ) em = EndModel( k=MAX_INT, input_module=lstm_module, layer_out_dims=[hidden_size, MAX_INT], optimizer="adam", batchnorm=True, seed=1, verbose=False, ) em.train_model(Xs[0], Ys[0], Xs[1], Ys[1], n_epochs=5, verbose=True) score = em.score(Xs[2], Ys[2], verbose=False) self.assertGreater(score, 0.95)
def test_logwriter(self): """Test the basic LogWriter class""" writer_kwargs = { "log_dir": self.log_dir, "run_dir": "test_dir", "run_name": "test", } em = EndModel( seed=1, input_batchnorm=False, middle_batchnorm=False, input_dropout=0.0, middle_dropout=0.0, layer_out_dims=[2, 10, 2], verbose=False, ) Xs, Ys = self.single_problem em.train_model( (Xs[0], Ys[0]), valid_data=(Xs[1], Ys[1]), n_epochs=7, checkpoint=False, writer="json", **writer_kwargs, ) # Load the log with open(em.writer.log_path, "r") as f: log_dict = json.load(f) self.assertEqual(log_dict["config"]["train_config"]["n_epochs"], 7) self.assertEqual(len(log_dict["run_log"]["train/loss"]), 7)
def test_logging(self): """Test the basic LogWriter class""" log_writer = LogWriter(run_dir="test_dir", run_name="test") em = EndModel( seed=1, input_batchnorm=False, middle_batchnorm=False, input_dropout=0.0, middle_dropout=0.0, layer_out_dims=[2, 10, 2], verbose=False, ) Xs, Ys = self.single_problem em.train_model( (Xs[0], Ys[0]), dev_data=(Xs[1], Ys[1]), n_epochs=7, log_writer=log_writer, ) # Load the log with open(log_writer.log_path, "r") as f: run_log = json.load(f) self.assertEqual(run_log["config"]["train_config"]["n_epochs"], 7) self.assertEqual(len(run_log["run-log"]["train-loss"]), 7) # Clean up rmtree(log_writer.log_subdir)
def test_checkpointing(self): """Confirm that different checkpoints are being saved with checkpoint_every on""" em = EndModel( seed=1, batchnorm=False, dropout=0.0, layer_out_dims=[2, 10, 2], verbose=False, ) Xs, Ys = self.single_problem em.train_model( (Xs[0], Ys[0]), valid_data=(Xs[1], Ys[1]), n_epochs=5, checkpoint=True, checkpoint_every=1, ) test_model = copy.deepcopy(em.state_dict()) new_model = torch.load("checkpoints/model_checkpoint_4.pth") self.assertFalse( torch.all( torch.eq( test_model["network.1.0.weight"], new_model["model"]["network.1.0.weight"], ))) new_model = torch.load("checkpoints/model_checkpoint_5.pth") self.assertTrue( torch.all( torch.eq( test_model["network.1.0.weight"], new_model["model"]["network.1.0.weight"], )))
def test_custom_modules(self): """Test custom input/head modules""" input_module = nn.Sequential(IdentityModule(), nn.Linear(2, 10)) middle_modules = [nn.Linear(10, 8), IdentityModule()] head_module = nn.Sequential(nn.Linear(8, 2), IdentityModule()) em = EndModel( seed=1, input_module=input_module, middle_modules=middle_modules, head_module=head_module, layer_out_dims=[10, 8, 8], verbose=False, ) Xs, Ys = self.single_problem em.train( Xs[0], Ys[0], Xs[1], Ys[1], n_epochs=5, verbose=False, show_plots=False, ) score = em.score(Xs[2], Ys[2], verbose=False) self.assertGreater(score, 0.95)
def test_lstm_memorize_marker(self): X = torch.randint(1, MAX_INT + 1, (n, SEQ_LEN)).long() Y = torch.zeros(n).long() needles = np.random.randint(1, SEQ_LEN - 1, n) for i in range(n): X[i, needles[i]] = MAX_INT + 1 Y[i] = X[i, needles[i] + 1] Xs = self._split_dataset(X) Ys = self._split_dataset(Y) embed_size = 4 hidden_size = 10 vocab_size = MAX_INT + 2 lstm_module = LSTMModule( embed_size, hidden_size, vocab_size, bidirectional=True, verbose=False, ) em = EndModel( k=MAX_INT, input_module=lstm_module, layer_out_dims=[hidden_size * 2, MAX_INT], batchnorm=True, seed=1, verbose=False, ) em.train_model(Xs[0], Ys[0], Xs[1], Ys[1], n_epochs=10, verbose=False) score = em.score(Xs[2], Ys[2], verbose=False) self.assertGreater(score, 0.95)
def test_singletask_extras(self): em = EndModel(seed=1, verbose=False, batchnorm=True, dropout=0.01, layer_output_dims=[2, 8, 4]) Xs, Ys = self.single_problem em.train(Xs[0], Ys[0], Xs[1], Ys[1], verbose=False, n_epochs=5) score = em.score(Xs[2], Ys[2], verbose=False) self.assertGreater(score, 0.95)
def test_singletask(self): """Test basic single-task end model""" em = EndModel( seed=1, batchnorm=False, dropout=0.0, layer_out_dims=[2, 10, 2], verbose=False, ) Xs, Ys = self.single_problem em.train(Xs[0], Ys[0], Xs[1], Ys[1], n_epochs=5) score = em.score(Xs[2], Ys[2], verbose=False) self.assertGreater(score, 0.95)
def test_scoring(self): """Test the metrics whole way through""" em = EndModel( seed=1, batchnorm=False, dropout=0.0, layer_out_dims=[2, 10, 2], verbose=False, ) Xs, Ys = self.single_problem em.train_model((Xs[0], Ys[0]), dev_data=(Xs[1], Ys[1]), n_epochs=5) metrics = list(METRICS.keys()) scores = em.score((Xs[2], Ys[2]), metric=metrics, verbose=True) for i, metric in enumerate(metrics): self.assertGreater(scores[i], 0.95)
def test_singletask_extras(self): """Test batchnorm and dropout""" em = EndModel( seed=1, input_batchnorm=True, middle_batchnorm=True, input_dropout=0.01, middle_dropout=0.01, layer_out_dims=[2, 10, 2], verbose=False, ) Xs, Ys = self.single_problem em.train_model((Xs[0], Ys[0]), dev_data=(Xs[1], Ys[1]), n_epochs=5) score = em.score((Xs[2], Ys[2]), verbose=False) self.assertGreater(score, 0.95)
def test_lstm_embeddings_freeze(self): """Confirm that if embeddings are frozen, they do not change during training""" X = torch.randint(1, MAX_INT + 1, (n, SEQ_LEN)).long() Y = torch.zeros(n).long() needles = np.random.randint(1, SEQ_LEN - 1, n) for i in range(n): X[i, needles[i]] = MAX_INT + 1 Y[i] = X[i, needles[i] + 1] Xs = self._split_dataset(X) Ys = self._split_dataset(Y) embed_size = 4 hidden_size = 10 vocab_size = MAX_INT + 2 for freeze_embs in [True, False]: lstm_module = LSTMModule( embed_size, hidden_size, vocab_size=vocab_size, freeze=freeze_embs, verbose=False, ) em = EndModel( k=MAX_INT, input_module=lstm_module, layer_out_dims=[hidden_size * 2, MAX_INT], verbose=False, ) before = lstm_module.embeddings.weight.clone() em.train_model( (Xs[0], Ys[0]), dev_data=(Xs[1], Ys[1]), n_epochs=15, verbose=False, ) after = lstm_module.embeddings.weight.clone() if freeze_embs: self.assertEqual(torch.abs(before - after).sum().item(), 0.0) else: self.assertNotEqual( torch.abs(before - after).sum().item(), 0.0)
def test_singletask(self): """Test basic single-task end model""" em = EndModel( seed=1, input_batchnorm=False, middle_batchnorm=False, input_dropout=0.0, middle_dropout=0.0, layer_out_dims=[2, 10, 2], verbose=False, ) Xs, Ys = self.single_problem em.train_model((Xs[0], Ys[0]), valid_data=(Xs[1], Ys[1]), n_epochs=5, checkpoint=False) score = em.score((Xs[2], Ys[2]), verbose=False) self.assertGreater(score, 0.95)
def test_gpustorage(self): # Running basics tutorial problem with open("tutorials/data/basics_tutorial.pkl", "rb") as f: X, Y, L, D = pickle.load(f) Xs, Ys, Ls, Ds = split_data(X, Y, L, D, splits=[0.8, 0.1, 0.1], stratify_by=Y, seed=123) label_model = LabelModel(k=2, seed=123) label_model.train_model(Ls[0], Y_dev=Ys[1], n_epochs=500, log_train_every=25) Y_train_ps = label_model.predict_proba(Ls[0]) # Creating a really large end model to use lots of memory end_model = EndModel([1000, 100000, 2], seed=123, device="cuda") # Getting initial GPU storage use initial_gpu_mem = GPUtil.getGPUs()[0].memoryUsed # Training model end_model.train_model( (Xs[0], Y_train_ps), valid_data=(Xs[1], Ys[1]), l2=0.1, batch_size=256, n_epochs=3, log_train_every=1, validation_metric="f1", ) # Final GPU storage use final_gpu_mem = GPUtil.getGPUs()[0].memoryUsed # On a Titan X, this model uses ~ 3 GB of memory gpu_mem_difference = final_gpu_mem - initial_gpu_mem self.assertGreater(gpu_mem_difference, 1000)
def test_lstm_direct_features(self): """Confirm that lstm can work over features passed in directly (rather than embedded).""" X = torch.randint(1, MAX_INT + 1, (n, SEQ_LEN)).long() Y = X[:, 0] # Convert X to one-hot features Xf = torch.zeros((n, SEQ_LEN, MAX_INT)).long() for i in range(n): for j in range(SEQ_LEN): Xf[i, j, X[i, j] - 1] = 1 X = Xf Xs = self._split_dataset(X) Ys = self._split_dataset(Y) embed_size = MAX_INT hidden_size = 10 lstm_module = LSTMModule( embed_size, hidden_size, skip_embeddings=True, # This is where we configure for this setting bidirectional=False, verbose=False, lstm_reduction="attention", ) em = EndModel( k=MAX_INT, input_module=lstm_module, layer_out_dims=[hidden_size, MAX_INT], optimizer="adam", batchnorm=True, seed=1, verbose=False, ) em.train_model((Xs[0], Ys[0]), dev_data=(Xs[1], Ys[1]), n_epochs=15) score = em.score((Xs[2], Ys[2]), verbose=False) self.assertGreater(score, 0.95)
def test_resume_training(self): """Confirm that a checkpoint can be saved and reloaded without throwing error""" em = EndModel( seed=1, batchnorm=False, dropout=0.0, layer_out_dims=[2, 10, 2], verbose=False, ) Xs, Ys = self.single_problem em.train_model( (Xs[0], Ys[0]), valid_data=(Xs[1], Ys[1]), n_epochs=5, checkpoint=True, checkpoint_every=1, ) em.resume_training( (Xs[0], Ys[0]), valid_data=(Xs[1], Ys[1]), model_path="checkpoints/model_checkpoint_2.pth", )
def test_lstm_memorize_marker(self): """Confirm that lstm can return the token that comes after a special marker""" X = torch.randint(1, MAX_INT + 1, (n, SEQ_LEN)).long() Y = torch.zeros(n).long() needles = np.random.randint(1, SEQ_LEN - 1, n) for i in range(n): X[i, needles[i]] = MAX_INT + 1 Y[i] = X[i, needles[i] + 1] Xs = self._split_dataset(X) Ys = self._split_dataset(Y) embed_size = 4 hidden_size = 10 vocab_size = MAX_INT + 2 lstm_module = LSTMModule( embed_size, hidden_size, vocab_size=vocab_size, bidirectional=True, verbose=False, lstm_reduction="attention", ) em = EndModel( k=MAX_INT, input_module=lstm_module, layer_out_dims=[hidden_size * 2, MAX_INT], batchnorm=True, seed=1, verbose=False, ) em.train_model((Xs[0], Ys[0]), dev_data=(Xs[1], Ys[1]), n_epochs=15, verbose=False) score = em.score((Xs[2], Ys[2]), verbose=False) self.assertGreater(score, 0.95)
def load_model_snapshot(args, inputdir): """ Load """ map_location = 'cuda:0' if torch.cuda.is_available() else 'cpu' init_kwargs = pickle.load(open(f'{inputdir}/init_kwargs.pickle', "rb")) init_kwargs["seed"] = args.seed init_kwargs["device"] = map_location #init_kwargs["input_batchnorm"] = False model = EndModel(**init_kwargs) #import ipdb; ipdb.set_trace() #model_state = torch.load(open(f"{inputdir}/best_model.pth",'rb')) model_state = torch.load(open(f"{inputdir}/best_model.pth", 'rb'), map_location=map_location) model.load_state_dict(model_state["model"]) #.to(map_location) model.to(map_location) #model.optimizer.load_state_dict(model_state["optimizer"]) #model.lr_scheduler.load_state_dict(model_state["lr_scheduler"]) return model
def train_model(args): #global args #args = parser.parse_args() # Create datasets and dataloaders train, dev, test, classes = load_dataset(args) #print('train size:',len(train)) # 106 #print('dev size:',len(dev)) # 216 #print('test size:',len(test)) # 90 # data in tuple of the form (series,label) # series shape [30,3,32,32] #import pdb; pdb.set_trace() train_loader, dev_loader, test_loader = data_loader( train, dev, test, args.batch_size) hidden_size = 128 num_classes = 2 # using get_frm_output_size() encode_dim = 132 # Define input encoder cnn_encoder = FrameEncoderBAV # Define LSTM module lstm_module = LSTMModule( encode_dim, hidden_size, bidirectional=False, verbose=False, lstm_reduction="attention", encoder_class=cnn_encoder, ) # Define end model end_model = EndModel( input_module=lstm_module, layer_out_dims=[hidden_size, num_classes], optimizer="adam", use_cuda=cuda, batchnorm=False, seed=args.seed, verbose=False, ) #end_model.config['train_config']['validation_metric'] = 'f1' # Train end model end_model.train_model( train_data=train_loader, valid_data=dev_loader, l2=args.weight_decay, lr=args.lr, n_epochs=args.n_epochs, log_train_every=1, verbose=True, #loss_weights = [0.96,0.04], batchnorm='False', checkpoint_metric='f1', log_valid_metrics=['accuracy', 'f1'], input_dropout=0.1, middle_dropout=0.25, validation_metric='f1', ) end_model.score( dev_loader, verbose=True, metric=['accuracy', 'precision', 'recall', 'f1', 'roc-auc']) # Test end model '''
def train_model(args): # Create datasets and dataloaders train, dev, test = load_dataset(args) data_loader = get_data_loader(train, dev, test, args.batch_size, args.num_workers) hidden_size = 128 num_classes = 2 encode_dim = 1000 # using get_frm_output_size() # Define input encoder cnn_encoder = FrameEncoderOC if (torch.cuda.is_available()): device = 'cuda' else: device = 'cpu' #import ipdb; ipdb.set_trace() # Define LSTM module lstm_module = LSTMModule( encode_dim, hidden_size, bidirectional=False, verbose=False, lstm_reduction=args.lstm_reduction, encoder_class=cnn_encoder, ) init_kwargs = { "layer_out_dims": [hidden_size, num_classes], "input_module": lstm_module, "optimizer": "adam", "verbose": False, "input_batchnorm": False, "use_cuda": cuda, 'seed': args.seed, 'device': device } end_model = EndModel(**init_kwargs) if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) with open(args.checkpoint_dir + '/init_kwargs.pickle', "wb") as f: pickle.dump(init_kwargs, f, protocol=pickle.HIGHEST_PROTOCOL) dropout = 0.4 # Train end model end_model.train_model( train_data=data_loader["train"], valid_data=data_loader["dev"], l2=args.weight_decay, lr=args.lr, n_epochs=args.n_epochs, log_train_every=1, verbose=True, progress_bar=True, #loss_weights = [0.55,0.45], batchnorm=False, input_dropout=0.1, middle_dropout=dropout, checkpoint_dir=args.checkpoint_dir, log_valid_metrics=['accuracy', 'f1'], checkpoint_metric='f1', ) end_model.score( data_loader["dev"], verbose=True, metric=['accuracy', 'precision', 'recall', 'f1', 'roc-auc'])
def train_model(args): #global args #args = parser.parse_args() hidden_size = 128 num_classes = 2 encode_dim = 1000 # using get_frm_output_size() L,Y = load_labels(args) # Label Model # labelling functions analysis print(lf_summary(L["dev"], Y = Y["dev"])) # training label model label_model = LabelModel(k=num_classes, seed=123) label_model.train_model(L["train"], Y["dev"], n_epochs = 500, log_train_every = 50) # evaluating label model print('Trained Label Model Metrics:') label_model.score((L["dev"], Y["dev"]), metric=['accuracy','precision', 'recall', 'f1']) # comparison with majority vote of LFs mv = MajorityLabelVoter(seed=123) print('Majority Label Voter Metrics:') mv.score((L["dev"], Y["dev"]), metric=['accuracy','precision', 'recall', 'f1']) Ytrain_p = label_model.predict_proba(L["train"]) #print(Ytrain_ps.shape) #(377*50,2) #Ydev_p = label_model.predict_proba(L["dev"]) # test models #label_model.score((Ltest,Ytest), metric=['accuracy','precision', 'recall', 'f1']) # End Model # Create datasets and dataloaders train, dev, test = load_dataset(args, Ytrain_p, Y["dev"], Y["test"]) data_loader = get_data_loader(train, dev, test, args.batch_size, args.num_workers) #print(len(data_loader["train"])) # 18850 / batch_size #print(len(data_loader["dev"])) # 1500 / batch_size #print(len(data_loader["test"])) # 1000 / batch_size #import ipdb; ipdb.set_trace() # Define input encoder cnn_encoder = FrameEncoderOC if(torch.cuda.is_available()): device = 'cuda' else: device = 'cpu' #import ipdb; ipdb.set_trace() # Define LSTM module lstm_module = LSTMModule( encode_dim, hidden_size, bidirectional=False, verbose=False, lstm_reduction="attention", encoder_class=cnn_encoder, ) # Define end model end_model = EndModel( input_module=lstm_module, layer_out_dims=[hidden_size, num_classes], optimizer="adam", #use_cuda=cuda, batchnorm=True, seed=123, verbose=False, device = device, ) #print('Training model') #tic = time.time() dropout = 0.4 # Train end model end_model.train_model( train_data=data_loader["train"], valid_data=data_loader["dev"], l2=args.weight_decay, lr=args.lr, n_epochs=args.n_epochs, log_train_every=1, verbose=True, progress_bar = True, loss_weights = [0.45,0.55], batchnorm = 'True', input_dropout = dropout, middle_dropout = dropout, #validation_metric='f1', ) #print('Time taken for training:') #print(time.time() - tic) # evaluate end model end_model.score(data_loader["dev"], verbose=True, metric=['accuracy','precision', 'recall', 'f1'])
def train_model(args): # Create datasets and dataloaders train, dev, test = load_dataset(args) print('train size:', len(train)) # print('dev size:', len(dev)) # print('test size:', len(test)) # # data in tuple of the form (frame,label) # frame shape (3,224,224) #import pdb; pdb.set_trace() data_loader = get_data_loader(train, dev, test, args.batch_size) num_classes = 2 encode_dim = 1000 # Define input encoder - can use the same #cnn_encoder = FrameEncoderOC if (torch.cuda.is_available()): device = 'cuda' else: device = 'cpu' model = torch_models.resnet34(pretrained=True) model = model.double() #model = model.float() # Define end model end_model = EndModel( input_module=model, layer_out_dims=[encode_dim, num_classes], optimizer="adam", use_cuda=cuda, input_batchnorm=True, seed=args.seed, verbose=False, ) dropout = 0.4 # Train end model end_model.train_model( train_data=data_loader["train"], valid_data=data_loader["dev"], l2=args.weight_decay, lr=args.lr, n_epochs=args.n_epochs, log_train_every=1, verbose=True, progress_bar=True, #loss_weights = [0.9,0.1], batchnorm='False', log_valid_metrics=['accuracy', 'f1'], checkpoint_metric='f1', checkpoint_dir=args.checkpoint_dir, #validation_metric='accuracy', #input_dropout = 0.1, middle_dropout=dropout, ) end_model.score( data_loader["dev"], verbose=True, metric=['accuracy', 'precision', 'recall', 'f1', 'roc-auc']) #import ipdb; ipdb.set_trace() # saving dev set performance Y_p, Y, Y_s = end_model._get_predictions(data_loader["dev"], break_ties='random', return_probs=True) dev_labels = dev.labels Y_s_0 = list(Y_s[:, 0]) Y_s_1 = list(Y_s[:, 1]) dev_ID = list(dev_labels["ID"]) dev_LABEL = list(dev_labels["LABEL"]) Y_p = list(Y_p) Y = list(Y) Y_p.insert(0, "Y_p"), Y.insert(0, "Y"), Y_s_0.insert(0, "Y_s_0") Y_s_1.insert(0, "Y_s_1") dev_ID.insert(0, "ID") dev_LABEL.insert(0, "LABEL") np.save(args.mr_result_filename + "_dev", np.column_stack((dev_ID, dev_LABEL, Y_p, Y, Y_s_0, Y_s_1))) # saving test set performance Y_p, Y, Y_s = end_model._get_predictions(data_loader["test"], break_ties='random', return_probs=True) test_labels = test.labels Y_s_0 = list(Y_s[:, 0]) Y_s_1 = list(Y_s[:, 1]) test_ID = list(test_labels["ID"]) test_LABEL = list(test_labels["LABEL"]) Y_p = list(Y_p) Y = list(Y) Y_p.insert(0, "Y_p"), Y.insert(0, "Y"), Y_s_0.insert(0, "Y_s_0") Y_s_1.insert(0, "Y_s_1") test_ID.insert(0, "ID") test_LABEL.insert(0, "LABEL") np.save(args.mr_result_filename + "_test", np.column_stack((test_ID, test_LABEL, Y_p, Y, Y_s_0, Y_s_1)))
def train_model(args): #global args #args = parser.parse_args() hidden_size = 128 num_classes = 2 encode_dim = 1000 # using get_frm_output_size() L,Y = load_labels(args) data_list = {} data_list["dev"] = glob(args.dev + '/la_4ch/*.npy') data_list["test"] = glob(args.test + '/la_4ch/*.npy') # End Model # Create datasets and dataloaders dev, test = load_dataset(data_list, Y) data_loader = get_data_loader(dev, test, args.batch_size, args.num_workers) #print(len(data_loader["dev"])) # 1500 / batch_size #print(len(data_loader["test"])) # 1000 / batch_size #import ipdb; ipdb.set_trace() # Define input encoder cnn_encoder = FrameEncoderOC if(torch.cuda.is_available()): device = 'cuda' else: device = 'cpu' # Define LSTM module lstm_module = LSTMModule( encode_dim, hidden_size, bidirectional=False, verbose=False, lstm_reduction="attention", encoder_class=cnn_encoder ) init_kwargs = { "layer_out_dims":[hidden_size, num_classes], "input_module": lstm_module, "optimizer": "adam", "verbose": False, "input_batchnorm": False, "use_cuda":cuda, 'seed':args.seed, 'device':device} end_model = EndModel(**init_kwargs) if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) with open(args.checkpoint_dir+'/init_kwargs.pickle', "wb") as f: pickle.dump(init_kwargs,f,protocol=pickle.HIGHEST_PROTOCOL) # Train end model end_model.train_model( train_data=data_loader["dev"], valid_data=data_loader["test"], l2=args.weight_decay, lr=args.lr, n_epochs=args.n_epochs, log_train_every=1, verbose=True, progress_bar = True, loss_weights = [0.55,0.45], batchnorm = args.batchnorm, middle_dropout = args.dropout, checkpoint = False, #checkpoint_every = args.n_epochs, #checkpoint_best = False, #checkpoint_dir = args.checkpoint_dir, #validation_metric='f1', ) # evaluate end model end_model.score(data_loader["test"], verbose=True,metric=['accuracy','precision', 'recall', 'f1','roc-auc','ndcg']) #end_model.score((Xtest,Ytest), verbose=True, metric=['accuracy','precision', 'recall', 'f1']) # saving model state = { "model": end_model.state_dict(), # "optimizer": optimizer.state_dict(), # "lr_scheduler": lr_scheduler.state_dict() if lr_scheduler else None, "score": end_model.score(data_loader["test"],verbose=False,metric=['accuracy','precision', 'recall', 'f1','roc-auc','ndcg']) } checkpoint_path = f"{args.checkpoint_dir}/best_model.pth" torch.save(state, checkpoint_path)
def test_lstm_determinism(self): """Test whether training and scoring is deterministic given seed""" X = torch.randint(1, MAX_INT + 1, (n, SEQ_LEN)).long() Y = torch.zeros(n).long() needles = np.random.randint(1, SEQ_LEN - 1, n) for i in range(n): X[i, needles[i]] = MAX_INT + 1 Y[i] = X[i, needles[i] + 1] Xs = self._split_dataset(X) Ys = self._split_dataset(Y) embed_size = 4 hidden_size = 10 vocab_size = MAX_INT + 2 lstm_module = LSTMModule( embed_size, hidden_size, vocab_size=vocab_size, seed=123, bidirectional=True, verbose=False, lstm_reduction="attention", ) em = EndModel( k=MAX_INT, input_module=lstm_module, layer_out_dims=[hidden_size * 2, MAX_INT], batchnorm=True, seed=123, verbose=False, ) em.train_model((Xs[0], Ys[0]), dev_data=(Xs[1], Ys[1]), n_epochs=2, verbose=False) score_1 = em.score((Xs[2], Ys[2]), verbose=False) # Test scoring determinism score_2 = em.score((Xs[2], Ys[2]), verbose=False) self.assertEqual(score_1, score_2) # Test training determinism lstm_module_2 = LSTMModule( embed_size, hidden_size, vocab_size=vocab_size, seed=123, bidirectional=True, verbose=False, lstm_reduction="attention", ) em_2 = EndModel( k=MAX_INT, input_module=lstm_module_2, layer_out_dims=[hidden_size * 2, MAX_INT], batchnorm=True, seed=123, verbose=False, ) em_2.train_model((Xs[0], Ys[0]), dev_data=(Xs[1], Ys[1]), n_epochs=2, verbose=False) score_3 = em_2.score((Xs[2], Ys[2]), verbose=False) self.assertEqual(score_1, score_3)
def train_model(args): #global args #args = parser.parse_args() hidden_size = 128 num_classes = 2 encode_dim = 1000 # using get_frm_output_size() L, Y = load_labels(args) ''' # Label Model # labelling functions analysis print(lf_summary(L["dev"], Y = Y["dev"])) # training label model label_model = LabelModel(k=num_classes, seed=123) label_model.train_model(L["train"], Y["dev"], n_epochs = 500, log_train_every = 50) # evaluating label model print('Trained Label Model Metrics:') label_model.score((L["dev"], Y["dev"]), metric=['accuracy','precision', 'recall', 'f1']) # comparison with majority vote of LFs mv = MajorityLabelVoter(seed=123) print('Majority Label Voter Metrics:') mv.score((L["dev"], Y["dev"]), metric=['accuracy','precision', 'recall', 'f1']) Ytrain_p = label_model.predict_proba(L["train"]) #print(Ytrain_ps.shape) #(377*50,2) #Ydev_p = label_model.predict_proba(L["dev"]) # test models #label_model.score((Ltest,Ytest), metric=['accuracy','precision', 'recall', 'f1']) ''' # loading train model #Ytrain = np.load(args.train_labels) #Ytrain = 2 - Ytrain # only for mv_400 #Ytrain_p = np.zeros((len(Ytrain),2)) #Ytrain_p[:,1] = Ytrain #Ytrain_p[:,0] = 1 - Ytrain Ytrain_p = np.load(args.train_labels) # End Model # Create datasets and dataloaders train, dev, test = load_dataset(args, Ytrain_p, Y["dev"], Y["test"]) data_loader = get_data_loader(train, dev, test, args.batch_size, args.num_workers) #print(len(data_loader["train"])) # 18850 / batch_size #print(len(data_loader["dev"])) # 1500 / batch_size #print(len(data_loader["test"])) # 1000 / batch_size # Define input encoder cnn_encoder = FrameEncoderOC if (torch.cuda.is_available()): device = 'cuda' else: device = 'cpu' # Define LSTM module lstm_module = LSTMModule( encode_dim, hidden_size, bidirectional=False, verbose=False, lstm_reduction=args.lstm_reduction, encoder_class=cnn_encoder, encoder_kwargs={"requires_grad": args.requires_grad}) ''' # Define end model end_model = EndModel( input_module=lstm_module, layer_out_dims=[hidden_size, num_classes], optimizer="adam", #use_cuda=cuda, batchnorm=False, seed=args.seed, verbose=False, device = device, ) ''' init_kwargs = { "layer_out_dims": [hidden_size, num_classes], "input_module": lstm_module, "optimizer": "adam", "verbose": False, "input_batchnorm": False, "use_cuda": cuda, 'seed': args.seed, 'device': device } end_model = EndModel(**init_kwargs) if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) with open(args.checkpoint_dir + '/init_kwargs.pickle', "wb") as f: pickle.dump(init_kwargs, f, protocol=pickle.HIGHEST_PROTOCOL) dropout = 0.4 # Train end model end_model.train_model( train_data=data_loader["train"], valid_data=data_loader["dev"], l2=args.weight_decay, lr=args.lr, n_epochs=args.n_epochs, log_train_every=1, verbose=True, progress_bar=True, loss_weights=[0.55, 0.45], batchnorm=True, input_dropout=0.1, middle_dropout=dropout, checkpoint_dir=args.checkpoint_dir, #validation_metric='f1', ) # evaluate end model print('Dev Set Performance') end_model.score( data_loader["dev"], verbose=True, metric=['accuracy', 'precision', 'recall', 'f1', 'roc-auc', 'ndcg']) print('Test Set Performance') end_model.score( data_loader["test"], verbose=True, metric=['accuracy', 'precision', 'recall', 'f1', 'roc-auc', 'ndcg'])
def train_model(args): # Create datasets and dataloaders train, dev, test = load_dataset(args) #print('train size:',len(train)) # 250 #print('dev size:',len(dev)) # 250 #print('test size:',len(test)) # 250 # data in tuple of the form (series,label) # series shape (50,3,224,224) #import pdb; pdb.set_trace() data_loader = get_data_loader(train, dev, test, args.batch_size) hidden_size = 128 num_classes = 2 encode_dim = 1000 # Define input encoder cnn_encoder = FrameEncoderOC if (torch.cuda.is_available()): device = 'cuda' else: device = 'cpu' # Define LSTM module lstm_module = LSTMModule( encode_dim, hidden_size, bidirectional=False, verbose=False, lstm_reduction="attention", encoder_class=cnn_encoder, ) # Define end model end_model = EndModel( input_module=lstm_module, layer_out_dims=[hidden_size, num_classes], optimizer="adam", use_cuda=cuda, batchnorm=True, seed=args.seed, verbose=False, ) dropout = 0.4 # Train end model end_model.train_model( train_data=data_loader["train"], valid_data=data_loader["dev"], l2=args.weight_decay, lr=args.lr, n_epochs=args.n_epochs, log_train_every=1, verbose=True, progress_bar=True, #loss_weights = [0.9,0.1], batchnorm='False', log_valid_metrics=['accuracy', 'f1'], checkpoint_metric='f1', checkpoint_dir=args.checkpoint_dir, #validation_metric='accuracy', #input_dropout = 0.1, middle_dropout=dropout, ) print('Dev Set Performance') end_model.score( data_loader["dev"], verbose=True, metric=['accuracy', 'precision', 'recall', 'f1', 'roc-auc']) print('Test Set Performance') end_model.score( data_loader["test"], verbose=True, metric=['accuracy', 'precision', 'recall', 'f1', 'roc-auc'])
Y_dev_ps = label_model.predict_proba(Ls[2]) plot_probabilities_histogram(Y_dev_ps[:, 0], title="Probablistic Label Distribution") except ModuleNotFoundError: print( "The tools in contrib/visualization/ require matplotlib. Try `conda/pip install matplotlib`." ) if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' end_model = EndModel([1000, 10, 2], seed=123, device=device) #print(type(Xs[0])) #print(Xs[0]) #print(type(Y_train_ps)) #end_model.train_model((Xs[0], Y_train_ps), valid_data=(Xs[1], Ys[1]), lr=0.01, l2=0.01, batch_size=256, n_epochs=5, checkpoint_metric='accuracy', checkpoint_metric_mode='max') #Xs = tf.convert_to_tensor(Xs) #Y_train_ps = tf.convert_to_tensor(Y_train_ps) #Ys = tf.convert_to_tensor(Ys) #Ls = tf.convert_to_tensor(Ls) end_model.train_model((Xs[0], Y_train_ps), valid_data=(Xs[1], Ys[1]), lr=0.01, l2=0.01, batch_size=256, n_epochs=5, checkpoint_metric='accuracy',