def main(config): if config.mode == 'train': train_data, valid_data = load_dataset(config.mode, config.random_seed) # if use CNN model, pad sentences to let all the batch inputs has minimum length (filter_sizes[-1]) if config.model == 'cnn': train_data = pad_sentence(train_data, config.filter_sizes[-1]) valid_data = pad_sentence(valid_data, config.filter_sizes[-1]) train_iter, valid_iter, pad_idx = make_iter(config.batch_size, config.mode, train_data=train_data, valid_data=valid_data) trainer = Trainer(config, pad_idx, train_iter=train_iter, valid_iter=valid_iter) trainer.train() else: test_data = load_dataset(config.mode, config.random_seed) if config.model == 'cnn': test_data = pad_sentence(test_data, config.filter_sizes[-1]) test_iter, pad_idx = make_iter(config.batch_size, config.mode, test_data=test_data) trainer = Trainer(config, pad_idx, test_iter=test_iter) trainer.inference()
def main(config): params_dict = { 'seq2seq': Params('configs/params.json'), 'seq2seq_gru': Params('configs/params_gru.json'), 'seq2seq_attention': Params('configs/params_attention.json') } params = params_dict[config.model] if config.mode == 'train': train_data, valid_data = load_dataset(config.mode) train_iter, valid_iter = make_iter(params.batch_size, config.mode, train_data=train_data, valid_data=valid_data) trainer = Trainer(params, config.mode, train_iter=train_iter, valid_iter=valid_iter) trainer.train() else: test_data = load_dataset(config.mode) test_iter = make_iter(params.batch_size, config.mode, test_data=test_data) trainer = Trainer(params, config.mode, test_iter=test_iter) trainer.inference()
def run_pytorch_cnn(train, test, layers, epochs, verbose=True, trials=1, deterministic=True): if deterministic: make_deterministic() # Load the dataset (X_train, y1), (X_val, y2) = train, test # Add a final dimension indicating the number of channels (only 1 here) m = X_train.shape[1] X_train = X_train.reshape((X_train.shape[0], 1, m, m)) X_val = X_val.reshape((X_val.shape[0], 1, m, m)) val_acc, test_acc = 0, 0 for trial in range(trials): # Reset the weights for l in layers: weight_reset(l) # Make Dataset Iterables train_iter, val_iter = make_iter(X_train, y1, batch_size=32), make_iter(X_val, y2, batch_size=32) # Run the model model, vacc, tacc = \ run_pytorch(train_iter, val_iter, None, layers, epochs, torch.tensor([1,1]), verbose=verbose) val_acc += vacc if vacc else 0 test_acc += tacc if tacc else 0 if val_acc: print("\nAvg. validation accuracy:" + str(val_acc / trials)) if test_acc: print("\nAvg. test accuracy:" + str(test_acc / trials)) predict = model(torch.FloatTensor(X_val)).data.numpy() return sklearn.metrics.precision_recall_curve(y2, np.amax(predict,axis=1) , pos_label=1)
def run_pytorch_fc(train, test, layers, epochs, verbose=True, trials=1, deterministic=True): ''' train, test = input data layers = list of PyTorch layers, e.g. [Linear(in_features=784, out_features=10)] epochs = number of epochs to run the model for each training trial trials = number of evaluation trials, resetting weights before each trial ''' if deterministic: make_deterministic() (X_train, y1), (X_val, y2) = train, test # weights weights = torch.tensor(sklearn.utils.class_weight.compute_class_weight('balanced',[0,1],y1),dtype=torch.float64) val_acc, test_acc = 0, 0 for trial in range(trials): # Reset the weights for l in layers: weight_reset(l) # Make Dataset Iterables train_iter, val_iter = make_iter(X_train, y1, batch_size=32), make_iter(X_val, y2, batch_size=32) # Run the model model, vacc, tacc = \ run_pytorch(train_iter, val_iter, None, layers, epochs, torch.tensor([50,.5]), verbose=verbose) val_acc += vacc if vacc else 0 test_acc += tacc if tacc else 0 if val_acc: print("\nAvg. validation accuracy:" + str(val_acc / trials)) if test_acc: print("\nAvg. test accuracy:" + str(test_acc / trials)) # WARNING: must cast input to FloatTensor not Double Tensor predict = model(torch.FloatTensor(X_val)).data.numpy() return sklearn.metrics.precision_recall_curve(y2, np.amax(predict,axis=1) , pos_label=1)
def main(config): params = Params('config/params.json') if config.mode == 'train': train_data, valid_data = load_dataset(config.mode) train_iter, valid_iter = make_iter(params.batch_size, config.mode, train_data=train_data, valid_data=valid_data) trainer = Trainer(params, config.mode, train_iter=train_iter, valid_iter=valid_iter) trainer.train() else: test_data = load_dataset(config.mode) test_iter = make_iter(params.batch_size, config.mode, test_data=test_data) trainer = Trainer(params, config.mode, test_iter=test_iter) trainer.inference()