def main(config):
    if config.mode == 'train':
        train_data, valid_data = load_dataset(config.mode, config.random_seed)

        # if use CNN model, pad sentences to let all the batch inputs has minimum length (filter_sizes[-1])
        if config.model == 'cnn':
            train_data = pad_sentence(train_data, config.filter_sizes[-1])
            valid_data = pad_sentence(valid_data, config.filter_sizes[-1])

        train_iter, valid_iter, pad_idx = make_iter(config.batch_size,
                                                    config.mode,
                                                    train_data=train_data,
                                                    valid_data=valid_data)

        trainer = Trainer(config,
                          pad_idx,
                          train_iter=train_iter,
                          valid_iter=valid_iter)

        trainer.train()

    else:
        test_data = load_dataset(config.mode, config.random_seed)
        if config.model == 'cnn':
            test_data = pad_sentence(test_data, config.filter_sizes[-1])
        test_iter, pad_idx = make_iter(config.batch_size,
                                       config.mode,
                                       test_data=test_data)
        trainer = Trainer(config, pad_idx, test_iter=test_iter)

        trainer.inference()
Exemple #2
0
def main(config):
    params_dict = {
        'seq2seq': Params('configs/params.json'),
        'seq2seq_gru': Params('configs/params_gru.json'),
        'seq2seq_attention': Params('configs/params_attention.json')
    }

    params = params_dict[config.model]

    if config.mode == 'train':
        train_data, valid_data = load_dataset(config.mode)
        train_iter, valid_iter = make_iter(params.batch_size,
                                           config.mode,
                                           train_data=train_data,
                                           valid_data=valid_data)

        trainer = Trainer(params,
                          config.mode,
                          train_iter=train_iter,
                          valid_iter=valid_iter)
        trainer.train()

    else:
        test_data = load_dataset(config.mode)
        test_iter = make_iter(params.batch_size,
                              config.mode,
                              test_data=test_data)

        trainer = Trainer(params, config.mode, test_iter=test_iter)
        trainer.inference()
Exemple #3
0
def run_pytorch_cnn(train, test, layers, epochs, verbose=True, trials=1, deterministic=True):
    
    if deterministic: make_deterministic()
    
    # Load the dataset
    (X_train, y1), (X_val, y2) = train, test
    # Add a final dimension indicating the number of channels (only 1 here)
    
    m = X_train.shape[1]
    X_train = X_train.reshape((X_train.shape[0], 1, m, m))
    X_val = X_val.reshape((X_val.shape[0], 1, m, m))

    val_acc, test_acc = 0, 0
    for trial in range(trials):
        # Reset the weights
        for l in layers:
            weight_reset(l)
        # Make Dataset Iterables
        train_iter, val_iter = make_iter(X_train, y1, batch_size=32), make_iter(X_val, y2, batch_size=32)
        # Run the model
        model, vacc, tacc = \
            run_pytorch(train_iter, val_iter, None, layers, epochs, torch.tensor([1,1]), verbose=verbose)
        val_acc += vacc if vacc else 0
        test_acc += tacc if tacc else 0
    if val_acc:
        print("\nAvg. validation accuracy:" + str(val_acc / trials))
    if test_acc:
        print("\nAvg. test accuracy:" + str(test_acc / trials))

    predict = model(torch.FloatTensor(X_val)).data.numpy()

    return sklearn.metrics.precision_recall_curve(y2, np.amax(predict,axis=1) , pos_label=1)
Exemple #4
0
def run_pytorch_fc(train, test, layers, epochs, verbose=True, trials=1, deterministic=True):
    '''
    train, test = input data
    layers = list of PyTorch layers, e.g. [Linear(in_features=784, out_features=10)]
    epochs = number of epochs to run the model for each training trial
    trials = number of evaluation trials, resetting weights before each trial
    '''
    if deterministic:
        make_deterministic()
    (X_train, y1), (X_val, y2) = train, test

    # weights
    weights = torch.tensor(sklearn.utils.class_weight.compute_class_weight('balanced',[0,1],y1),dtype=torch.float64)

    val_acc, test_acc = 0, 0
    for trial in range(trials):
        # Reset the weights
        for l in layers:
            weight_reset(l)
        # Make Dataset Iterables
        train_iter, val_iter = make_iter(X_train, y1, batch_size=32), make_iter(X_val, y2, batch_size=32)
        # Run the model
        model, vacc, tacc = \
            run_pytorch(train_iter, val_iter, None, layers, epochs, torch.tensor([50,.5]), verbose=verbose)
        val_acc += vacc if vacc else 0
        test_acc += tacc if tacc else 0
    if val_acc:
        print("\nAvg. validation accuracy:" + str(val_acc / trials))
    if test_acc:
        print("\nAvg. test accuracy:" + str(test_acc / trials))

    # WARNING: must cast input to FloatTensor not Double Tensor
    predict = model(torch.FloatTensor(X_val)).data.numpy()

    return sklearn.metrics.precision_recall_curve(y2, np.amax(predict,axis=1) , pos_label=1)
Exemple #5
0
def main(config):
    params = Params('config/params.json')

    if config.mode == 'train':
        train_data, valid_data = load_dataset(config.mode)
        train_iter, valid_iter = make_iter(params.batch_size, config.mode, train_data=train_data, valid_data=valid_data)

        trainer = Trainer(params, config.mode, train_iter=train_iter, valid_iter=valid_iter)
        trainer.train()

    else:
        test_data = load_dataset(config.mode)
        test_iter = make_iter(params.batch_size, config.mode, test_data=test_data)

        trainer = Trainer(params, config.mode, test_iter=test_iter)
        trainer.inference()