Ejemplo n.º 1
0
def main(args):

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    train, val, test = get_clf(args.train_data, args.val_data, args.test_data,
                               args.max_train_sents, args.max_val_sents,
                               args.max_test_sents)

    net = Classifier_Net()
    if (args.load_saved):
        print('Loaded from saved model ..... ')
        net = torch.load(os.path.join(args.outputdir, args.outputmodelname))

    # loss
    # weight = torch.FloatTensor(args.n_classes).fill_(1)
    loss_fn = nn.CrossEntropyLoss()  #weight=weight)
    loss_fn.size_average = False

    # optimizer
    optim_fn, optim_params = get_optimizer(args.optimizer)
    optimizer = optim_fn(net.parameters(), **optim_params)

    if args.cuda:
        net.cuda()
        loss_fn.cuda()

    global val_acc_best, lr, stop_training, adam_stop
    val_acc_best = -1e10
    adam_stop = False
    stop_training = False
    lr = optim_params['lr'] if 'sgd' in args.optimizer else 0.005

    epoch = 1

    while not stop_training and epoch <= args.n_epochs:
        train_acc, net = trainepoch(epoch, train, optimizer, args, net,
                                    loss_fn)
        eval_acc = evaluate(epoch, val, optimizer, args, net, 'valid')
        epoch += 1

    # net = torch.load(os.path.join(args.outputdir, args.outputmodelname))

    print("The Tests Accuracy is ",
          evaluate("NO", test, optimizer, args, net, "test"))
Ejemplo n.º 2
0
def main(args):

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    if args.gpu_id > -1:
        torch.cuda.manual_seed(args.seed)

    train, val, test = get_nli_hypoth(args.train_data, args.val_data,
                                      args.test_data, args.max_train_sents,
                                      args.max_val_sents, args.max_test_sents)

    nli_net = NLI_HYPOTHS_Net()

    # loss
    loss_fn = nn.CrossEntropyLoss()
    loss_mse = nn.MSELoss()

    # optimizer
    optim_fn, optim_params = get_optimizer(args.optimizer)
    optimizer = optim_fn(nli_net.parameters(), **optim_params)

    if args.gpu_id > -1:
        nli_net.cuda()
        loss_fn.cuda()
        loss_mse.cuda()

    global val_acc_best, lr, stop_training, adam_stop
    val_acc_best = -1e10
    adam_stop = False
    stop_training = False
    lr = optim_params['lr'] if 'sgd' in args.optimizer else None

    epoch = 1

    while not stop_training and epoch <= args.n_epochs:
        train_acc, nli_net = trainepoch(epoch, train, optimizer, args, nli_net,
                                        loss_fn, loss_mse)
        eval_acc = evaluate(epoch, val, optimizer, args, nli_net, 'valid')
        epoch += 1

    nli_net = torch.load(os.path.join(args.outputdir, args.outputmodelname))

    print("The Tests Accuracy is ",
          evaluate("NO", test, optimizer, args, nli_net, "test"))
Ejemplo n.º 3
0
# model
encoder_types = ['BLSTMEncoder', 'BLSTMprojEncoder', 'BGRUlastEncoder', 'InnerAttentionMILAEncoder',\
                 'InnerAttentionYANGEncoder', 'InnerAttentionNAACLEncoder', 'ConvNetEncoder', 'LSTMEncoder']
assert params.encoder_type in encoder_types, "encoder_type must be in " + str(
    encoder_types)
nli_net = NLINet(config_nli_model)
print(nli_net)

# loss
weight = torch.FloatTensor(params.n_classes).fill_(1)
loss_fn = nn.CrossEntropyLoss(weight=weight)
loss_fn.size_average = False

# optimizer
optim_fn, optim_params = get_optimizer(params.optimizer)
optimizer = optim_fn(nli_net.parameters(), **optim_params)

# cuda by default
nli_net.cuda()
loss_fn.cuda()
#src_embeddings.cuda()
"""
TRAIN
"""
#src_embeddings.volatile = True
val_acc_best = -1e10
adam_stop = False
stop_training = False
lr = optim_params['lr'] if 'sgd' in params.optimizer else None
#index_pad = word2id['<p>']
Ejemplo n.º 4
0
for name, x in nli_net.named_parameters():
    print(name)

for name, x in actorModel.named_parameters():
    print(name)

#print(nli_net.target_pred.enc_lstm.weight_ih_l0)
#print(nli_net.target_classifier[4].bias)

# loss
weight = torch.FloatTensor(params.n_classes).fill_(1)
loss_fn = nn.CrossEntropyLoss(weight=weight)
loss_fn.size_average = False

# optimizer
optim_fn, optim_params = get_optimizer(params.optimizer)
critic_target_optimizer = optim_fn(
    list(nli_net.target_pred.parameters()) +
    list(nli_net.target_classifier.parameters()), **optim_params)

optim_fn2, optim_params2 = get_optimizer(params.optimizer)
critic_active_optimizer = optim_fn(
    list(nli_net.active_pred.parameters()) +
    list(nli_net.active_classifier.parameters()), **optim_params2)

optim_fn3, optim_params3 = get_optimizer("adam,lr=0.1")
actor_target_optimizer = optim_fn3(actorModel.target_policy.parameters(),
                                   **optim_params3)

optim_fn4, optim_params4 = get_optimizer("adam,lr=0.1")
actor_active_optimizer = optim_fn4(actorModel.active_policy.parameters(),
Ejemplo n.º 5
0
def run_experiment(params):

    # print parameters passed, and all parameters
    print('\ntogrep : {0}\n'.format(sys.argv[1:]))
    print(params)
    os.makedirs(params.outputdir, exist_ok=True)
    """
    SEED
    """
    np.random.seed(params.seed)
    torch.manual_seed(params.seed)
    torch.cuda.manual_seed(params.seed)
    """
    DATA
    """
    dataset_path = params.dataset_path

    # build training and test corpus
    filename_list = recursive_file_list(dataset_path)
    print('Use the following files for training: ', filename_list)
    corpus = CBOWDataset(dataset_path, params.num_docs, params.context_size,
                         params.num_samples_per_item, params.mode,
                         params.precomputed_word_vocab, params.max_words, None,
                         1000, params.precomputed_chunks_dir, params.temp_path)
    corpus_len = len(corpus)

    ## split train and test
    inds = list(range(corpus_len))
    shuffle(inds)

    num_val_samples = int(corpus_len * params.validation_fraction)
    train_indices = inds[:-num_val_samples] if num_val_samples > 0 else inds
    test_indices = inds[-num_val_samples:] if num_val_samples > 0 else []

    cbow_train_loader = DataLoader(corpus,
                                   sampler=SubsetRandomSampler(train_indices),
                                   batch_size=params.batch_size,
                                   shuffle=False,
                                   num_workers=params.num_workers,
                                   pin_memory=True,
                                   collate_fn=corpus.collate_fn)
    cbow_test_loader = DataLoader(corpus,
                                  sampler=SubsetRandomSampler(test_indices),
                                  batch_size=params.batch_size,
                                  shuffle=False,
                                  num_workers=params.num_workers,
                                  pin_memory=True,
                                  collate_fn=corpus.collate_fn)

    ## extract some variables needed for training
    num_training_samples = corpus.num_training_samples
    word_vec = corpus.word_vec
    unigram_dist = corpus.unigram_dist
    word_vec_copy = corpus._word_vec_count_tuple

    print("Number of sentences used for training:", str(num_training_samples))
    """
    MODEL
    """

    # build path where to store the encoder
    outputmodelname = construct_model_name(params.outputmodelname, params)

    # build encoder
    n_words = len(word_vec)
    if params.w2m_type == "cmow":
        encoder = get_cmow_encoder(
            n_words,
            padding_idx=0,
            word_emb_dim=params.word_emb_dim,
            initialization_strategy=params.initialization)
        output_embedding_size = params.word_emb_dim
    elif params.w2m_type == "cbow":
        encoder = get_cbow_encoder(n_words,
                                   padding_idx=0,
                                   word_emb_dim=params.word_emb_dim)
        output_embedding_size = params.word_emb_dim
    elif params.w2m_type == "hybrid":
        encoder = get_cbow_cmow_hybrid_encoder(
            n_words,
            padding_idx=0,
            word_emb_dim=params.word_emb_dim,
            initialization_strategy=params.initialization,
            w2m_type=params.hybrid_cmow,
            _lambda=params._lambda,
            cnmow_version=params.cnmow_version)
        output_embedding_size = 2 * params.word_emb_dim
    elif params.w2m_type == "cnmow":
        encoder = get_cnmow_encoder(
            n_words,
            padding_idx=0,
            word_emb_dim=params.word_emb_dim,
            initialization_strategy=params.initialization,
            _lambda=params._lambda,
            cnmow_version=params.cnmow_version)
        output_embedding_size = params.word_emb_dim

    # build cbow model
    cbow_net = CBOWNet(encoder,
                       output_embedding_size,
                       n_words,
                       weights=unigram_dist,
                       n_negs=params.n_negs,
                       padding_idx=0)
    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs for training!")
        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        cbow_net = nn.DataParallel(cbow_net)
        use_multiple_gpus = True
    else:
        use_multiple_gpus = False

    # optimizer
    print([x.size() for x in cbow_net.parameters()])
    optim_fn, optim_params = get_optimizer(params.optimizer)
    optimizer = optim_fn(cbow_net.parameters(), **optim_params)

    # cuda by default
    cbow_net.to(device)  #.cuda()
    """
    TRAIN
    """
    val_acc_best = -1e10
    adam_stop = False
    stop_training = False
    lr = optim_params['lr'] if 'sgd' in params.optimizer else None

    # compute learning rate schedule
    if params.linear_decay:
        lr_shrinkage = (lr - params.minlr) / (
            (float(num_training_samples) / params.batch_size) *
            params.n_epochs)

    def forward_pass(X_batch, tgt_batch, params, check_size=False):

        X_batch = Variable(X_batch).to(device)  #.cuda()
        tgt_batch = Variable(torch.LongTensor(tgt_batch)).to(device)  #.cuda()
        k = X_batch.size(0)  # actual batch size

        loss = cbow_net(X_batch, tgt_batch).mean()
        return loss, k

    def validate(data_loader):
        cbow_net.eval()

        with torch.no_grad():
            all_costs = []
            for X_batch, tgt_batch in data_loader:
                loss, k = forward_pass(X_batch, tgt_batch, params)
                all_costs.append(loss.item())

        cbow_net.train()
        return np.mean(all_costs)

    def trainepoch(epoch):
        print('\nTRAINING : Epoch ' + str(epoch))
        cbow_net.train()
        all_costs = []
        logs = []
        words_count = 0

        last_time = time.time()
        correct = 0.

        if not params.linear_decay:
            optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * params.decay if epoch>1\
                and 'sgd' in params.optimizer else optimizer.param_groups[0]['lr']
            print('Learning rate : {0}'.format(
                optimizer.param_groups[0]['lr']))

        processed_training_samples = 0
        start_time = time.time()
        total_time = 0
        total_batch_generation_time = 0
        total_forward_time = 0
        total_backward_time = 0
        total_step_time = 0
        last_processed_training_samples = 0

        nonlocal processed_batches, stop_training, no_improvement, min_val_loss, losses, min_loss_criterion
        for i, (X_batch, tgt_batch) in enumerate(cbow_train_loader):

            # every 10 epochs train the cmow parameters
            if params.w2m_type == "hybrid":
                enabled = (i % params.explore_par) == 0
                cbow_net.encoder.cmow_encoder.lookup_table.weight.requires_grad = enabled

            batch_generation_time = (time.time() - start_time) * 1000000

            # forward pass
            forward_start = time.time()
            loss, k = forward_pass(X_batch, tgt_batch, params)
            all_costs.append(loss.item())
            forward_total = (time.time() - forward_start) * 1000000

            # backward
            backward_start = time.time()
            optimizer.zero_grad()
            loss.backward()

            backward_total = (time.time() - backward_start) * 1000000

            # linear learning rate decay
            if params.linear_decay:
                optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] - lr_shrinkage if \
                    'sgd' in params.optimizer else optimizer.param_groups[0]['lr']

            # optimizer step
            step_time = time.time()
            optimizer.step()
            total_step_time += (time.time() - step_time) * 1000000

            # log progress
            processed_training_samples += params.batch_size
            percentage_done = float(
                processed_training_samples) / num_training_samples
            processed_batches += 1
            if processed_batches == params.validation_frequency:

                # compute validation loss and train loss
                val_loss = round(validate(cbow_test_loader),
                                 5) if num_val_samples > 0 else float('inf')
                train_loss = round(np.mean(all_costs), 5)

                # print current loss and processing speed
                logs.append(
                    'Epoch {3} - {4:.4} ; lr {2:.4} ; train-loss {0} ; val-loss {5} ; sentence/s {1}'
                    .format(
                        train_loss,
                        int((processed_training_samples -
                             last_processed_training_samples) /
                            (time.time() - last_time)),
                        optimizer.param_groups[0]['lr'], epoch,
                        percentage_done, val_loss))
                if params.VERBOSE:
                    print('\n\n\n')
                print(logs[-1])
                last_time = time.time()
                words_count = 0
                all_costs = []
                last_processed_training_samples = processed_training_samples

                if params.VERBOSE:
                    print(
                        "100 Batches took {} microseconds".format(total_time))
                    print(
                        "get_batch: {} \nforward: {} \nbackward: {} \nstep: {}"
                        .format(total_batch_generation_time / total_time,
                                total_forward_time / total_time,
                                total_backward_time / total_time,
                                total_step_time / total_time))
                total_time = 0
                total_batch_generation_time = 0
                total_forward_time = 0
                total_backward_time = 0
                total_step_time = 0
                processed_batches = 0

                # save losses for logging later
                losses.append((train_loss, val_loss))

                # early stopping?
                if val_loss < min_val_loss:
                    min_val_loss = val_loss

                    # save best model
                    torch.save(
                        cbow_net,
                        os.path.join(params.outputdir,
                                     outputmodelname + '.cbow_net'))

                if params.stop_criterion is not None:
                    stop_crit_loss = eval(params.stop_criterion)
                    if stop_crit_loss < min_loss_criterion:
                        no_improvement = 0
                        min_loss_criterion = stop_crit_loss
                    else:
                        no_improvement += 1
                        if no_improvement > params.patience:
                            stop_training = True
                            print("No improvement in loss criterion",
                                  str(params.stop_criterion), "for",
                                  str(no_improvement),
                                  "steps. Terminate training.")
                            break

            now = time.time()
            batch_time_micro = (now - start_time) * 1000000

            total_time = total_time + batch_time_micro
            total_batch_generation_time += batch_generation_time
            total_forward_time += forward_total
            total_backward_time += backward_total

            start_time = now

    """
    Train model on CBOW objective
    """
    epoch = 1

    processed_batches = 0
    min_val_loss = float('inf')
    min_loss_criterion = float('inf')
    no_improvement = 0
    losses = []
    while not stop_training and epoch <= params.n_epochs:
        trainepoch(epoch)
        epoch += 1

    # load the best model
    if min_val_loss < float('inf'):
        cbow_net = torch.load(
            os.path.join(params.outputdir, outputmodelname + '.cbow_net'))
        print("Loading model with best validation loss.")
    else:
        # we use the current model;
        print("No model with better validation loss has been saved.")

    # save word vocabulary and counts
    pickle.dump(
        word_vec_copy,
        open(os.path.join(params.outputdir, outputmodelname + '.vocab'), "wb"))

    if use_multiple_gpus:
        cbow_net = cbow_net.module
    return cbow_net.encoder, losses
Ejemplo n.º 6
0
def main(args):

    GLOVE_PATH = "dataset/GloVe/glove.840B.300d.txt"

    parser = argparse.ArgumentParser(description='NLI training')
    # paths
    parser.add_argument("--nlipath",
                        type=str,
                        default='dataset/SNLI/',
                        help="NLI data path (SNLI or MultiNLI)")
    parser.add_argument("--outputdir",
                        type=str,
                        default='savedir/',
                        help="Output directory")
    parser.add_argument("--outputmodelname", type=str, default='model.pickle')

    # dataset, dimensions, transfer learning
    parser.add_argument("--dataset",
                        type=str,
                        required=True,
                        help="Semantic similarity dataset")
    parser.add_argument('--dimension',
                        nargs='+',
                        required=True,
                        help='Dimension(s) on the dataset')
    parser.add_argument('--transfer',
                        default='DNT',
                        help='Transfer learning approach')
    parser.add_argument('--save', default='no', help='Save trained model')
    parser.add_argument(
        '--load_model',
        default='no',
        help='If load model, do not perform training, just evalute')

    # training
    parser.add_argument("--n_epochs", type=int, default=10)
    parser.add_argument("--batch_size", type=int, default=16)
    parser.add_argument("--dpout_model",
                        type=float,
                        default=0.,
                        help="encoder dropout")
    parser.add_argument("--dpout_fc",
                        type=float,
                        default=0.,
                        help="classifier dropout")
    parser.add_argument("--nonlinear_fc",
                        type=float,
                        default=0,
                        help="use nonlinearity in fc")
    parser.add_argument("--optimizer",
                        type=str,
                        default="sgd,lr=5",
                        help="adam or sgd,lr=0.1")
    parser.add_argument("--lrshrink",
                        type=float,
                        default=5,
                        help="shrink factor for sgd")
    parser.add_argument("--decay", type=float, default=1., help="lr decay")
    parser.add_argument("--minlr", type=float, default=1e-5, help="minimum lr")
    parser.add_argument("--max_norm",
                        type=float,
                        default=5.,
                        help="max norm (grad clipping)")

    # model
    parser.add_argument("--encoder_type",
                        type=str,
                        default='BLSTMEncoder',
                        help="see list of encoders")
    parser.add_argument("--enc_lstm_dim",
                        type=int,
                        default=2048,
                        help="encoder nhid dimension")
    parser.add_argument("--n_enc_layers",
                        type=int,
                        default=1,
                        help="encoder num layers")
    parser.add_argument("--fc_dim",
                        type=int,
                        default=512,
                        help="nhid of fc layers")
    parser.add_argument("--n_classes",
                        type=int,
                        default=3,
                        help="entailment/neutral/contradiction")
    parser.add_argument("--pool_type",
                        type=str,
                        default='max',
                        help="max or mean")

    # gpu
    parser.add_argument("--gpu_id", type=int, default=0, help="GPU ID")
    parser.add_argument("--seed", type=int, default=1236, help="seed")

    params, _ = parser.parse_known_args(args)

    # set gpu device
    torch.cuda.set_device(params.gpu_id)

    # print parameters passed, and all parameters
    #print('\ntogrep : {0}\n'.format(sys.argv[1:]))
    #print(params)

    def trainepoch(epoch):
        print('TRAINING : Epoch ' + str(epoch))
        nli_net.train()
        logs = []

        last_time = time.time()
        #correct = 0.
        # shuffle the data
        permutation = np.random.permutation(len(train['s1']))

        s1 = train['s1'][permutation]
        s2 = train['s2'][permutation]

        targets = [x[permutation] for x in train['labels']]

        optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * params.decay if epoch>1\
            and 'sgd' in params.optimizer else optimizer.param_groups[0]['lr']
        #print('Learning rate : {0}'.format(optimizer.param_groups[0]['lr']))

        for stidx in range(0, len(s1), params.batch_size):
            tgt_batches = []
            # prepare batch
            s1_batch, s1_len = get_batch(s1[stidx:stidx + params.batch_size],
                                         word_vec)
            s2_batch, s2_len = get_batch(s2[stidx:stidx + params.batch_size],
                                         word_vec)
            s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(
                s2_batch.cuda())
            for i, _ in enumerate(MTL_index):
                tgt_batches.append(
                    Variable(
                        torch.FloatTensor(
                            targets[i][stidx:stidx +
                                       params.batch_size])).cuda())

            #for dim in [1,2,3,4]:
            # model forward
            outputs = nli_net((s1_batch, s1_len), (s2_batch, s2_len))

            # loss
            if params.transfer == 'DNT':
                #print(outputs[0])
                #print((tgt_batches[0] - 1)/(params.n_classes-1))
                losses = [
                    nli_net.loss_fn(outputs[i], (tgt_batches[i] - 1) /
                                    (params.n_classes - 1))
                    for i, _ in enumerate(MTL_index)
                ]
            elif params.transfer == 'NT':
                losses = [
                    nli_net.loss_fn(outputs[i], tgt_batches[i])
                    for i, _ in enumerate(MTL_index)
                ]
            #if 'kl' in MTL_index:
            #    output1 = torch.log(output1)

            loss = np.sum(losses)

            #loss = loss1 + loss2 + loss3 + loss4# + loss5 + loss6 + loss7 + loss8
            #ADDED
            #optimizer.zero_grad()
            #loss1.backward(retain_graph=True)
            #loss2.backward(retain_graph=True)
            #loss3.backward(retain_graph=True)
            #loss4.backward(retain_graph=True)
            #optimizer.step()
            #END ADDED
            """
            if dim == 1:
                loss = nli_net.loss_fn(output1, tgt_batch1)
            elif dim == 2:
                loss = nli_net.loss_fn(output2, tgt_batch2)
            elif dim == 3:
                loss = nli_net.loss_fn(output3, tgt_batch3)
            elif dim == 4:
                loss = nli_net.loss_fn(output4, tgt_batch4)
            """
            # backward
            optimizer.zero_grad()
            loss.backward()

            # optimizer step
            optimizer.step()

    def evaluate(epoch,
                 eval_type='valid',
                 flag='',
                 correlation=spearmanr,
                 transfer='NT'):
        nli_net.eval()
        #correct = 0.
        preds = []
        r = np.arange(1, 1 + nli_net.n_classes)
        global val_acc_best, lr, stop_training, adam_stop

        if eval_type == 'valid':
            print('VALIDATION : Epoch {0}'.format(epoch))
            s1 = valid['s1']
            s2 = valid['s2']
            targets = valid['scores']
        elif eval_type == 'test':
            print('TEST : Epoch {0}'.format(epoch))
            s1 = test['s1']
            s2 = test['s2']
            targets = test['scores']
        elif eval_type == 'train':
            print('EVAL ON TRAIN : Epoch {0}'.format(epoch))
            s1 = train['s1']
            s2 = train['s2']
            targets = train['scores']
        else:
            raise ValueError('Wrong eval_type.')

        probas = [[] for _ in MTL_index]
        correct = 0.

        for i in range(0, len(s1), params.batch_size):
            # prepare batch
            s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec)
            s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec)
            s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(
                s2_batch.cuda())

            # model forward
            outputs = nli_net((s1_batch, s1_len), (s2_batch, s2_len))

            for i, _ in enumerate(MTL_index):
                if len(probas[i]) == 0:
                    probas[i] = outputs[i].data.cpu().numpy()
                else:
                    probas[i] = np.concatenate(
                        (probas[i], outputs[i].data.cpu().numpy()), axis=0)
            """
            if 2 in MTL_index:
                if 'e' in MTL_index:
                    tgt_batch2 = Variable(torch.LongTensor(target2[i:i + params.batch_size])).cuda()
                    pred2 = output2.data.max(1)[1]
                    correct += pred2.long().eq(tgt_batch2.data.long()).cpu().sum()
                else:
                    if len(probas2) == 0:
                        probas2 = output2.data.cpu().numpy()
                    else:
                        probas2 = np.concatenate((probas2, output2.data.cpu().numpy()), axis=0)
           """

        if transfer == 'NT':
            ret = [
                correlation(np.dot(x, r), y)[0]
                for x, y in zip(probas, targets)
            ]
        elif transfer == 'DNT':
            ret = [correlation(x, y)[0] for x, y in zip(probas, targets)]
        else:
            raise ValueError('Wrong transfer.')
        """
        if 2 in MTL_index:      
            if 'e' in MTL_index:
                ret.append(round(100 * correct/len(s1), 2))
            else:
                yhat2 = np.dot(probas2, r)
                p2 = spearmanr(yhat2, target2)[0]
                ret.append(p2)
        else:
            ret.append(0)
        """

        return ret

    """
    SEED
    """
    np.random.seed(params.seed)
    torch.manual_seed(params.seed)
    torch.cuda.manual_seed(params.seed)
    """
    DATA
    """
    #for i in range(1,9):
    #    print(i)
    #    print('----------')
    dataset_path = {
        'stsbenchmark': '../stsbenchmark/',
        'sts12': '../SemEval12/',
        'sick': '../SICK/',
        'activities': '../human_activity_phrase_data/',
        'sag': '../ShortAnswerGrading_v2.0/data/processed/',
        'typed': '../SemEval13/typed/'
    }
    #MTL_index = [1,2,3,4, 'mse'] #'e'
    MTL_index = [int(x) for x in params.dimension]
    train, valid, test = get_sts(dataset_path[params.dataset], MTL_index,
                                 params.transfer, params.n_classes)

    word_vec = build_vocab(
        train['s1'] + train['s2'] + valid['s1'] + valid['s2'] + test['s1'] +
        test['s2'], GLOVE_PATH)

    for split in ['s1', 's2']:
        for data_type in ['train', 'valid', 'test']:
            eval(data_type)[split] = np.array(
                [[word for word in sent.split() if word in word_vec]
                 for sent in eval(data_type)[split]])
            #eval(data_type)[split] = np.array([['<s>'] +
            #    [word for word in sent.split() if word in word_vec or word[:2] == 'dc'] +
            #    ['</s>'] for sent in eval(data_type)[split]])

    params.word_emb_dim = 300
    """
    MODEL
    """
    # model config
    config_nli_model = {
        'n_words': len(word_vec),
        'word_emb_dim': params.word_emb_dim,
        'enc_lstm_dim': params.enc_lstm_dim,
        'n_enc_layers': params.n_enc_layers,
        'dpout_model': params.dpout_model,
        'dpout_fc': params.dpout_fc,
        'fc_dim': params.fc_dim,
        'bsize': params.batch_size,
        'n_classes': params.n_classes,
        'pool_type': params.pool_type,
        'nonlinear_fc': params.nonlinear_fc,
        'encoder_type': params.encoder_type,
        'use_cuda': True,
        'MTL_index': MTL_index,
        'transfer': params.transfer
    }

    # model
    encoder_types = [
        'BLSTMEncoder', 'BLSTMprojEncoder', 'BGRUlastEncoder',
        'InnerAttentionMILAEncoder', 'InnerAttentionYANGEncoder',
        'InnerAttentionNAACLEncoder', 'ConvNetEncoder', 'LSTMEncoder'
    ]
    assert params.encoder_type in encoder_types, "encoder_type must be in " + \
                                                    str(encoder_types)
    perfs_all = []
    for rd in range(1):
        print("Round", rd)
        if params.load_model == 'no':
            nli_net = NLINet(config_nli_model)
            nli_net.encoder = torch.load('encoder/infersent.allnli.pickle',
                                         map_location={
                                             'cuda:1': 'cuda:0',
                                             'cuda:2': 'cuda:0'
                                         })
        else:
            nli_net = torch.load(params.load_model)
        print(nli_net)

        # optimizer
        optim_fn, optim_params = get_optimizer(params.optimizer)
        optimizer = optim_fn(nli_net.parameters(), **optim_params)

        # cuda by default
        nli_net.cuda()
        """
        TRAIN
        """
        val_acc_best = -1e10
        adam_stop = False
        stop_training = False
        lr = optim_params['lr'] if 'sgd' in params.optimizer else None

        last_result = 0
        last_test_result = 0
        drop_count = 0
        """
        Train model on Natural Language Inference task
        """
        correlation = spearmanr if params.dataset == 'activities' else pearsonr
        epoch = 0
        perfs_valid = evaluate(epoch, 'valid', 'begin', correlation,
                               params.transfer)
        perfs_test = evaluate(epoch, 'test', 'begin', correlation,
                              params.transfer)
        print(perfs_valid, perfs_test)
        epoch += 1

        if params.load_model == 'no':
            while not stop_training and epoch <= params.n_epochs:
                trainepoch(epoch)
                perfs_valid = evaluate(epoch, 'valid', '', correlation,
                                       params.transfer)
                perfs_test = evaluate(epoch, 'test', '', correlation,
                                      params.transfer)
                print(perfs_valid, perfs_test)

                epoch += 1
            #perfs_all.append(perfs)
        if params.save != 'no':
            torch.save(nli_net, params.save)
Ejemplo n.º 7
0
def main(args):

  """
  SEED
  """
  np.random.seed(args.seed)
  torch.manual_seed(args.seed)
  if args.gpu_id > -1:
    torch.cuda.manual_seed(args.seed)

  """
  DATA
  """
  train, valid, test = get_nli(args.nlipath, args.n_classes)
  word_vecs = build_vocab(train['s1'] + train['s2'] +
                       valid['s1'] + valid['s2'] +
                       test['s1'] + test['s2'], args.embdfile)

  for split in ['s1', 's2']:
    for data_type in ['train', 'valid', 'test']:
        eval(data_type)[split] = np.array([['<s>'] +
            [word for word in sent.split() if word in word_vecs] +
            ['</s>'] for sent in eval(data_type)[split]])


  args.word_emb_dim = len(word_vecs[list(word_vecs.keys())[0]])

  nli_model_configs = get_model_configs(args, len(word_vecs))


  nli_model_configs["n_classes"] = args.n_classes

  # define premise and hypoth encoders
  premise_encoder = eval(nli_model_configs['encoder_type'])(nli_model_configs)
  hypoth_encoder = eval(nli_model_configs['encoder_type'])(nli_model_configs)
  shared_nli_net = SharedNLINet(nli_model_configs, premise_encoder, hypoth_encoder)
  shared_hypoth_net = SharedHypothNet(nli_model_configs, hypoth_encoder)
  print(shared_nli_net)
  print(shared_hypoth_net)

  if args.pre_trained_model:
    print( "Pre_trained_model: " + args.pre_trained_model)
    pre_trained_model = torch.load(args.pre_trained_model)
  
    shared_nli_net_params = shared_nli_net.state_dict()
    pre_trained_params = pre_trained_model.state_dict()
    assert shared_nli_net_params.keys() == pre_trained_params.keys(), "load model has different parameter state names that NLI_HYPOTHS_NET"
    for key, parameters in shared_nli_net_params.items():
      if parameters.size() == pre_trained_params[key].size():
        shared_nli_net_params[key] = pre_trained_params[key]
    shared_nli_net.load_state_dict(shared_nli_net_params)

  print(shared_nli_net)

  if args.pre_trained_adv_model:
    print( "Pre_trained_adv_model: " + args.pre_trained_adv_model)
    pre_trained_model = torch.load(args.pre_trained_adv_model)
  
    shared_hypoth_net_params = shared_hypoth_net.state_dict()
    pre_trained_params = pre_trained_model.state_dict()
    assert shared_hypoth_net_params.keys() == pre_trained_params.keys(), "load model has different parameter state names that NLI_HYPOTHS_NET"
    for key, parameters in nli_hypoth_params.items():
      if parameters.size() == pre_trained_params[key].size():
        shared_hypoth_net_params[key] = pre_trained_params[key]
    shared_hypoth_net.load_state_dict(shared_hypoth_net_params)

  print(shared_hypoth_net)


  # nli loss
  weight = torch.FloatTensor(args.n_classes).fill_(1)
  loss_fn_nli = nn.CrossEntropyLoss(weight=weight)
  loss_fn_nli.size_average = False

  # hypoth (adversarial) loss
  weight = torch.FloatTensor(args.n_classes).fill_(1)
  loss_fn_hypoth = nn.CrossEntropyLoss(weight=weight)
  loss_fn_hypoth.size_average = False

  # optimizer
  optim_fn, optim_params = get_optimizer(args.optimizer)
  optimizer_nli = optim_fn(shared_nli_net.parameters(), **optim_params)
  #optimizer_hypoth = optim_fn(shared_hypoth_net.parameters(), **optim_params)
  # only pass hypoth classifier params to avoid updating shared encoder params twice 
  optimizer_hypoth = optim_fn(shared_hypoth_net.classifier.parameters(), **optim_params)

  if args.gpu_id > -1:
    shared_nli_net.cuda()
    shared_hypoth_net.cuda()
    loss_fn_nli.cuda()
    loss_fn_hypoth.cuda()

  """
  TRAIN
  """
  global val_acc_best, lr, stop_training, adam_stop
  val_acc_best = -1e10
  adam_stop = False
  stop_training = False
  lr = optim_params['lr'] if 'sgd' in args.optimizer else None

  """
  Train model on Natural Language Inference task
  """
  epoch = 1

  while not stop_training and epoch <= args.n_epochs:
    train_acc_nli, train_acc_hypoth, shared_nli_net, shared_hypoth_net = trainepoch(epoch, train, optimizer_nli, optimizer_hypoth, args, word_vecs, shared_nli_net, shared_hypoth_net, loss_fn_nli, loss_fn_hypoth, args.adv_lambda, args.adv_hyp_encoder_lambda)
    eval_acc_nli, eval_acc_hypoth = evaluate(epoch, valid, optimizer_nli, optimizer_hypoth, args, word_vecs, shared_nli_net, shared_hypoth_net, 'valid', adv_lambda=args.adv_lambda)
    epoch += 1
Ejemplo n.º 8
0
def main(args):
    print "main"
    """
  SEED
  """
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpu_id > -1:
        torch.cuda.manual_seed(args.seed)
    """
  DATA
  """
    train, val, test = get_nli_hypoth(args.train_lbls_file, args.train_src_file, args.val_lbls_file, \
                                      args.val_src_file, args.test_lbls_file, args.test_src_file, \
                                      args.max_train_sents, args.max_val_sents, args.max_test_sents, args.remove_dup)

    word_vecs = build_vocab(
        train['hypoths'] + val['hypoths'] + test['hypoths'], args.embdfile,
        args.lorelei_embds)
    args.word_emb_dim = len(word_vecs[word_vecs.keys()[0]])

    nli_model_configs = get_model_configs(args, len(word_vecs))

    lbls_file = args.train_lbls_file
    if "mpe" in lbls_file or "snli" in lbls_file or "multinli" in lbls_file or "sick" in lbls_file or "joci" in lbls_file:
        nli_model_configs["n_classes"] = 3
    elif "spr" in lbls_file or "dpr" in lbls_file or "fnplus" in lbls_file or "add_one" in lbls_file or "scitail" in lbls_file:
        nli_model_configs["n_classes"] = 2

    nli_net = NLI_HYPOTHS_Net(nli_model_configs)
    print(nli_net)

    # loss
    weight = torch.FloatTensor(args.n_classes).fill_(1)
    loss_fn = nn.CrossEntropyLoss(weight=weight)
    loss_fn.size_average = False

    # optimizer
    optim_fn, optim_params = get_optimizer(args.optimizer)
    optimizer = optim_fn(nli_net.parameters(), **optim_params)

    if args.gpu_id > -1:
        nli_net.cuda()
        loss_fn.cuda()
    """
  TRAIN
  """
    global val_acc_best, lr, stop_training, adam_stop
    val_acc_best = -1e10
    adam_stop = False
    stop_training = False
    lr = optim_params['lr'] if 'sgd' in args.optimizer else None
    """
  Train model on Natural Language Inference task
  """
    epoch = 1

    while not stop_training and epoch <= args.n_epochs:
        train_acc, nli_net = trainepoch(epoch, train, optimizer, args,
                                        word_vecs, nli_net, loss_fn)
        eval_acc = evaluate(epoch, val, optimizer, args, word_vecs, nli_net,
                            'valid')
        epoch += 1
Ejemplo n.º 9
0
# model
encoder_types = ['InferSent', 'BLSTMprojEncoder', 'BGRUlastEncoder',
                 'InnerAttentionMILAEncoder', 'InnerAttentionYANGEncoder',
                 'InnerAttentionNAACLEncoder', 'ConvNetEncoder', 'LSTMEncoder']
assert params.encoder_type in encoder_types, "encoder_type must be in " + \
                                             str(encoder_types)
nli_net = NLINet(config_nli_model)
print(nli_net)

# loss
weight = torch.FloatTensor(params.n_classes).fill_(1)
loss_fn = nn.CrossEntropyLoss(weight=weight)
loss_fn.size_average = False

# optimizer
optim_fn, optim_params = get_optimizer(params.optimizer)
optimizer = optim_fn(nli_net.parameters(), **optim_params)

# cuda by default
nli_net.cuda()
loss_fn.cuda()


"""
TRAIN
"""
val_acc_best = -1e10
adam_stop = False
stop_training = False
lr = optim_params['lr'] if 'sgd' in params.optimizer else None
Ejemplo n.º 10
0
def main(args):
    print "main"
    """
  SEED
  """
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpu_id > -1:
        torch.cuda.manual_seed(args.seed)
    """
  DATA
  """
    train, val, test = get_nli_text(args.train_lbls_file, args.train_src_file, args.val_lbls_file, \
                                      args.val_src_file, args.test_lbls_file, args.test_src_file, \
                                      args.max_train_sents, args.max_val_sents, args.max_test_sents, args.remove_dup)

    word_vecs = build_vocab(
        train['hypoths'] + val['hypoths'] + test['hypoths'] +
        train['premises'] + val['premises'] + test['premises'], args.embdfile,
        args.lorelei_embds)
    args.word_emb_dim = len(word_vecs[word_vecs.keys()[0]])

    nli_model_configs = get_model_configs(args, len(word_vecs))

    lbls_file = args.train_lbls_file
    if "mpe" in lbls_file or "snli" in lbls_file or "multinli" in lbls_file or "sick" in lbls_file or "joci" in lbls_file or "glue" in lbls_file:
        nli_model_configs["n_classes"] = 3
    elif "spr" in lbls_file or "dpr" in lbls_file or "fnplus" in lbls_file or "add_one" in lbls_file or "scitail" in lbls_file:
        nli_model_configs["n_classes"] = 2

    # define premise and hypoth encoders
    premise_encoder = eval(
        nli_model_configs['encoder_type'])(nli_model_configs)
    hypoth_encoder = eval(nli_model_configs['encoder_type'])(nli_model_configs)
    shared_nli_net = SharedNLINet(nli_model_configs, premise_encoder,
                                  hypoth_encoder)
    shared_hypoth_net = SharedHypothNet(nli_model_configs, hypoth_encoder)
    print(shared_nli_net)
    print(shared_hypoth_net)

    if args.pre_trained_model:
        print "Pre_trained_model: " + args.pre_trained_model
        pre_trained_model = torch.load(args.pre_trained_model)

        shared_nli_net_params = shared_nli_net.state_dict()
        pre_trained_params = pre_trained_model.state_dict()
        assert shared_nli_net_params.keys() == pre_trained_params.keys(
        ), "load model has different parameter state names that NLI_HYPOTHS_NET"
        for key, parameters in shared_nli_net_params.items():
            if parameters.size() == pre_trained_params[key].size():
                shared_nli_net_params[key] = pre_trained_params[key]
        shared_nli_net.load_state_dict(shared_nli_net_params)

    print(shared_nli_net)

    if args.pre_trained_adv_model:
        print "Pre_trained_adv_model: " + args.pre_trained_adv_model
        pre_trained_model = torch.load(args.pre_trained_adv_model)

        shared_hypoth_net_params = shared_hypoth_net.state_dict()
        pre_trained_params = pre_trained_model.state_dict()
        assert shared_hypoth_net_params.keys() == pre_trained_params.keys(
        ), "load model has different parameter state names that NLI_HYPOTHS_NET"
        for key, parameters in nli_hypoth_params.items():
            if parameters.size() == pre_trained_params[key].size():
                shared_hypoth_net_params[key] = pre_trained_params[key]
        shared_hypoth_net.load_state_dict(shared_hypoth_net_params)

    print(shared_hypoth_net)

    # nli loss
    weight = torch.FloatTensor(args.n_classes).fill_(1)
    loss_fn_nli = nn.CrossEntropyLoss(weight=weight)
    loss_fn_nli.size_average = False

    # hypoth (adversarial) loss
    weight = torch.FloatTensor(args.n_classes).fill_(1)
    loss_fn_hypoth = nn.CrossEntropyLoss(weight=weight)
    loss_fn_hypoth.size_average = False

    # optimizer
    optim_fn, optim_params = get_optimizer(args.optimizer)
    optimizer_nli = optim_fn(shared_nli_net.parameters(), **optim_params)
    #optimizer_hypoth = optim_fn(shared_hypoth_net.parameters(), **optim_params)
    # only pass hypoth classifier params to avoid updating shared encoder params twice
    optimizer_hypoth = optim_fn(shared_hypoth_net.classifier.parameters(),
                                **optim_params)

    if args.gpu_id > -1:
        shared_nli_net.cuda()
        shared_hypoth_net.cuda()
        loss_fn_nli.cuda()
        loss_fn_hypoth.cuda()
    """
  TRAIN
  """
    global val_acc_best, lr, stop_training, adam_stop
    val_acc_best = -1e10
    adam_stop = False
    stop_training = False
    lr = optim_params['lr'] if 'sgd' in args.optimizer else None
    """
  Train model on Natural Language Inference task
  """
    epoch = 1

    while not stop_training and epoch <= args.n_epochs:
        train_acc_nli, train_acc_hypoth, shared_nli_net, shared_hypoth_net = trainepoch(
            epoch, train, optimizer_nli, optimizer_hypoth, args, word_vecs,
            shared_nli_net, shared_hypoth_net, loss_fn_nli, loss_fn_hypoth,
            args.adv_lambda, args.adv_hyp_encoder_lambda)
        eval_acc_nli, eval_acc_hypoth = evaluate(epoch,
                                                 val,
                                                 optimizer_nli,
                                                 optimizer_hypoth,
                                                 args,
                                                 word_vecs,
                                                 shared_nli_net,
                                                 shared_hypoth_net,
                                                 'valid',
                                                 adv_lambda=args.adv_lambda)
        epoch += 1
Ejemplo n.º 11
0
for name, x in nli_net.named_parameters():
    print(name)

for name, x in actorModel.named_parameters():
    print(name)

#print(nli_net.target_pred.enc_lstm.weight_ih_l0)
#print(nli_net.target_classifier[4].bias)

# loss
weight = torch.FloatTensor(params.n_classes).fill_(1)
loss_fn = nn.CrossEntropyLoss(weight=weight)
loss_fn.size_average = False

# optimizer
optim_fn, optim_params = get_optimizer(params.optimizer)
critic_target_optimizer = optim_fn(
    list(nli_net.target_pred.parameters()) +
    list(nli_net.target_classifier.parameters()), **optim_params)

optim_fn2, optim_params2 = get_optimizer(params.optimizer)
critic_active_optimizer = optim_fn(
    list(nli_net.active_pred.parameters()) +
    list(nli_net.active_classifier.parameters()), **optim_params2)

optim_fn3, optim_params3 = get_optimizer(params.actor_optimizer)
actor_target_optimizer = optim_fn3(actorModel.target_policy.parameters(),
                                   **optim_params3)

optim_fn4, optim_params4 = get_optimizer(params.actor_optimizer)
actor_active_optimizer = optim_fn4(actorModel.active_policy.parameters(),
Ejemplo n.º 12
0
def main(args):
    print "main"
    """
  SEED
  """
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpu_id > -1:
        torch.cuda.manual_seed(args.seed)
    """
  DATA
  """
    train, val, test = get_nli_hypoth(args.train_lbls_file, args.train_src_file, args.val_lbls_file, \
                                      args.val_src_file, args.test_lbls_file, args.test_src_file, \
                                      args.max_train_sents, args.max_val_sents, args.max_test_sents, args.remove_dup)

    word_vecs = build_vocab(
        train['hypoths'] + val['hypoths'] + test['hypoths'], args.embdfile,
        args.lorelei_embds)
    args.word_emb_dim = len(word_vecs[word_vecs.keys()[0]])

    nli_model_configs = get_model_configs(args, len(word_vecs))

    lbls_file = args.train_lbls_file
    if "mpe" in lbls_file or "snli" in lbls_file or "multinli" in lbls_file or "sick" in lbls_file or "joci" in lbls_file:
        nli_model_configs["n_classes"] = 3
    elif "spr" in lbls_file or "dpr" in lbls_file or "fnplus" in lbls_file or "add_one" in lbls_file or "scitail" in lbls_file:
        nli_model_configs["n_classes"] = 2

    nli_net = NLI_HYPOTHS_Net(nli_model_configs)
    print(nli_net)

    if args.pre_trained_nli_model:
        print("Pre_trained_model: " + args.pre_trained_nli_model)
        from models import SharedNLINet
        pre_trained_model = torch.load(args.pre_trained_nli_model)

        nli_net_params = nli_net.state_dict()
        pre_trained_params = pre_trained_model.state_dict()
        # this assert will fail becasue pre-trained model has both premise and hypothesis encoders
        #assert nli_net_params.keys() == pre_trained_params.keys(), "load model has different parameter state names that NLI_HYPOTHS_NET"
        # instead, we will only copy the hypothesis encoder
        for key, parameters in nli_net_params.items():
            if key.startswith('encoder'):
                pre_trained_key = key.replace('encoder', 'encoder_hypoth')
                if parameters.size(
                ) == pre_trained_params[pre_trained_key].size():
                    nli_net_params[key] = pre_trained_params[pre_trained_key]
        nli_net.load_state_dict(nli_net_params)

    print(nli_net)

    # loss
    weight = torch.FloatTensor(args.n_classes).fill_(1)
    loss_fn = nn.CrossEntropyLoss(weight=weight)
    loss_fn.size_average = False

    # optimizer
    optim_fn, optim_params = get_optimizer(args.optimizer)
    optimizer = optim_fn(nli_net.parameters(), **optim_params)

    if args.freeze_encoder:
        print("Freezing encoder parameters")
        for p in nli_net.encoder.parameters():
            p.requires_grad = False

    if args.gpu_id > -1:
        nli_net.cuda()
        loss_fn.cuda()
    """
  TRAIN
  """
    global val_acc_best, lr, stop_training, adam_stop
    val_acc_best = -1e10
    adam_stop = False
    stop_training = False
    lr = optim_params['lr'] if 'sgd' in args.optimizer else None
    """
  Train model on Natural Language Inference task
  """
    epoch = 1

    while not stop_training and epoch <= args.n_epochs:
        train_acc, nli_net = trainepoch(epoch, train, optimizer, args,
                                        word_vecs, nli_net, loss_fn)
        eval_acc = evaluate(epoch, val, optimizer, args, word_vecs, nli_net,
                            'valid')
        epoch += 1