def train(features,
          fea_len,
          split_frac,
          out_file,
          save=False,
          save_folder=None):
    '''
	hyperparameters: 
		features
		amount of training data
		feature length
	'''
    if isinstance(out_file, str):
        out_file = open(out_file, 'w')
    d = Dataset(features, split_frac, 1, gpu)
    print 'defining architecture'
    enc = ChainEncoder(d.get_v_fea_len(), d.get_e_fea_len(), fea_len, 'last')
    predictor = Predictor(fea_len)
    loss = nn.NLLLoss()
    if gpu:
        enc.cuda()
        predictor.cuda()
        loss.cuda()

    optimizer = optim.Adam(
        list(enc.parameters()) + list(predictor.parameters()))

    print 'training'
    test_v_features, test_e_features, test_A_pls, test_B_pls, test_y = d.get_test_pairs(
    )
    test_y = test_y.data.cpu().numpy()
    for train_iter in xrange(12000):
        v_features, e_features, A_pls, B_pls, y = d.get_train_pairs(100)
        enc.zero_grad()
        predictor.zero_grad()
        A_code, B_code = encode(enc, fea_len, v_features, e_features, A_pls,
                                B_pls)
        softmax_output = predictor(A_code, B_code)
        loss_val = loss(softmax_output, y)
        loss_val.backward()
        optimizer.step()

        enc.zero_grad()
        predictor.zero_grad()
        test_A_code, test_B_code = encode(enc, fea_len, test_v_features,
                                          test_e_features, test_A_pls,
                                          test_B_pls)
        softmax_output = predictor(test_A_code, test_B_code).data.cpu().numpy()
        test_y_pred = softmax_output.argmax(axis=1)
        cur_acc = (test_y_pred == test_y).sum() / len(test_y)
        out_file.write('%f\n' % cur_acc)
        out_file.flush()
        if save and train_iter % 50 == 0:
            if save_folder[-1] == '/':
                save_folder = save_folder[:-1]
            torch.save(enc.state_dict(),
                       '%s/%i_enc.model' % (save_folder, train_iter))
            torch.save(predictor.state_dict(),
                       '%s/%i_pred.model' % (save_folder, train_iter))
    out_file.close()
Esempio n. 2
0
def train(features, fea_len, split_frac, out_file):
    if isinstance(out_file, str):
        out_file = open(out_file, 'w')
    d = Dataset(features, split_frac, gpu)
    print 'defining architecture'
    enc = ChainEncoder(d.get_v_fea_len(), d.get_e_fea_len(), fea_len, 'last')
    predictor = Predictor(fea_len)
    loss = nn.NLLLoss()
    if gpu:
        enc.cuda()
        predictor.cuda()
        loss.cuda()

    optimizer = optim.Adam(
        list(enc.parameters()) + list(predictor.parameters()))

    print 'training'
    test_chain_A, test_chain_B, test_y = d.get_test_pairs()
    test_y = test_y.data.cpu().numpy()
    for train_iter in xrange(4000):
        chains_A, chains_B, y = d.get_train_pairs(1000)
        enc.zero_grad()
        predictor.zero_grad()
        output_A = enc(chains_A)
        output_B = enc(chains_B)
        softmax_output = predictor(output_A, output_B)
        loss_val = loss(softmax_output, y)
        loss_val.backward()
        optimizer.step()

        enc.zero_grad()
        predictor.zero_grad()
        output_test_A = enc(test_chain_A)
        output_test_B = enc(test_chain_B)
        softmax_output = predictor(output_test_A,
                                   output_test_B).data.cpu().numpy()
        test_y_pred = softmax_output.argmax(axis=1)
        cur_acc = (test_y_pred == test_y).sum() / len(test_y)
        print 'test acc:', cur_acc
        out_file.write('%f\n' % cur_acc)
        if train_iter % 50 == 0:
            torch.save(enc.state_dict(), 'ckpt/%i_encoder.model' % train_iter)
            torch.save(predictor.state_dict(),
                       'ckpt/%i_predictor.model' % train_iter)
    out_file.close()
Esempio n. 3
0
best_test3_acc = 0.0
best_epoch_num = 0

total_epoch_num = 0
all_losses = []
all_acc_1 = []
all_acc_2 = []
all_acc_3 = []

for epoch in range(1, num_epochs):
    total_epoch_num += 1
    shuffled_id_blocks = get_shuffled_ids(_data['tr'], batch_size)
    running_loss = 0.0
    predictor.train()
    for id_block in shuffled_id_blocks:
        predictor.zero_grad()

        h0 = torch.zeros(num_of_layers * num_of_directions, id_block.shape[0],
                         lstm_dim)
        c0 = torch.zeros(num_of_layers * num_of_directions, id_block.shape[0],
                         lstm_dim)

        batch_input, batch_len, batch_label = make_batch(
            _data['tr'], _label['tr'], id_block)
        output = predictor(batch_input, batch_len, h0, c0)
        loss = criterion(output, batch_label)
        running_loss += loss.item() * batch_input.size(0)
        loss.backward()
        _ = torch.nn.utils.clip_grad_norm_(predictor.parameters(), clip)
        optimizer.step()
    running_loss = running_loss / _data['tr'].shape[0]