def dev(use_gpu, batch_size, model_path):
    dataset = build_dataset_pytorch.QuoraDataset_train(
        'data/dev_data_pad.pkl', 'data/dev_data_char_pad.pkl',
        'data/dev_data_flags_pad.pkl')
    dataloader = DataLoader(dataset, batch_size, shuffle=False, num_workers=0)
    mm1 = mp.MatchModel(vocab_size=100000,
                        tr_w_embed_dim=300,
                        tr_w_emb_dim_flag=100,
                        char_size=50,
                        char_embed_dim=20,
                        output_dim=100,
                        hidden_dim=200,
                        use_gpu=use_gpu,
                        yt_layer_num=3,
                        mode='test')
    mm1.load_state_dict(t.load(model_path, map_location='cpu'))
    mm1.eval()
    with open('data/vector_n.pkl', 'rb') as handle:
        data = pickle.load(
            handle
        )  # Warning: If adding something here, also modifying saveDataset
        word_vec = t.from_numpy(np.array(data['word_vec']))
    if use_gpu: mm1.cuda()

    def data_convert(*data):
        datas = []
        for i in data:
            datas.append(i.cuda())
        return datas

    data_num = 0
    correct_all = 0
    for i, data in enumerate(dataloader):
        if i == 5:
            p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags = data
            print(t.nonzero(p_inputs).shape[0], t.nonzero(q_inputs).shape[0])
            if use_gpu:
                [p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec] = \
                    data_convert(p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec)
            p_input = [p_inputs, p_inputs_char, word_vec, p_flags]
            q_input = [q_inputs, q_inputs_char, q_flags]
            out_p, out_q, cos = mm1(100, p_input, q_input)
            vis = visdom.Visdom(env=u'test1')
            for x in range(len(out_p)):
                # print('\n%d:'%(x), cos[x].detach().numpy())
                # vis.image(out_p[x].detach().numpy()[:,:400], win='p_layer_%d_%d'%(x, 0), opts={'title':'p_layer%d'%(x)})
                # vis.image(out_q[x].detach().numpy()[:,:400], win='q_layer_%d_%d'%(x, 0), opts={'title':'q_layer%d'%(x)})
                vis.image(cos[x].detach().numpy(),
                          win='q_layer_%d_%d' % (x, 0),
                          opts={'title': 'cos_layer%d' % (x)})
            break
Ejemplo n.º 2
0
def dev(use_gpu, batch_size, model_path):
    dataset = build_dataset_pytorch.QuoraDataset_train(
        'data/dev_data_pad.pkl', 'data/dev_data_char_pad.pkl',
        'data/dev_data_flags_pad.pkl')
    dataloader = DataLoader(dataset, batch_size, shuffle=True, num_workers=1)
    mm1 = mp.MatchModel(vocab_size=100000,
                        tr_w_embed_dim=300,
                        tr_w_emb_dim_flag=0,
                        char_size=50,
                        char_embed_dim=20,
                        output_dim=100,
                        hidden_dim=200,
                        use_gpu=use_gpu,
                        yt_layer_num=3,
                        mode='test')
    mm1.load_state_dict(t.load(model_path))
    mm1.eval()
    with open('data/vector_n.pkl', 'rb') as handle:
        data = pickle.load(
            handle
        )  # Warning: If adding something here, also modifying saveDataset
        word_vec = t.from_numpy(np.array(data['word_vec']))
    if use_gpu: mm1.cuda()

    def data_convert(*data):
        datas = []
        for i in data:
            datas.append(i.cuda())
        return datas

    data_num = 0
    correct_all = 0
    for i, data in enumerate(dataloader):
        p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags = data
        if use_gpu:
            [p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec] = \
                data_convert(p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec)
        p_input = [p_inputs, p_inputs_char, word_vec, p_flags]
        q_input = [q_inputs, q_inputs_char, q_flags]
        output = mm1(100, p_input, q_input)
        if use_gpu:
            output = output.cpu()
            targets = targets.cpu()
        # print(output, targets)
        correct_num = (t.argmax(output, 1) == t.argmax(targets, 1)).numpy()
        correct_sum = correct_num.sum()
        data_num = data_num + len(correct_num)
        correct_all = correct_all + correct_sum
    print("-- Dev Acc %.3f" % (correct_all / data_num))
    return correct_all / data_num
Ejemplo n.º 3
0
def train(use_gpu, model_path, lr, epochs, model_prefix, batch_size):
    dataset = build_dataset_pytorch.QuoraDataset_train(
        'data/train_data_pad.pkl', 'data/train_data_char_pad.pkl',
        'data/train_data_flags_pad.pkl')
    dataloader = DataLoader(dataset, batch_size, shuffle=True, num_workers=1)
    mm = mp.MatchModel(vocab_size=100000,
                       tr_w_embed_dim=300,
                       tr_w_emb_dim_flag=0,
                       char_size=50,
                       char_embed_dim=20,
                       output_dim=100,
                       hidden_dim=200,
                       use_gpu=use_gpu,
                       yt_layer_num=3)
    # for name, param in mm.named_parameters():
    #     print(name, param.size())
    optimizer = t.optim.Adam(mm.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    t.manual_seed(2)
    if model_path:
        mm.load_state_dict(t.load(model_path))
    if use_gpu:
        # device_ids = [1,2]
        mm.cuda()
        # mm = nn.DataParallel(mm, device_ids = device_ids)
        # optimizer = nn.DataParallel(optimizer, device_ids = device_ids)
        criterion.cuda()
        t.cuda.manual_seed_all(2)
    # loss_meter = meter.AverageValueMeter()

    with open('data/vector_n.pkl', 'rb') as handle:
        data = pickle.load(
            handle
        )  # Warning: If adding something here, also modifying saveDataset
        word_vec = t.from_numpy(np.array(data['word_vec']))

    def data_convert(*data):
        datas = []
        for i in data:
            datas.append(i.cuda())
        return datas

    max_dev = 0
    max_test = 0
    for e in range(0, epochs):
        correct_all = 0
        data_num = 0
        # loss_meter.reset()
        print("----- Epoch {}/{} -----".format(e + 1, epochs))
        # optimizer.zero_grad()
        for i, data_ in tqdm(enumerate(dataloader), desc="Training"):
            p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags = data_
            if use_gpu:
                [p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec] = \
                    data_convert(p_inputs, q_inputs, targets, p_inputs_char, q_inputs_char, p_flags, q_flags, word_vec)
            optimizer.zero_grad()
            p_input = [p_inputs, p_inputs_char, word_vec, p_flags]
            q_input = [q_inputs, q_inputs_char, q_flags]
            output = mm(100, p_input, q_input)
            loss = criterion(output, t.argmax(targets, 1))
            # print(loss)
            # print(output)
            if use_gpu:
                output = output.cpu()
                targets = targets.cpu()
            correct_num = (t.argmax(output, 1) == t.argmax(targets, 1)).numpy()
            correct_sum = correct_num.sum()
            # print('correct_sum:', correct_num)
            accuracy = correct_sum / len(correct_num)
            data_num = data_num + len(correct_num)
            correct_all = correct_all + correct_sum
            loss.backward()
            optimizer.step()
            # loss_meter.add(loss.data[0])
            if (i + 1) % 600 == 0:
                tqdm.write("----- Epoch %d Batch %d -- Loss %.2f -- Acc %.3f" %
                           (e + 1, i + 1, loss, accuracy))
        tqdm.write("----- epoch %d -- Loss %.2f -- Acc %.3f" %
                   (e + 1, loss, correct_all / data_num))
        t.save(mm.state_dict(), '%s_%s.pth' % (model_prefix, e))
        dev_acc = dev(True, 50, '%s_%s.pth' % (model_prefix, e))
        test_acc = test(True, 50, '%s_%s.pth' % (model_prefix, e))
        max_dev = max(max_dev, dev_acc)
        max_test = max(max_test, test_acc)
    print('max test acc:', max_test, '\nmax dev acc:', max_dev)