Example #1
0
def load_model(args, data):
    model = BIMPM(args, data)
    model.load_state_dict(torch.load(args.model_path))

    if args.gpu > -1:
        model.cuda(args.gpu)

    return model
Example #2
0
def load_model(args, data):
    if args.use_my_model:
        model = CBIMPM(args, data)
    elif args.use_only_conv:
        model = CONV(args, data)
    else:
        model = BIMPM(args, data)

    model.load_state_dict(torch.load(args.model_path))

    if args.gpu > -1:
        model.cuda(args.gpu)

    return model
Example #3
0
def train(args, data):
    model = BIMPM(args, data)
    viz.line(X=np.array([0]),
             Y=np.array([0]),
             win=args.loss_curve,
             name='train-%s' % args.line_suffix,
             opts={'title': args.title})
    viz.line(X=np.array([0]),
             Y=np.array([0]),
             win=args.loss_curve,
             name='dev-%s' % args.line_suffix,
             update='append')
    viz.line(X=np.array([0]),
             Y=np.array([0]),
             win=args.loss_curve,
             name='test-%s' % args.line_suffix,
             update='append')
    viz.line(X=np.array([0]),
             Y=np.array([0]),
             win=args.acc_curve,
             name='test-%s' % args.line_suffix,
             opts={'title': args.title})
    viz.line(X=np.array([0]),
             Y=np.array([0]),
             win=args.acc_curve,
             name='dev-%s' % args.line_suffix,
             update='append')
    viz.line(X=np.array([0]),
             Y=np.array([0]),
             win=args.auc_curve,
             name='auc-test-%s' % args.line_suffix,
             opts={'title': args.title})
    viz.line(X=np.array([0]),
             Y=np.array([0]),
             win=args.auc_curve,
             name='auc-dev-%s' % args.line_suffix,
             update='append')

    if args.gpu > -1:
        model.cuda(args.gpu)

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(parameters, lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    model.train()
    loss, last_epoch = 0, -1
    max_dev_auc, max_test_auc = 0, 0

    iterator = data.train_iter
    for i, batch in enumerate(iterator):
        present_epoch = int(iterator.epoch)
        if present_epoch == args.epoch:
            break
        if present_epoch > last_epoch:
            logger.info('epoch: %s' % (present_epoch + 1))
        last_epoch = present_epoch

        if args.data_type == 'SNLI':
            s1, s2 = 'premise', 'hypothesis'
        else:
            s1, s2 = 'q1', 'q2'

        s1, s2 = getattr(batch, s1), getattr(batch, s2)

        # limit the lengths of input sentences up to max_sent_len
        if args.max_sent_len >= 0:
            if s1.size()[1] > args.max_sent_len:
                s1 = s1[:, :args.max_sent_len]
            if s2.size()[1] > args.max_sent_len:
                s2 = s2[:, :args.max_sent_len]

        kwargs = {'p': s1, 'h': s2}

        if args.use_char_emb:
            char_p = Variable(torch.LongTensor(data.characterize(s1)))
            char_h = Variable(torch.LongTensor(data.characterize(s2)))

            if args.gpu > -1:
                char_p = char_p.cuda(args.gpu)
                char_h = char_h.cuda(args.gpu)

            kwargs['char_p'] = char_p
            kwargs['char_h'] = char_h
        pred = model(**kwargs)
        optimizer.zero_grad()
        batch_loss = criterion(pred, batch.label)
        loss += batch_loss.data[0]
        batch_loss.backward()
        optimizer.step()
        if (i + 1) % args.print_freq == 0:
            dev_loss, dev_acc, dev_auc_pr = test(model, args, data, mode='dev')
            test_loss, test_acc, test_auc_pr = test(model, args, data)
            c = (i + 1) // args.print_freq
            viz.line(X=np.array([c]),
                     Y=np.array([loss]),
                     win=args.loss_curve,
                     name='train-%s' % args.line_suffix,
                     update='append')
            viz.line(X=np.array([c]),
                     Y=np.array([dev_loss]),
                     win=args.loss_curve,
                     name='dev-%s' % args.line_suffix,
                     update='append')
            viz.line(X=np.array([c]),
                     Y=np.array([test_loss]),
                     win=args.loss_curve,
                     name='test-%s' % args.line_suffix,
                     update='append')
            viz.line(X=np.array([c]),
                     Y=np.array([dev_acc]),
                     win=args.acc_curve,
                     name='dev-%s' % args.line_suffix,
                     update='append')
            viz.line(X=np.array([c]),
                     Y=np.array([test_acc]),
                     win=args.acc_curve,
                     name='test-%s' % args.line_suffix,
                     update='append')
            viz.line(X=np.array([c]),
                     Y=np.array([dev_auc_pr]),
                     win=args.auc_curve,
                     name='auc-dev-%s' % args.line_suffix,
                     update='append')
            viz.line(X=np.array([c]),
                     Y=np.array([test_auc_pr]),
                     win=args.auc_curve,
                     name='auc-test-%s' % args.line_suffix,
                     update='append')
            logger.info('train loss: %.3f / dev loss: %.3f / test loss: %.3f' %
                        (loss, dev_loss, test_loss))
            logger.info('dev acc: %.3f / test acc: %.3f' % (dev_acc, test_acc))
            logger.info('dev auc of pr : %.3f , test auc pr : %.3f' %
                        (dev_auc_pr, test_auc_pr))
            if dev_auc_pr > max_dev_auc:
                max_dev_auc = dev_auc_pr
                max_test_auc = test_auc_pr
                best_model = copy.deepcopy(model)
                torch.save(
                    best_model.state_dict(), 'saved_models/BIBPM_%s_%s.pt' %
                    (args.data_type, args.model_time))
            loss = 0
            model.train()

    logger.info('max dev acc: %.3f / max test acc: %.3f' %
                (max_dev_auc, max_test_auc))
Example #4
0
    class_size = len(data.LABEL.vocab)

    print(f"Creating model with class_size: {class_size}, word_vocab_size: {word_vocab_size}, char_vocab_size: {char_vocab_size}")
    print(f"pretrained_word_embedding: {pretrained_word_embedding}")
    print(f"pretrained_char_embedding: {pretrained_char_embedding}")

    model = BIMPM(class_size,
                  word_vocab_size,
                  char_vocab_size, 
                  pretrained_word_embedding=pretrained_word_embedding,
                  pretrained_char_embedding=pretrained_char_embedding,
                  word_dim=args.word_dim, 
                  char_dim=args.char_dim, 
                  num_perspective=args.num_perspective, 
                  use_char_emb=(not args.wo_char), 
                  context_lstm_dim=args.context_lstm_dim, 
                  context_layer_num=args.context_layer_num, 
                  aggregation_lstm_dim=args.aggregation_lstm_dim, 
                  aggregation_layer_num=args.aggregation_layer_num, 
                  char_lstm_dim=args.char_lstm_dim, 
                  dropout=args.dropout,
                  wo_full_match=args.wo_full_match,
                  wo_maxpool_match=args.wo_maxpool_match,
                  wo_attentive_match=args.wo_attentive_match,
                  wo_max_attentive_match=args.wo_max_attentive_match,
                  )
                  
    if args.gpu >= 0:
        model.cuda(args.gpu)
    print(model)
    
    print('Training start!')
Example #5
0
def train(args, data):
    if args.use_my_model:
        model = CBIMPM(args, data)
    elif args.use_only_conv:
        model = CONV(args, data)
    else:
        model = BIMPM(args, data)
    if args.gpu > -1:
        model.cuda(args.gpu)

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(parameters, lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    writer = SummaryWriter(log_dir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_acc, max_test_acc = 0, 0

    iterator = data.train_iter
    savenow = False
    for i, batch in enumerate(iterator):
        present_epoch = int(iterator.epoch)
        if present_epoch == args.epoch:
            break
        if present_epoch > last_epoch:
            savenow = True
            with codecs.open('saved_models/' + args.model_time + "/acc.txt",
                             "a+", "utf-8") as output:
                output.write('\nEpoch: ' + str(present_epoch + 1))
            print('Epoch: ' + str(present_epoch + 1))
        last_epoch = present_epoch

        if args.data_type == 'SNLI':
            s1, s2 = 'premise', 'hypothesis'
        else:
            s1, s2 = 'q1', 'q2'

        s1, s2 = getattr(batch, s1), getattr(batch, s2)

        # limit the lengths of input sentences up to max_sent_len
        if args.max_sent_len >= 0:
            if s1.size()[1] > args.max_sent_len:
                s1 = s1[:, :args.max_sent_len]
            if s2.size()[1] > args.max_sent_len:
                s2 = s2[:, :args.max_sent_len]

        kwargs = {'p': s1, 'h': s2}

        if args.use_char_emb:
            char_p = Variable(torch.LongTensor(data.characterize(s1)))
            char_h = Variable(torch.LongTensor(data.characterize(s2)))

            if args.gpu > -1:
                char_p = char_p.cuda(args.gpu)
                char_h = char_h.cuda(args.gpu)

            kwargs['char_p'] = char_p
            kwargs['char_h'] = char_h

        pred = model(**kwargs)

        optimizer.zero_grad()
        batch_loss = criterion(pred, batch.label)
        loss += batch_loss.data[0]
        batch_loss.backward()
        optimizer.step()

        if (i + 1) % args.print_freq == 0:
            dev_loss, dev_acc = test(model, args, data, mode='dev')
            test_loss, test_acc = test(model, args, data)
            c = (i + 1) // args.print_freq

            writer.add_scalar('loss/train', loss, c)
            writer.add_scalar('loss/dev', dev_loss, c)
            writer.add_scalar('acc/dev', dev_acc, c)
            writer.add_scalar('loss/test', test_loss, c)
            writer.add_scalar('acc/test', test_acc, c)

            print("[" + str(i) + "][loss] train: " + "{:.3f}".format(loss) +
                  " dev: " + "{:.3f}".format(dev_loss) + " test: " +
                  "{:.3f}".format(test_loss) + "\n[" + str(i) +
                  "][acc]  dev: " + "{:.3f}".format(dev_acc) + " test: " +
                  "{:.3f}".format(test_acc))

            with codecs.open('saved_models/' + args.model_time + "/acc.txt",
                             "a+", "utf-8") as output:
                output.write("\n[" + str(i) + "][loss] train: " +
                             "{:.3f}".format(loss) + " dev: " +
                             "{:.3f}".format(dev_loss) + " test: " +
                             "{:.3f}".format(test_loss) + "\n[" + str(i) +
                             "][acc]  dev: " + "{:.3f}".format(dev_acc) +
                             " test: " + "{:.3f}".format(test_acc))

            if test_acc > max_test_acc:
                max_test_acc = test_acc
                best_model = copy.deepcopy(model)
                # with codecs.open('saved_models/' + args.model_time + "/best.json", "w+", "utf-8") as out:
                #     out.write(json.dumps(test_res))

            if savenow:
                print('Saving model...', present_epoch)
                torch.save(
                    best_model.state_dict(), "saved_models/" +
                    args.model_time + "/Epoch_" + str(present_epoch) + "_" +
                    "{:.5f}".format(max_test_acc) + "_" + str(args.model_time))
                savenow = False
            loss = 0
            model.train()

    writer.close()
    print("max dev acc: " + str(max_dev_acc) + " max test acc: " +
          str(max_test_acc))

    return best_model
Example #6
0
def train(args, data):
    model = BIMPM(args, data)
    if args.gpu > -1:
        model.cuda(args.gpu)

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(parameters, lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    writer = SummaryWriter(log_dir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, 0
    # loss, last_epoch = 0, -1
    max_dev_acc, max_test_acc = 0, 0

    iterator = data.train_iter
    while last_epoch < args.epoch:
        for i, batch in enumerate(iterator):
            # present_epoch = int(iterator.epoch)
            # if present_epoch == args.epoch:
            #     break
            # if present_epoch > last_epoch:
            #     print('epoch:', present_epoch + 1)
            # last_epoch = present_epoch

            if args.data_type == 'SNLI':
                s1, s2 = 'premise', 'hypothesis'
            else:
                s1, s2 = 'q1', 'q2'

            s1, s2 = getattr(batch, s1), getattr(batch, s2)

            # limit the lengths of input sentences up to max_sent_len
            if args.max_sent_len >= 0:
                if s1.size()[1] > args.max_sent_len:
                    s1 = s1[:, :args.max_sent_len]
                if s2.size()[1] > args.max_sent_len:
                    s2 = s2[:, :args.max_sent_len]

            kwargs = {'p': s1, 'h': s2}

            if args.use_char_emb:
                char_p = torch.LongTensor(data.characterize(s1))
                char_h = torch.LongTensor(data.characterize(s2))

                if args.gpu > -1:
                    char_p = char_p.cuda(args.gpu)
                    char_h = char_h.cuda(args.gpu)

                kwargs['char_p'] = char_p
                kwargs['char_h'] = char_h

            pred = model(**kwargs)

            optimizer.zero_grad()
            batch_loss = criterion(pred, batch.label)
            loss += batch_loss.item()
            batch_loss.backward()
            optimizer.step()

            if (i + 1) % args.print_freq == 0:
                dev_loss, dev_acc = test(model, args, data, mode='dev')
                test_loss, test_acc = test(model, args, data)
                c = (i + 1) // args.print_freq

                writer.add_scalar('loss/train', loss, c)
                writer.add_scalar('loss/dev', dev_loss, c)
                writer.add_scalar('acc/dev', dev_acc, c)
                writer.add_scalar('loss/test', test_loss, c)
                writer.add_scalar('acc/test', test_acc, c)

                print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f} / test loss: {test_loss:.3f}'
                    f' / dev acc: {dev_acc:.3f} / test acc: {test_acc:.3f}')

                if dev_acc > max_dev_acc:
                    max_dev_acc = dev_acc
                    max_test_acc = test_acc
                    best_model = copy.deepcopy(model)

                    to_save = {"model": model.state_dict(), "args": args}
                    torch.save(to_save, f'saved_models/BIMPM_best.pt')

                loss = 0
                model.train()
        
        iterator.init_epoch()
        last_epoch += 1

    writer.close()
    print(f'max dev acc: {max_dev_acc:.3f} / max test acc: {max_test_acc:.3f}')

    return best_model
Example #7
0
def train(args, data):
    model = (BIMPM(args, data))
    if args.gpu > -1:
        model.cuda(args.gpu)

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(parameters, lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    writer = SummaryWriter(log_dir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_acc, max_test_acc = 0, 0

    iterator = data.train_iter
    for i, batch in enumerate(iterator):
        present_epoch = int(iterator.epoch)
        if present_epoch == args.epoch:
            break
        if present_epoch > last_epoch:
            print('epoch:', str(present_epoch + 1))
        last_epoch = present_epoch

        if args.data_type == 'SNLI':
            s1, s2 = 'premise', 'hypothesis'
        else:
            s1, s2 = 'q1', 'q2'

        s1, s2 = getattr(batch, s1), getattr(batch, s2)

        # limit the lengths of input sentences up to max_sent_len
        if args.max_sent_len >= 0:
            if s1.size()[1] > args.max_sent_len:
                s1 = s1[:, :args.max_sent_len]
            if s2.size()[1] > args.max_sent_len:
                s2 = s2[:, :args.max_sent_len]

        kwargs = {'p': s1, 'h': s2}

        if args.use_char_emb:
            char_p = Variable(torch.LongTensor(data.characterize(s1)))
            char_h = Variable(torch.LongTensor(data.characterize(s2)))

            if args.gpu > -1:
                char_p = char_p.cuda(args.gpu)
                char_h = char_h.cuda(args.gpu)

            kwargs['char_p'] = char_p
            kwargs['char_h'] = char_h

        pred = (model(**kwargs))
        
        optimizer.zero_grad()
        batch_loss = criterion(pred, batch.label)
        loss += batch_loss.data[0]
        batch_loss.backward()
        optimizer.step()
        del pred
        del batch_loss
        if (i + 1) % args.print_freq == 0:
            dev_loss, dev_acc = test(model, args, data, mode='dev')
            test_loss, test_acc = test(model, args, data)
            c = (i + 1) // args.print_freq

            writer.add_scalar('loss/train', loss, c)
            writer.add_scalar('loss/dev', dev_loss, c)
            writer.add_scalar('acc/dev', dev_acc, c)
            writer.add_scalar('loss/test', test_loss, c)
            writer.add_scalar('acc/test', test_acc, c)

            print('train loss: '+ str(loss) +' / dev loss: '+ str(dev_loss) + '/ test loss:' + str(test_loss) +
                  ' / dev acc:' + str(dev_acc) + 'test acc:' + str(test_acc))

            if dev_acc > max_dev_acc:
                max_dev_acc = dev_acc
                max_test_acc = test_acc
                best_model = copy.deepcopy(model)

            loss = 0
            model.train()

    writer.close()
    print('max dev acc:'+ str(max_dev_acc) + '/ max test acc: ' + str(max_test_acc))

    return best_model
Example #8
0
def train(args, data):
    model = BIMPM(args, data)
    if args.gpu > -1:
        model.cuda(args.gpu)

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(parameters, lr=args.learning_rate)
    criterion = nn.BCEWithLogitsLoss()

    writer = SummaryWriter(log_dir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_acc, max_test_acc = 0, 0
    print(args.epoch)
    iterator = data.train_iter
    for j in enumerate(range(args.epoch)):
        for i, batch in enumerate(iterator):
            present_epoch = int(iterator.epoch)
            if present_epoch > last_epoch:
                print('epoch:', present_epoch + 1)
                last_epoch = present_epoch
            

            if args.data_type == 'SNLI':
                s1, s2 = 'premise', 'hypothesis'
            else:
                s1, s2 = 'q1', 'q2'

            s1, s2 = getattr(batch, s1), getattr(batch, s2)

            # limit the lengths of input sentences up to max_sent_len
            if args.max_sent_len >= 0:
                if s1.shape[1] > args.max_sent_len:
                    s1 = s1[:, :args.max_sent_len]
                if s2.shape[1] > args.max_sent_len:
                    s2 = s2[:, :args.max_sent_len]

            kwargs = {'p': s1, 'h': s2}

            if args.use_char_emb:
                char_p = torch.LongTensor(data.characterize(s1))
                char_h = torch.LongTensor(data.characterize(s2))

                if args.gpu > -1:
                    char_p = char_p.cuda(args.gpu)
                    char_h = char_h.cuda(args.gpu)

                kwargs['char_p'] = char_p
                kwargs['char_h'] = char_h

            pred = model(**kwargs)

            if(i%3==0):
                optimizer.zero_grad()
            batch_loss = criterion(pred, batch.label.view(pred.shape[0],-1).float())
            loss += batch_loss.item()
            batch_loss.backward()
            if(i%3==0):
                optimizer.step()

            if i % args.print_freq == 0:
                dev_loss, dev_acc = test(model, args, data, mode='dev')
                test_loss, test_acc = test(model, args, data)
                c = (i + 1) // args.print_freq

                writer.add_scalar('loss/train', loss, c)
                writer.add_scalar('loss/dev', dev_loss, c)
                writer.add_scalar('acc/dev', dev_acc, c)
                writer.add_scalar('loss/test', test_loss, c)
                writer.add_scalar('acc/test', test_acc, c)

                print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f} / test loss: {test_loss:.3f}'
                      f' / dev acc: {dev_acc:.3f} / test acc: {test_acc:.3f}')

                max_dev_acc = dev_acc
                max_test_acc = test_acc
                best_model = copy.deepcopy(model)
                if not os.path.exists('saved_models'):
                    os.makedirs('saved_models')
                torch.save(best_model.state_dict(), f'saved_models/BIBPM_{args.data_type}_{args.model_time}.pt')

                loss = 0
                model.train()

    writer.close()
    print(f'max dev acc: {max_dev_acc:.3f} / max test acc: {max_test_acc:.3f}')

    return best_model
Example #9
0
def train(args, data):
    model = BIMPM(args, data)
    if args.cuda:
        model = model.cuda()

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(parameters, lr=args.learning_rate)

    writer = SummaryWriter(log_dir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_acc, max_test_acc = 0, 0

    for epoch in range(args.epoch):
        print("当前为训练第{%s}轮" % str(epoch + 1))
        iterator = data.train_iter
        for i, batch in enumerate(iterator):
            present_epoch = int(iterator.epoch)
            if present_epoch == args.epoch:
                break
            if present_epoch > last_epoch:
                print('epoch:', present_epoch + 1)
            last_epoch = present_epoch

            s1, s2, label = 'q1', 'q2', 'label'

            s1, s2, label = getattr(batch,
                                    s1), getattr(batch,
                                                 s2), getattr(batch, label)

            # limit the lengths of input sentences up to max_sent_len
            if args.max_sent_len >= 0:
                if s1.size()[1] > args.max_sent_len:
                    s1 = s1[:, :args.max_sent_len]
                if s2.size()[1] > args.max_sent_len:
                    s2 = s2[:, :args.max_sent_len]

            if args.cuda:
                s1, s2, label = s1.cuda(), s2.cuda(), label.cuda()
            kwargs = {'p': s1, 'h': s2}

            if args.use_char_emb:
                char_p = Variable(torch.LongTensor(data.characterize(s1)))
                char_h = Variable(torch.LongTensor(data.characterize(s2)))

                if args.cuda:
                    char_p = char_p.cuda()
                    char_h = char_h.cuda()

                kwargs['char_p'] = char_p
                kwargs['char_h'] = char_h

            pred = model(**kwargs)

            optimizer.zero_grad()

            loss = F.cross_entropy(pred, label)
            loss += loss.data
            loss.backward()
            optimizer.step()

            if (i + 1) % args.print_freq == 0:
                dev_loss, dev_acc = test(model, args, data, mode='dev')
                test_loss, test_acc = test(model, args, data)
                c = (i + 1) // args.print_freq

                writer.add_scalar('loss/train', loss, c)
                writer.add_scalar('loss/dev', dev_loss, c)
                writer.add_scalar('acc/dev', dev_acc, c)
                writer.add_scalar('loss/test', test_loss, c)
                writer.add_scalar('acc/test', test_acc, c)

                print(
                    f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f} / test loss: {test_loss:.3f}'
                    f' / dev acc: {dev_acc:.3f} / test acc: {test_acc:.3f}')

                if dev_acc > max_dev_acc:
                    max_dev_acc = dev_acc
                    max_test_acc = test_acc
                    best_model = copy.deepcopy(model)

                # loss = 0
                model.train()

    writer.close()
    print(f'max dev acc: {max_dev_acc:.3f} / max test acc: {max_test_acc:.3f}')

    return best_model