Exemple #1
0
def main():
    train_set = SinaDataset(path.join(args.source, 'train.json'), input_dim)
    test_set = SinaDataset(path.join(args.source, 'test.json'), input_dim)
    train_loader = DataLoader(train_set,
                              batch_size=args.bs,
                              shuffle=True,
                              drop_last=True)
    test_loader = DataLoader(test_set,
                             batch_size=args.bs,
                             shuffle=True,
                             drop_last=True)

    model = TextCNN(input_dim, 200)
    # model = MyLSTM(input_dim, hidden_dim=8)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), args.lr, weight_decay=args.wd)

    epoch = 0
    train_loss = []
    train_accu = []
    valid_loss = []
    valid_accu = []
    while True:
        epoch += 1
        epoch_loss, epoch_accu = train_one_epoch(epoch, model, optimizer,
                                                 train_loader, device, args.bs)
        val_loss, val_accu = validate(model, test_loader, device, args.bs)
        train_loss += epoch_loss
        train_accu += epoch_accu
        valid_loss += val_loss
        valid_accu += val_accu

        print('saving...')
        torch.save(model.state_dict(),
                   './saved_models/epoch' + str(epoch) + '.pkl')
        print()

        if args.max_epoch and epoch >= args.max_epoch:
            train_result = {
                'batch-size': args.bs,
                'train-loss': train_loss,
                'train-accu': train_accu,
                'valid-loss': valid_loss,
                'valid-accu': valid_accu
            }
            with open('train-result.json', 'w', encoding='utf-8') as f:
                json.dump(train_result, f)

            break
def train(name, dataset, epochs, batch_size, learning_rate, regularization,
          embedding_dims, embedding_type):

    dirname, _ = os.path.split(os.path.abspath(__file__))
    run_uid = datetime.datetime.today().strftime('%Y-%m-%dT%H:%M:%S')
    logger = StatsLogger(dirname, 'stats', name, run_uid)

    print('Loading data')
    X_train, y_train = load('{}_train'.format(dataset))
    X_valid, y_valid = load('{}_valid'.format(dataset))
    vocab = load('{}_vocab'.format(dataset)).vocab

    X_train = torch.as_tensor(X_train, dtype=torch.long)
    y_train = torch.as_tensor(y_train, dtype=torch.float)
    X_valid = torch.as_tensor(X_valid, dtype=torch.long)
    y_valid = torch.as_tensor(y_valid, dtype=torch.float)

    prev_acc = 0

    model = TextCNN(dataset=dataset,
                    input_size=X_train.size()[1],
                    vocab_size=len(vocab) + 1,
                    embedding_dims=embedding_dims,
                    embedding_type=embedding_type)
    print(model)
    print('Parameters: {}'.format(sum([p.numel() for p in \
                                  model.parameters() if p.requires_grad])))
    print('Training samples: {}'.format(len(X_train)))

    if torch.cuda.is_available():
        X_train = X_train.cuda()
        y_train = y_train.cuda()
        X_valid = X_valid.cuda()
        y_valid = y_valid.cuda()
        model = model.cuda()

    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=regularization)
    criterion = nn.BCEWithLogitsLoss()

    print('Starting training')
    for epoch in range(epochs):
        epoch_loss = []
        epoch_acc = []

        iters = 0
        total_iters = num_batches(len(X_train), batch_size)

        for i, batch in enumerate(minibatch_iter(len(X_train), batch_size)):
            model.train()

            X_train_batch = X_train[batch]
            y_train_batch = y_train[batch]

            if torch.cuda.is_available():
                X_train_batch = X_train_batch.cuda()
                y_train_batch = y_train_batch.cuda()

            optimizer.zero_grad()

            output = model(X_train_batch)
            train_loss = criterion(output, y_train_batch)
            train_acc = accuracy(output, y_train_batch)

            epoch_loss.append(train_loss.item())
            epoch_acc.append(train_acc.item())

            train_loss.backward()
            optimizer.step()

        model.eval()
        train_loss, train_acc = np.mean(epoch_loss), np.mean(epoch_acc)
        valid_loss, valid_acc, _ = compute_dataset_stats(
            X_valid, y_valid, model, nn.BCEWithLogitsLoss(), 256)

        stats = [epoch + 1, train_loss, train_acc, valid_loss, valid_acc]
        epoch_string = '* Epoch {}: t_loss={:.3f}, t_acc={:.3f}, ' + \
                       'v_loss={:.3f}, v_acc={:.3f}'
        print(epoch_string.format(*stats))
        logger.write(stats)

        # checkpoint model
        if prev_acc < valid_acc:
            prev_acc = valid_acc
            model_path = os.path.join(dirname, 'checkpoints', name)
            torch.save(model.state_dict(), model_path)

    logger.close()
def cv_score(dataset,
             embedding_type,
             epochs,
             batch_size=32,
             learning_rate=1e-4,
             regularization=0):
    kf = KFold(10)
    X, y = load('{}_train'.format(dataset))
    vocab = load('{}_vocab'.format(dataset)).vocab

    cv_acc = []
    cv_std = []

    for ci, (train_index, test_index) in enumerate(kf.split(X)):
        X_train, y_train = X[train_index], y[train_index]
        X_test, y_test = X[test_index], y[test_index]

        X_train = torch.as_tensor(X_train, dtype=torch.long).cuda()
        y_train = torch.as_tensor(y_train, dtype=torch.float).cuda()
        X_test = torch.as_tensor(X_test, dtype=torch.long).cuda()
        y_test = torch.as_tensor(y_test, dtype=torch.float).cuda()

        model = TextCNN(dataset=dataset,
                        input_size=X_train.shape[1],
                        vocab_size=len(vocab) + 1,
                        embedding_dims=300,
                        embedding_type=embedding_type).cuda()

        optimizer = optim.Adam(model.parameters(),
                               lr=learning_rate,
                               weight_decay=regularization)
        criterion = nn.BCEWithLogitsLoss()

        model.train()

        for epoch in range(epochs):
            for i, batch in enumerate(minibatch_iter(len(X_train),
                                                     batch_size)):
                X_train_batch = X_train[batch].cuda()
                y_train_batch = y_train[batch].cuda()

                optimizer.zero_grad()

                output = model(X_train_batch)
                train_loss = criterion(output, y_train_batch)

                train_loss.backward()
                optimizer.step()

        model.eval()
        _, test_acc, test_std = compute_dataset_stats(X_test, y_test, model,
                                                      nn.BCEWithLogitsLoss(),
                                                      256)

        cv_acc.append(test_acc)
        cv_std.append(test_std)
        print('  [{}] acc={}, std={}'.format(ci + 1, test_acc, test_std))

    print('{} - {}'.format(dataset, embedding_type))
    print('Mean acc - {}'.format(np.mean(cv_acc)))
    print('Min acc - {}'.format(np.min(cv_acc)))
    print('Max acc - {}'.format(np.max(cv_acc)))
    print('Mean std - {}'.format(np.mean(cv_std)))
        label_model.load_state_dict(label_model_pretrained)

    if args.flat or args.cascaded_step2:
        for param in label_model.parameters():
            param.require_grad = False

    doc_model = nn.DataParallel(doc_model)
    label_model = nn.DataParallel(label_model)

    doc_model = doc_model.cuda()
    label_model = label_model.cuda()

    # Loss and optimizer
    criterion = Loss(use_geodesic=args.joint,
                     _lambda=args.geodesic_lambda,
                     only_label=args.cascaded_step1)

    optimizer = torch.optim.Adam([{
        'params': doc_model.parameters(),
        'lr': doc_lr
    }, {
        'params': label_model.parameters(),
        'lr': 0.001
    }])

    logging.info('Starting Training')
    # Train and evaluate
    Y = torch.arange(trainvalset.n_labels).cuda()
    train(doc_model, label_model, trainloader, valloader, testloader,
          criterion, optimizer, Y, args.num_epochs, args.exp_name)