Beispiel #1
0
        for i, source in enumerate(batchify_data(test_source)):
            output = model.decode(source)
            for words in output:
                print(' '.join(words))
        exit(0)

    if args.model == 'baseline':
        model = BaselineModel(vocab).to(device)
    elif args.model == 'transformer':
        model = TransformerModel(vocab).to(device)
    else:
        print('error: invalid model or model not specified (--model)',
              file=sys.stderr)
        sys.exit()

    for p in model.parameters():
        if p.dim() > 1:
            torch.nn.init.xavier_uniform_(p)

    criterion = torch.nn.CrossEntropyLoss(ignore_index=pad_id)
    lr = 5  # learning rate
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

    bos_token = vocab.numberize('<BOS>')
    eos_token = vocab.numberize('<EOS>')
    cpy_token = vocab.numberize('<CPY>')

    def train():
        model.train()  # Turn on the train mode
        total_loss = 0.
Beispiel #2
0
def train(args):

    label_name = ['0', '1']

    device = torch.device("cuda:0" if args['--cuda'] else "cpu")

    start_time = time.time()
    print('Initializing Glove vocab and embeddings...', file=sys.stderr)
    glove_word2id = pickle.load(open(args['--vocab'], 'rb'))
    glove_word2id.update({'<unk>': len(glove_word2id)})
    glove_word2id.update({'<pad>': len(glove_word2id)})
    vocab = VocabEntry(glove_word2id)

    embedding_matrix = np.load(open(args['--embeddings'], 'rb'))
    embedding_matrix = np.vstack(
        (embedding_matrix,
         np.random.uniform(embedding_matrix.min(), embedding_matrix.max(),
                           (2, embedding_matrix.shape[1]))))
    glove_embeddings = torch.tensor(embedding_matrix,
                                    dtype=torch.float,
                                    device=device)
    print('Done! time elapsed %.2f sec' % (time.time() - start_time),
          file=sys.stderr)
    print('-' * 80, file=sys.stderr)

    start_time = time.time()
    print('Importing data...', file=sys.stderr)
    df_train = pd.read_csv(args['--train'], index_col=0)
    df_val = pd.read_csv(args['--dev'], index_col=0)
    train_label = dict(df_train.InformationType_label.value_counts())
    label_max = float(max(train_label.values()))
    train_label_weight = torch.tensor(
        [label_max / train_label[i] for i in range(len(train_label))],
        device=device)
    print('Done! time elapsed %.2f sec' % (time.time() - start_time),
          file=sys.stderr)
    print('-' * 80, file=sys.stderr)

    start_time = time.time()
    print('Set up model...', file=sys.stderr)

    model = BaselineModel(hidden_size=int(args['--hidden-size']),
                          embedding=glove_embeddings,
                          vocab=vocab,
                          n_class=len(label_name),
                          dropout_rate=float(args['--dropout']))
    model = model.to(device)
    print('Use device: %s' % device, file=sys.stderr)
    print('Done! time elapsed %.2f sec' % (time.time() - start_time),
          file=sys.stderr)
    print('-' * 80, file=sys.stderr)

    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=float(args['--lr']))
    cn_loss = torch.nn.CrossEntropyLoss(weight=train_label_weight.float(),
                                        reduction='mean')
    torch.save(cn_loss, 'loss_func')  # for later testing

    train_batch_size = int(args['--batch-size'])
    clip_grad = float(args['--clip-grad'])
    valid_niter = int(args['--valid-niter'])
    log_every = int(args['--log-every'])
    model_save_path = args['--save-to']

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = 0
    cum_examples = report_examples = epoch = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    print('Begin Maximum Likelihood training...')

    while True:
        epoch += 1

        for sents, targets in batch_iter(df_train,
                                         batch_size=train_batch_size,
                                         shuffle=True):  # for each epoch
            train_iter += 1

            optimizer.zero_grad()

            batch_size = len(sents)

            pre_softmax = model(sents)
            print(type(targets[0]))
            loss = cn_loss(
                pre_softmax,
                torch.tensor(targets, dtype=torch.long, device=device))

            loss.backward()

            # clip gradient
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       clip_grad)

            optimizer.step()

            batch_losses_val = loss.item() * batch_size
            report_loss += batch_losses_val
            cum_loss += batch_losses_val

            report_examples += batch_size
            cum_examples += batch_size

            if train_iter % log_every == 0:
                print('epoch %d, iter %d, avg. loss %.2f, '
                      'cum. examples %d, speed %.2f examples/sec, '
                      'time elapsed %.2f sec' %
                      (epoch, train_iter, report_loss / report_examples,
                       cum_examples, report_examples /
                       (time.time() - train_time), time.time() - begin_time),
                      file=sys.stderr)

                train_time = time.time()
                report_loss = report_examples = 0.

            # perform validation
            if train_iter % valid_niter == 0:
                print(
                    'epoch %d, iter %d, cum. loss %.2f, cum. examples %d' %
                    (epoch, train_iter, cum_loss / cum_examples, cum_examples),
                    file=sys.stderr)

                cum_loss = cum_examples = 0.

                print('begin validation ...', file=sys.stderr)

                validation_loss = validation(
                    model, df_val, cn_loss,
                    device)  # dev batch size can be a bit larger

                print('validation: iter %d, loss %f' %
                      (train_iter, validation_loss),
                      file=sys.stderr)

                is_better = len(
                    hist_valid_scores
                ) == 0 or validation_loss < min(hist_valid_scores)
                hist_valid_scores.append(validation_loss)

                if is_better:
                    patience = 0
                    print('save currently the best model to [%s]' %
                          model_save_path,
                          file=sys.stderr)
                    model.save(model_save_path)

                    # also save the optimizers' state
                    torch.save(optimizer.state_dict(),
                               model_save_path + '.optim')
                elif patience < int(args['--patience']):
                    patience += 1
                    print('hit patience %d' % patience, file=sys.stderr)

                    if patience == int(args['--patience']):
                        num_trial += 1
                        print('hit #%d trial' % num_trial, file=sys.stderr)
                        if num_trial == int(args['--max-num-trial']):
                            print('early stop!', file=sys.stderr)
                            exit(0)

                        # decay lr, and restore from previously best checkpoint
                        lr = optimizer.param_groups[0]['lr'] * float(
                            args['--lr-decay'])
                        print(
                            'load previously best model and decay learning rate to %f'
                            % lr,
                            file=sys.stderr)

                        # load model
                        params = torch.load(
                            model_save_path,
                            map_location=lambda storage, loc: storage)
                        model.load_state_dict(params['state_dict'])
                        model = model.to(device)

                        print('restore parameters of the optimizers',
                              file=sys.stderr)
                        optimizer.load_state_dict(
                            torch.load(model_save_path + '.optim'))

                        # set new lr
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr

                        # reset patience
                        patience = 0

                if epoch == int(args['--max-epoch']):
                    print('reached maximum number of epochs!', file=sys.stderr)
                    exit(0)
Beispiel #3
0
def best_cell_and_baseline_hyperparameters(train_dataloader, validate_dataloader, test_dataloader, embedding):
    configs = []

    RNN_config = {}
    RNN_config["model"] = "LSTM"
    RNN_config["hidden_size"] = 30
    RNN_config["num_layers"] = 3
    RNN_config["dropout"] = 0.9
    RNN_config["bidirectional"] = True
    RNN_config["fc1_width"] = "//"
    RNN_config["fc2_width"] = "//"
    RNN_config["vocab_size"] = -1
    RNN_config["lr"] = 0.0001
    RNN_config["optimizer"] = torch.optim.Adam

    baseline_config = {}
    baseline_config["model"] = "Baseline"
    baseline_config["hidden_size"] = "//"
    baseline_config["num_layers"] = "//"
    baseline_config["dropout"] = "//"
    baseline_config["bidirectional"] = "//"
    baseline_config["fc1_width"] = 150
    baseline_config["fc2_width"] = 150
    baseline_config["vocab_size"] = -1
    baseline_config["lr"] = 0.0001
    baseline_config["optimizer"] = torch.optim.Adam

    hyperparameters = {}
    hyperparameters["vocab_size"] = [50, 1000, 10000]
    hyperparameters["lr"] = [0.0001, 0.001, 0.01, 0.1]
    hyperparameters["dropout"] = [0, 0.2, 0.4, 0.6, 0.8, 1]
    hyperparameters["num_layers"] = [1, 3, 6]
    hyperparameters["hidden_size"] = [30, 100, 150, 200]
    hyperparameters["optimizer"] = [torch.optim.Adam, torch.optim.SGD, torch.optim.RMSprop]

    supports = {}
    supports["vocab_size"] = [BaselineModel, RNN.RecurrentModel]
    supports["lr"] = [BaselineModel, RNN.RecurrentModel]
    supports["dropout"] = [RNN.RecurrentModel]
    supports["num_layers"] = [RNN.RecurrentModel]
    supports["hidden_size"] = [RNN.RecurrentModel]
    supports["optimizer"] = [BaselineModel, RNN.RecurrentModel]

    initial_config = {}
    initial_config["clip"] = args.clip
    initial_config["epochs"] = args.epochs
    initial_config["input_width"] = 300
    initial_config["output_width"] = 1

    models = [BaselineModel, RNN.RecurrentModel]

    criterion = nn.BCEWithLogitsLoss()

    for model_type in models:
        for (key, values) in hyperparameters.items():
            # Skip this hyperparameter testing if the model does not support it
            if model_type not in supports[key]:
                continue

            for value in values:
                start = time.time()
                config = {}

                if model_type == RNN.RecurrentModel:
                    config.update(RNN_config)
                    train = RNN.train
                    evaluate = RNN.evaluate
                    model = RNN.RecurrentModel(config["model"], initial_config["input_width"],
                                               config["hidden_size"],
                                               initial_config["output_width"], config["num_layers"],
                                               config["bidirectional"], config["dropout"])
                else:
                    config.update(baseline_config)
                    train = baseline.train
                    evaluate = baseline.evaluate
                    model = BaselineModel(initial_config["input_width"], config["fc1_width"],
                                          config["fc2_width"], initial_config["output_width"])

                config.update(initial_config)
                config[key] = value

                print(config)

                optimizer = config["optimizer"](model.parameters(), lr=config["lr"])

                for epoch in range(args.epochs):
                    print(f'\nEpoch: {epoch}')
                    train(model, train_dataloader, optimizer, criterion, embedding, args.clip)
                    evaluate(model, validate_dataloader, criterion, embedding)
                accuracy, f1, confusion_matrix = evaluate(model, test_dataloader, criterion, embedding)
                config["accuracy"] = accuracy.item()
                config["f1"] = f1.item()
                config["TP"] = confusion_matrix[0, 0].item()
                config["FP"] = confusion_matrix[0, 1].item()
                config["FN"] = confusion_matrix[1, 0].item()
                config["TN"] = confusion_matrix[1, 1].item()

                end = time.time()
                config["time"] = end - start
                config["optimizer"] = method_to_string(config["optimizer"])
                configs.append(config)

    print_to_file("5_final.xls", "RNN baseline hyperparameters", configs)