예제 #1
0
def train(train_feats,
          train_caps,
          val_feats,
          val_caps,
          train_prefix="",
          val_prefix="",
          epochs=EPOCHS,
          batch_size=BATCH_SIZE,
          max_seq_len=MAX_LEN,
          hidden_dim=HIDDEN_DIM,
          emb_dim=EMB_DIM,
          enc_seq_len=ENC_SEQ_LEN,
          enc_dim=ENC_DIM,
          clip_val=CLIP_VAL,
          teacher_force=TEACHER_FORCE_RAT,
          dropout_p=0.1,
          attn_activation="relu",
          epsilon=0.0005,
          weight_decay=WEIGHT_DECAY,
          lr=LEARNING_RATE,
          early_stopping=True,
          scheduler="step",
          attention=None,
          deep_out=False,
          checkpoint="",
          out_dir="Pytorch_Exp_Out",
          decoder=None):

    print("EXPERIMENT START ", time.asctime())

    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    # 1. Load the data

    train_captions = open(train_caps, mode='r', encoding='utf-8') \
        .read().strip().split('\n')
    train_features = open(train_feats, mode='r').read().strip().split('\n')
    train_features = [os.path.join(train_prefix, z) for z in train_features]

    assert len(train_captions) == len(train_features)

    if val_caps:
        val_captions = open(val_caps, mode='r', encoding='utf-8') \
            .read().strip().split('\n')

        val_features = open(val_feats, mode='r').read().strip().split('\n')
        val_features = [os.path.join(val_prefix, z) for z in val_features]

        assert len(val_captions) == len(val_features)

    # 2. Preprocess the data

    train_captions = normalize_strings(train_captions)
    train_data = list(zip(train_captions, train_features))
    train_data = filter_inputs(train_data)
    print("Total training instances: ", len(train_data))

    if val_caps:
        val_captions = normalize_strings(val_captions)
        val_data = list(zip(val_captions, val_features))
        val_data = filter_inputs(val_data)
        print("Total validation instances: ", len(val_data))

    vocab = Vocab()
    vocab.build_vocab(map(lambda x: x[0], train_data), max_size=10000)
    vocab.save(path=os.path.join(out_dir, 'vocab.txt'))
    print("Vocabulary size: ", vocab.n_words)

    # 3. Initialize the network, optimizer & loss function

    net = Network(hid_dim=hidden_dim,
                  out_dim=vocab.n_words,
                  sos_token=0,
                  eos_token=1,
                  pad_token=2,
                  teacher_forcing_rat=teacher_force,
                  emb_dim=emb_dim,
                  enc_seq_len=enc_seq_len,
                  enc_dim=enc_dim,
                  dropout_p=dropout_p,
                  deep_out=deep_out,
                  decoder=decoder,
                  attention=attention)
    net.to(DEVICE)

    if checkpoint:
        net.load_state_dict(torch.load(checkpoint))

    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decay)
    loss_function = nn.NLLLoss()

    scheduler = set_scheduler(scheduler, optimizer)

    # 4. Train

    prev_val_l = sys.maxsize
    total_instances = 0
    total_steps = 0
    train_loss_log = []
    train_loss_log_batches = []
    train_penalty_log = []
    val_loss_log = []
    val_loss_log_batches = []
    val_bleu_log = []
    prev_bleu = sys.maxsize

    train_data = DataLoader(captions=map(lambda x: x[0], train_data),
                            sources=map(lambda x: x[1], train_data),
                            batch_size=batch_size,
                            vocab=vocab,
                            max_seq_len=max_seq_len)

    if val_caps:
        val_data = DataLoader(captions=map(lambda x: x[0], val_data),
                              sources=map(lambda x: x[1], val_data),
                              batch_size=batch_size,
                              vocab=vocab,
                              max_seq_len=max_seq_len,
                              val_multiref=True)

    training_start_time = time.time()

    for e in range(1, epochs + 1):
        print("Epoch ", e)

        tfr = _teacher_force(epochs, e, teacher_force)

        # train one epoch
        train_l, inst, steps, t, l_log, pen = train_epoch(
            model=net,
            loss_function=loss_function,
            optimizer=optimizer,
            data_iter=train_data,
            max_len=max_seq_len,
            clip_val=clip_val,
            epsilon=epsilon,
            teacher_forcing_rat=tfr)

        if scheduler is not None:
            scheduler.step()

        # epoch logs
        print("Training loss:\t", train_l)
        print("Instances:\t", inst)
        print("Steps:\t", steps)
        hours = t // 3600
        mins = (t % 3600) // 60
        secs = (t % 60)
        print("Time:\t{0}:{1}:{2}".format(hours, mins, secs))
        total_instances += inst
        total_steps += steps
        train_loss_log.append(train_l)
        train_loss_log_batches += l_log
        train_penalty_log.append(pen)
        print()

        # evaluate
        if val_caps:
            val_l, l_log, bleu = evaluate(model=net,
                                          loss_function=loss_function,
                                          data_iter=val_data,
                                          max_len=max_seq_len,
                                          epsilon=epsilon)

            # validation logs
            print("Validation loss: ", val_l)
            print("Validation BLEU-4: ", bleu)
            if bleu > prev_bleu:
                torch.save(net.state_dict(), os.path.join(out_dir, 'net.pt'))
            val_loss_log.append(val_l)
            val_bleu_log.append(bleu)
            val_loss_log_batches += l_log

        #sample model
        print("Sampling training data...")
        print()
        samples = sample(net,
                         train_data,
                         vocab,
                         samples=3,
                         max_len=max_seq_len)
        for t, s in samples:
            print("Target:\t", t)
            print("Predicted:\t", s)
            print()

        # if val_caps:
        #     print("Sampling validation data...")
        #     print()
        #     samples = sample(net, val_data, vocab, samples=3, max_len=max_seq_len)
        #     for t, s in samples:
        #         print("Target:\t", t)
        #         print("Predicted:\t", s)
        #         print()

        if val_caps:
            # If the validation loss after this epoch increased from the
            # previous epoch, wrap training.
            if prev_bleu > bleu and early_stopping:
                print("\nWrapping training after {0} epochs.\n".format(e + 1))
                break

            prev_val_l = val_l
            prev_bleu = bleu

    # Experiment summary logs.
    tot_time = time.time() - training_start_time
    hours = tot_time // 3600
    mins = (tot_time % 3600) // 60
    secs = (tot_time % 60)
    print("Total training time:\t{0}:{1}:{2}".format(hours, mins, secs))
    print("Total training instances:\t", total_instances)
    print("Total training steps:\t", total_steps)
    print()

    _write_loss_log("train_loss_log.txt", out_dir, train_loss_log)
    _write_loss_log("train_loss_log_batches.txt", out_dir,
                    train_loss_log_batches)
    _write_loss_log("train_penalty.txt", out_dir, train_penalty_log)

    if val_caps:
        _write_loss_log("val_loss_log.txt", out_dir, val_loss_log)
        _write_loss_log("val_loss_log_batches.txt", out_dir,
                        val_loss_log_batches)
        _write_loss_log("val_bleu4_log.txt", out_dir, val_bleu_log)

    print("EXPERIMENT END ", time.asctime())
    model_weight_paths = get_model_weight_paths(ckpt_dir, args.num)

    for epoch_index, (epoch_number,
                      weights_path) in enumerate(model_weight_paths):

        logger.info('Starting epoch: {}'.format(epoch_number))

        assert osp.exists(
            weights_path), 'path to weights: {} was not found'.format(
                weights_path)
        state_dict = torch.load(weights_path,
                                map_location=lambda storage, loc: storage)
        if 'model' in state_dict.keys():
            state_dict = state_dict['model']
        model.load_state_dict(state_dict, strict=True)
        model = model.to(device)

        model = model.eval()
        logger.info('weights loaded from path: {}'.format(weights_path))
        logger.info('for epoch: {}'.format(epoch_number))

        Hess = FullHessian(crit='CrossEntropyLoss',
                           loader=loader,
                           device=device,
                           model=model,
                           num_classes=C,
                           hessian_type='Hessian',
                           init_poly_deg=64,
                           poly_deg=128,
                           spectrum_margin=0.05,
                           poly_points=1024,
예제 #3
0
def run(test_dir,
        test_srcs,
        checkpoint,
        vocab,
        out="captions.out.txt",
        batch_size=16,
        max_seq_len=MAX_LEN,
        hidden_dim=HIDDEN_DIM,
        emb_dim=EMB_DIM,
        enc_seq_len=ENC_SEQ_LEN,
        enc_dim=ENC_DIM,
        attn_activation="relu",
        deep_out=False,
        decoder=4,
        attention=3):

    if decoder == 1:
        decoder = models.AttentionDecoder_1
    elif decoder == 2:
        decoder = models.AttentionDecoder_2
    elif decoder == 3:
        decoder = models.AttentionDecoder_3
    elif decoder == 4:
        decoder = models.AttentionDecoder_4

    if attention == 1:
        attention = attentions.AdditiveAttention
    elif attention == 2:
        attention = attentions.GeneralAttention
    elif attention == 3:
        attention = attentions.ScaledGeneralAttention

    # load vocabulary
    vocabulary = Vocab()
    vocabulary.load(vocab)

    # load test instances file paths
    srcs = open(test_srcs).read().strip().split('\n')
    srcs = [os.path.join(test_dir, s) for s in srcs]

    # load model
    net = Network(hid_dim=hidden_dim,
                  out_dim=vocabulary.n_words,
                  sos_token=0,
                  eos_token=1,
                  pad_token=2,
                  emb_dim=emb_dim,
                  enc_seq_len=enc_seq_len,
                  enc_dim=enc_dim,
                  deep_out=deep_out,
                  attention=attention,
                  decoder=decoder)
    net.to(DEVICE)

    net.load_state_dict(torch.load(checkpoint))

    net.eval()

    with torch.no_grad():

        # run inference
        num_instances = len(srcs)
        i = 0
        captions = []
        while i < num_instances:
            srcs_batch = srcs[i:i + batch_size]
            batch = _load_batch(srcs_batch)
            batch = batch.to(DEVICE)

            tokens, _ = net(batch, targets=None, max_len=max_seq_len)
            tokens = tokens.permute(1, 0, 2).detach()
            _, topi = tokens.topk(1, dim=2)
            topi = topi.squeeze(2)

            # decode token output from the model
            for j in range(len(srcs_batch)):
                c = vocabulary.tensor_to_sentence(topi[j])
                c = ' '.join(c)
                captions.append(c)

            i += len(srcs_batch)

    out_f = open(out, mode='w')
    for c in captions:
        out_f.write(c + '\n')

    return