Exemple #1
0
def evaluate(model, data, indices, parallel=False):
    start_time = time.time()
    eval_loss = 0.
    eval_num_words = 0
    model.eval()
    with torch.no_grad():
        # fetch the first batch
        batch = [dh.make_batch(data, indices[0], separate_caption=args.separate_caption, pretrained_elmo=args.pretrained_elmo, pretrained_bert=args.pretrained_bert, bert_tokenizer=bert_tokenizer, pretrained_all=args.pretrained_all, bert_model=args.bert_model, concat_his=args.concat_his)]
        # evaluation loop
        it = tqdm(six.moves.range(len(indices)), desc="evaluation", ncols=0)
        for j in it: 
            #if args.separate_caption:
            #    x_batch, h_batch, q_batch, a_batch_in, a_batch_out, c_batch = batch.pop()
            #else:
            #    x_batch, h_batch, q_batch, a_batch_in, a_batch_out = batch.pop()
            b = batch.pop()
            if j < len(indices)-1:
                prefetch = threading.Thread(target=fetch_batch, 
                                args=([dh, data, indices[j+1], args.separate_caption, batch]))
                prefetch.start()
            # propagate for training
            x = [torch.from_numpy(x) for x in b[0]]
            if args.concat_his:
                h = [torch.from_numpy(h_i) for h_i in b[1]]
            else:
                h = [[torch.from_numpy(h) for h in hb] for hb in b[1]]
            q = [torch.from_numpy(q) for q in b[2]]
            ai = [torch.from_numpy(ai) for ai in b[3]]
            ao = [torch.from_numpy(ao) for ao in b[4]]
            if args.separate_caption:
                c = [torch.from_numpy(c) for c in b[5]]
            else:
                c = None 
            if args.pretrained_elmo or args.pretrained_bert:
                if args.pretrained_all:
                    context_q, context_h, context_ai = b[-3:]
                else:
                    context_q = b[-1]
                    context_h = None
                    context_ai = None 
            else:
                context_q = None
                context_h = None 
                context_ai = None 
            if args.exclude_video:
                x = None

            if parallel:
                _, _, loss = model.module.loss(x, h, q, ai, ao, c, context_q, context_h, context_ai)
            else:
                _, _, loss = model.loss(x, h, q, ai, ao, c, context_q, context_h, context_ai)

            num_words = sum([len(s) for s in ao])
            eval_loss += loss.cpu().data.numpy() * num_words
            eval_num_words += num_words
            prefetch.join()
    model.train()

    wall_time = time.time() - start_time
    return math.exp(eval_loss/eval_num_words), wall_time
Exemple #2
0
def run_epoch(data, indices, vocab, epoch, model, loss_compute, eval=False):
    "Standard Training and Logging Function"
    start = time.time()
    total_tokens = 0 
    total_loss = 0 
    tokens = 0 
    it = tqdm(range(len(indices)), desc="epoch {}/{}".format(epoch+1, args.num_epochs), ncols=0)
    for j in it:
        batch = dh.make_batch(data, indices[j], vocab, separate_caption=args.separate_caption, cut_a=args.cut_a)
        b = batch 
        if True: 
            out, ae_out = model.forward(b)
            if args.auto_encoder_ft == 'caption' or args.auto_encoder_ft == 'summary':
                ntokens_cap = (b.cap != vocab['<blank>']).data.sum()
                loss = loss_compute(out, b.trg_y, b.ntokens, ae_out, b.cap, ntokens_cap)
            elif args.auto_encoder_ft == 'query':
                ntokens_query = (b.query != vocab['<blank>']).data.sum()
                loss = loss_compute(out, b.trg_y, b.ntokens, ae_out, b.query, ntokens_query)
        total_loss += loss
        total_tokens += b.ntokens
        tokens += b.ntokens
        if (j+1) % args.report_interval == 0 and not eval:
            elapsed = time.time() - start
            print("Epoch: %d Step: %d Loss: %f Tokens per Sec: %f" %
                    (epoch+1,j+1, loss / b.ntokens.float(), float(tokens) / elapsed))
            with open(train_log_path, "a") as f:
                f.write("{},{},{:e},{}\n".format(epoch+1,j+1,loss/b.ntokens.float(),float(tokens)/elapsed))
            start = time.time()
            tokens = 0
        #prefetch.join()
    return total_loss / total_tokens.float()
Exemple #3
0
def generate_caption(model,
                     data,
                     batch_indices,
                     vocab,
                     dim,
                     stride=1,
                     maxlen=20,
                     beam=5,
                     penalty=2.0,
                     nbest=1):
    vocablist = sorted(vocab.keys(), key=lambda s: vocab[s])
    result = []
    c = 0
    for j in six.moves.range(len(batch_indices[0])):
        start_time = time.time()
        x_batch = [None] * len(data)
        for n in six.moves.range(len(data)):
            x_batch[n], Q_batch, A_batch = dh.make_batch(data[n],
                                                         batch_indices[n][j],
                                                         dim=dim[n],
                                                         stride=stride)

        pred_out, logp = model.generate(x_batch,
                                        Q_batch,
                                        A_batch,
                                        maxlen=maxlen,
                                        beam=beam,
                                        penalty=penalty)

        for i in six.moves.range(Q_batch.shape[1]):
            c = c + 1
            print c, batch_indices[0][j][0][0] + '_' + str(i + 1)
            print 'REF:',
            for n in six.moves.range(A_batch.shape[0]):
                number = A_batch[n][i][0]
                if number == 3:
                    continue
                else:
                    print vocablist[A_batch[n][i][0]],
            print
            for n in six.moves.range(min(nbest, len(pred_out[i]))):
                # pdb.set_trace()
                print 'HYP[%d]:' % (n + 1),
                pred = pred_out[i][n]
                if (isinstance(pred, list)):
                    # The format used in run_youtube_mm.sh.
                    for w in pred:
                        print vocablist[w],
                    print '( {:f} )'.format(logp[i][n])
                else:
                    # The format used in run_youtube.sh.
                    print(vocablist[pred_out[i][n]])
                    print('( {:f} )'.format(logp[i]))
            print 'ElapsedTime:', time.time() - start_time
            print '-----------------------'
            ## print sentence
        result.append(pred_out)
    return result
Exemple #4
0
def generate_response(model,
                      data,
                      batch_indices,
                      vocab,
                      maxlen=20,
                      beam=5,
                      penalty=2.0,
                      nbest=1):
    vocablist = sorted(vocab.keys(), key=lambda s: vocab[s])
    result_dialogs = []
    model.eval()
    with torch.no_grad():
        qa_id = 0
        for dialog in data['original']['dialogs']:
            vid = dialog['image_id']
            pred_dialog = {
                'image_id': vid,
                'dialog': copy.deepcopy(dialog['dialog'])
            }
            result_dialogs.append(pred_dialog)
            for t, qa in enumerate(dialog['dialog']):
                logging.info('%d %s_%d' % (qa_id, vid, t))
                logging.info('QS: ' + qa['question'])
                logging.info('REF: ' + qa['answer'])
                # prepare input data
                start_time = time.time()
                x_batch, h_batch, q_batch, a_batch_in, a_batch_out, s_batch = \
                    dh.make_batch(data, batch_indices[qa_id])
                qa_id += 1
                x = [torch.from_numpy(x) for x in x_batch]
                h = [[torch.from_numpy(h) for h in hb] for hb in h_batch]
                q = [torch.from_numpy(q) for q in q_batch]
                s = torch.from_numpy(s_batch).cuda().float()

                # generate sequences
                pred_out, _ = model.generate(x,
                                             h,
                                             q,
                                             s,
                                             maxlen=maxlen,
                                             beam=beam,
                                             penalty=penalty,
                                             nbest=nbest)
                for n in six.moves.range(min(nbest, len(pred_out))):
                    pred = pred_out[n]
                    hypstr = ' '.join([vocablist[w] for w in pred[0]])
                    logging.info('HYP[%d]: %s  ( %f )' %
                                 (n + 1, hypstr, pred[1]))
                    if n == 0:
                        pred_dialog['dialog'][t]['answer'] = hypstr
                logging.info('ElapsedTime: %f' % (time.time() - start_time))
                logging.info('-----------------------')

    return {'dialogs': result_dialogs}
Exemple #5
0
def evaluate(model, data, indices):
    start_time = time.time()
    eval_loss = 0.
    eval_num_words = 0
    model.eval()
    with torch.no_grad():
        # fetch the first batch
        batch = [dh.make_batch(data, indices[0])]
        # evaluation loop
        for j in six.moves.range(len(indices)):
            # get a fetched batch
            x_batch, h_batch, q_batch, a_batch_in, a_batch_out, s_batch = batch.pop(
            )
            # fetch the next batch in parallel
            if j < len(indices) - 1:
                prefetch = threading.Thread(
                    target=fetch_batch,
                    args=([dh, data, indices[j + 1], batch]))
                prefetch.start()
            # propagate for training
            x = [torch.from_numpy(x) for x in x_batch]
            h = [[torch.from_numpy(h) for h in hb] for hb in h_batch]
            q = [torch.from_numpy(q) for q in q_batch]
            ai = [torch.from_numpy(ai) for ai in a_batch_in]
            ao = [torch.from_numpy(ao) for ao in a_batch_out]
            s = torch.from_numpy(s_batch).cuda().float()

            _, _, loss = model.loss(x, h, q, ai, ao, s)

            num_words = sum([len(s) for s in ao])
            eval_loss += loss.cpu().data.numpy() * num_words
            eval_num_words += num_words
            # wait prefetch completion
            prefetch.join()
    model.train()

    wall_time = time.time() - start_time
    return math.exp(eval_loss / eval_num_words), wall_time
def evaluate(model, data, batch_indices, dim, stride=1):
    start_time = time.time()
    eval_loss = 0.
    eval_hit = 0
    num_tokens = 0
    for j in six.moves.range(len(batch_indices)):
        x_batch = [None] * len(data)
        for m in six.moves.range(len(data)):
            x_batch[m], Q_batch, A_batch = dh.make_batch(data[m],
                                                         batch_indices[m][j],
                                                         dim=dim[m],
                                                         stride=stride)
        loss, hit, num = model(x_batch,
                               Q_batch,
                               A_batch,
                               predicted_context=False,
                               istraining=False)
        eval_loss += loss
        eval_hit += hit
        num_tokens += num

    wall_time = time.time() - start_time
    return math.exp(eval_loss /
                    num_tokens), float(eval_hit) / num_tokens, wall_time
    # do training iterations
    for i in six.moves.range(args.num_epochs):
        if is_sgd == True:
            print('Epoch %d : SGD learning rate = %g' % (i + 1, optimizer.lr))
        else:
            print('Epoch %d : %s' % (i + 1, args.optimizer))

        train_loss = 0.
        for j in six.moves.range(len(ids)):
            # prepare input data
            k = ids[j]
            x_batch = [None] * len(data)
            for m in six.moves.range(len(data)):
                x_batch[m], Q_batch, A_batch = dh.make_batch(
                    data[m],
                    train_indices[m][k],
                    dim=args.in_size[m],
                    stride=args.frame_stride)
            # propagate for training
            loss = model(x_batch,
                         Q_batch,
                         A_batch,
                         predicted_context=False,
                         istraining=True)
            if multiGPU:
                loss = loss.sum()
            wj = loss.cpu()
            cur_log_perp += wj.data.numpy()
            num_words += A_batch.shape[0] * A_batch.shape[1]
            if (n + 1) % report_interval == 0:
                now = time.time()
Exemple #8
0
def fetch_batch(dh, data, index, result):
    result.append(dh.make_batch(data, index))
Exemple #9
0
    min_valid_ppl = 1.0e+10
    n = 0
    report_interval = 1000 / args.batch_size
    bestmodel_num = 0

    random.shuffle(train_indices)
    # do training iterations
    for i in six.moves.range(args.num_epochs):
        logging.info('Epoch %d : %s' % (i + 1, args.optimizer))
        train_loss = 0.
        train_num_words = 0
        batch_time = AverageMeter()
        data_time = AverageMeter()
        end = time.time()
        # fetch the first batch
        batch = [dh.make_batch(train_data, train_indices[0])]
        # train iterations
        for j in six.moves.range(len(train_indices)):
            data_time.update(time.time() - end)
            # get fetched batch
            x_batch, h_batch, q_batch, a_batch_in, a_batch_out, s_batch = batch.pop(
            )
            # fetch the next batch in parallel
            if j < len(train_indices) - 1:
                prefetch = threading.Thread(
                    target=fetch_batch,
                    args=([dh, train_data, train_indices[j + 1], batch]))
                prefetch.start()

            # propagate for training
            x = [torch.from_numpy(x) for x in x_batch]
Exemple #10
0
def generate_response(model,
                      data,
                      batch_indices,
                      vocab,
                      maxlen=20,
                      beam=5,
                      penalty=2.0,
                      nbest=1,
                      ref_data=None):
    vocablist = sorted(vocab.keys(), key=lambda s: vocab[s])
    result_dialogs = []
    model.eval()
    with torch.no_grad():
        qa_id = 0
        for idx, dialog in enumerate(data['original']['dialogs']):
            vid = dialog['image_id']
            if args.undisclosed_only:
                out_dialog = dialog['dialog'][-1:]
                if ref_data is not None:
                    ref_dialog = ref_data['dialogs'][idx]
                    assert ref_dialog['image_id'] == vid
                    ref_dialog = ref_dialog['dialog'][-1:]
            else:
                out_dialog = dialog['dialog']
            pred_dialog = {
                'image_id': vid,
                'dialog': copy.deepcopy(out_dialog)
            }
            result_dialogs.append(pred_dialog)
            for t, qa in enumerate(out_dialog):
                if args.undisclosed_only:
                    assert qa['answer'] == '__UNDISCLOSED__'
                logging.info('%d %s_%d' % (qa_id, vid, t))
                logging.info('QS: ' + qa['question'])
                if args.undisclosed_only and ref_data is not None:
                    logging.info('REF: ' + ref_dialog[t]['answer'])
                else:
                    logging.info('REF: ' + qa['answer'])
                # prepare input data
                start_time = time.time()
                batch = dh.make_batch(
                    data,
                    batch_indices[qa_id],
                    vocab,
                    separate_caption=train_args.separate_caption)
                qa_id += 1
                if args.decode_style == 'beam_search':
                    pred_out, _ = beam_search_decode(
                        model,
                        batch,
                        maxlen,
                        start_symbol=vocab['<sos>'],
                        unk_symbol=vocab['<unk>'],
                        end_symbol=vocab['<eos>'],
                        pad_symbol=vocab['<blank>'])
                    for n in range(min(nbest, len(pred_out))):
                        pred = pred_out[n]
                        hypstr = []
                        for w in pred[0]:
                            if w == vocab['<eos>']:
                                break
                            hypstr.append(vocablist[w])
                        hypstr = " ".join(hypstr)
                        #hypstr = " ".join([vocablist[w] for w in pred[0]])
                        logging.info('HYP[%d]: %s  ( %f )' %
                                     (n + 1, hypstr, pred[1]))
                        if n == 0:
                            pred_dialog['dialog'][t]['answer'] = hypstr
                elif args.decode_style == 'greedy':
                    output = greedy_decode(model,
                                           batch,
                                           maxlen,
                                           start_symbol=vocab['<sos>'],
                                           pad_symbol=vocab['<blank>'])
                    output = [i for i in output[0].cpu().numpy()]
                    hypstr = []
                    for i in output[1:]:
                        if i == vocab['<eos>']:
                            break
                        hypstr.append(vocablist[i])
                    hypstr = ' '.join(hypstr)
                    logging.info('HYP: {}'.format(hypstr))
                    pred_dialog['dialog'][t]['answer'] = hypstr
                logging.info('ElapsedTime: %f' % (time.time() - start_time))
                logging.info('-----------------------')

    return {'dialogs': result_dialogs}
Exemple #11
0
def generate_response(model,
                      data,
                      batch_indices,
                      vocab,
                      maxlen=20,
                      beam=5,
                      penalty=2.0,
                      nbest=1):
    vocablist = sorted(vocab.keys(), key=lambda s: vocab[s])
    result_dialogs = []
    model.eval()
    with torch.no_grad():
        qa_id = 0
        for dialog in data['original']['dialogs']:
            vid = dialog['image_id']
            pred_dialog = {
                'image_id': vid,
                'dialog': copy.deepcopy(dialog['dialog'])
            }
            result_dialogs.append(pred_dialog)
            for t, qa in enumerate(dialog['dialog']):
                logging.info('%d %s_%d' % (qa_id, vid, t))
                logging.info('QS: ' + qa['question'])
                logging.info('REF: ' + qa['answer'])
                # prepare input data
                start_time = time.time()
                b = dh.make_batch(data,
                                  batch_indices[qa_id],
                                  separate_caption=train_args.separate_caption,
                                  pretrained_elmo=train_args.pretrained_elmo,
                                  pretrained_bert=train_args.pretrained_bert,
                                  bert_tokenizer=bert_tokenizer,
                                  pretrained_all=train_args.pretrained_all,
                                  bert_model=train_args.bert_model,
                                  concat_his=train_args.concat_his)
                qa_id += 1
                x = [torch.from_numpy(x) for x in b[0]]
                if train_args.concat_his:
                    h = [torch.from_numpy(h_i) for h_i in b[1]]
                else:
                    h = [[torch.from_numpy(h) for h in hb] for hb in b[1]]
                q = [torch.from_numpy(q) for q in b[2]]
                # generate sequences
                if train_args.separate_caption:
                    c = [torch.from_numpy(c) for c in b[5]]
                else:
                    c = None
                if train_args.pretrained_elmo or train_args.pretrained_bert:
                    if train_args.pretrained_all:
                        context_q, context_h, context_ai = b[-3:]
                    else:
                        context_q = b[-1]
                        context_h = None
                        context_ai = None
                else:
                    context_q = None
                    context_h = None
                    context_ai = None
                if train_args.exclude_video:
                    x = None
                if hasattr(train_args,
                           "i3d_two_stream") and train_args.i3d_two_stream:
                    min_num_frames = min(x[0].shape[0], x[1].shape[0])
                    x = [
                        torch.cat([
                            x[0][:min_num_frames, :, :],
                            x[1][:min_num_frames, :, :]
                        ],
                                  dim=2), x[2]
                    ]
                if hasattr(model, 'generate'):
                    pred_out, _ = model.generate(x,
                                                 h,
                                                 q,
                                                 context_q,
                                                 context_h,
                                                 context_ai,
                                                 maxlen=maxlen,
                                                 beam=beam,
                                                 penalty=penalty,
                                                 nbest=nbest,
                                                 c=c)
                else:
                    pred_out, _ = model.module.generate(x,
                                                        h,
                                                        q,
                                                        context_q,
                                                        context_ai,
                                                        maxlen=maxlen,
                                                        beam=beam,
                                                        penalty=penalty,
                                                        nbest=nbest,
                                                        c=c)
                for n in six.moves.range(min(nbest, len(pred_out))):
                    pred = pred_out[n]
                    if 'openai-gpt' in train_args.bert_model:
                        hypstr = ' '.join([
                            vocablist[w].replace('</w>', '') for w in pred[0]
                        ])
                    elif 'gpt2' in train_args.bert_model:
                        out_tokens = []
                        for w in pred[0]:
                            token = vocablist[w]
                            if token[0].isupper():
                                out_tokens.append(token[1:])
                            else:
                                out_tokens.append(token)
                        hypstr = ' '.join(out_tokens)
                    else:
                        hypstr = ' '.join([vocablist[w] for w in pred[0]])
                    logging.info('HYP[%d]: %s  ( %f )' %
                                 (n + 1, hypstr, pred[1]))
                    if n == 0:
                        pred_dialog['dialog'][t]['answer'] = hypstr
                logging.info('ElapsedTime: %f' % (time.time() - start_time))
                logging.info('-----------------------')

    return {'dialogs': result_dialogs}
Exemple #12
0
def fetch_batch(dh, data, index, separate_caption, result):
    result.append(dh.make_batch(data, index, separate_caption=separate_caption, pretrained_elmo=args.pretrained_elmo, pretrained_bert=args.pretrained_bert, bert_tokenizer=bert_tokenizer, pretrained_all=args.pretrained_all, bert_model=args.bert_model, concat_his=args.concat_his))
Exemple #13
0
    train_log_path = args.model+'_train.csv'
    with open(train_log_path, "w") as f:  
        f.write('epoch,step,perplexity\n') 
    print("Saving training results to {}".format(train_log_path))
    print("Saving val results to {}".format(trace_log_path))

    # do training iterations
    for i in six.moves.range(args.num_epochs):
        if args.lr_scheduler:
            scheduler.step()
        logging.info('-------------------------Epoch %d : %s-----------------------' % (i+1, args.optimizer))
        train_loss = 0.
        train_num_words = 0
        # fetch the first batch
        batch = [dh.make_batch(train_data, train_indices[0], separate_caption=args.separate_caption, pretrained_elmo=args.pretrained_elmo, pretrained_bert=args.pretrained_bert, bert_tokenizer=bert_tokenizer, 
                pretrained_all=args.pretrained_all, bert_model=args.bert_model,
                concat_his=args.concat_his)]
        # train iterations
        if args.n_batches > 0: 
            n_batches = args.n_batches
        else:
            n_batches = len(train_indices)
        it = tqdm(six.moves.range(n_batches), desc="epoch {}/{}".format(i, args.num_epochs), ncols=0)
        for j in it:
            b = batch.pop()
            # fetch the next batch in parallel
            if j < len(train_indices)-1:
                prefetch = threading.Thread(target=fetch_batch, 
                                args=([dh, train_data, train_indices[j+1], args.separate_caption, batch]))
                prefetch.start()