def evaluate(model, data, indices, parallel=False): start_time = time.time() eval_loss = 0. eval_num_words = 0 model.eval() with torch.no_grad(): # fetch the first batch batch = [dh.make_batch(data, indices[0], separate_caption=args.separate_caption, pretrained_elmo=args.pretrained_elmo, pretrained_bert=args.pretrained_bert, bert_tokenizer=bert_tokenizer, pretrained_all=args.pretrained_all, bert_model=args.bert_model, concat_his=args.concat_his)] # evaluation loop it = tqdm(six.moves.range(len(indices)), desc="evaluation", ncols=0) for j in it: #if args.separate_caption: # x_batch, h_batch, q_batch, a_batch_in, a_batch_out, c_batch = batch.pop() #else: # x_batch, h_batch, q_batch, a_batch_in, a_batch_out = batch.pop() b = batch.pop() if j < len(indices)-1: prefetch = threading.Thread(target=fetch_batch, args=([dh, data, indices[j+1], args.separate_caption, batch])) prefetch.start() # propagate for training x = [torch.from_numpy(x) for x in b[0]] if args.concat_his: h = [torch.from_numpy(h_i) for h_i in b[1]] else: h = [[torch.from_numpy(h) for h in hb] for hb in b[1]] q = [torch.from_numpy(q) for q in b[2]] ai = [torch.from_numpy(ai) for ai in b[3]] ao = [torch.from_numpy(ao) for ao in b[4]] if args.separate_caption: c = [torch.from_numpy(c) for c in b[5]] else: c = None if args.pretrained_elmo or args.pretrained_bert: if args.pretrained_all: context_q, context_h, context_ai = b[-3:] else: context_q = b[-1] context_h = None context_ai = None else: context_q = None context_h = None context_ai = None if args.exclude_video: x = None if parallel: _, _, loss = model.module.loss(x, h, q, ai, ao, c, context_q, context_h, context_ai) else: _, _, loss = model.loss(x, h, q, ai, ao, c, context_q, context_h, context_ai) num_words = sum([len(s) for s in ao]) eval_loss += loss.cpu().data.numpy() * num_words eval_num_words += num_words prefetch.join() model.train() wall_time = time.time() - start_time return math.exp(eval_loss/eval_num_words), wall_time
def run_epoch(data, indices, vocab, epoch, model, loss_compute, eval=False): "Standard Training and Logging Function" start = time.time() total_tokens = 0 total_loss = 0 tokens = 0 it = tqdm(range(len(indices)), desc="epoch {}/{}".format(epoch+1, args.num_epochs), ncols=0) for j in it: batch = dh.make_batch(data, indices[j], vocab, separate_caption=args.separate_caption, cut_a=args.cut_a) b = batch if True: out, ae_out = model.forward(b) if args.auto_encoder_ft == 'caption' or args.auto_encoder_ft == 'summary': ntokens_cap = (b.cap != vocab['<blank>']).data.sum() loss = loss_compute(out, b.trg_y, b.ntokens, ae_out, b.cap, ntokens_cap) elif args.auto_encoder_ft == 'query': ntokens_query = (b.query != vocab['<blank>']).data.sum() loss = loss_compute(out, b.trg_y, b.ntokens, ae_out, b.query, ntokens_query) total_loss += loss total_tokens += b.ntokens tokens += b.ntokens if (j+1) % args.report_interval == 0 and not eval: elapsed = time.time() - start print("Epoch: %d Step: %d Loss: %f Tokens per Sec: %f" % (epoch+1,j+1, loss / b.ntokens.float(), float(tokens) / elapsed)) with open(train_log_path, "a") as f: f.write("{},{},{:e},{}\n".format(epoch+1,j+1,loss/b.ntokens.float(),float(tokens)/elapsed)) start = time.time() tokens = 0 #prefetch.join() return total_loss / total_tokens.float()
def generate_caption(model, data, batch_indices, vocab, dim, stride=1, maxlen=20, beam=5, penalty=2.0, nbest=1): vocablist = sorted(vocab.keys(), key=lambda s: vocab[s]) result = [] c = 0 for j in six.moves.range(len(batch_indices[0])): start_time = time.time() x_batch = [None] * len(data) for n in six.moves.range(len(data)): x_batch[n], Q_batch, A_batch = dh.make_batch(data[n], batch_indices[n][j], dim=dim[n], stride=stride) pred_out, logp = model.generate(x_batch, Q_batch, A_batch, maxlen=maxlen, beam=beam, penalty=penalty) for i in six.moves.range(Q_batch.shape[1]): c = c + 1 print c, batch_indices[0][j][0][0] + '_' + str(i + 1) print 'REF:', for n in six.moves.range(A_batch.shape[0]): number = A_batch[n][i][0] if number == 3: continue else: print vocablist[A_batch[n][i][0]], print for n in six.moves.range(min(nbest, len(pred_out[i]))): # pdb.set_trace() print 'HYP[%d]:' % (n + 1), pred = pred_out[i][n] if (isinstance(pred, list)): # The format used in run_youtube_mm.sh. for w in pred: print vocablist[w], print '( {:f} )'.format(logp[i][n]) else: # The format used in run_youtube.sh. print(vocablist[pred_out[i][n]]) print('( {:f} )'.format(logp[i])) print 'ElapsedTime:', time.time() - start_time print '-----------------------' ## print sentence result.append(pred_out) return result
def generate_response(model, data, batch_indices, vocab, maxlen=20, beam=5, penalty=2.0, nbest=1): vocablist = sorted(vocab.keys(), key=lambda s: vocab[s]) result_dialogs = [] model.eval() with torch.no_grad(): qa_id = 0 for dialog in data['original']['dialogs']: vid = dialog['image_id'] pred_dialog = { 'image_id': vid, 'dialog': copy.deepcopy(dialog['dialog']) } result_dialogs.append(pred_dialog) for t, qa in enumerate(dialog['dialog']): logging.info('%d %s_%d' % (qa_id, vid, t)) logging.info('QS: ' + qa['question']) logging.info('REF: ' + qa['answer']) # prepare input data start_time = time.time() x_batch, h_batch, q_batch, a_batch_in, a_batch_out, s_batch = \ dh.make_batch(data, batch_indices[qa_id]) qa_id += 1 x = [torch.from_numpy(x) for x in x_batch] h = [[torch.from_numpy(h) for h in hb] for hb in h_batch] q = [torch.from_numpy(q) for q in q_batch] s = torch.from_numpy(s_batch).cuda().float() # generate sequences pred_out, _ = model.generate(x, h, q, s, maxlen=maxlen, beam=beam, penalty=penalty, nbest=nbest) for n in six.moves.range(min(nbest, len(pred_out))): pred = pred_out[n] hypstr = ' '.join([vocablist[w] for w in pred[0]]) logging.info('HYP[%d]: %s ( %f )' % (n + 1, hypstr, pred[1])) if n == 0: pred_dialog['dialog'][t]['answer'] = hypstr logging.info('ElapsedTime: %f' % (time.time() - start_time)) logging.info('-----------------------') return {'dialogs': result_dialogs}
def evaluate(model, data, indices): start_time = time.time() eval_loss = 0. eval_num_words = 0 model.eval() with torch.no_grad(): # fetch the first batch batch = [dh.make_batch(data, indices[0])] # evaluation loop for j in six.moves.range(len(indices)): # get a fetched batch x_batch, h_batch, q_batch, a_batch_in, a_batch_out, s_batch = batch.pop( ) # fetch the next batch in parallel if j < len(indices) - 1: prefetch = threading.Thread( target=fetch_batch, args=([dh, data, indices[j + 1], batch])) prefetch.start() # propagate for training x = [torch.from_numpy(x) for x in x_batch] h = [[torch.from_numpy(h) for h in hb] for hb in h_batch] q = [torch.from_numpy(q) for q in q_batch] ai = [torch.from_numpy(ai) for ai in a_batch_in] ao = [torch.from_numpy(ao) for ao in a_batch_out] s = torch.from_numpy(s_batch).cuda().float() _, _, loss = model.loss(x, h, q, ai, ao, s) num_words = sum([len(s) for s in ao]) eval_loss += loss.cpu().data.numpy() * num_words eval_num_words += num_words # wait prefetch completion prefetch.join() model.train() wall_time = time.time() - start_time return math.exp(eval_loss / eval_num_words), wall_time
def evaluate(model, data, batch_indices, dim, stride=1): start_time = time.time() eval_loss = 0. eval_hit = 0 num_tokens = 0 for j in six.moves.range(len(batch_indices)): x_batch = [None] * len(data) for m in six.moves.range(len(data)): x_batch[m], Q_batch, A_batch = dh.make_batch(data[m], batch_indices[m][j], dim=dim[m], stride=stride) loss, hit, num = model(x_batch, Q_batch, A_batch, predicted_context=False, istraining=False) eval_loss += loss eval_hit += hit num_tokens += num wall_time = time.time() - start_time return math.exp(eval_loss / num_tokens), float(eval_hit) / num_tokens, wall_time
# do training iterations for i in six.moves.range(args.num_epochs): if is_sgd == True: print('Epoch %d : SGD learning rate = %g' % (i + 1, optimizer.lr)) else: print('Epoch %d : %s' % (i + 1, args.optimizer)) train_loss = 0. for j in six.moves.range(len(ids)): # prepare input data k = ids[j] x_batch = [None] * len(data) for m in six.moves.range(len(data)): x_batch[m], Q_batch, A_batch = dh.make_batch( data[m], train_indices[m][k], dim=args.in_size[m], stride=args.frame_stride) # propagate for training loss = model(x_batch, Q_batch, A_batch, predicted_context=False, istraining=True) if multiGPU: loss = loss.sum() wj = loss.cpu() cur_log_perp += wj.data.numpy() num_words += A_batch.shape[0] * A_batch.shape[1] if (n + 1) % report_interval == 0: now = time.time()
def fetch_batch(dh, data, index, result): result.append(dh.make_batch(data, index))
min_valid_ppl = 1.0e+10 n = 0 report_interval = 1000 / args.batch_size bestmodel_num = 0 random.shuffle(train_indices) # do training iterations for i in six.moves.range(args.num_epochs): logging.info('Epoch %d : %s' % (i + 1, args.optimizer)) train_loss = 0. train_num_words = 0 batch_time = AverageMeter() data_time = AverageMeter() end = time.time() # fetch the first batch batch = [dh.make_batch(train_data, train_indices[0])] # train iterations for j in six.moves.range(len(train_indices)): data_time.update(time.time() - end) # get fetched batch x_batch, h_batch, q_batch, a_batch_in, a_batch_out, s_batch = batch.pop( ) # fetch the next batch in parallel if j < len(train_indices) - 1: prefetch = threading.Thread( target=fetch_batch, args=([dh, train_data, train_indices[j + 1], batch])) prefetch.start() # propagate for training x = [torch.from_numpy(x) for x in x_batch]
def generate_response(model, data, batch_indices, vocab, maxlen=20, beam=5, penalty=2.0, nbest=1, ref_data=None): vocablist = sorted(vocab.keys(), key=lambda s: vocab[s]) result_dialogs = [] model.eval() with torch.no_grad(): qa_id = 0 for idx, dialog in enumerate(data['original']['dialogs']): vid = dialog['image_id'] if args.undisclosed_only: out_dialog = dialog['dialog'][-1:] if ref_data is not None: ref_dialog = ref_data['dialogs'][idx] assert ref_dialog['image_id'] == vid ref_dialog = ref_dialog['dialog'][-1:] else: out_dialog = dialog['dialog'] pred_dialog = { 'image_id': vid, 'dialog': copy.deepcopy(out_dialog) } result_dialogs.append(pred_dialog) for t, qa in enumerate(out_dialog): if args.undisclosed_only: assert qa['answer'] == '__UNDISCLOSED__' logging.info('%d %s_%d' % (qa_id, vid, t)) logging.info('QS: ' + qa['question']) if args.undisclosed_only and ref_data is not None: logging.info('REF: ' + ref_dialog[t]['answer']) else: logging.info('REF: ' + qa['answer']) # prepare input data start_time = time.time() batch = dh.make_batch( data, batch_indices[qa_id], vocab, separate_caption=train_args.separate_caption) qa_id += 1 if args.decode_style == 'beam_search': pred_out, _ = beam_search_decode( model, batch, maxlen, start_symbol=vocab['<sos>'], unk_symbol=vocab['<unk>'], end_symbol=vocab['<eos>'], pad_symbol=vocab['<blank>']) for n in range(min(nbest, len(pred_out))): pred = pred_out[n] hypstr = [] for w in pred[0]: if w == vocab['<eos>']: break hypstr.append(vocablist[w]) hypstr = " ".join(hypstr) #hypstr = " ".join([vocablist[w] for w in pred[0]]) logging.info('HYP[%d]: %s ( %f )' % (n + 1, hypstr, pred[1])) if n == 0: pred_dialog['dialog'][t]['answer'] = hypstr elif args.decode_style == 'greedy': output = greedy_decode(model, batch, maxlen, start_symbol=vocab['<sos>'], pad_symbol=vocab['<blank>']) output = [i for i in output[0].cpu().numpy()] hypstr = [] for i in output[1:]: if i == vocab['<eos>']: break hypstr.append(vocablist[i]) hypstr = ' '.join(hypstr) logging.info('HYP: {}'.format(hypstr)) pred_dialog['dialog'][t]['answer'] = hypstr logging.info('ElapsedTime: %f' % (time.time() - start_time)) logging.info('-----------------------') return {'dialogs': result_dialogs}
def generate_response(model, data, batch_indices, vocab, maxlen=20, beam=5, penalty=2.0, nbest=1): vocablist = sorted(vocab.keys(), key=lambda s: vocab[s]) result_dialogs = [] model.eval() with torch.no_grad(): qa_id = 0 for dialog in data['original']['dialogs']: vid = dialog['image_id'] pred_dialog = { 'image_id': vid, 'dialog': copy.deepcopy(dialog['dialog']) } result_dialogs.append(pred_dialog) for t, qa in enumerate(dialog['dialog']): logging.info('%d %s_%d' % (qa_id, vid, t)) logging.info('QS: ' + qa['question']) logging.info('REF: ' + qa['answer']) # prepare input data start_time = time.time() b = dh.make_batch(data, batch_indices[qa_id], separate_caption=train_args.separate_caption, pretrained_elmo=train_args.pretrained_elmo, pretrained_bert=train_args.pretrained_bert, bert_tokenizer=bert_tokenizer, pretrained_all=train_args.pretrained_all, bert_model=train_args.bert_model, concat_his=train_args.concat_his) qa_id += 1 x = [torch.from_numpy(x) for x in b[0]] if train_args.concat_his: h = [torch.from_numpy(h_i) for h_i in b[1]] else: h = [[torch.from_numpy(h) for h in hb] for hb in b[1]] q = [torch.from_numpy(q) for q in b[2]] # generate sequences if train_args.separate_caption: c = [torch.from_numpy(c) for c in b[5]] else: c = None if train_args.pretrained_elmo or train_args.pretrained_bert: if train_args.pretrained_all: context_q, context_h, context_ai = b[-3:] else: context_q = b[-1] context_h = None context_ai = None else: context_q = None context_h = None context_ai = None if train_args.exclude_video: x = None if hasattr(train_args, "i3d_two_stream") and train_args.i3d_two_stream: min_num_frames = min(x[0].shape[0], x[1].shape[0]) x = [ torch.cat([ x[0][:min_num_frames, :, :], x[1][:min_num_frames, :, :] ], dim=2), x[2] ] if hasattr(model, 'generate'): pred_out, _ = model.generate(x, h, q, context_q, context_h, context_ai, maxlen=maxlen, beam=beam, penalty=penalty, nbest=nbest, c=c) else: pred_out, _ = model.module.generate(x, h, q, context_q, context_ai, maxlen=maxlen, beam=beam, penalty=penalty, nbest=nbest, c=c) for n in six.moves.range(min(nbest, len(pred_out))): pred = pred_out[n] if 'openai-gpt' in train_args.bert_model: hypstr = ' '.join([ vocablist[w].replace('</w>', '') for w in pred[0] ]) elif 'gpt2' in train_args.bert_model: out_tokens = [] for w in pred[0]: token = vocablist[w] if token[0].isupper(): out_tokens.append(token[1:]) else: out_tokens.append(token) hypstr = ' '.join(out_tokens) else: hypstr = ' '.join([vocablist[w] for w in pred[0]]) logging.info('HYP[%d]: %s ( %f )' % (n + 1, hypstr, pred[1])) if n == 0: pred_dialog['dialog'][t]['answer'] = hypstr logging.info('ElapsedTime: %f' % (time.time() - start_time)) logging.info('-----------------------') return {'dialogs': result_dialogs}
def fetch_batch(dh, data, index, separate_caption, result): result.append(dh.make_batch(data, index, separate_caption=separate_caption, pretrained_elmo=args.pretrained_elmo, pretrained_bert=args.pretrained_bert, bert_tokenizer=bert_tokenizer, pretrained_all=args.pretrained_all, bert_model=args.bert_model, concat_his=args.concat_his))
train_log_path = args.model+'_train.csv' with open(train_log_path, "w") as f: f.write('epoch,step,perplexity\n') print("Saving training results to {}".format(train_log_path)) print("Saving val results to {}".format(trace_log_path)) # do training iterations for i in six.moves.range(args.num_epochs): if args.lr_scheduler: scheduler.step() logging.info('-------------------------Epoch %d : %s-----------------------' % (i+1, args.optimizer)) train_loss = 0. train_num_words = 0 # fetch the first batch batch = [dh.make_batch(train_data, train_indices[0], separate_caption=args.separate_caption, pretrained_elmo=args.pretrained_elmo, pretrained_bert=args.pretrained_bert, bert_tokenizer=bert_tokenizer, pretrained_all=args.pretrained_all, bert_model=args.bert_model, concat_his=args.concat_his)] # train iterations if args.n_batches > 0: n_batches = args.n_batches else: n_batches = len(train_indices) it = tqdm(six.moves.range(n_batches), desc="epoch {}/{}".format(i, args.num_epochs), ncols=0) for j in it: b = batch.pop() # fetch the next batch in parallel if j < len(train_indices)-1: prefetch = threading.Thread(target=fetch_batch, args=([dh, train_data, train_indices[j+1], args.separate_caption, batch])) prefetch.start()