def test(self, save_sampled_captions=True, evaluate_score=True, generate_demo_sample=False): self.atten_model.eval() self.atten_model.load_state_dict(torch.load(self.args.save)) self.atten_model.cuda() if save_sampled_captions: features = self.test_data['features'] n_examples = features.shape[0] all_sam_cap = np.ndarray((n_examples, 20)) test_times = int(np.ceil(float(n_examples) / self.args.batch_size)) for t in range(test_times): features_batch = Variable( torch.from_numpy( features[t * self.args.batch_size:(t + 1) * self.args.batch_size])).cuda() _, _, sampled_captions = self.atten_model.build_sample( features_batch) all_sam_cap[t * self.args.batch_size:(t + 1) * self.args.batch_size] = np.array( sampled_captions.data) decoded = decode_captions(all_sam_cap, self.idx_to_word) save_pickle(decoded, self.args.test_samples) print 'test all sccessful' if evaluate_score: ref = load_pickle('./data/test/test.references.pkl') try: evaluate(ref, decoded) except KeyboardInterrupt: decoded = load_pickle(self.args.test_samples) evaluate(ref, decoded) if generate_demo_sample: features = self.args.demo_feat features_batch = Variable(torch.from_numpy(features)).cuda() _, _, sampled_captions = self.atten_model.build_sample( features_batch) decoded = decode_captions(sampled_captions, self.idx_to_word) print decoded
def evaluate_on_split(self, sess, generated_captions, summary_writer, epoch, tags, split='train'): caps = self.data.captions[split] ids = self.data.video_ids[split] unique_ids = list(set(ids)) num_iter = int(ceil(len(unique_ids) / float(self.batch_size))) while len(unique_ids) < num_iter * self.batch_size: unique_ids += unique_ids unique_ids = unique_ids[:num_iter * self.batch_size] all_gen_cap = np.ndarray((len(unique_ids), self.max_words), dtype=np.int) for i in range(num_iter): features_batch = [ self.data.feature(vid) for vid in unique_ids[i * self.batch_size:(i + 1) * self.batch_size] ] # if len(features_batch) < self.batch_size: # l = len(features_batch) # features_batch += [self.data.feature(vid) for vid in unique_ids[:self.batch_size - l]] features_batch = np.asarray(features_batch) feed_dict = {self.features: features_batch} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.data.vocab.idx2word) # create cand dict cand = {} for vid, sentence in zip(unique_ids, all_decoded): cand[vid] = [sentence] # create ref dict ref = {} for vid in unique_ids: ref[vid] = decode_captions(caps[ids == vid][:, 1:], self.data.vocab.idx2word) with open('result/cand_%s_%d.txt' % (split, epoch), 'w') as file: file.write(str(cand)) with open('result/ref_%s_%d.txt' % (split, epoch), 'w') as file: file.write(str(ref)) # evaluate scores = evaluate(ref=ref, cand=cand, get_scores=True) for tag in tags: summary = tf.Summary() summary.value.add(tag=split + tag, simple_value=scores[tag]) summary_writer.add_summary(summary, epoch) return scores
def run_eval(self, candidate): with open(candidate, "r") as f: cand = json.load(f) # correct format # keys without image features have to be filtered out - this is the easiest way without checking image vectors for key in self.refdict4eval.keys(): if not key in cand.keys(): del self.refdict4eval[key] assert len(self.refdict4eval.keys()) == len(cand.keys()) with open(self.reference_dict_path, 'w') as f: json.dump(self.refdict4eval, f) return bleu.evaluate(self.reference_dict_path, candidate, True)
def validation(vocab, val_loader, encoder, decoder, beam_width): encoder.eval() decoder.eval() output_captions = dict() # Map(ID -> List(sentences)) ref_captions = dict() # Map(ID -> List(sentences)) # Iterate over validation data set with torch.no_grad(): for i, (image, captions) in enumerate(val_loader): image = image.to(device) # captions = cations.to(device)?? feature = encoder(image) output_caption = decoder.sample(feature, beam_width=beam_width) # exclude <pad> <start> <end> output_without_nonstring = [] for idx in output_caption: if idx == 2 or idx == 19: break elif idx <= 3: continue else: output_without_nonstring.append(vocab.vec2word(idx)) output_captions[i] = [" ".join(output_without_nonstring)] ref_captions[i] = [ ref_caption[0].lower() for ref_caption in captions ] if i % log_step == 0: print('Validation Step [{}/{}]'.format(i, len(val_loader))) print(output_captions[i]) print(ref_captions[i]) bleu_score = evaluate(ref_captions, output_captions) print(bleu_score)
def train(self): data_save_path = self.data_save_path n_examples = self.data['captions'].shape[0] n_iters_per_epoch = int(np.floor(float(n_examples) / self.batch_size)) features = self.data['features'] captions = self.data['captions'][:,:21] image_idxs = self.data['image_idxs'] val_features = self.val_data['features'] n_iters_val = int(np.ceil(float(val_features.shape[0]) / self.batch_size)) with tf.variable_scope(tf.get_variable_scope()): loss = self.model.build_model() tf.get_variable_scope().reuse_variables() _, _, generated_captions = self.model.build_sampler(max_len=16) with tf.variable_scope(tf.get_variable_scope()): optimizer = self.optimizer(learning_rate=self.learning_rate) grads = tf.gradients(loss, tf.trainable_variables()) grads_and_vars = list(zip(grads, tf.trainable_variables())) train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars) tf.summary.scalar('batch_loss', loss) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) for grad, var in grads_and_vars: tf.summary.histogram(var.op.name + '/gradient', grad) summary_op = tf.summary.merge_all() print "The number of epoch: %d" % self.n_epochs print "Data size: %d" % n_examples print "Batch size: %d" % self.batch_size print "Iterations per epoch: %d" % n_iters_per_epoch config = tf.ConfigProto(allow_soft_placement=True) # config.gpu_options.per_process_gpu_memory_fraction=0.9 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() summary_writer = tf.summary.FileWriter(self.log_path, graph=tf.get_default_graph()) saver = tf.train.Saver(max_to_keep=40) if self.pretrained_model is not None: print "Start training with pretrained Model.." saver.restore(sess, self.pretrained_model) prev_loss = -1 curr_loss = 0 start_t = time.time() for e in range(self.n_epochs): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for i in range(n_iters_per_epoch): captions_batch = captions[i * self.batch_size:(i + 1) * self.batch_size] image_idxs_batch = image_idxs[i * self.batch_size:(i + 1) * self.batch_size] features_batch = features[image_idxs_batch] feed_dict = {self.model.features: features_batch, self.model.captions: captions_batch, } _, l = sess.run([train_op, loss], feed_dict) curr_loss += l if (i + 1) % self.print_every == 0: ground_truths = captions[image_idxs == image_idxs_batch[0], 4:] decoded = decode_captions(ground_truths, self.model.idx_to_word) for j, gt in enumerate(decoded): print "Ground truth %d: %s" % (j + 1, gt) gen_caps = sess.run(generated_captions, feed_dict) decoded = decode_captions(gen_caps, self.model.idx_to_word) print "Generated caption: %s\n" % decoded[0] print "Previous epoch loss: ", prev_loss print "Current epoch loss: ", curr_loss print "Elapsed time: ", time.time() - start_t prev_loss = curr_loss curr_loss = 0 if self.print_bleu: all_gen_cap = np.ndarray((val_features.shape[0], 20)) val_features[:, :, 2048:2052] = [1, 0, 0, 1] for i in range(n_iters_val): features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {self.model.features: features_batch} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle(all_decoded, os.path.join(data_save_path, "val/val.candidate.captions.pkl")) scores = evaluate(data_path=data_save_path, split='val', get_scores=True) write_bleu(scores=scores, path=self.model_path, epoch=e, senti=pos) val_features[:, :, 2048:2052] = [0, 0, 1, 2] for i in range(n_iters_val): features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {self.model.features: features_batch} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle(all_decoded, os.path.join(data_save_path, "val/val.candidate.captions.pkl")) scores = evaluate(data_path=data_save_path, split='val', get_scores=True) write_bleu(scores=scores, path=self.model_path, epoch=e, senti=neg) if (e + 1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model'), global_step=e + 1) print "model-%s saved." % (e + 1)
def train(self, data, support_data, val_data, epochs, pretrain_epochs, batch_size = 100, print_bleu = True): n_example = data['captions'].shape[0] if n_example%batch_size == 0: n_batch = int(n_example/batch_size) else: n_batch = int(n_example/batch_size)+1 features = data['features'] captions = data['captions'] image_idx = data['image_idxs'] val_features = val_data['features'] n_val_example = val_features.shape[0] if n_val_example%batch_size==0: n_val_batch = int(n_val_example/batch_size) else: n_val_batch = int(n_val_example/batch_size)+1 pretrain_opt = self.optimizer.minimize(self.model.pretrain_loss) train_opt = self.optimizer.minimize(self.model.loss) loss = self.model.loss pretrain_loss = self.model.pretrain_loss alphs, sample_caption = self.model.build_sampler() init = tf.global_variables_initializer() print('='*80) print('The number of epoch: %d'%epochs) print('Iteration per epoch: %d'%n_batch) print('The batch size: %d'%batch_size) print('The number of training example: %d'%n_example) print('The number of validation example: %d'%n_val_example) print('='*80) # config = tf.ConfigProto(allow_soft_placement = True) # config.gpu_options.allow_growth = True with tf.Session() as sess: print('model is initialized.') sess.run(init) saver = tf.train.Saver(max_to_keep=40) print('Start to pretrain...') start_time = time.time() for pre_ep in xrange(pretrain_epochs): pre_ep_start = time.time() rand_idxs = np.random.permutation(n_example) #captions = captions[rand_idxs] #support_data = support_data[rand_idxs] #image_idx = image_idx[rand_idxs] pretrain_cost = 0. for itr in tqdm(xrange(n_batch),desc='Pretrain Epoch:%d'%(pre_ep+1)): start = itr*batch_size if (itr+1)*batch_size>n_example: end = n_example else: end = (itr+1)*batch_size rand_idxs_batch = rand_idxs[start:end] caption_batch = captions[rand_idxs_batch] support_data_batch = support_data[rand_idxs_batch] image_idx_batch = image_idx[rand_idxs_batch] features_batch = features[image_idx_batch] feed_dict={ self.model.img_feature:features_batch, self.model.support_context:support_data_batch, self.model.captions:caption_batch } _,pre_loss_batch = sess.run([pretrain_opt,pretrain_loss],feed_dict=feed_dict) pretrain_cost += pre_loss_batch/n_batch pre_ep_end = time.time() pre_ep_sec = pre_ep_end-pre_ep_start pre_ep_min = int(pre_ep_sec/60) pre_ep_sec = pre_ep_sec%60 print('='*80) print('Pretrain Epoch: %d'%(pre_ep+1)) print('Pretrain loss: %.4f'%pretrain_cost) print('Cost time %d:%d'%(pre_ep_min,pre_ep_sec)) print('='*80) print('Start to train...') for ep in xrange(epochs): train_cost=0. ep_start = time.time() rand_idxs = np.random.permutation(n_example) #captions = captions[rand_idxs] #support_data = support_data[rand_idxs] #image_idx = image_idx[rand_idxs] for itr in tqdm(xrange(n_batch),desc='Epoch:%d'%(ep+1)): start = itr*batch_size if (itr+1)*batch_size>n_example: end = n_example else: end = (itr+1)*batch_size rand_idxs_batch = rand_idxs[start:end] caption_batch = captions[rand_idxs_batch] image_idx_batch = image_idx[rand_idxs_batch] features_batch = features[image_idx_batch] support_data_batch = support_data[rand_idxs_batch] feed_dict = { self.model.img_feature:features_batch, self.model.support_context:support_data_batch, self.model.captions:caption_batch } _,loss_batch = sess.run([train_opt,loss],feed_dict=feed_dict) train_cost += loss_batch/n_batch ep_end = time.time() ep_sec = ep_end-ep_start ep_min = int(ep_sec/60) ep_sec = ep_sec%60 saver.save(sess,'./model_ckpt/model',global_step=ep+1) print('='*80) print('Epoch: %d'%(ep+1)) print('Training loss: %.4f'%train_cost) print('Cost time %d:%d'%(ep_min,ep_sec)) print('model-%d is saved'%(ep+1)) print('='*80) if print_bleu: all_gen_cap = np.ndarray((val_features.shape[0], 16)) for i in xrange(n_val_batch): start = i*batch_size if (i+1)*batch_size>n_val_example: end = n_val_example else: end = (i+1)*batch_size features_batch = val_features[start:end] feed_dict = {self.model.img_feature: features_batch} gen_cap = sess.run(sample_caption, feed_dict=feed_dict) all_gen_cap[start:end] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path='./model_ckpt', epoch=ep) end_time = time.time() total_sec = end_time-start_time total_hr = int(total_sec/3600) total_min = int((total_sec%3600)/60) total_sec = total_sec%60 print('\n') print('Total cost time %d:%d:%d'%(total_hr,total_min,total_sec))
def train(self): # train/val dataset n_examples = self.data['captions'].shape[0] n_iters_per_epoch = int(np.ceil(float(n_examples) / self.batch_size)) features = self.data['features'] captions = self.data['captions'] image_idxs = self.data['image_idxs'] val_features = self.val_data['features'] n_iters_val = int( np.ceil(float(val_features.shape[0]) / self.batch_size)) # build graphs for training model and sampling captions loss = self.model.build_model() # tf.get_variable_scope().reuse_variables() # _, _, generated_captions = self.model.build_sampler(max_len=20) # train op #with tf.name_scope('optimizer'): with tf.variable_scope(tf.get_variable_scope()) as scope: with tf.name_scope('optimizer'): optimizer = self.optimizer(learning_rate=self.learning_rate) grads = tf.gradients(loss, tf.trainable_variables()) grads_and_vars = list(zip(grads, tf.trainable_variables())) train_op = optimizer.apply_gradients( grads_and_vars=grads_and_vars) tf.get_variable_scope().reuse_variables() _, _, generated_captions = self.model.build_sampler( max_len=20) # summary op # tf.scalar_summary('batch_loss', loss) tf.summary.scalar('batch_loss', loss) for var in tf.trainable_variables(): #tf.histogram_summary(var.op.name, var) tf.summary.histogram(var.op.name, var) for grad, var in grads_and_vars: #tf.histogram_summary(var.op.name+'/gradient', grad) tf.summary.histogram(var.op.name + '/gradient', grad) #summary_op = tf.merge_all_summaries() summary_op = tf.summary.merge_all() print "The number of epoch: %d" % self.n_epochs print "Data size: %d" % n_examples print "Batch size: %d" % self.batch_size print "Iterations per epoch: %d" % n_iters_per_epoch config = tf.ConfigProto(allow_soft_placement=True) #config.gpu_options.per_process_gpu_memory_fraction=0.9 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() #summary_writer = tf.train.SummaryWriter(self.log_path, graph=tf.get_default_graph()) summary_writer = tf.summary.FileWriter( self.log_path, graph=tf.get_default_graph()) saver = tf.train.Saver(max_to_keep=40) if self.pretrained_model is not None: print "Start training with pretrained Model.." saver.restore(sess, self.pretrained_model) prev_loss = -1 curr_loss = 0 start_t = time.time() for e in range(self.n_epochs): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for i in range(n_iters_per_epoch): captions_batch = captions[i * self.batch_size:(i + 1) * self.batch_size] image_idxs_batch = image_idxs[i * self.batch_size:(i + 1) * self.batch_size] features_batch = features[image_idxs_batch] feed_dict = { self.model.features: features_batch, self.model.captions: captions_batch } _, l = sess.run([train_op, loss], feed_dict) curr_loss += l # write summary for tensorboard visualization if i % 10 == 0: summary = sess.run(summary_op, feed_dict) summary_writer.add_summary(summary, e * n_iters_per_epoch + i) if (i + 1) % self.print_every == 0: print "\nTrain loss at epoch %d & iteration %d (mini-batch): %.5f" % ( e + 1, i + 1, l) ground_truths = captions[image_idxs == image_idxs_batch[0]] decoded = decode_captions(ground_truths, self.model.idx_to_word) for j, gt in enumerate(decoded): print "Ground truth %d: %s" % (j + 1, gt) gen_caps = sess.run(generated_captions, feed_dict) decoded = decode_captions(gen_caps, self.model.idx_to_word) print "Generated caption: %s\n" % decoded[0] print "Previous epoch loss: ", prev_loss print "Current epoch loss: ", curr_loss print "Elapsed time: ", time.time() - start_t prev_loss = curr_loss curr_loss = 0 # print out BLEU scores and file write if self.print_bleu: all_gen_cap = np.ndarray((val_features.shape[0], 20)) for i in range(n_iters_val): features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {self.model.features: features_batch} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path=self.model_path, epoch=e) # save model's parameters if (e + 1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model'), global_step=e + 1) print "model-%s saved." % (e + 1)
def train(self): # train/val dataset # Changed this because I keep less features than captions, see prepro # n_examples = self.data['captions'].shape[0] n_examples = self.data['features'].shape[0] n_iters_per_epoch = int(np.ceil(float(n_examples)/self.batch_size)) features = self.data['features'] captions = self.data['captions'] image_idxs = self.data['image_idxs'] val_features = self.val_data['features'] n_iters_val = int(np.ceil(float(val_features.shape[0])/self.batch_size)) # build graphs for training model and sampling captions # This scope fixed things!! with tf.variable_scope(tf.get_variable_scope()): loss = self.model.build_model() tf.get_variable_scope().reuse_variables() _, _, generated_captions = self.model.build_sampler(max_len=20) # train op with tf.variable_scope(tf.get_variable_scope(), reuse=False): optimizer = self.optimizer(learning_rate=self.learning_rate) grads = tf.gradients(loss, tf.trainable_variables()) grads_and_vars = list(zip(grads, tf.trainable_variables())) train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars) # summary op # tf.scalar_summary('batch_loss', loss) tf.summary.scalar('batch_loss', loss) for var in tf.trainable_variables(): #tf.histogram_summary(var.op.name, var) tf.summary.histogram(var.op.name, var) for grad, var in grads_and_vars: #tf.histogram_summary(var.op.name+'/gradient', grad) tf.summary.histogram(var.op.name+'/gradient', grad) #summary_op = tf.merge_all_summaries() summary_op = tf.summary.merge_all() print "The number of epoch: %d" %self.n_epochs print "Data size: %d" %n_examples print "Batch size: %d" %self.batch_size print "Iterations per epoch: %d" %n_iters_per_epoch config = tf.ConfigProto(allow_soft_placement = True) #config.gpu_options.per_process_gpu_memory_fraction=0.9 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() #summary_writer = tf.train.SummaryWriter(self.log_path, graph=tf.get_default_graph()) summary_writer = tf.summary.FileWriter(self.log_path, graph=tf.get_default_graph()) saver = tf.train.Saver(max_to_keep=40) if self.pretrained_model is not None: print "Start training with pretrained Model.." saver.restore(sess, self.pretrained_model) prev_loss = -1 curr_loss = 0 start_t = time.time() for e in range(self.n_epochs): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for i in range(n_iters_per_epoch): captions_batch = captions[i*self.batch_size:(i+1)*self.batch_size] image_idxs_batch = image_idxs[i*self.batch_size:(i+1)*self.batch_size] features_batch = features[image_idxs_batch] feed_dict = {self.model.features: features_batch, self.model.captions: captions_batch} _, l = sess.run([train_op, loss], feed_dict) curr_loss += l # write summary for tensorboard visualization if i % 10 == 0: summary = sess.run(summary_op, feed_dict) summary_writer.add_summary(summary, e*n_iters_per_epoch + i) if (i+1) % self.print_every == 0: print "\nTrain loss at epoch %d & iteration %d (mini-batch): %.5f" %(e+1, i+1, l) ground_truths = captions[image_idxs == image_idxs_batch[0]] decoded = decode_captions(ground_truths, self.model.idx_to_word) for j, gt in enumerate(decoded): print "Ground truth %d: %s" %(j+1, gt) gen_caps = sess.run(generated_captions, feed_dict) decoded = decode_captions(gen_caps, self.model.idx_to_word) print "Generated caption: %s\n" %decoded[0] print "Previous epoch loss: ", prev_loss print "Current epoch loss: ", curr_loss print "Elapsed time: ", time.time() - start_t prev_loss = curr_loss curr_loss = 0 # print out BLEU scores and file write if self.print_bleu: all_gen_cap = np.ndarray((val_features.shape[0], 20)) for i in range(n_iters_val): features_batch = val_features[i*self.batch_size:(i+1)*self.batch_size] feed_dict = {self.model.features: features_batch} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i*self.batch_size:(i+1)*self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path=self.model_path, epoch=e) # save model's parameters if (e+1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model'), global_step=e+1) print "model-%s saved." %(e+1)
print "Elapsed time: ", time.time() - start_t prev_loss = curr_loss curr_loss = 0 # print out BLEU scores and file write if self.print_bleu: all_gen_cap = np.ndarray((val_features.shape[0], 20)) for i in range(n_iters_val): features_batch = val_features[i*self.batch_size:(i+1)*self.batch_size] feed_dict = {self.model.features: features_batch} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i*self.batch_size:(i+1)*self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path=self.model_path, epoch=e) # save model's parameters if (e+1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model'), global_step=e+1) print "model-%s saved." %(e+1) def test(self, data, split='train', attention_visualization=True, save_sampled_captions=True): ''' Args: - data: dictionary with the following keys: - features: Feature vectors of shape (5000, 196, 512) - file_names: Image file names of shape (5000, ) - captions: Captions of shape (24210, 17)
def test(self, data, split='train', attention_visualization=False, save_sampled_captions=False): ''' Args: - data: dictionary with the following keys: - features: Feature vectors of shape (5000, 196, 512) - file_names: Image file names of shape (5000, ) - captions: Captions of shape (24210, 17) - image_idxs: Indices for mapping caption to image of shape (24210, ) - features_to_captions: Mapping feature to captions (5000, 4~5) - split: 'train', 'val' or 'test' - attention_visualization: If True, visualize attention weights with images for each sampled word. (ipthon notebook) - save_sampled_captions: If True, save sampled captions to pkl file for computing BLEU scores. ''' features = data['features'] n_examples = self.data['captions'].shape[0] n_iters_per_epoch = int(np.ceil(float(n_examples) / self.batch_size)) # build a graph to sample captions alphas, betas, sampled_captions = self.model.build_sampler(max_len=20) # (N, max_len, L), (N, max_len) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: saver = tf.train.Saver() saver.restore(sess, self.test_model) features_batch, image_files = sample_coco_minibatch_inference(data, self.batch_size) feed_dict = {self.model.features: features_batch} alps, bts, sam_cap = sess.run([alphas, betas, sampled_captions], feed_dict) # (N, max_len, L), (N, max_len) decoded = decode_captions(sam_cap, self.model.idx_to_word) if self.print_bleu: all_gen_cap = np.ndarray((features.shape[0], 20)) for i in range(n_iters_per_epoch): features_batch = features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {self.model.features: features_batch} gen_cap = sess.run(sampled_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) if attention_visualization: for n in range(10): print "Sampled Caption: %s" % decoded[n] # Plot original image img = ndimage.imread(image_files[n]) plt.clf() plt.subplot(4, 5, 1) plt.imshow(img) plt.axis('off') # Plot images with attention weights words = decoded[n].split(" ") for t in range(len(words)): if t > 18: break plt.subplot(4, 5, t + 2) plt.text(0, 1, '%s(%.2f)' % (words[t], bts[n, t]), color='black', backgroundcolor='white', fontsize=8) plt.imshow(img) alp_curr = alps[n, t, :].reshape(14, 14) alp_img = skimage.transform.pyramid_expand(alp_curr, upscale=16, sigma=20) plt.imshow(alp_img, alpha=0.85) plt.axis('off') plt.savefig(str(n) + 'test.pdf') if save_sampled_captions: all_sam_cap = np.ndarray((features.shape[0], 20)) num_iter = int(np.ceil(float(features.shape[0]) / self.batch_size)) for i in range(num_iter): features_batch = features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {self.model.features: features_batch} all_sam_cap[i * self.batch_size:(i + 1) * self.batch_size] = sess.run(sampled_captions, feed_dict) all_decoded = decode_captions(all_sam_cap, self.model.idx_to_word) save_pickle(all_decoded, "./data/%s/%s.candidate.captions.pkl" % (split, split))
def train(self): ###################################################### # move to each epoch to solve huge data load problem # ###################################################### # train/val dataset n_examples = self.data['captions'].shape[0] n_iters_per_epoch = int(np.ceil(float(n_examples)/self.batch_size)) # features = self.data['features'] captions = self.data['captions'] image_idxs = self.data['image_idxs'] caption_idxs = {} for i in range(len(image_idxs)): if image_idxs[i] not in caption_idxs: caption_idxs[image_idxs[i]] = [i] else: caption_idxs[image_idxs[i]].append(i) # val_features = self.val_data['features'] val_features = load_val_data() n_iters_val = int(np.ceil(float(val_features.shape[0])/self.batch_size)) # build graphs for training model and sampling captions loss = self.model.build_model() with tf.variable_scope(tf.get_variable_scope(), reuse=True): _, _, generated_captions = self.model.build_sampler(max_len=20) # train op with tf.name_scope('optimizer'): optimizer = self.optimizer(learning_rate=self.learning_rate) grads = tf.gradients(loss, tf.trainable_variables()) grads_and_vars = list(zip(grads, tf.trainable_variables())) train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars) # summary op tf.summary.scalar('batch_loss', loss) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) # for grad, var in grads_and_vars: # tf.summary.histogram(var.op.name+'/gradient', grad) summary_op = tf.summary.merge_all() config = tf.ConfigProto(allow_soft_placement = True) #config.gpu_options.per_process_gpu_memory_fraction=0.9 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() summary_writer = tf.summary.FileWriter(self.log_path, graph=tf.get_default_graph()) saver = tf.train.Saver(max_to_keep=40) if self.pretrained_model is not None: print "Start training with pretrained Model.." saver.restore(sess, self.pretrained_model) prev_loss = -1 curr_loss = 0 start_t = time.time() print "The number of epoch: %d" %self.n_epochs print "Batch size: %d" %self.batch_size for e in range(self.n_epochs): # load data 9 times to solve huge data load problem # cur_iteration = 0 for data_cnt in range(9): print "----------------------------------------------------" print "Loading data (part %d / 9) " %(int(data_cnt)+1) features = hickle.load(os.path.join('./data/train', 'train.features%d.hkl' % data_cnt)) total_num = features.shape[0] print "Load success (data size: %d) " %total_num print "Iterations: %d" %n_iters_per_epoch print "----------------------------------------------------" index_st = data_cnt * 10000 index_ed = index_st + total_num part_features = features part_captions = [] part_image_idxs = [] for idx in range(total_num): for caption_idx in caption_idxs[index_st + idx]: part_captions.append(captions[caption_idx]) part_image_idxs.append(idx) part_captions = np.asarray(part_captions) part_image_idxs = np.asarray(part_image_idxs) part_iters = int(np.ceil(float(part_captions.shape[0])/self.batch_size)) rand_idxs = np.random.permutation(part_captions.shape[0]) part_captions = part_captions[rand_idxs] part_image_idxs = part_image_idxs[rand_idxs] for i in range(part_iters): captions_batch = part_captions[i*self.batch_size:(i+1)*self.batch_size] part_image_idxs_batch = part_image_idxs[i*self.batch_size:(i+1)*self.batch_size] features_batch = part_features[part_image_idxs_batch] feed_dict = {self.model.features: features_batch, self.model.captions: captions_batch} _, l = sess.run([train_op, loss], feed_dict) curr_loss += l # write summary for tensorboard visualization if cur_iteration % 10 == 0: summary = sess.run(summary_op, feed_dict) summary_writer.add_summary(summary, e*n_iters_per_epoch + cur_iteration) if (cur_iteration+1) % self.print_every == 0: print "\nTrain loss at epoch %d & iteration %d (mini-batch): %.5f" %(e+1, cur_iteration+1, l) ground_truths = part_captions[part_image_idxs == part_image_idxs_batch[0]] decoded = decode_captions(ground_truths, self.model.idx_to_word) for j, gt in enumerate(decoded): print "Ground truth %d: %s" %(j+1, gt) gen_caps = sess.run(generated_captions, feed_dict) decoded = decode_captions(gen_caps, self.model.idx_to_word) print "Generated caption: %s\n" %decoded[0] cur_iteration = cur_iteration + 1 print "Current( epoch %d / part %d ) loss: %f" %(e+1, data_cnt+1, curr_loss) print "----------------------------------------------------" print "Previous epoch loss: ", prev_loss print "Current epoch loss: ", curr_loss print "Elapsed time: ", time.time() - start_t print "----------------------------------------------------" prev_loss = curr_loss curr_loss = 0 # print out BLEU scores and file write if self.print_bleu: all_gen_cap = np.ndarray((val_features.shape[0], 20)) for i in range(n_iters_val): features_batch = val_features[i*self.batch_size:(i+1)*self.batch_size] feed_dict = {self.model.features: features_batch} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i*self.batch_size:(i+1)*self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path=self.model_path, epoch=e) # save model's parameters if (e+1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model'), global_step=e+1) print "model-%s saved." %(e+1)
def test(self, split='train', save_sampled_captions=True): ''' Args: - data: dictionary with the following keys: - features: Feature vectors of shape (5000, 196, 512) - file_names: Image file names of shape (5000, ) - captions: Captions of shape (24210, 17) - image_idxs: Indices for mapping caption to image of shape (24210, ) - features_to_captions: Mapping feature to captions (5000, 4~5) - split: 'train', 'val' or 'test' - attention_visualization: If True, visualize attention weights with images for each sampled word. (ipthon notebook) - save_sampled_captions: If True, save sampled captions to pkl file for computing BLEU scores. ''' caps = self.data.captions[split] ids = self.data.video_ids[split] unique_ids = list(set(ids)) n_examples = len(unique_ids) n_iters_per_epoch = int(np.ceil(float(n_examples) / self.batch_size)) # build a graph to sample captions alphas, betas, sampled_captions = self.model.build_sampler( max_len=self.max_words) # (N, max_len, L), (N, max_len) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True all_decoded = [] with tf.Session(config=config) as sess: saver = tf.train.Saver() saver.restore(sess, self.test_model) for i in range(n_iters_per_epoch): ids_batch = unique_ids[i * self.batch_size:(i + 1) * self.batch_size] features_batch = [self.data.feature(vid) for vid in ids_batch] features_batch = np.asarray(features_batch) feed_dict = {self.model.features: features_batch} alps, bts, sam_cap = sess.run( [alphas, betas, sampled_captions], feed_dict) # (N, max_len, L), (N, max_len) decoded = decode_captions(sam_cap, self.data.vocab.idx2word) all_decoded.extend(decoded) # generate ref and cand ref = {} cand = {} for vid, dec in zip(unique_ids, all_decoded): gts = decode_captions(caps[ids == vid][:, 1:], self.data.vocab.idx2word) ref[vid] = gts cand[vid] = [dec] # print ground truths and generated sentences for vid in unique_ids: print '---' * 10 for i, gt in enumerate(ref[vid]): print i + 1, ':', gt print 'generated :', cand[vid][0] scores = evaluate(ref, cand, get_scores=True) tags = [ 'Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 'METEOR', 'CIDEr', 'ROUGE_L' ] for tag in tags: print tag, ':', scores[tag] print split, len(unique_ids), len(all_decoded)
def test(self, data, split='train', attention_visualization=False, save_sampled_captions=False, senti=[0]): max_len_captions = 20 features = data['features'].reshape(-1, 49, 2048) captions = data['captions'] if senti == [1]: data_save_path = "../data/positive" else: data_save_path = "../data/negative" n_examples = self.data['captions'].shape[0] n_iters_per_epoch = int(np.floor(float(n_examples) / self.batch_size)) alphas, betas, sampled_captions = self.model.build_sampler( max_len=max_len_captions) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: saver = tf.train.Saver() saver.restore(sess, self.test_model) if self.print_bleu: all_gen_cap = np.ndarray((features.shape[0], max_len_captions)) for i in range(n_iters_per_epoch): features_batch = features[i * self.batch_size:(i + 1) * self.batch_size] captions_batch = captions[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = { self.model.features: features_batch, self.model.whole_samples: captions_batch[:, 4:self.model.T], self.model.nsample: 0, self.model.mode_sampling: 1, self.model.captions: captions_batch } gen_cap = sess.run(sampled_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle( all_decoded, os.path.join(data_save_path + 'test/test.candidate.captions.pkl')) scores = evaluate(data_path=data_save_path, split=split, get_scores=True) if save_sampled_captions: all_sam_cap = np.ndarray((features.shape[0], max_len_captions)) num_iter = int( np.floor(float(features.shape[0]) / self.batch_size)) for i in range(num_iter): features_batch = features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {self.model.features: features_batch} all_sam_cap[i * self.batch_size:(i + 1) * self.batch_size] = sess.run( sampled_captions, feed_dict) all_decoded = decode_captions(all_sam_cap, self.model.idx_to_word) save_pickle( all_decoded, "./data/%s/%s.candidate.captions.pkl" % (split, split))
def train(self): data_save_path = self.data_path sentiment_i = np.where(self.data['captions'][:, 3] != 0)[0] captions = self.data['captions'][sentiment_i, :21] n_examples = captions.shape[0] n_iters_per_epoch = int(np.floor(float(n_examples) / self.batch_size)) image_idxs = self.data['image_idxs'][sentiment_i] features = self.data['features'].reshape(-1, 49, 2048) val_features = self.val_data['features'].reshape(-1, 49, 2048) n_iters_val = int( np.ceil(float(val_features.shape[0]) / self.batch_size)) with tf.variable_scope(tf.get_variable_scope()): loss = self.model.build_model() tf.get_variable_scope().reuse_variables() _, _, generated_captions = self.model.build_sampler( max_len=self.model.T - 4) with tf.variable_scope(tf.get_variable_scope()): optimizer = self.optimizer(learning_rate=self.learning_rate) params = [ param for param in tf.trainable_variables() if not ('discriminator' in param.name) ] grads = tf.gradients(loss, params) grads_and_vars = list(zip(grads, params)) #tf.trainable_variables())) train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars) tf.summary.scalar('batch_loss', loss) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) for grad, var in grads_and_vars: print var.op.name, 'ooooo' tf.summary.histogram(var.op.name + '/gradient', grad) summary_op = tf.summary.merge_all() print "The number of epoch: %d" % self.n_epochs print "Data size: %d" % n_examples print "Batch size: %d" % self.batch_size print "Iterations per epoch: %d" % n_iters_per_epoch config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True dis_embedding_dim = 256 dis_filter_sizes = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, self.model.T - 4 ] dis_num_filters = [ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160 ] dis_l2_reg_lambda = 0.2 discriminator = Discriminator(sequence_length=self.model.T - 4, num_classes=2, vocab_size=self.model.V, embedding_size=dis_embedding_dim, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) rollout = ROLLOUT(self.model, 0.8) dis_data_loader = Dis_dataloader(self.dis_batch_size) rewards = np.zeros((self.batch_size, self.model.T - 4), dtype=np.float32) dis_results_file = open( os.path.join(self.model_path, 'dis_results_file_4.txt'), 'w') with tf.Session(config=config) as sess: tf.global_variables_initializer().run() summary_writer = tf.summary.FileWriter( self.log_path, graph=tf.get_default_graph()) saver = tf.train.Saver(max_to_keep=40) if self.pretrained_model is not None: print "Start training with pretrained Model.." saver.restore(sess, self.pretrained_model) prev_loss = -1 curr_loss = 0 start_t = time.time() print 'Start pre-training...' for e in range(0): #self.n_epochs): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for i in range(n_iters_per_epoch): captions_batch = captions[i * self.batch_size:(i + 1) * self.batch_size] image_idxs_batch = image_idxs[i * self.batch_size:(i + 1) * self.batch_size] features_batch = features[image_idxs_batch] feed_dict = { self.model.whole_samples: captions_batch[:, 4:self.model.T], self.model.rewards: rewards, self.model.features: features_batch, self.model.captions: captions_batch, self.model.mode_learning: 1 } _, l = sess.run([train_op, loss], feed_dict) curr_loss += l if (i + 1) % self.print_every == 0: ground_truths = captions[image_idxs == image_idxs_batch[0], 4:] decoded = decode_captions(ground_truths, self.model.idx_to_word) for j, gt in enumerate(decoded): print "Ground truth %d: %s" % (j + 1, gt) feed_dict = { self.model.features: features_batch, self.model.whole_samples: captions_batch[:, 4:self.model.T], self.model.nsample: 0, self.model.mode_sampling: 1, self.model.captions: captions_batch } gen_caps = sess.run(generated_captions, feed_dict) decoded = decode_captions(gen_caps, self.model.idx_to_word) print "Generated caption: %s\n" % decoded[0] print "Previous epoch loss: ", prev_loss print "Current epoch loss: ", curr_loss print "Elapsed time: ", time.time() - start_t prev_loss = curr_loss curr_loss = 0 captions_batch = captions[0 * self.batch_size:(0 + 1) * self.batch_size] if self.print_bleu: all_gen_cap = np.ndarray( (val_features.shape[0], self.model.T - 4)) pos = [1] neg = [-1] val_features[:, :, 2048:2052] = [0, 1, 0, 1] for i in range(n_iters_val): features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = { self.model.features: features_batch, self.model.whole_samples: captions_batch[:, 4:self.model.T], self.model.nsample: 0, self.model.mode_sampling: 1, self.model.captions: captions_batch } gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle( all_decoded, os.path.join(data_save_path, "val/val.candidate.captions.pkl")) scores = evaluate(data_path=data_save_path, split='val', get_scores=True) print "scores_pos==================", scores write_bleu(scores=scores, path=self.model_path, epoch=e, senti=pos) val_features[:, :, 2048:2052] = [0, 0, 1, 2] for i in range(n_iters_val): features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = { self.model.features: features_batch, self.model.whole_samples: captions_batch[:, 4:self.model.T], self.model.nsample: 0, self.model.mode_sampling: 1, self.model.captions: captions_batch } gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle( all_decoded, os.path.join(data_save_path, "val/val.candidate.captions.pkl")) scores = evaluate(data_path=data_save_path, split='val', get_scores=True) print "scores_neg==================", scores write_bleu(scores=scores, path=self.model_path, epoch=e, senti=neg) if (e + 1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model'), global_step=e + 1) print "model-%s saved." % (e + 1) print 'Start pre-training discriminator...' for e in range(0): #self.n_epochs): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] dis_loss = 0 for i in range(n_iters_per_epoch): captions_batch = captions[i * self.batch_size:(i + 1) * self.batch_size] image_idxs_batch = image_idxs[i * self.batch_size:(i + 1) * self.batch_size] features_batch = features[image_idxs_batch] feed_dict = { self.model.features: features_batch, self.model.whole_samples: captions_batch[:, 4:self.model.T], self.model.nsample: 0, self.model.mode_sampling: 1, self.model.captions: captions_batch } for d_step in range(3): negative_file = sess.run(generated_captions, feed_dict=feed_dict) positive_file = captions_batch[:, 4:self.model.T] dis_data_loader.load_train_data( positive_file, negative_file) for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: self.dis_dropout_keep_prob } dis_l = sess.run(discriminator.loss, feed) dis_loss = dis_loss + dis_l _ = sess.run(discriminator.train_op, feed) _ = sess.run(discriminator.params_clip, feed) dis_results_file.write('The loss in epoch %i is %f \n' % (e + 1, dis_loss)) dis_results_file.flush() saver.save(sess, os.path.join(self.model_path, 'model_and_dis'), global_step=e + 1) print '#########################################################################' print 'Start Adversarial Training...' for e in range(self.n_epochs): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for i in range(n_iters_per_epoch): captions_batch = captions[i * self.batch_size:(i + 1) * self.batch_size] image_idxs_batch = image_idxs[i * self.batch_size:(i + 1) * self.batch_size] features_batch = features[image_idxs_batch] feed_dict = { self.model.features: features_batch, self.model.whole_samples: captions_batch[:, 4:self.model.T], self.model.nsample: 0, self.model.mode_sampling: 1, self.model.captions: captions_batch } samples_whole = sess.run(generated_captions, feed_dict=feed_dict) rewards = rollout.get_reward(sess, samples_whole, generated_captions, self.rollout_num, discriminator, features_batch, captions_batch) feed_dict = { self.model.whole_samples: samples_whole, self.model.rewards: rewards, self.model.features: features_batch, self.model.captions: captions_batch, self.model.mode_learning: 2 } _, l_reward = sess.run([train_op, loss], feed_dict=feed_dict) curr_loss += l_reward feed_dict = { self.model.features: features_batch, self.model.whole_samples: captions_batch[:, 4:self.model.T], self.model.nsample: 0, self.model.mode_sampling: 1, self.model.captions: captions_batch } for d_step in range(3): negative_file = sess.run(generated_captions, feed_dict=feed_dict) positive_file = captions_batch[:, 4:self.model.T] dis_data_loader.load_train_data( positive_file, negative_file) for it in xrange(dis_data_loader.num_batch): x_batch, y_batch = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.input_y: y_batch, discriminator.dropout_keep_prob: self.dis_dropout_keep_prob } _ = sess.run(discriminator.train_op, feed) _ = sess.run(discriminator.params_clip, feed) if (i + 1) % self.print_every == 0: ground_truths = captions[image_idxs == image_idxs_batch[0], 4:] decoded = decode_captions(ground_truths, self.model.idx_to_word) for j, gt in enumerate(decoded): print "Ground truth %d: %s" % (j + 1, gt) feed_dict = { self.model.features: features_batch, self.model.whole_samples: captions_batch[:, 4:self.model.T], self.model.nsample: 0, self.model.mode_sampling: 1, self.model.captions: captions_batch } gen_caps = sess.run(generated_captions, feed_dict) decoded = decode_captions(gen_caps, self.model.idx_to_word) print "Generated caption: %s\n" % decoded[0] print "Previous epoch loss: ", prev_loss print "Current epoch loss: ", curr_loss print "Elapsed time: ", time.time() - start_t prev_loss = curr_loss curr_loss = 0 captions_batch = captions[0 * self.batch_size:(0 + 1) * self.batch_size] if self.print_bleu: all_gen_cap = np.ndarray( (val_features.shape[0], self.model.T - 4)) pos = [1] neg = [-1] val_features[:, :, 2048:2052] = [0, 1, 0, 1] for i in range(n_iters_val): features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = { self.model.features: features_batch, self.model.whole_samples: captions_batch[:, 4:self.model.T], self.model.nsample: 0, self.model.mode_sampling: 1, self.model.captions: captions_batch } gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle( all_decoded, os.path.join(data_save_path, "val/val.candidate.captions.pkl")) scores = evaluate(data_path=data_save_path, split='val', get_scores=True) print "scores_pos==================", scores write_bleu(scores=scores, path=self.model_path, epoch=e, senti=pos) val_features[:, :, 2048:2052] = [0, 0, 1, 2] for i in range(n_iters_val): features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = { self.model.features: features_batch, self.model.whole_samples: captions_batch[:, 4:self.model.T], self.model.nsample: 0, self.model.mode_sampling: 1, self.model.captions: captions_batch } gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle( all_decoded, os.path.join(data_save_path, "val/val.candidate.captions.pkl")) scores = evaluate(data_path=data_save_path, split='val', get_scores=True) print "scores_neg==================", scores write_bleu(scores=scores, path=self.model_path, epoch=e, senti=neg) if (e + 1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model_adv'), global_step=e + 1) print "model-%s saved." % (e + 1)
def train(self): # train/val dataset n_examples = self.data['captions'].shape[0] n_iters_per_epoch = int(np.ceil(float(n_examples) / self.batch_size)) features = self.data['features'] captions = self.data['captions'] image_idxs = self.data['image_idxs'] val_features = self.val_data['features'] val_captions = self.val_data['captions'] n_iters_val = int( np.ceil(float(val_features.shape[0]) / self.batch_size)) # build graphs for training model and sampling captions loss = self.model.build_model() tf.get_variable_scope().reuse_variables() alphas, betas, sampled_captions, loss = self.model.build_multinomial_sampler( ) _, _, greedy_caption = self.model.build_sampler(max_len=20) rewards = tf.placeholder(tf.float32, [None]) base_line = tf.placeholder(tf.float32, [None]) grad_mask = tf.placeholder(tf.int32, [None, 16]) t1 = tf.expand_dims(grad_mask, 1) t1_mul = tf.to_float(tf.transpose(t1, [0, 2, 1])) # train op with tf.name_scope('optimizer'): optimizer = self.optimizer(learning_rate=self.learning_rate) norm = tf.reduce_sum(t1_mul) mask_loss = loss * t1_mul sum_loss = tf.reduce_sum( tf.transpose( tf.mul(tf.transpose(mask_loss, [2, 1, 0]), (rewards - base_line)), [2, 1, 0])) / norm # sum_loss = tf.reduce_sum( # tf.transpose(tf.mul(tf.transpose(mask_loss, [2, 1, 0]), rewards - base_line), [2, 1, 0]), 1) grads_rl = tf.gradients(sum_loss, tf.trainable_variables(), aggregation_method=tf.AggregationMethod. EXPERIMENTAL_ACCUMULATE_N) grads_and_vars = list(zip(grads_rl, tf.trainable_variables())) # grads = tf.gradients(loss, tf.trainable_variables()) # grads_and_vars = list(zip(grads, tf.trainable_variables())) train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars) # summary op print "The number of epoch: %d" % self.n_epochs print "Data size: %d" % n_examples print "Batch size: %d" % self.batch_size print "Iterations per epoch: %d" % n_iters_per_epoch config = tf.ConfigProto(allow_soft_placement=True) # config.gpu_options.per_process_gpu_memory_fraction=0.9 config.gpu_options.allow_growth = True config.gpu_options.allocator_type = 'BFC' with tf.Session(config=config) as sess: saver = tf.train.Saver() saver.restore(sess, self.test_model) start_t = time.time() for e in range(self.n_epochs): rand_idxs = np.random.permutation(n_examples) captions = np.array(captions[rand_idxs]) image_idxs = np.array(image_idxs[rand_idxs]) b_for_eval = [] for i in range(n_iters_per_epoch): captions_batch = np.array( captions[i * self.batch_size:(i + 1) * self.batch_size]) image_idxs_batch = np.array( image_idxs[i * self.batch_size:(i + 1) * self.batch_size]) features_batch = np.array(features[image_idxs_batch]) # ground_truths = [] # for j in range(len(image_idxs_batch)): # print j # print image_idxs_batch[j] # print captions[image_idxs == image_idxs_batch[j]] # # ground_truths.append(captions[image_idxs_batch[j]]) # ground_truths = [captions[image_idxs == image_idxs_batch[j]] for j in range(64)] ground_truths = [ captions[image_idxs == image_idxs_batch[j]] for j in range(len(image_idxs_batch)) ] ref_decoded = [ decode_captions(ground_truths[j], self.model.idx_to_word) for j in range(len(ground_truths)) ] feed_dict = { self.model.features: features_batch, self.model.captions: captions_batch } samples, greedy_words = sess.run( [sampled_captions, greedy_caption], feed_dict) masks, all_decoded = decode_captions_for_blue( samples, self.model.idx_to_word) _, greedy_decoded = decode_captions_for_blue( greedy_words, self.model.idx_to_word) # write summary for tensorboard visualization r = [ evaluate_captions([k], [v]) for k, v in zip(ref_decoded, all_decoded) ] b = [ evaluate_captions([k], [v]) for k, v in zip(ref_decoded, greedy_decoded) ] b_for_eval.extend(b) feed_dict = { grad_mask: masks, rewards: r, base_line: b, self.model.features: features_batch, self.model.captions: captions_batch } # write summary for tensorboard visualization _ = sess.run([train_op], feed_dict) print str(np.mean(np.array(b_for_eval))) # print out BLEU scores and file write print "Elapsed time: ", time.time() - start_t if self.print_bleu: print "b" + str(np.mean(np.array(b))) print "r" + str(np.mean(np.array(r))) all_gen_cap = np.ndarray((val_features.shape[0], 128)) for k in range(n_iters_val): features_batch = val_features[k * self.batch_size:(k + 1) * self.batch_size] captions_words_batch = np.array( val_captions[k * self.batch_size:(k + 1) * self.batch_size]) feed_dict = { self.model.features: features_batch, self.model.captions: captions_words_batch } gen_cap = sess.run(greedy_caption, feed_dict=feed_dict) all_gen_cap[k * self.batch_size:(k + 1) * self.batch_size] = gen_cap masks, all_decoded = decode_captions_for_blue( all_gen_cap, self.model.idx_to_word) for s in range(5): print all_decoded[-s - 1] save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path=self.model_path, epoch=e) # save model's parameters if (e + 1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model'), global_step=e + 1) print "model-%s saved." % (e + 1)