def test(data, model, sess, test_model, batch_size, idx_to_word, split='test', attention_visualization=True, save_sampled_captions=True): ''' Args: - data: dictionary with the following keys: - features: Feature vectors of shape (5000, 196, 512) - file_names: Image file names of shape (5000, ) - captions: Captions of shape (24210, 17) - image_idxs: Indices for mapping caption to image of shape (24210, ) - features_to_captions: Mapping feature to captions (5000, 4~5) - split: 'train', 'val' or 'test' - attention_visualization: If True, visualize attention weights with images for each sampled word. (ipthon notebook) - save_sampled_captions: If True, save sampled captions to pkl file for computing BLEU scores. ''' features = data['features'] # build a graph to sample captions #alphas, betas, sampled_captions = model.generate(max_len=20) # (N, max_len, L), (N, max_len) # config = tf.ConfigProto(allow_soft_placement=True) # config.gpu_options.allow_growth = True #with tf.Session(config=config) as sess: if 1: model.build() saver = tf.train.Saver(max_to_keep=40) saver.restore(sess, test_model) #features_batch, test_features_batch, image_files = sample_coco_minibatch(data, batch_size) #feed_dict = features_batch val_features = data['features'] val_batch_size = batch_size n_iters_val = int( np.ceil(float(val_features.shape[0]) / val_batch_size)) all_gen_cap = np.ndarray((val_features.shape[0], 30)) for i in range(n_iters_val): features_batch = val_features[i * val_batch_size:(i + 1) * val_batch_size] val_detect_batch = np.empty((len(features_batch), 10, 4096)) m = 0 for j in range(i * val_batch_size, (i + 1) * val_batch_size): val_detect_single = hickle.load('./data_residue_detect/test/' + 'test_' + str(j) + '.features.hkl') val_detect_single = val_detect_single[-10:, :] val_detect_batch[m, :] = val_detect_single m = m + 1 _, _, _, _, gen_cap = model.generate(features_batch, val_detect_batch) all_gen_cap[i * val_batch_size:(i + 1) * val_batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, idx_to_word) save_pickle(all_decoded, "./data/test/test.candidate.captions.pkl") scores = evaluate(data_path='./data', split='test', get_scores=True)
def _validation_score(self,e,sess,image_val,generated_captions,lr): file_names = load_pickle('/data1/junjiaot/data/val/val.file.names.pkl') print('number of validation features:', len(file_names)) coco_attributes = load_pickle('/data1/junjiaot/data/val/val.multi_class_labels_abs.pkl') #print(coco_attributes.shape) coco_attributes_onehot = load_pickle('/data1/junjiaot/data/val/val.multi_class_labels.pkl') #(82783,1113) #pkl_dir = '/home/junjiaot/data_local/val/resnet152_v1_feature/'#'/data1/junjiaot/data/val/resnet152_v1_feature/' #pickle_files = [] #for file in file_names: # pickle_files.append(os.path.join(pkl_dir,file.split('/')[2].split('.')[0]+'.pkl')) #pickle_files = np.array(pickle_files) image_dir = '/home/junjiaot/data_local/val2014'#'/data1/junjiaot/image/resized_224_aspect/val2014'#'/data1/junjiaot/image/resized_224_aspect/train2014' image_files = [] for file in file_names: image_files.append(os.path.join(image_dir,file.split('/')[2])) image_files = np.array(image_files) start = 0 end = self.batch_size n_iters_val = int(np.ceil(float(len(file_names))/self.batch_size)) all_gen_cap = np.ndarray((len(file_names), 20)) for i in range(n_iters_val): #features_batch = np.array(list(map(lambda x:pickle.load(open(x,'rb')),pickle_files[start:end]))).squeeze() image_batch = [] for image_file in image_files[start:end]: image = cv2.imread(image_file) b,g,r = cv2.split(image) rgb_img = cv2.merge([r,g,b]) image_batch.append(sess.run(image_val,feed_dict={self.image:rgb_img})) #image_batch = image_val_all[start:end,:,:,:] #attribute_batch = coco_attributes[start:end] attribute_onehot_batch = coco_attributes_onehot[start:end] feed_dict = {self.images: image_batch,#self.model.attributes:attribute_batch, self.model.attributes_onehot: attribute_onehot_batch, self.model.keep_prob:1.0} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) #import ipdb; ipdb.set_trace() all_gen_cap[i*self.batch_size:(i+1)*self.batch_size] = gen_cap[:] start = end end = end + self.batch_size #if end > len(file_names): # residual = start - (len(file_names) - self.batch_size) # end = len(file_names) - 1 # start = end - self.batch_size all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle(all_decoded, "/data1/junjiaot/data/val/val.candidate.captions.pkl") print('Calculating scores...') scores = evaluate(data_path='/data1/junjiaot/data', split='val', get_scores=True) write_bleu(scores=scores, path=os.path.join(self.model_path,'Score'), epoch=e,lr=lr)
learning_rate=0.0025, print_every=2000, save_every=1, image_path='./image/val2014_resized', pretrained_model=None, model_path='./model/lstm', test_model='./model/lstm3/model-18', print_bleu=False, log_path='./log/') # In[7]: solver.test(data, split='val') # In[8]: test = load_coco_data(data_path='./data', split='test') # In[13]: tf.get_variable_scope().reuse_variables() solver.test(test, split='test') # In[14]: evaluate(data_path='./data', split='val') # In[15]: evaluate(data_path='./data', split='test')
def train(self): """ training :return: """ loss = self.model.build_model() with tf.variable_scope(tf.get_variable_scope()) as scope: with tf.name_scope('optimizer'): tf.get_variable_scope().reuse_variables() _, _, generated_captions = self.model.build_sampler(max_len=self.max_words_len) self.global_step = tf.Variable(0, name="global_step", trainable=False) lr = tf.train.exponential_decay(learning_rate=self.learning_rate, global_step=self.global_step, decay_steps=TrainingArg.lr_decay_steps, decay_rate=0.96, staircase=True, name='learn_rate') optimizer = self.optimizer(learning_rate=lr) grads = tf.gradients(loss, tf.trainable_variables()) grads_and_vars = list(zip(grads, tf.trainable_variables())) train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars, global_step=self.global_step) # summary op tf.summary.scalar('batch_loss', loss) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) for grad, var in grads_and_vars: if grad is not None: tf.summary.histogram(var.op.name + '/gradient', grad) summary_op = tf.summary.merge_all() config = tf.ConfigProto(allow_soft_placement=True) # config.gpu_options.per_process_gpu_memory_fraction=0.9 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: self.pre_mgr.set_tf_sess(sess) tf.initialize_all_variables().run() summary_writer = tf.summary.FileWriter(self.log_path, graph=tf.get_default_graph()) saver = tf.train.Saver(max_to_keep=10) if self.pretrained_model is not None: print("Start training with pretrained Model..") saver.restore(sess, self.pretrained_model) curr_epoch = 0 batchs = self.pre_mgr.fetch_batch(Const.caption_train_vector_path, self.data_dir, self.batch_size, self.n_epochs) for batch in batchs: caption_batch, image_batch, epoch = batch feed_dict = {self.model.features: image_batch, self.model.captions: caption_batch} _, l, step = sess.run([train_op, loss, self.global_step], feed_dict) if step % self.print_every == 0 or step == 1: summary = sess.run(summary_op, feed_dict) summary_writer.add_summary(summary, step) print("\nTrain loss at epoch %d & step %d (mini-batch): %.5f" % (epoch + 1, step, l)) # ground_truths = captions[image_idxs == image_idxs_batch[0]] ground_truths = np.array([caption_batch[0]]) decoded = self.pre_mgr.decode_captions(ground_truths, self.model.idx_to_word) for j, gt in enumerate(decoded): print("Ground truth %d: %s" % (j + 1, gt)) gen_caps = sess.run(generated_captions, feed_dict) decoded = self.pre_mgr.decode_captions(gen_caps, self.model.idx_to_word) print("Generated caption: %s\n" % decoded[0]) print('{}, epoch:{} step: {},Current epoch loss: {}'.format(datetime.datetime.now().isoformat(), epoch + 1, step, l)) # print(out BLEU scores and file write if curr_epoch != epoch or step == 1 or step % self.print_every == 0: curr_epoch = epoch val_data_batchs = self.pre_mgr.fetch_val_batch(Const.val_vector_out_path, self.data_dir, self.batch_size) gen_caps = [] i = 0 for val_batch in val_data_batchs: val_caption, val_image = val_batch # features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {self.model.features: val_image} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) gen_caps.extend(gen_cap) if not self.val_data_flag: print('val batch loop {}'.format(i)) for item in val_caption: self.org_decoded[i] = self.pre_mgr.decode_captions(np.array(item), self.model.idx_to_word, ignore_start=True) i += 1 # break self.val_data_flag = True gen_decoded = self.pre_mgr.decode_captions(np.array(gen_caps), self.model.idx_to_word) for j in range(5): print('val org sents: {}'.format(self.org_decoded[j])) print('val gen sents: {}\n'.format(gen_decoded[j])) scores = evaluate(gen_decoded, self.org_decoded, get_scores=True) utils.write_bleu(scores=scores, path=self.model_path, epoch=epoch) # save model's parameters # if (e + 1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model'), global_step=step) print("model-%s saved." % (epoch + 1))
def main(unused_argv): #matplotlib inline plt.rcParams['figure.figsize'] = (8.0, 6.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' data = load_coco_data( data_path='/home/yifan/PythonProjects/im2txt-att/data', split='val') with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) # val settings split = 'train' batch_size = 128 # basic settings test_path = '/home/yifan/PythonProjects/im2txt-att/model/lstm/model-test' model_path = '/home/yifan/PythonProjects/im2txt-att/model/lstm/model.ckpt' model = ShowAndTellModel(word_to_idx, mode='eval', dim_embed=512, dim_hidden=1024, n_time_step=16, alpha_c=1.0) # test features = data['features'] # build a graph to sample captions alphas, betas, sampled_captions = model.build_model(max_len=20) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: saver = tf.train.Saver() saver.restore(sess, model_path) features_batch, image_files = sample_coco_minibatch(data, batch_size) feed_dict = {model.features: features_batch} alps, bts, sam_cap = sess.run([alphas, betas, sampled_captions], feed_dict) decoded = decode_captions(sam_cap, model.idx_to_word) # attention visualization for n in range(10): print("Sampled Caption: %s" % decoded[n]) # Plot original image img = ndimage.imread(image_files[n]) plt.subplot(4, 5, 1) plt.imshow(img) plt.axis('off') # Plot images with attention weights words = decoded[n].split(" ") for t in range(len(words)): if t > 18: break plt.subplot(4, 5, t + 2) plt.text(0, 1, '%s(%.2f)' % (words[t], bts[n, t]), color='black', backgroundcolor='white', fontsize=8) plt.imshow(img) alp_curr = alps[n, t, :].reshape(14, 14) alp_img = skimage.transform.pyramid_expand(alp_curr, upscale=16, sigma=20) plt.imshow(alp_img, alpha=0.85) plt.axis('off') plt.show() # print out BLEU scores and file write all_sam_cap = np.ndarray((features.shape[0], 20)) num_iter = int(np.ceil(float(features.shape[0]) / batch_size)) for i in range(num_iter): features_batch = features[i * batch_size:(i + 1) * batch_size] feed_dict = {model.features: features_batch} all_sam_cap[i * batch_size:(i + 1) * batch_size] = sess.run( sampled_captions, feed_dict) all_decoded = decode_captions(all_sam_cap, model.idx_to_word) save_pickle( all_decoded, "/home/yifan/PythonProjects/im2txt-att/data/%s/%s.candidate.captions.pkl" % (split, split)) scores = evaluate( data_path='/home/yifan/PythonProjects/im2txt-att/data', split='val', get_scores=True) write_bleu(scores=scores, path=test_path, epoch=0)
def train(self): # train/val dataset n_examples = self.data['captions'].shape[0] #n_examples = 5000 n_iters_per_epoch = int(np.ceil(float(n_examples) / self.batch_size)) features = self.data['features'] captions = self.data['captions'] image_idxs = self.data['image_idxs'] val_features = self.val_data['features'] n_iters_val = int( np.ceil(float(val_features.shape[0]) / self.batch_size)) # build graphs for training model and sampling captions loss = self.model.build_model() with tf.variable_scope(tf.get_variable_scope()) as scope: with tf.name_scope('optimizer'): tf.get_variable_scope().reuse_variables() _, _, generated_captions = self.model.build_sampler(max_len=20) optimizer = self.optimizer(learning_rate=self.learning_rate) grads = tf.gradients(loss, tf.trainable_variables()) grads_and_vars = list(zip(grads, tf.trainable_variables())) train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars) # summary op tf.summary.scalar('batch_loss', loss) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) for grad, var in grads_and_vars: tf.summary.histogram(var.op.name + '/gradient', grad) summary_op = tf.summary.merge_all() print "The number of epoch: %d" % self.n_epochs print "Data size: %d" % n_examples print "Batch size: %d" % self.batch_size print "Iterations per epoch: %d" % n_iters_per_epoch config = tf.ConfigProto(allow_soft_placement=True) #config.gpu_options.per_process_gpu_memory_fraction=0.9 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.initialize_all_variables().run() summary_writer = tf.summary.FileWriter( self.log_path, graph=tf.get_default_graph()) saver = tf.train.Saver(max_to_keep=20) if self.pretrained_model is not None: print "Start training with pretrained Model." saver.restore(sess, self.pretrained_model) prev_loss = -1 curr_loss = 0 start_t = time.time() for e in range(self.n_epochs): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for i in range(n_iters_per_epoch): captions_batch = captions[i * self.batch_size:(i + 1) * self.batch_size] image_idxs_batch = image_idxs[i * self.batch_size:(i + 1) * self.batch_size] print image_idxs_batch features_batch = features[image_idxs_batch] feed_dict = { self.model.features: features_batch, self.model.captions: captions_batch } _, l = sess.run([train_op, loss], feed_dict) curr_loss += l # write summary for tensorboard visualization if i % 10 == 0: summary = sess.run(summary_op, feed_dict) summary_writer.add_summary(summary, e * n_iters_per_epoch + i) if (i + 1) % self.print_every == 0: print( "\nTrain loss at epoch %d & iteration %d (mini-batch): %.5f" % (e + 1, i + 1, l)) ground_truths = captions[image_idxs == image_idxs_batch[0]] decoded = decode_captions(ground_truths, self.model.idx_to_word) for j, gt in enumerate(decoded): print("Ground truth %d: %s" % (j + 1, gt)) gen_caps = sess.run(generated_captions, feed_dict) decoded = decode_captions(gen_caps, self.model.idx_to_word) print("Generated caption: %s\n" % decoded[0]) print("Previous epoch loss: ", prev_loss) print("Current epoch loss: ", curr_loss) print("Elapsed time: ", time.time() - start_t) prev_loss = curr_loss curr_loss = 0 # print out BLEU scores and file write if self.print_bleu: all_gen_cap = np.ndarray((val_features.shape[0], 20)) for i in range(n_iters_val): features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {self.model.features: features_batch} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path=self.model_path, epoch=e) # save model's parameters if (e + 1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model'), global_step=e + 1) print "model-%s saved." % (e + 1)
def train(self): # train/val dataset # Changed this because I keep less features than captions, see prepro #path = '/media/zaheer/Data/Image_Text_Datasets/IU_Xray/latest/Two_Images/word/Sample1/' n_examples = self.data['captions'].shape[0] #n_examples = self.data['features'].shape[0] n_iters_per_epoch = int(np.ceil(float(n_examples) / self.batch_size)) features = self.data['features'] captions = self.data['captions'] image_idxs = self.data['image_idxs'] val_features = self.val_data['features'] val_captions = self.val_data['captions'] n_iters_val = int( np.ceil(float(val_features.shape[0]) / self.batch_size)) # build graphs for training model and sampling captions # This scope fixed things!! with tf.variable_scope(tf.get_variable_scope()): loss = self.model.build_model() tf.get_variable_scope().reuse_variables() alph, bts, generated_captions = self.model.build_sampler( split='val', max_len=101) val_loss = self.model.valid_loss() # train op with tf.variable_scope(tf.get_variable_scope(), reuse=False): early_optimizer = self.optimizer(learning_rate=self.learning_rate) mid_optimizer = self.optimizer(learning_rate=self.learning_rate / 10) late_optimizer = self.optimizer(learning_rate=self.learning_rate / 100) grads = tf.gradients(loss, tf.trainable_variables()) grads_and_vars = list(zip(grads, tf.trainable_variables())) train_op = early_optimizer.apply_gradients( grads_and_vars=grads_and_vars) train_op0 = mid_optimizer.apply_gradients( grads_and_vars=grads_and_vars) train_op1 = late_optimizer.apply_gradients( grads_and_vars=grads_and_vars) # summary op val_loss_sum = tf.summary.scalar('val_batch_loss', val_loss) #### Added by Zaheer train_loss_sum = tf.summary.scalar('batch_loss', loss) # tf.scalar_summary('batch_loss', loss) # tf.summary.scalar('batch_loss', loss) # for var in tf.trainable_variables(): # #tf.histogram_summary(var.op.name, var) # tf.summary.histogram(var.op.name, var) # for grad, var in grads_and_vars: # #tf.histogram_summary(var.op.name+'/gradient', grad) # tf.summary.histogram(var.op.name+'/gradient', grad) # # #summary_op = tf.merge_all_summaries() # summary_op = tf.summary.merge_all() print("The number of epoch: %d" % self.n_epochs) print("Data size: %d" % n_examples) print("Batch size: %d" % self.batch_size) print("Iterations per epoch: %d" % n_iters_per_epoch) #config = tf.ConfigProto(allow_soft_placement = True,device_count = {'GPU': 0}, log_device_placement=True) config = tf.ConfigProto(log_device_placement=True) run_options = tf.RunOptions() #config.gpu_options.per_process_gpu_memory_fraction=0.9 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() #summary_writer = tf.train.SummaryWriter(self.log_path, graph=tf.get_default_graph()) summary_writer = tf.summary.FileWriter( self.log_path + '/train/', graph=tf.get_default_graph()) val_summary_writer = tf.summary.FileWriter( self.log_path + '/val/', graph=tf.get_default_graph()) ### Added by Zaheer saver = tf.train.Saver(max_to_keep=100) # given value was 40 if self.pretrained_model is not None: print("Start training with pretrained Model..") saver.restore(sess, self.pretrained_model) prev_loss = -1 curr_loss = 0 start_t = time.time() for e in range(self.n_epochs): tik_epoch = time.time() print("epoch:", e) rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for i in range(n_iters_per_epoch): tik_batch = time.time() captions_batch = captions[i * self.batch_size:(i + 1) * self.batch_size] image_idxs_batch = image_idxs[i * self.batch_size:(i + 1) * self.batch_size] features_batch = features[image_idxs_batch] feed_dict = { self.model.features: features_batch, self.model.captions: captions_batch } print(i) if (e <= 30): _, l = sess.run([train_op, loss], feed_dict, options=run_options) # elif (e<=60 and e>30): # _, l = sess.run([train_op0, loss], feed_dict) else: _, l = sess.run([train_op0, loss], feed_dict, options=run_options) curr_loss += l #print('Time to execute a batch:',time.time()-tik_batch) # write summary for tensorboard visualization if i % 10 == 0: summary = sess.run(train_loss_sum, feed_dict) summary_writer.add_summary(summary, e * n_iters_per_epoch + i) if ((i + 1) % self.print_every == 0): print( "\nTrain loss at epoch %d & iteration %d (mini-batch): %.5f" % (e + 1, i + 1, l)) ground_truths = captions[image_idxs == image_idxs_batch[0]] decoded = decode_captions(ground_truths, self.model.idx_to_word) for j, gt in enumerate(decoded): print("Ground truth %d: %s" % (j + 1, gt)) gen_caps = sess.run(generated_captions, feed_dict) decoded = decode_captions(gen_caps, self.model.idx_to_word) print("Generated caption: %s\n" % decoded[0]) #print('Time to execute an epoch:', time.time() - tik_epoch) print("Previous epoch loss: ", prev_loss) print("Current epoch loss: ", curr_loss) print("Elapsed time: ", time.time() - start_t) prev_loss = curr_loss curr_loss = 0 curr_val_loss = 0.0 # print out BLEU scores and file write if self.print_bleu: all_gen_cap = np.ndarray((val_features.shape[0], 101)) for i in range(n_iters_val): val_features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] val_captions_batch = val_captions[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = { self.model.features: val_features_batch, self.model.captions: val_captions_batch } gen_cap, val_l = sess.run( [generated_captions, val_loss], feed_dict=feed_dict) #### Added by Zaheer all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap curr_val_loss += val_l ### Added by Zaheer if i % 10 == 0: val_summary = sess.run(val_loss_sum, feed_dict) val_summary_writer.add_summary( val_summary, e * n_iters_val + i) print(curr_val_loss) #### End all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) GroundTruth_decoded = decode_captions( self.val_data['captions'], self.model.idx_to_word) print("Validation Sample Generated Caption") sample_gen = random.randint(0, val_captions.shape[0] - 1) print("Ground truth: %s" % (GroundTruth_decoded[sample_gen])) print("Generated caption: %s\n" % all_decoded[sample_gen]) save_pickle(all_decoded, self.path + "test/test.candidate.captions.pkl") scores = evaluate(data_path=self.path, split='test', get_scores=True) write_bleu(scores=scores, path=self.path, epoch=e) # save model's parameters if (e + 1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model'), global_step=e + 1) print("model-%s saved." % (e + 1)) #self.test(self.test_data,alphas=alph, betas=bts, sampled_captions=generated_captions) tf.reset_default_graph()
def test(self, data, split='train', attention_visualization=True, save_sampled_captions=False): ''' Args: - data: dictionary with the following keys: - features: Feature vectors of shape (5000, 196, 512) - file_names: Image file names of shape (5000, ) - captions: Captions of shape (24210, 17) - image_idxs: Indices for mapping caption to image of shape (24210, ) - features_to_captions: Mapping feature to captions (5000, 4~5) - split: 'train', 'val' or 'test' - attention_visualization: If True, visualize attention weights with images for each sampled word. (ipthon notebook) - save_sampled_captions: If True, save sampled captions to pkl file for computing BLEU scores. ''' features = data['features'] # build a graph to sample captions alphas, betas, sampled_captions = self.model.build_sampler( max_len=25) # (N, max_len, L), (N, max_len) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: saver = tf.train.Saver() saver.restore(sess, self.test_model) features_batch, image_files = data['features'], data['file_names'] feed_dict = {self.model.features: features_batch} alps, bts, sam_cap = sess.run( [alphas, betas, sampled_captions], feed_dict) # (N, max_len, L), (N, max_len) decoded = decode_captions(sam_cap, self.model.idx_to_word) save_pickle( decoded, r".\image_data_to_be_labeled\Object_feature\our_data\train\val.candidate.captions.pkl" ) if attention_visualization: for n in range(len(image_files)): print("Sampled Caption: %s" % decoded[n]) # Plot original image img = ndimage.imread(image_files[n]) plt.subplot(4, 5, 1) plt.imshow(img) plt.axis('off') # Plot images with attention weights words = decoded[n].split(" ") for t in range(len(words)): if t > 18: break plt.subplot(4, 5, t + 2) plt.text(0, 1, '%s(%.2f)' % (words[t], bts[n, t]), color='black', backgroundcolor='white', fontsize=8) plt.imshow(img) alp_curr = alps[n, t, :196].reshape(14, 14) alp_img = skimage.transform.pyramid_expand(alp_curr, upscale=16, sigma=20) plt.imshow(alp_img, alpha=0.85) plt.axis('off') plt.suptitle(decoded[n]) savename = 'caption_' + os.path.basename( image_files[n]).rstrip('.jpg') + '.png' plt.savefig( os.path.join( r'C:\Users\song\Desktop\511project\show-attend-and-tell-tensorflow\image_data_to_be_labeled\Object_feature\results', savename)) plt.close('all') # plt.show() ref_path = r'.\image_data_to_be_labeled\Object_feature\our_data\train\train.references.pkl' cand_path = r'.\image_data_to_be_labeled\Object_feature\our_data\train\val.candidate.captions.pkl' scores = evaluate(ref_path, cand_path, get_scores=True) #write_bleu(scores=scores, path=self.model_path, epoch=e) if save_sampled_captions: all_sam_cap = np.ndarray((features.shape[0], 20)) num_iter = int( np.ceil(float(features.shape[0]) / self.batch_size)) for i in range(num_iter): features_batch = features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {self.model.features: features_batch} all_sam_cap[i * self.batch_size:(i + 1) * self.batch_size] = sess.run( sampled_captions, feed_dict) all_decoded = decode_captions(all_sam_cap, self.model.idx_to_word) save_pickle( all_decoded, "./data/%s/%s.candidate.captions.pkl" % (split, split))
alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.0025, print_every=2000, save_every=1, image_path='./image/val2014_resized', pretrained_model=None, model_path='./model/preview_model/', test_model='./model/preview_model/model-20', print_bleu=False, log_path='./log/') #solver.test(data, split='val') #test = load_coco_data(data_path='./data/coco_data', split='test') #tf.get_variable_scope().reuse_variables() solver.test(data, split='test') #evaluate(data_path='./data/coco_data', split='val') evaluate(data_path='./data/coco_data', split='test') #solver.test(data, split='test') # #evaluate(data_path='./data', split='test')
def main(): batch_size = 32 val_batch_size = 12 save_every = 1 #pretrained_model = None with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) model_path = 'model_residue_cascade_attention_detect_10/' # load val dataset to print out bleu scores every epoch # #word_to_idx =1 sess = tf.Session() model = CaptionGenerator(sess, word_to_idx, dim_feature=[49, 2048], dim_embed=512, dim_hidden=512, n_time_step=21, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) n_examples = 117208 val_data = load_coco_data(data_path='./data', split='val') n_iters_per_epoch = int(np.ceil(float(n_examples) / batch_size)) with open('./data/train/train.captions.pkl', 'rb') as f: captions = pickle.load(f) with open('./data/train/train.image.idxs.pkl', 'rb') as f: image_idxs = pickle.load(f) print image_idxs val_features = val_data['features'] print val_features.shape[0] n_iters_val = int(np.ceil(float(val_features.shape[0]) / val_batch_size)) model.build() saver = tf.train.Saver() #variables = slim.get_variables_to_restore() #variables_to_restore = [v for v in variables if string.find(v.name, 'discriminator') == -1] #saver = tf.train.Saver(variables_to_restore) #if pretrained_model is not None: #saver = tf.train.import_meta_graph('./model_residue/model-10.meta') # saver.restore(sess, pretrained_model) print 'start pre-traininig' for epoch in xrange(1, 10 + 1): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for step in xrange(1, n_iters_per_epoch + 1): captions_batch = captions[step * batch_size:(step + 1) * batch_size] image_idxs_batch = image_idxs[step * batch_size:(step + 1) * batch_size] features_batch = np.empty((batch_size, 49, 2048)) j = 0 for i in image_idxs_batch: features_single = hickle.load('./data_residue_single/train/' + 'train_' + str(i) + '.features.hkl') features_batch[j, :] = features_single j = j + 1 features_detect_batch = np.empty((batch_size, 10, 4096)) j = 0 for i in image_idxs_batch: features_detect_single = hickle.load( './data_residue_detect/train/' + 'train_' + str(i) + '.features.hkl') features_detect_single = features_detect_single[-10:, :] features_detect_batch[j, :] = features_detect_single j = j + 1 if captions_batch.shape[0] == batch_size: model.pre_train_batch(features_batch, features_detect_batch, captions_batch) if step % 10 == 0: print 'epoch', epoch print 'step', step if step % 512 == 0: all_gen_cap = np.ndarray((val_features.shape[0], 30)) for i in range(n_iters_val): features_batch = val_features[i * val_batch_size:(i + 1) * val_batch_size] val_detect_batch = np.empty( (len(features_batch), 10, 4096)) m = 0 for j in range(i * val_batch_size, (i + 1) * val_batch_size): val_detect_single = hickle.load( './data_residue_detect/val/' + 'val_' + str(j) + '.features.hkl') val_detect_single = val_detect_single[-10:, :] val_detect_batch[m, :] = val_detect_single m = m + 1 _, _, _, _, gen_cap = model.generate( features_batch, val_detect_batch) all_gen_cap[i * val_batch_size:(i + 1) * val_batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path=model_path, epoch=epoch) print "generative captions:%s\n" % all_decoded[0] if epoch % save_every == 0: saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch) print "model-%s saved." % (epoch) print 'start reinforcement learning!' for epoch in xrange(1, 0 + 1): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for step in xrange(1, n_iters_per_epoch + 1): captions_batch = captions[step * batch_size:(step + 1) * batch_size] image_idxs_batch = image_idxs[step * batch_size:(step + 1) * batch_size] features_batch = features[image_idxs_batch] if captions_batch.shape[0] == batch_size: #gen_cap = model.generate(features_batch) #decoded_cap = decode_captions(gen_cap, model.idx_to_word) #decoded_reference = decode_captions(captions_batch, model.idx_to_word) #scores = evaluate_part(candidate = decoded_cap, split = 'train', idx = image_idxs_batch, get_scores=True) #reward = (0.5*scores['Bleu_1'] + 0.5*scores['Bleu_2'] + scores['Bleu_3'] + scores['Bleu_4'])/3 #print reward #reward = 1 t = model.train_batch(features_batch, captions_batch) if step % 10 == 0: print 'epoch', epoch print 'step', step print 'time', t if step % 1024 == 0: ground_truths = captions[image_idxs == image_idxs_batch[0]] decoded = decode_captions(ground_truths, model.idx_to_word) for j, gt in enumerate(decoded): print "Ground truth %d: %s" % (j + 1, gt) gen_caps = model.generate(features_batch) decoded = decode_captions(gen_caps, model.idx_to_word) print "Generated caption: %s\n" % decoded[0] if step % 1024 == 0: all_gen_cap = np.ndarray((val_features.shape[0], 30)) for i in range(n_iters_val): features_batch = val_features[i * batch_size:(i + 1) * batch_size] feed_dict = features_batch gen_cap = model.generate(feed_dict) all_gen_cap[i * batch_size:(i + 1) * batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path=model_path, epoch=epoch) #print "generative captions:%s\n"%all_decoded[0] if epoch % save_every == 0: saver.save(sess, os.path.join(model_path, 'reinforcemodel'), global_step=epoch) print "model-%s saved." % (epoch)