def main(): # load train dataset data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out scores every epoch val_data = load_coco_data(data_path='./data', split='val') model = CaptionGenerator( word_to_idx, dim_feature=[FLAGS.image_feature_size, FLAGS.image_feature_depth], dim_embed=FLAGS.embed_dim, dim_hidden=FLAGS.lstm_hidden_size, n_time_step=FLAGS.time_steps, prev2out=FLAGS.prev2out, ctx2out=FLAGS.ctx2out, alpha_c=1.0, enable_selector=FLAGS.enable_selector, dropout=FLAGS.dropout) solver = CaptioningSolver(model, n_epochs=FLAGS.num_epochs, batch_size=FLAGS.batch_size, update_rule=FLAGS.optimizer, learning_rate=FLAGS.learning_rate, metric=FLAGS.metric, print_every=FLAGS.snapshot_steps, eval_every=FLAGS.eval_steps, pretrained_model=FLAGS.pretrained_model, start_from=FLAGS.start_from, checkpoint_dir=FLAGS.checkpoint_dir, log_path=FLAGS.log_path) solver.train(data, val_data, beam_size=FLAGS.beam_size)
def main(): # load train dataset data = load_coco_data(data_path='/data1/junjiaot/data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='/data1/junjiaot/data', split='val') #print(data['file_names'].shape,data['captions'].shape,data['image_idxs'].shape) #print(max(data['image_idxs'])) #model/adaptive_attention_REINFORCE/ #'model/adaptive_attention/3_26_2018/model-13 model = CaptionGenerator(word_to_idx, dim_feature=[49, 2048], dim_embed=512, dim_hidden=512, n_time_step=16, alpha_c=1.0, dropout=True) solver = CaptioningSolver( model, data, val_data, n_epochs=50, batch_size=56, update_rule='adam', learning_rate=5e-4, print_bleu_every=1000, save_every=1000, image_path='./image/', model_path='model/Resnet_Pretrain_adaptive_attribute9_new/', test_model='model/lstm/model-5', print_bleu=True, log_path='log/') solver.test()
def main(use_inception): # load train dataset print "Loading COCO training data..." data = load_coco_data(data_path='./data', split='train') print "Done!" word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./data', split='val') if use_inception: L = 64 D = 2048 else: L = 196 D = 512 from core.solver import CaptioningSolver from core.model import CaptionGenerator model = CaptionGenerator(word_to_idx, dim_feature=[L, D], dim_embed=512, dim_hidden=1800, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=5.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=100, batch_size=256, update_rule='adam', learning_rate=0.0005, print_every=1000, summary_every=10000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') solver.train()
def main(): # create vocab word_to_idx = build_vocab(train_image_dir=args.train_image_dir, val_image_dir=args.val_image_dir, coco_dataset_files=[args.coco_dataset_train_dir, args.coco_dataset_val_dir], senticap_dataset_files=[args.senticap_dataset_dir], max_length=args.max_length) with open(os.path.join(args.output_dir, 'word_to_idx.pkl'), 'wb') as f: pickle.dump(word_to_idx, f) print("*" * 16, "Vocabulary built", "*" * 16) # load senticap dataset train_senticap_data, val_senticap_data, test_senticap_data = load_senticap_data(vocab=word_to_idx, train_image_dir=args.train_image_dir, val_image_dir=args.val_image_dir, caption_file=args.senticap_dataset_dir, splits=args.senticap_dataset_portions, max_length=args.max_length) with open(os.path.join(args.output_dir, 'train_senticap_data.pkl'), 'wb') as f: pickle.dump(train_senticap_data, f) with open(os.path.join(args.output_dir, 'val_senticap_data.pkl'), 'wb') as f: pickle.dump(val_senticap_data, f) with open(os.path.join(args.output_dir, 'test_senticap_data.pkl'), 'wb') as f: pickle.dump(test_senticap_data, f) # load senticap dataset train_coco_data = load_coco_data(vocab=word_to_idx, image_dir=args.train_image_dir, caption_file=args.coco_dataset_train_dir, splits=[args.coco_dataset_portions[0]], max_length=args.max_length) val_coco_data, test_coco_data = load_coco_data(vocab=word_to_idx, image_dir=args.val_image_dir, caption_file=args.coco_dataset_val_dir, splits=args.coco_dataset_portions[1:], max_length=args.max_length) with open(os.path.join(args.output_dir, 'train_coco_data.pkl'), 'wb') as f: pickle.dump(train_coco_data, f) with open(os.path.join(args.output_dir, 'val_coco_data.pkl'), 'wb') as f: pickle.dump(val_coco_data, f) with open(os.path.join(args.output_dir, 'test_coco_data.pkl'), 'wb') as f: pickle.dump(test_coco_data, f) print("*" * 16, "Dataset loaded", "*" * 16)
def main(): # load train dataset data = load_coco_data(data_path='./data/coco_data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./data/coco_data', split='val') model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=10, image_path='./image/', pretrained_model=None, model_path='model/preview_model', test_model='model/lstm/model-10', print_bleu=True, log_path='log/preview_model_log/') solver.train()
def main(): data = load_coco_data(data_path='./data', split='val', if_train=True) word_to_idx = data['word_to_idx'] model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, data, n_epochs=10, batch_size=100, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=5, image_path='./image/', pretrained_model=None, model_path='model/lstm_hard/', test_model='model/lstm_hard/model-40', print_bleu=True, log_path='log/') test_data = load_coco_data( data_path='./data', split='test', if_train=False) solver.test(test_data, split='test')
def main(): val_data = load_coco_data(data_path='./data', split='val') with open(os.path.join('data/train/word_to_idx.pkl'), 'rb') as f: word_to_idx = pickle.load(f) model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=21, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver( model, val_data, n_epochs=100, batch_size=128, update_rule='adam', learning_rate=0.0012, print_every=100, save_every=5, image_path='./image/', pretrained_model='train_batch/model0.001/model.ckpt-30', model_path='train_batch/model0.002/', test_model=None, print_bleu=True, log_path='train_batch/log/') solver.train()
def main(): data = load_coco_data(data_path='./data', split='val') with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/val2014_resized', pretrained_model=None, model_path='model/lstmval/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') #solver.test(data, split='val') solver.test(data, split='test')
def main(): # load train dataset data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./data', split='val') model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') solver.train()
def main(): # load train dataset data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./data', split='val') model = CaptionGenerator(word_to_idx, dim_feature=[121, 1536], dim_embed=512, dim_hidden=1024, n_time_step=26, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=50000, batch_size=64, update_rule='adam', learning_rate=0.00005, print_every=500, save_every=1, image_path='./image/', pretrained_model=None, model_path='./model/rl_att_ciderD/', test_model='./model/att/model-10', n_batches=10000, print_bleu=True, log_path='./log/') solver.train()
def main(): # load train dataset # data = load_coco_data(data_path='./our_data', split='train') # word_to_idx = data['word_to_idx'] # # load val dataset to print out bleu scores every epoch # test_data = load_coco_data(data_path='./our_data', split='test') #our train: data = load_coco_data( data_path='.\image_data_to_be_labeled\Object_feature\our_data', split='train') our_test = load_coco_data( data_path='.\image_data_to_be_labeled\Object_feature\our_data', split='train') word_to_idx = data['word_to_idx'] model = CaptionGenerator(word_to_idx, dim_feature=[216, 512], dim_embed=512, dim_hidden=1024, n_time_step=26, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=False) solver = CaptioningSolver(model, data, our_test, n_epochs=5000, batch_size=64, update_rule='adam', learning_rate=1e-4, print_every=1000, save_every=100, image_path='./image/', pretrained_model=None, model_path='model/our_train0414/', test_model='model/our_train0414/model-2000', print_bleu=False, log_path='log/') # solver.train() solver.test(our_test)
def main(): # load train dataset data = load_coco_data(data_path='./data', split='debug') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch # val_data = load_coco_data(data_path='./data', split='val') # val_data = second_process(10,16, val_data) model = CaptionGenerator(word_to_idx, dim_feature=[121, 1536], dim_embed=512, dim_hidden=1024, n_time_step=26, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
def main(params): batch_size = params['batch_size'] n_epochs = params['epoch'] n_time_step = params['n_time_step'] learning_rate = params['lr'] model_path = params['model_path'] log_path = params['log_path'] data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] val_data = load_coco_data(data_path='./data', split='val') model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=256, dim_hidden=1024, n_time_step=n_time_step, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=n_epochs, batch_size=batch_size, update_rule='adam', learning_rate=learning_rate, print_every=3000, save_every=1, image_path='./image/', pretrained_model=None, model_path=model_path, test_model='./model/lstm/model-10', print_bleu=True, log_path=log_path) solver.train()
def main(): # load train dataset print "start loading data" data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./data', split='val') print "data loaded" # Elapse time: 15.95 model = CaptionGenerator( word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, # n_time_step is max length + 1 ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') # change model path accordingly solver.train()
def main(): # load train dataset data = load_coco_data(data_path='./new_data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./new_data', split='val') model = CaptionGenerator(word_to_idx, dim_att=[4, 512], dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) idx_to_word = {v: k for k, v in word_to_idx.iteritems()} solver = CaptioningSolver(model, data, val_data, idx_to_word, n_epochs=15, batch_size=64, update_rule='adam', learning_rate=0.001, print_every=50, save_every=5, image_path='./image/', pretrained_model=None, model_path='model/lstm2/', test_model='model/lstm2/model-15', print_bleu=True, log_path='log/') solver.train()
def main(): # load train dataset data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) discrim = Discriminator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=0.0, selector=True, dropout=True, learning_rate=0.01) solver = CaptioningSolver(model, discrim, data, data, n_epochs=20, batch_size=128, gpu_list="1,2,3", update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', train_new='./model/lstm/model-20', test_model='model/lstm/model-21', print_bleu=True, log_path='log/') solver.train()
def test(toy=None): if toy == True: toy = "toy_" else: toy ="" data_path = os.path.join('./data', 'train') with open(os.path.join(data_path, '%sword_to_idx.pkl' % toy), 'rb') as f: word_to_idx = pickle.load(f) val_data = load_coco_data(data_path='./data', split='val', toy=toy) model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=512, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, None, val_data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/HighwayLSTM01_lstm/', test_model='model/HighwayLSTM01_lstm/model-20', print_bleu=True, log_path='log/') solver.test(val_data)
def main(): # load dataset and vocab data = load_coco_data(data_path='./data', split=FLAGS.split) with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) model = CaptionGenerator( word_to_idx, dim_feature=[FLAGS.image_feature_size, FLAGS.image_feature_depth], dim_embed=FLAGS.embed_dim, dim_hidden=FLAGS.lstm_hidden_size, n_time_step=FLAGS.time_steps, prev2out=FLAGS.prev2out, ctx2out=FLAGS.ctx2out, alpha_c=1.0, enable_selector=FLAGS.enable_selector, dropout=FLAGS.dropout) solver = CaptioningSolver(model, batch_size=FLAGS.batch_size, test_checkpoint=FLAGS.test_checkpoint) solver.test(data, beam_size=3, attention_visualization=FLAGS.att_vis)
def main(): # load train dataset data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 12, 16] dis_num_filters = [100, 200, 200, 200, 200, 100, 100, 100, 100, 100] dis_l2_reg_lambda = 0.2 discrim = Discriminator(sequence_length=16, num_classes=2, vocab_size=len(word_to_idx), embedding_size=128, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda) solver = CaptioningSolver(model, discrim, data, data, n_epochs=20, batch_size=64, gpu_list="0,1,2", update_rule='adam', learning_rate=0.0025, print_every=20, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', train_new=None, test_model='model/lstm/model-42', print_bleu=False, log_path='log/', num_rollout=10) solver.train_adversarial()
def main(): # load train dataset data = load_coco_data(data_path='./data', split='debug') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch # val_data = load_coco_data(data_path='./data', split='val') # val_data = second_process(10,16, val_data) model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=256, dim_hidden=256, n_time_step=10, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, data, n_epochs=50, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=500, save_every=1, image_path='./image/', pretrained_model=None, model_path='./model/lstm/', test_model='./model/lstm/model-5', print_bleu=True, log_path='./log/') solver.test(data, split='val')
from core.solver import CaptioningSolver from core.model import CaptionGenerator from core.utils import load_coco_data from core.bleu import evaluate get_ipython().magic(u'matplotlib inline') plt.rcParams['figure.figsize'] = (8.0, 6.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' get_ipython().magic(u'load_ext autoreload') get_ipython().magic(u'autoreload 2') # In[2]: data = load_coco_data(data_path='./data', split='val') with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) # In[3]: model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1500, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
def main(): batch_size = 32 val_batch_size = 12 save_every = 1 #pretrained_model = None with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) model_path = 'model_residue_cascade_attention_detect_10/' # load val dataset to print out bleu scores every epoch # #word_to_idx =1 sess = tf.Session() model = CaptionGenerator(sess, word_to_idx, dim_feature=[49, 2048], dim_embed=512, dim_hidden=512, n_time_step=21, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) n_examples = 117208 val_data = load_coco_data(data_path='./data', split='val') n_iters_per_epoch = int(np.ceil(float(n_examples) / batch_size)) with open('./data/train/train.captions.pkl', 'rb') as f: captions = pickle.load(f) with open('./data/train/train.image.idxs.pkl', 'rb') as f: image_idxs = pickle.load(f) print image_idxs val_features = val_data['features'] print val_features.shape[0] n_iters_val = int(np.ceil(float(val_features.shape[0]) / val_batch_size)) model.build() saver = tf.train.Saver() #variables = slim.get_variables_to_restore() #variables_to_restore = [v for v in variables if string.find(v.name, 'discriminator') == -1] #saver = tf.train.Saver(variables_to_restore) #if pretrained_model is not None: #saver = tf.train.import_meta_graph('./model_residue/model-10.meta') # saver.restore(sess, pretrained_model) print 'start pre-traininig' for epoch in xrange(1, 10 + 1): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for step in xrange(1, n_iters_per_epoch + 1): captions_batch = captions[step * batch_size:(step + 1) * batch_size] image_idxs_batch = image_idxs[step * batch_size:(step + 1) * batch_size] features_batch = np.empty((batch_size, 49, 2048)) j = 0 for i in image_idxs_batch: features_single = hickle.load('./data_residue_single/train/' + 'train_' + str(i) + '.features.hkl') features_batch[j, :] = features_single j = j + 1 features_detect_batch = np.empty((batch_size, 10, 4096)) j = 0 for i in image_idxs_batch: features_detect_single = hickle.load( './data_residue_detect/train/' + 'train_' + str(i) + '.features.hkl') features_detect_single = features_detect_single[-10:, :] features_detect_batch[j, :] = features_detect_single j = j + 1 if captions_batch.shape[0] == batch_size: model.pre_train_batch(features_batch, features_detect_batch, captions_batch) if step % 10 == 0: print 'epoch', epoch print 'step', step if step % 512 == 0: all_gen_cap = np.ndarray((val_features.shape[0], 30)) for i in range(n_iters_val): features_batch = val_features[i * val_batch_size:(i + 1) * val_batch_size] val_detect_batch = np.empty( (len(features_batch), 10, 4096)) m = 0 for j in range(i * val_batch_size, (i + 1) * val_batch_size): val_detect_single = hickle.load( './data_residue_detect/val/' + 'val_' + str(j) + '.features.hkl') val_detect_single = val_detect_single[-10:, :] val_detect_batch[m, :] = val_detect_single m = m + 1 _, _, _, _, gen_cap = model.generate( features_batch, val_detect_batch) all_gen_cap[i * val_batch_size:(i + 1) * val_batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path=model_path, epoch=epoch) print "generative captions:%s\n" % all_decoded[0] if epoch % save_every == 0: saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch) print "model-%s saved." % (epoch) print 'start reinforcement learning!' for epoch in xrange(1, 0 + 1): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for step in xrange(1, n_iters_per_epoch + 1): captions_batch = captions[step * batch_size:(step + 1) * batch_size] image_idxs_batch = image_idxs[step * batch_size:(step + 1) * batch_size] features_batch = features[image_idxs_batch] if captions_batch.shape[0] == batch_size: #gen_cap = model.generate(features_batch) #decoded_cap = decode_captions(gen_cap, model.idx_to_word) #decoded_reference = decode_captions(captions_batch, model.idx_to_word) #scores = evaluate_part(candidate = decoded_cap, split = 'train', idx = image_idxs_batch, get_scores=True) #reward = (0.5*scores['Bleu_1'] + 0.5*scores['Bleu_2'] + scores['Bleu_3'] + scores['Bleu_4'])/3 #print reward #reward = 1 t = model.train_batch(features_batch, captions_batch) if step % 10 == 0: print 'epoch', epoch print 'step', step print 'time', t if step % 1024 == 0: ground_truths = captions[image_idxs == image_idxs_batch[0]] decoded = decode_captions(ground_truths, model.idx_to_word) for j, gt in enumerate(decoded): print "Ground truth %d: %s" % (j + 1, gt) gen_caps = model.generate(features_batch) decoded = decode_captions(gen_caps, model.idx_to_word) print "Generated caption: %s\n" % decoded[0] if step % 1024 == 0: all_gen_cap = np.ndarray((val_features.shape[0], 30)) for i in range(n_iters_val): features_batch = val_features[i * batch_size:(i + 1) * batch_size] feed_dict = features_batch gen_cap = model.generate(feed_dict) all_gen_cap[i * batch_size:(i + 1) * batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path=model_path, epoch=epoch) #print "generative captions:%s\n"%all_decoded[0] if epoch % save_every == 0: saver.save(sess, os.path.join(model_path, 'reinforcemodel'), global_step=epoch) print "model-%s saved." % (epoch)
def main(): # load train dataset # data = load_coco_data(data_path='./data', split='train0', if_train=True) # word_to_idx = data['word_to_idx'] # # load val dataset to print out bleu scores every epoch # val_data = load_coco_data(data_path='./data', split='val', if_train=False) # model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, # dim_hidden=1024, n_time_step=16, prev2out=True, # ctx2out=True, alpha_c=1.0, selector=True, dropout=True) # solver = CaptioningSolver(model, data, val_data, n_epochs=10, batch_size=100, update_rule='adam', # learning_rate=0.001, print_every=1000, save_every=5, image_path='./image/', # pretrained_model=None, model_path='model/lstm_hard/', test_model='model/lstm_hard/model-10', # print_bleu=True, log_path='log_hard/') # solver.train(chunk=0) # data = load_coco_data(data_path='./data', split='train1', if_train=True) # word_to_idx = data['word_to_idx'] # val_data = load_coco_data(data_path='./data', split='val', if_train=False) # model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, # dim_hidden=1024, n_time_step=16, prev2out=True, # ctx2out=True, alpha_c=1.0, selector=True, dropout=True) # solver = CaptioningSolver(model, data, val_data, n_epochs=10, batch_size=100, update_rule='adam', # learning_rate=0.001, print_every=1000, save_every=5, image_path='./image/', # pretrained_model='model/lstm_hard/model-10', model_path='model/lstm_hard/', test_model='model/lstm_hard/model-20', # print_bleu=True, log_path='log_hard/') # solver.train(chunk=1) # data = load_coco_data(data_path='./data', split='train2', if_train=True) # word_to_idx = data['word_to_idx'] # val_data = load_coco_data(data_path='./data', split='val', if_train=False) # model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, # dim_hidden=1024, n_time_step=16, prev2out=True, # ctx2out=True, alpha_c=1.0, selector=True, dropout=True) # solver = CaptioningSolver(model, data, val_data, n_epochs=10, batch_size=100, update_rule='adam', # learning_rate=0.001, print_every=1000, save_every=5, image_path='./image/', # pretrained_model='model/lstm_hard/model-20', model_path='model/lstm_hard/', test_model='model/lstm_hard/model-30', # print_bleu=True, log_path='log_hard/') # solver.train(chunk=2) data = load_coco_data(data_path='./data', split='train3', if_train=True) word_to_idx = data['word_to_idx'] val_data = load_coco_data(data_path='./data', split='val', if_train=False) model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=10, batch_size=100, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=5, image_path='./image/', pretrained_model='model/lstm_hard/model-30', model_path='model/lstm_hard/', test_model='model/lstm_hard/model-40', print_bleu=True, log_path='log_hard/') solver.train(chunk=3)
@author: xz """ import matplotlib.pyplot as plt import cPickle as pickle import tensorflow as tf from core.solver import CaptioningSolver from core.model import CaptionGenerator from core.utils import load_coco_data from core.bleu import evaluate plt.rcParams['figure.figsize'] = (8.0, 6.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' data = load_coco_data(data_path='./data/coco_data/', split='test') with open('./data/coco_data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) #print '~~~~~~~~~~~~~~~~~~~~~~~' # #for i in range(data['features'].shape[0]): # # if data['file_names'][i] =='image/train2014_resized/COCO_train2014_000000013140.jpg': # print i # print data['file_names'][i] #print data['file_names'][1813] model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
def train(self): # train/val dataset # n_examples = self.data['captions'].shape[0] # n_iters_per_epoch = int(np.ceil(float(n_examples)/self.batch_size)) # features = self.data['features'] # captions = self.data['captions'] # image_idxs = self.data['image_idxs'] # val_features = self.val_data['features'] val_features = self.val_data['features'] n_iters_val = int( np.ceil(float(val_features.shape[0]) / self.batch_size)) # build graphs for training model and sampling captions loss = self.model.build_model() # tf.get_variable_scope().reuse_variables() # _, _, generated_captions = self.model.build_sampler(max_len=20) # # # train op # with tf.name_scope('optimizer'): # optimizer = self.optimizer(learning_rate=self.learning_rate) # grads = tf.gradients(loss, tf.trainable_variables()) # grads_and_vars = list(zip(grads, tf.trainable_variables())) # train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars) with tf.variable_scope(tf.get_variable_scope()) as scope: with tf.name_scope('optimizer'): tf.get_variable_scope().reuse_variables() _, _, generated_captions = self.model.build_sampler(max_len=20) optimizer = self.optimizer(learning_rate=self.learning_rate) grads = tf.gradients(loss, tf.trainable_variables()) grads_and_vars = list(zip(grads, tf.trainable_variables())) train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars) # summary op tf.summary.scalar('batch_loss', loss) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) for grad, var in grads_and_vars: tf.summary.histogram(var.op.name + '/gradient', grad) summary_op = tf.summary.merge_all() print "The number of epoch: %d" % self.n_epochs #print "Data size: %d" %n_examples print "Batch size: %d" % self.batch_size #print "Iterations per epoch: %d" %n_iters_per_epoch config = tf.ConfigProto(allow_soft_placement=True) #os.environ["CUDA_VISIBLE_DEVICES"] = '1' #config.gpu_options.per_process_gpu_memory_fraction=0.9 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: init = tf.global_variables_initializer() sess.run(init) summary_writer = tf.summary.FileWriter( self.log_path, graph=tf.get_default_graph()) saver = tf.train.Saver(max_to_keep=20) if self.pretrained_model is not None: print "Start training with pretrained Model.." saver.restore(sess, self.pretrained_model) prev_loss = -1 curr_loss = 0 start_t = time.time() for e in range(self.n_epochs): for part_num in range(5): data = load_coco_data(data_path='./data', split='train', batch=part_num) n_examples = data['captions'].shape[0] n_iters_per_epoch = int( np.ceil(float(n_examples) / self.batch_size)) features = data['features'] captions = data['captions'] image_idxs = data['image_idxs'] rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for i in range(n_iters_per_epoch): captions_batch = captions[i * self.batch_size:(i + 1) * self.batch_size] image_idxs_batch = image_idxs[i * self.batch_size:(i + 1) * self.batch_size] features_batch = features[image_idxs_batch] feed_dict = { self.model.features: features_batch, self.model.captions: captions_batch } _, l = sess.run([train_op, loss], feed_dict) curr_loss += l # write summary for tensorboard visualization if i % 10 == 0: summary = sess.run(summary_op, feed_dict) summary_writer.add_summary( summary, e * n_iters_per_epoch + i) if (i + 1) % self.print_every == 0: print "\nTrain loss at epoch %d & part %d & iteration %d (mini-batch): %.5f" % ( e + 1, part_num + 1, i + 1, l) ground_truths = captions[image_idxs == image_idxs_batch[0]] decoded = decode_captions(ground_truths, self.model.idx_to_word) for j, gt in enumerate(decoded): print "Ground truth %d: %s" % (j + 1, gt) gen_caps = sess.run(generated_captions, feed_dict) decoded = decode_captions(gen_caps, self.model.idx_to_word) print "Generated caption: %s\n" % decoded[0] del data print "Previous epoch loss: ", prev_loss print "Current epoch loss: ", curr_loss print "Elapsed time: ", time.time() - start_t prev_loss = curr_loss curr_loss = 0 # print out BLEU scores and file write if self.print_bleu: all_gen_cap = np.ndarray((val_features.shape[0], 20)) for i in range(n_iters_val): features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {self.model.features: features_batch} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) captions2json.captions2json( all_decoded, self.val_data['file_names'], './data/val/val_cadidate_captions_json.json') compute_m1( json_predictions_file= './data/val/val_cadidate_captions_json.json', reference_file='./data/val/val_references_json.json') # save model's parameters if (e + 1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model.ckpt'), global_step=e + 1) print "model-%s saved." % (e + 1)