def main(hf, f_type, capl=16, d_w2v=512, output_dim=512, feature_shape=None, lr=0.01, batch_size=64, total_epoch=100, file=None, pretrained_model=None): ''' capl: the length of caption ''' # Create vocabulary v2i, train_data, val_data, test_data = MsrDataUtil.create_vocabulary_word2vec( file, capl=capl, v2i={ '': 0, 'UNK': 1, 'BOS': 2, 'EOS': 3 }) i2v = {i: v for v, i in v2i.items()} print('building model ...') voc_size = len(v2i) input_video = tf.placeholder(tf.float32, shape=(None, ) + feature_shape, name='input_video') input_captions = tf.placeholder(tf.int32, shape=(None, capl), name='input_captions') y = tf.placeholder(tf.int32, shape=(None, capl, len(v2i))) captionModel = CaptionModel.CaptionModel(input_video, input_captions, voc_size, d_w2v, output_dim) predict_score, predict_words = captionModel.build_model() loss = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=predict_score) loss = tf.reduce_mean(loss) + sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) optimizer = tf.train.RMSPropOptimizer(lr, decay=0.9, momentum=0.0, epsilon=1e-8) train = optimizer.minimize(loss) ''' configure && runtime environment ''' config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.3 # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) config.log_device_placement = False sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) with sess.as_default(): saver = tf.train.Saver(sharded=True, max_to_keep=total_epoch) if pretrained_model is not None: saver.restore(sess, pretrained_model) print('restore pre trained file:' + pretrained_model) for epoch in xrange(total_epoch): # # shuffle print('Epoch: %d/%d, Batch_size: %d' % (epoch + 1, total_epoch, batch_size)) # # train phase tic = time.time() total_loss = exe_train(sess, train_data, batch_size, v2i, hf, feature_shape, train, loss, input_video, input_captions, y, capl=capl) print(' --Train--, Loss: %.5f, .......Time:%.3f' % (total_loss, time.time() - tic)) tic = time.time() js = exe_test(sess, test_data, batch_size, v2i, i2v, hf, feature_shape, predict_words, input_video, input_captions, y, capl=capl) print(' --Val--, .......Time:%.3f' % (time.time() - tic)) #save model export_path = '/home/xyj/usr/local/saved_model/msrvtt2017/s2s' + '_' + f_type + '/' + 'lr' + str( lr) + '_f' + str(feature_shape[0]) if not os.path.exists(export_path + '/model'): os.makedirs(export_path + '/model') print('mkdir %s' % export_path + '/model') if not os.path.exists(export_path + '/res'): os.makedirs(export_path + '/res') print('mkdir %s' % export_path + '/res') # eval res_path = export_path + '/res/' + f_type + '_E' + str(epoch + 1) + '.json' evaluate_mode_by_shell(res_path, js) save_path = saver.save( sess, export_path + '/model/' + 'E' + str(epoch + 1) + '_L' + str(total_loss) + '.ckpt') print("Model saved in file: %s" % save_path)
def main(hf1, hf2, f_type, capl=16, d_w2v=512, output_dim=512, feature_shape1=None, feature_shape2=None, lr=0.01, batch_size=64, total_epoch=100, file=None, pretrained_model=None): ''' capl: the length of caption ''' # Create vocabulary v2i, train_data, val_data, test_data = MsrDataUtil.create_vocabulary_word2vec( file, capl=capl, word_threshold=1, v2i={ '': 0, 'UNK': 1, 'BOS': 2, 'EOS': 3 }) i2v = {i: v for v, i in v2i.items()} print('building model ...') voc_size = len(v2i) input_video1 = tf.placeholder(tf.float32, shape=(None, ) + feature_shape1, name='input_video1') input_video2 = tf.placeholder(tf.float32, shape=(None, ) + feature_shape2, name='input_video2') input_captions = tf.placeholder(tf.int32, shape=(None, capl), name='input_captions') y = tf.placeholder(tf.int32, shape=(None, capl)) attentionCaptionModel = mGRUAstarCaptionModel.mGRUAstarAttentionCaptionModel( input_video1, input_video2, input_captions, voc_size, d_w2v, output_dim, T_k=[1, 2, 4, 8]) predict_score, predict_words, loss_mask = attentionCaptionModel.build_model( ) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=predict_score) loss = tf.reduce_sum(loss, reduction_indices=[-1]) / tf.reduce_sum( loss_mask, reduction_indices=[-1]) + sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) loss = tf.reduce_mean(loss) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False, name='Adam') gvs = optimizer.compute_gradients(loss) capped_gvs = [(tf.clip_by_global_norm([grad], 10)[0][0], var) for grad, var in gvs] train = optimizer.apply_gradients(capped_gvs) # optimizer = tf.train.RMSPropOptimizer(lr,decay=0.9, momentum=0.0, epsilon=1e-8) # train = optimizer.minimize(loss) ''' configure && runtime environment ''' config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.4 # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) config.log_device_placement = False sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) with sess.as_default(): saver = tf.train.Saver(sharded=True, max_to_keep=total_epoch) if pretrained_model is not None: saver.restore(sess, pretrained_model) print('restore pre trained file:' + pretrained_model) for epoch in xrange(total_epoch): # # shuffle print('Epoch: %d/%d, Batch_size: %d' % (epoch + 1, total_epoch, batch_size)) # # train phase tic = time.time() total_loss = exe_train(sess, train_data, batch_size, v2i, hf1, hf2, feature_shape1, feature_shape2, train, loss, input_video1, input_video2, input_captions, y, capl=capl) print(' --Train--, Loss: %.5f, .......Time:%.3f' % (total_loss, time.time() - tic)) tic = time.time() js = exe_test(sess, test_data, batch_size, v2i, i2v, hf1, hf2, feature_shape1, feature_shape2, predict_words, input_video1, input_video2, input_captions, y, capl=capl) print(' --Val--, .......Time:%.3f' % (time.time() - tic)) #save model export_path = '/home/xyj/usr/local/saved_model/msrvtt2017/' + f_type + '/' + 'lr' + str( lr) + '_f' + str(feature_shape1[0]) + '_B' + str(batch_size) if not os.path.exists(export_path + '/model'): os.makedirs(export_path + '/model') print('mkdir %s' % export_path + '/model') if not os.path.exists(export_path + '/res'): os.makedirs(export_path + '/res') print('mkdir %s' % export_path + '/res') # eval res_path = export_path + '/res/' + f_type + '_E' + str(epoch + 1) + '.json' evaluate_mode_by_shell(res_path, js) save_path = saver.save( sess, export_path + '/model/' + 'E' + str(epoch + 1) + '_L' + str(total_loss) + '.ckpt') print("Model saved in file: %s" % save_path)
def main(hf,f_type,capl=16, d_w2v=512, output_dim=512, feature_shape=None,unsup_training_feature_shape=None, lr=0.01, batch_size=64,total_epoch=100,unsup_epoch=None, file=None,pretrained_model=None): ''' capl: the length of caption ''' # Create vocabulary v2i, train_data, val_data, test_data = MsrDataUtil.create_vocabulary_word2vec(file, capl=capl, v2i={'': 0, 'UNK':1,'BOS':2, 'EOS':3}) i2v = {i:v for v,i in v2i.items()} print('building model ...') voc_size = len(v2i) input_video = tf.placeholder(tf.float32, shape=(None,)+feature_shape,name='input_video') input_captions = tf.placeholder(tf.int32, shape=(None,capl), name='input_captions') y = tf.placeholder(tf.int32,shape=(None, capl)) unsup_input_video = tf.placeholder(tf.float32, shape=(None,)+(40,2048),name='unsup_input_video') unsup_decoder_feature = tf.placeholder(tf.float32, shape=(None,)+(40,2048),name='unsup_decoder_feature') true_video = tf.placeholder(tf.float32, shape=(None,)+(40,2048),name='true_video') # # attentionCaptionModel = CaptionModel.UnsupTrainingAttentionCaptionModel(input_video, input_captions, unsup_input_video, unsup_decoder_feature, voc_size, d_w2v, output_dim, T_k=[1,2,4,8]) predict_score, predict_words, predict_vector = attentionCaptionModel.build_model() huber_Loss = Losses.Huber_Loss(predict_vector, true_video) unsup_training_loss = huber_Loss.build() print('unsup_training_loss.get_shape().as_list()',unsup_training_loss.get_shape().as_list()) unsup_training_loss = tf.reduce_mean(tf.reduce_sum(unsup_training_loss,axis=[1,2])+sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))) optimizer = tf.train.AdamOptimizer(learning_rate=lr,beta1=0.9,beta2=0.999,epsilon=1e-08,use_locking=False,name='Adam') gvs = optimizer.compute_gradients(unsup_training_loss) capped_gvs = [(tf.clip_by_global_norm([grad], 10)[0][0], var) for grad, var in gvs ] unsup_training = optimizer.apply_gradients(capped_gvs) caption_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=predict_score)+sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) caption_loss = tf.reduce_mean(caption_loss)# caption_gvs = optimizer.compute_gradients(caption_loss) caption_capped_gvs = [(tf.clip_by_global_norm([grad], 10)[0][0], var) for grad, var in caption_gvs ] caption_training = optimizer.apply_gradients(caption_capped_gvs) # caption_training = optimizer.minimize(caption_loss) # ''' configure && runtime environment ''' config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.3 # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) config.log_device_placement=False sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) with sess.as_default(): saver = tf.train.Saver(sharded=True,max_to_keep=total_epoch) if pretrained_model is not None: saver.restore(sess, pretrained_model) print('restore pre trained file:' + pretrained_model) export_path = '/home/xyj/usr/local/saved_model/msrvtt2017/'+f_type+'/'+'lr'+str(lr)+'_f'+str(feature_shape[0])+'_B'+str(batch_size) # #unsupervised training for epoch in xrange(unsup_epoch): print('Unsupervised Epoch: %d/%d, Batch_size: %d' %(epoch+1,unsup_epoch,batch_size)) # # train phase tic = time.time() total_loss = exe_unsup_train(sess, train_data, batch_size, v2i, hf, unsup_training_feature_shape, unsup_training, unsup_training_loss, unsup_input_video, unsup_decoder_feature, true_video,capl=capl) print(' --Unsupervised Training--, Loss: %.5f, .......Time:%.3f' %(total_loss,time.time()-tic)) tic = time.time() total_loss = exe_unsup_test(sess, test_data, batch_size, v2i, hf, unsup_training_feature_shape, unsup_training_loss, unsup_input_video, unsup_decoder_feature, true_video,capl=capl) print(' --Unsupervised Testing--, Loss: %.5f, .......Time:%.3f' %(total_loss,time.time()-tic)) if not os.path.exists(export_path+'/unsupervised'): os.makedirs(export_path+'/unsupervised') print('mkdir %s' %export_path+'/unsupervised') save_path = saver.save(sess, export_path+'/unsupervised/'+'E'+str(epoch+1)+'_L'+str(total_loss)+'.ckpt') for epoch in xrange(total_epoch): # # shuffle # if epoch % 5==0: # train phase print('Epoch: %d/%d, Batch_size: %d' %(epoch+1,total_epoch,batch_size)) tic = time.time() total_loss = exe_train(sess, train_data, batch_size, v2i, hf, feature_shape, caption_training, caption_loss, input_video, input_captions, y,capl=capl) print(' --Train--, Loss: %.5f, .......Time:%.3f' %(total_loss,time.time()-tic)) tic = time.time() js = exe_test(sess, test_data, batch_size, v2i, i2v, hf, feature_shape, predict_words, input_video, input_captions, y, capl=capl) print(' --Val--, .......Time:%.3f' %(time.time()-tic)) #save model # export_path = '/home/xyj/usr/local/saved_model/msrvtt2017/s2s'+'_'+f_type+'/'+'lr'+str(lr)+'_f'+str(feature_shape[0])+'_B'+str(batch_size) if not os.path.exists(export_path+'/model'): os.makedirs(export_path+'/model') print('mkdir %s' %export_path+'/model') if not os.path.exists(export_path+'/res'): os.makedirs(export_path+'/res') print('mkdir %s' %export_path+'/res') # eval res_path = export_path+'/res/'+f_type+'_E'+str(epoch+1)+'.json' evaluate_mode_by_shell(res_path,js) save_path = saver.save(sess, export_path+'/model/'+'E'+str(epoch+1)+'_L'+str(total_loss)+'.ckpt') print("Model saved in file: %s" % save_path)