def main(hf,
         f_type,
         capl=16,
         d_w2v=512,
         output_dim=512,
         feature_shape=None,
         lr=0.01,
         batch_size=64,
         total_epoch=100,
         file=None,
         pretrained_model=None):
    '''
		capl: the length of caption
	'''

    # Create vocabulary
    v2i, train_data, val_data, test_data = MsrDataUtil.create_vocabulary_word2vec(
        file, capl=capl, v2i={
            '': 0,
            'UNK': 1,
            'BOS': 2,
            'EOS': 3
        })

    i2v = {i: v for v, i in v2i.items()}

    print('building model ...')
    voc_size = len(v2i)

    input_video = tf.placeholder(tf.float32,
                                 shape=(None, ) + feature_shape,
                                 name='input_video')
    input_captions = tf.placeholder(tf.int32,
                                    shape=(None, capl),
                                    name='input_captions')
    y = tf.placeholder(tf.int32, shape=(None, capl, len(v2i)))

    captionModel = CaptionModel.CaptionModel(input_video, input_captions,
                                             voc_size, d_w2v, output_dim)
    predict_score, predict_words = captionModel.build_model()
    loss = tf.nn.softmax_cross_entropy_with_logits(labels=y,
                                                   logits=predict_score)
    loss = tf.reduce_mean(loss) + sum(
        tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
    optimizer = tf.train.RMSPropOptimizer(lr,
                                          decay=0.9,
                                          momentum=0.0,
                                          epsilon=1e-8)
    train = optimizer.minimize(loss)
    '''
		configure && runtime environment
	'''
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.3
    # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    config.log_device_placement = False

    sess = tf.Session(config=config)

    init = tf.global_variables_initializer()
    sess.run(init)

    with sess.as_default():
        saver = tf.train.Saver(sharded=True, max_to_keep=total_epoch)
        if pretrained_model is not None:
            saver.restore(sess, pretrained_model)
            print('restore pre trained file:' + pretrained_model)

        for epoch in xrange(total_epoch):
            # # shuffle
            print('Epoch: %d/%d, Batch_size: %d' %
                  (epoch + 1, total_epoch, batch_size))
            # # train phase
            tic = time.time()
            total_loss = exe_train(sess,
                                   train_data,
                                   batch_size,
                                   v2i,
                                   hf,
                                   feature_shape,
                                   train,
                                   loss,
                                   input_video,
                                   input_captions,
                                   y,
                                   capl=capl)

            print('    --Train--, Loss: %.5f, .......Time:%.3f' %
                  (total_loss, time.time() - tic))

            tic = time.time()
            js = exe_test(sess,
                          test_data,
                          batch_size,
                          v2i,
                          i2v,
                          hf,
                          feature_shape,
                          predict_words,
                          input_video,
                          input_captions,
                          y,
                          capl=capl)
            print('    --Val--, .......Time:%.3f' % (time.time() - tic))

            #save model
            export_path = '/home/xyj/usr/local/saved_model/msrvtt2017/s2s' + '_' + f_type + '/' + 'lr' + str(
                lr) + '_f' + str(feature_shape[0])
            if not os.path.exists(export_path + '/model'):
                os.makedirs(export_path + '/model')
                print('mkdir %s' % export_path + '/model')
            if not os.path.exists(export_path + '/res'):
                os.makedirs(export_path + '/res')
                print('mkdir %s' % export_path + '/res')

            # eval
            res_path = export_path + '/res/' + f_type + '_E' + str(epoch +
                                                                   1) + '.json'
            evaluate_mode_by_shell(res_path, js)

            save_path = saver.save(
                sess, export_path + '/model/' + 'E' + str(epoch + 1) + '_L' +
                str(total_loss) + '.ckpt')
            print("Model saved in file: %s" % save_path)
Example #2
0
def main(hf1,
         hf2,
         f_type,
         capl=16,
         d_w2v=512,
         output_dim=512,
         feature_shape1=None,
         feature_shape2=None,
         lr=0.01,
         batch_size=64,
         total_epoch=100,
         file=None,
         pretrained_model=None):
    '''
		capl: the length of caption
	'''

    # Create vocabulary
    v2i, train_data, val_data, test_data = MsrDataUtil.create_vocabulary_word2vec(
        file,
        capl=capl,
        word_threshold=1,
        v2i={
            '': 0,
            'UNK': 1,
            'BOS': 2,
            'EOS': 3
        })

    i2v = {i: v for v, i in v2i.items()}

    print('building model ...')
    voc_size = len(v2i)

    input_video1 = tf.placeholder(tf.float32,
                                  shape=(None, ) + feature_shape1,
                                  name='input_video1')
    input_video2 = tf.placeholder(tf.float32,
                                  shape=(None, ) + feature_shape2,
                                  name='input_video2')

    input_captions = tf.placeholder(tf.int32,
                                    shape=(None, capl),
                                    name='input_captions')
    y = tf.placeholder(tf.int32, shape=(None, capl))

    attentionCaptionModel = mGRUAstarCaptionModel.mGRUAstarAttentionCaptionModel(
        input_video1,
        input_video2,
        input_captions,
        voc_size,
        d_w2v,
        output_dim,
        T_k=[1, 2, 4, 8])
    predict_score, predict_words, loss_mask = attentionCaptionModel.build_model(
    )
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                          logits=predict_score)

    loss = tf.reduce_sum(loss, reduction_indices=[-1]) / tf.reduce_sum(
        loss_mask, reduction_indices=[-1]) + sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    loss = tf.reduce_mean(loss)

    optimizer = tf.train.AdamOptimizer(learning_rate=lr,
                                       beta1=0.9,
                                       beta2=0.999,
                                       epsilon=1e-08,
                                       use_locking=False,
                                       name='Adam')

    gvs = optimizer.compute_gradients(loss)
    capped_gvs = [(tf.clip_by_global_norm([grad], 10)[0][0], var)
                  for grad, var in gvs]
    train = optimizer.apply_gradients(capped_gvs)

    # optimizer = tf.train.RMSPropOptimizer(lr,decay=0.9, momentum=0.0, epsilon=1e-8)
    # train = optimizer.minimize(loss)
    '''
		configure && runtime environment
	'''
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.4
    # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    config.log_device_placement = False

    sess = tf.Session(config=config)

    init = tf.global_variables_initializer()
    sess.run(init)

    with sess.as_default():
        saver = tf.train.Saver(sharded=True, max_to_keep=total_epoch)
        if pretrained_model is not None:
            saver.restore(sess, pretrained_model)
            print('restore pre trained file:' + pretrained_model)

        for epoch in xrange(total_epoch):
            # # shuffle
            print('Epoch: %d/%d, Batch_size: %d' %
                  (epoch + 1, total_epoch, batch_size))
            # # train phase
            tic = time.time()
            total_loss = exe_train(sess,
                                   train_data,
                                   batch_size,
                                   v2i,
                                   hf1,
                                   hf2,
                                   feature_shape1,
                                   feature_shape2,
                                   train,
                                   loss,
                                   input_video1,
                                   input_video2,
                                   input_captions,
                                   y,
                                   capl=capl)

            print('    --Train--, Loss: %.5f, .......Time:%.3f' %
                  (total_loss, time.time() - tic))

            tic = time.time()
            js = exe_test(sess,
                          test_data,
                          batch_size,
                          v2i,
                          i2v,
                          hf1,
                          hf2,
                          feature_shape1,
                          feature_shape2,
                          predict_words,
                          input_video1,
                          input_video2,
                          input_captions,
                          y,
                          capl=capl)
            print('    --Val--, .......Time:%.3f' % (time.time() - tic))

            #save model
            export_path = '/home/xyj/usr/local/saved_model/msrvtt2017/' + f_type + '/' + 'lr' + str(
                lr) + '_f' + str(feature_shape1[0]) + '_B' + str(batch_size)
            if not os.path.exists(export_path + '/model'):
                os.makedirs(export_path + '/model')
                print('mkdir %s' % export_path + '/model')
            if not os.path.exists(export_path + '/res'):
                os.makedirs(export_path + '/res')
                print('mkdir %s' % export_path + '/res')

            # eval
            res_path = export_path + '/res/' + f_type + '_E' + str(epoch +
                                                                   1) + '.json'
            evaluate_mode_by_shell(res_path, js)

            save_path = saver.save(
                sess, export_path + '/model/' + 'E' + str(epoch + 1) + '_L' +
                str(total_loss) + '.ckpt')
            print("Model saved in file: %s" % save_path)
def main(hf,f_type,capl=16, d_w2v=512, output_dim=512,
		feature_shape=None,unsup_training_feature_shape=None,
		lr=0.01,
		batch_size=64,total_epoch=100,unsup_epoch=None,
		file=None,pretrained_model=None):
	'''
		capl: the length of caption
	'''

	# Create vocabulary
	v2i, train_data, val_data, test_data = MsrDataUtil.create_vocabulary_word2vec(file, capl=capl, v2i={'': 0, 'UNK':1,'BOS':2, 'EOS':3})

	i2v = {i:v for v,i in v2i.items()}

	print('building model ...')
	voc_size = len(v2i)
	input_video = tf.placeholder(tf.float32, shape=(None,)+feature_shape,name='input_video')
	input_captions = tf.placeholder(tf.int32, shape=(None,capl), name='input_captions')
	y = tf.placeholder(tf.int32,shape=(None, capl))

	unsup_input_video = tf.placeholder(tf.float32, shape=(None,)+(40,2048),name='unsup_input_video')
	unsup_decoder_feature = tf.placeholder(tf.float32, shape=(None,)+(40,2048),name='unsup_decoder_feature')
	true_video = tf.placeholder(tf.float32, shape=(None,)+(40,2048),name='true_video')


	#
	#
	attentionCaptionModel = CaptionModel.UnsupTrainingAttentionCaptionModel(input_video, input_captions, unsup_input_video, 
															unsup_decoder_feature, voc_size, d_w2v, output_dim,
															T_k=[1,2,4,8])
	predict_score, predict_words, predict_vector = attentionCaptionModel.build_model()
	
	huber_Loss = Losses.Huber_Loss(predict_vector, true_video)
	unsup_training_loss = huber_Loss.build()
	print('unsup_training_loss.get_shape().as_list()',unsup_training_loss.get_shape().as_list())
	unsup_training_loss = tf.reduce_mean(tf.reduce_sum(unsup_training_loss,axis=[1,2])+sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)))
	optimizer = tf.train.AdamOptimizer(learning_rate=lr,beta1=0.9,beta2=0.999,epsilon=1e-08,use_locking=False,name='Adam')
	gvs = optimizer.compute_gradients(unsup_training_loss)
	capped_gvs = [(tf.clip_by_global_norm([grad], 10)[0][0], var) for grad, var in gvs ]
	unsup_training = optimizer.apply_gradients(capped_gvs)


	caption_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=predict_score)+sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
	caption_loss = tf.reduce_mean(caption_loss)#

	caption_gvs = optimizer.compute_gradients(caption_loss)
	caption_capped_gvs = [(tf.clip_by_global_norm([grad], 10)[0][0], var) for grad, var in caption_gvs ]
	caption_training = optimizer.apply_gradients(caption_capped_gvs)

	# caption_training = optimizer.minimize(caption_loss)
	# 

	'''
		configure && runtime environment
	'''
	config = tf.ConfigProto()
	config.gpu_options.per_process_gpu_memory_fraction = 0.3
	# sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
	config.log_device_placement=False

	sess = tf.Session(config=config)

	init = tf.global_variables_initializer()
	sess.run(init)

	with sess.as_default():
		saver = tf.train.Saver(sharded=True,max_to_keep=total_epoch)
		if pretrained_model is not None:
			saver.restore(sess, pretrained_model)
			print('restore pre trained file:' + pretrained_model)


		export_path = '/home/xyj/usr/local/saved_model/msrvtt2017/'+f_type+'/'+'lr'+str(lr)+'_f'+str(feature_shape[0])+'_B'+str(batch_size)
		
		# #unsupervised training 
		for epoch in xrange(unsup_epoch):
			print('Unsupervised Epoch: %d/%d, Batch_size: %d' %(epoch+1,unsup_epoch,batch_size))
			# # train phase
			tic = time.time()
			total_loss = exe_unsup_train(sess, train_data, batch_size, v2i, hf, unsup_training_feature_shape, unsup_training, unsup_training_loss, unsup_input_video, unsup_decoder_feature, true_video,capl=capl)

			print('    --Unsupervised Training--, Loss: %.5f, .......Time:%.3f' %(total_loss,time.time()-tic))
			tic = time.time()
			total_loss = exe_unsup_test(sess, test_data, batch_size, v2i, hf, unsup_training_feature_shape, unsup_training_loss, unsup_input_video, unsup_decoder_feature, true_video,capl=capl)
			print('    --Unsupervised Testing--, Loss: %.5f, .......Time:%.3f' %(total_loss,time.time()-tic))

			if not os.path.exists(export_path+'/unsupervised'):
				os.makedirs(export_path+'/unsupervised')
				print('mkdir %s' %export_path+'/unsupervised')
			save_path = saver.save(sess, export_path+'/unsupervised/'+'E'+str(epoch+1)+'_L'+str(total_loss)+'.ckpt')

		for epoch in xrange(total_epoch):
			# # shuffle
			

			# if epoch % 5==0:
				
			# train phase
			print('Epoch: %d/%d, Batch_size: %d' %(epoch+1,total_epoch,batch_size))
			tic = time.time()
			total_loss = exe_train(sess, train_data, batch_size, v2i, hf, feature_shape, caption_training, caption_loss, input_video, input_captions, y,capl=capl)

			print('    --Train--, Loss: %.5f, .......Time:%.3f' %(total_loss,time.time()-tic))

			tic = time.time()
			js = exe_test(sess, test_data, batch_size, v2i, i2v, hf, feature_shape, 
										predict_words, input_video, input_captions, y, capl=capl)
			print('    --Val--, .......Time:%.3f' %(time.time()-tic))

			

			#save model
			# export_path = '/home/xyj/usr/local/saved_model/msrvtt2017/s2s'+'_'+f_type+'/'+'lr'+str(lr)+'_f'+str(feature_shape[0])+'_B'+str(batch_size)
			if not os.path.exists(export_path+'/model'):
				os.makedirs(export_path+'/model')
				print('mkdir %s' %export_path+'/model')
			if not os.path.exists(export_path+'/res'):
				os.makedirs(export_path+'/res')
				print('mkdir %s' %export_path+'/res')

			# eval
			res_path = export_path+'/res/'+f_type+'_E'+str(epoch+1)+'.json'
			evaluate_mode_by_shell(res_path,js)


			save_path = saver.save(sess, export_path+'/model/'+'E'+str(epoch+1)+'_L'+str(total_loss)+'.ckpt')
			print("Model saved in file: %s" % save_path)