def main(hf,
         f_type,
         capl=16,
         d_w2v=512,
         output_dim=512,
         feature_shape=None,
         lr=0.01,
         batch_size=64,
         total_epoch=100,
         file=None,
         pretrained_model=None):
    '''
		capl: the length of caption
	'''

    # Create vocabulary
    v2i, train_data, val_data, test_data = DataUtil.create_vocabulary_word2vec(
        file, capl=capl, v2i={
            '': 0,
            'UNK': 1,
            'BOS': 2,
            'EOS': 3
        })

    i2v = {i: v for v, i in v2i.items()}

    print('building model ...')
    voc_size = len(v2i)

    input_video = tf.placeholder(tf.float32,
                                 shape=(None, ) + feature_shape,
                                 name='input_video')
    input_captions = tf.placeholder(tf.int32,
                                    shape=(None, capl),
                                    name='input_captions')
    y = tf.placeholder(tf.int32, shape=(None, capl, len(v2i)))

    captionModel = CaptionModel.CaptionModel(input_video, input_captions,
                                             voc_size, d_w2v, output_dim)
    predict_score, predict_words = captionModel.build_model()
    loss = tf.nn.softmax_cross_entropy_with_logits(labels=y,
                                                   logits=predict_score)
    loss = tf.reduce_mean(loss) + sum(
        tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
    optimizer = tf.train.RMSPropOptimizer(lr,
                                          decay=0.9,
                                          momentum=0.0,
                                          epsilon=1e-8)
    train = optimizer.minimize(loss)
    '''
		configure && runtime environment
	'''
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.3
    # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    config.log_device_placement = False

    sess = tf.Session(config=config)

    init = tf.global_variables_initializer()
    sess.run(init)

    with sess.as_default():
        saver = tf.train.Saver(sharded=True, max_to_keep=total_epoch)
        if pretrained_model is not None:
            saver.restore(sess, pretrained_model)
            print('restore pre trained file:' + pretrained_model)

        for epoch in xrange(total_epoch):
            # # shuffle
            print('Epoch: %d/%d, Batch_size: %d' %
                  (epoch + 1, total_epoch, batch_size))
            # # train phase
            tic = time.time()
            total_loss = exe_train(sess,
                                   train_data,
                                   batch_size,
                                   v2i,
                                   hf,
                                   feature_shape,
                                   train,
                                   loss,
                                   input_video,
                                   input_captions,
                                   y,
                                   capl=capl)

            print('    --Train--, Loss: %.5f, .......Time:%.3f' %
                  (total_loss, time.time() - tic))

            tic = time.time()
            js = exe_test(sess,
                          test_data,
                          batch_size,
                          v2i,
                          i2v,
                          hf,
                          feature_shape,
                          predict_words,
                          input_video,
                          input_captions,
                          y,
                          capl=capl)
            print('    --Val--, .......Time:%.3f' % (time.time() - tic))

            #save model
            export_path = '/home/xyj/usr/local/saved_model/caption/s2s' + '_' + f_type + '/' + 'lr' + str(
                lr) + '_f' + str(feature_shape[0])
            if not os.path.exists(export_path + '/model'):
                os.makedirs(export_path + '/model')
                print('mkdir %s' % export_path + '/model')
            if not os.path.exists(export_path + '/res'):
                os.makedirs(export_path + '/res')
                print('mkdir %s' % export_path + '/res')

            # eval
            res_path = export_path + '/res/' + f_type + '_E' + str(epoch +
                                                                   1) + '.json'
            evaluate_mode_by_shell(res_path, js)

            save_path = saver.save(
                sess, export_path + '/model/' + 'E' + str(epoch + 1) + '_L' +
                str(total_loss) + '.ckpt')
            print("Model saved in file: %s" % save_path)
Ejemplo n.º 2
0
def main(hf,f_type,
		reduction_dim=512,
		centers_num = 32, capl=16, d_w2v=512, output_dim=512,
		batch_size=64,total_epoch=100,
		file=None,pretrained_model=None):

	# Create vocabulary
	v2i, train_data, val_data, test_data = DataUtil.create_vocabulary_word2vec(file=file, capl=capl,  v2i={'': 0, 'UNK':1,'BOS':2, 'EOS':3})

	i2v = {i:v for v,i in v2i.items()}

	print('building model ...')
	voc_size = len(v2i)

	input_video = tf.placeholder(tf.float32, shape=(None,)+feature_shape,name='input_video')
	input_captions = tf.placeholder(tf.int32, shape=(None,capl), name='input_captions')
	y = tf.placeholder(tf.int32,shape=(None, capl))

	if args.soft:
		captionModel = SamModel.SoftModel(input_video, input_captions, voc_size, d_w2v, output_dim,
								reduction_dim=reduction_dim,
								centers_num=centers_num, 
								done_token=v2i['EOS'], max_len = capl, beamsearch_batchsize = 1, beam_size=5)

	else:
		captionModel = SamModel.HardModel(input_video, input_captions, voc_size, d_w2v, output_dim,
									reduction_dim=reduction_dim,
									centers_num=centers_num, 
									done_token=v2i['EOS'], max_len = capl, beamsearch_batchsize = 1, beam_size=5)

	predict_score, predict_words, loss_mask, finished_beam, logprobs_finished_beams, past_symbols = captionModel.build_model()
	loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=predict_score)

	loss = tf.reduce_sum(loss,reduction_indices=[-1])/tf.reduce_sum(loss_mask,reduction_indices=[-1])

	loss = tf.reduce_mean(loss)+sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

	optimizer = tf.train.AdamOptimizer(learning_rate=lr,beta1=0.9,beta2=0.999,epsilon=1e-08,use_locking=False,name='Adam')
	
	gvs = optimizer.compute_gradients(loss)
	capped_gvs = [(tf.clip_by_global_norm([grad], 10)[0][0], var) for grad, var in gvs ]
	train = optimizer.apply_gradients(capped_gvs)

	tf.summary.scalar('cross_entropy',loss)

	'''
		configure && runtime environment
	'''
	config = tf.ConfigProto()
	config.gpu_options.per_process_gpu_memory_fraction = 0.6
	config.log_device_placement=False

	sess = tf.Session(config=config)

	init = tf.global_variables_initializer()
	sess.run(init)
	
	'''
		tensorboard configure
	'''
	merged = tf.summary.merge_all()
	export_path = 'saved_model/mvad/'+f_type+'/'+'lr'+str(lr)+'_f'+str(feature_shape[0])+'_B'+str(batch_size)

	if not os.path.exists(export_path+'/log'):
		os.makedirs(export_path+'/log')
		print('mkdir %s' %export_path+'/log')

	print('building writer')
	train_writer = tf.summary.FileWriter(export_path + '/log',
                                      sess.graph)
	with sess.as_default():
		saver = tf.train.Saver(sharded=True,max_to_keep=total_epoch)
		if pretrained_model is not None:
			saver.restore(sess, pretrained_model)
			print('restore pre trained file:' + pretrained_model)

		for epoch in xrange(total_epoch):
			# # shuffle
			print('Epoch: %d/%d, Batch_size: %d' %(epoch+1,total_epoch,batch_size))
			# # train phase
			tic = time.time()
			total_loss = exe_train(sess, train_data, epoch, batch_size, v2i, hf, feature_shape, train, loss, input_video, input_captions, y, 
				merged, train_writer, bidirectional=bidirectional, step=step, capl=capl,f_feature=f_feature,b_feature=b_feature,
				avg_feature=avg_feature,input_feature=input_feature,cond1=cond1,threshold=threshold, temp_t=temp_t, alpha_t=alpha_t)

			print('    --Train--, Loss: %.5f, .......Time:%.3f' %(total_loss,time.time()-tic))

			# tic = time.time()
			# js = exe_test(sess, test_data, batch_size, v2i, i2v, hf, feature_shape, 
			# 							predict_words, input_video, input_captions, y, step=step, capl=capl)
			# print('    --Val--, .......Time:%.3f' %(time.time()-tic))


			# beamsearch
			tic = time.time()
			print('beam searching ...')
			js = beamsearch_exe_test(sess, test_data, 1, v2i, i2v, hf, feature_shape, 
										predict_words, input_video, input_captions, y, finished_beam, logprobs_finished_beams, past_symbols, step=step, capl=capl)
			print('    --Val--, .......Time:%.3f' %(time.time()-tic))

			#save model
			if not os.path.exists(export_path+'/model'):
				os.makedirs(export_path+'/model')
				print('mkdir %s' %export_path+'/model')
			if not os.path.exists(export_path+'/res'):
				os.makedirs(export_path+'/res')
				print('mkdir %s' %export_path+'/res')

			# eval
			res_path = export_path+'/res/E'+str(epoch+1)+'.json'
			evaluate_mode_by_shell(res_path,js)


			save_path = saver.save(sess, export_path+'/model/'+'E'+str(epoch+1)+'_L'+str(total_loss)+'.ckpt')
			print("Model saved in file: %s" % save_path)