Python MsrDataUtil.getBatchC3DVideoFeature Beispiele

Programmiersprache: Python

Namespace / Paketname: utils

Klasse / Typ: MsrDataUtil

Methode / Funktion: getBatchC3DVideoFeature

Beispiele auf hotexamples.com: 4

Python MsrDataUtil.getBatchC3DVideoFeature - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die utils.MsrDataUtil.getBatchC3DVideoFeature, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

getBatchVideoFeature(10)

convertCaptionI2V(6)

getBatchVideoCategoriesInfo(5)

getBatchC3DVideoFeature(4)

getBatchTestCaptionWithSparseLabel(4)

getBatchTrainCaptionWithSparseLabel(4)

create_vocabulary_word2vec(3)

getBatchStepVideoFeature(2)

getBatchTestCaption(2)

getBatchVideoAudioInfo(2)

getBatchTrainCaption(1)

getCategoriesInfo(1)

getNewBatchTrainCaption(1)

Beispiel #1

Datei anzeigen

Datei: msrvtt_final_version_bi_3thstep.py Projekt: youjiangxu/VideoCaptioning

def exe_train(sess,
              data,
              cate_info,
              batch_size,
              v2i,
              hf1,
              hf2,
              feature_shape1,
              feature_shape2,
              train,
              loss,
              input_video1,
              input_video2,
              input_captions,
              input_categories,
              y,
              capl=16):

    np.random.shuffle(data)

    total_data = len(data)
    num_batch = int(round(total_data * 1.0 / batch_size))

    total_loss = 0.0
    for batch_idx in xrange(num_batch):
        # for batch_idx in xrange(500):

        # if batch_idx < 100:
        batch_caption = data[batch_idx *
                             batch_size:min((batch_idx + 1) *
                                            batch_size, total_data)]

        data_v1 = MsrDataUtil.getBatchVideoFeature(batch_caption, hf1,
                                                   feature_shape1)
        data_v2 = MsrDataUtil.getBatchC3DVideoFeature(batch_caption, hf2,
                                                      feature_shape2)

        flag = np.random.randint(0, 2)
        if flag == 1:
            data_v1 = data_v1[:, ::-1, :]
            data_v2 = data_v2[:, ::-1, :]

        data_c, data_y = MsrDataUtil.getBatchTrainCaptionWithSparseLabel(
            batch_caption, v2i, capl=capl)
        data_cate = MsrDataUtil.getBatchVideoCategoriesInfo(
            batch_caption, cate_info, feature_shape1)

        _, l = sess.run(
            [train, loss],
            feed_dict={
                input_video1: data_v1,
                input_video2: data_v2,
                input_captions: data_c,
                input_categories: data_cate,
                y: data_y
            })
        total_loss += l
        print('    batch_idx:%d/%d, loss:%.5f' % (batch_idx + 1, num_batch, l))
    total_loss = total_loss / num_batch
    return total_loss

Beispiel #2

Datei anzeigen

Datei: msrvtt_caption_s2s_merge_two_feature.py Projekt: youjiangxu/VideoCaptioning

def exe_train(sess, data, batch_size, v2i, hf1, hf2, feature_shape, 
	train, loss, input_video, input_captions, y, capl=16):

	np.random.shuffle(data)

	total_data = len(data)
	num_batch = int(round(total_data*1.0/batch_size))

	total_loss = 0.0
	for batch_idx in xrange(num_batch):
	# for batch_idx in xrange(500):

		# if batch_idx < 100:
		batch_caption = data[batch_idx*batch_size:min((batch_idx+1)*batch_size,total_data)]

		data_v1 = MsrDataUtil.getBatchVideoFeature(batch_caption,hf1,(feature_shape[0],2048))
		data_v2 = MsrDataUtil.getBatchC3DVideoFeature(batch_caption,hf2,(feature_shape[0],4096))

		# data_v1 = data_v1/(np.linalg.norm(data_v1, ord=None, axis=-1, keepdims=True)+sys.float_info.epsilon)
		# data_v2 = data_v2/(np.linalg.norm(data_v2, ord=None, axis=-1, keepdims=True)+sys.float_info.epsilon)

		data_v = np.concatenate((data_v1,data_v2),axis=-1)
		data_c, data_y = MsrDataUtil.getBatchTrainCaptionWithSparseLabel(batch_caption, v2i, capl=capl)

		_, l = sess.run([train,loss],feed_dict={input_video:data_v, input_captions:data_c,  y:data_y})
		total_loss += l
		print('    batch_idx:%d/%d, loss:%.5f' %(batch_idx+1,num_batch,l))
	total_loss = total_loss/num_batch
	return total_loss

Beispiel #3

Datei anzeigen

def beamsearch_exe_test(sess, data, cate_info, batch_size, v2i, i2v,  hf1, hf2, feature_shape1, feature_shape2, 
	predict_words, input_video1, input_video2, input_captions, input_categories, y, finished_beam, logprobs_finished_beams, capl=16):
	
	caption_output = []
	total_data = len(data)
	num_batch = int(round(total_data*1.0/batch_size))

	for batch_idx in xrange(num_batch):
		batch_caption = data[batch_idx*batch_size:min((batch_idx+1)*batch_size,total_data)]
		
		data_v1 = MsrDataUtil.getBatchVideoFeature(batch_caption,hf1,feature_shape1)
		data_v2 = MsrDataUtil.getBatchC3DVideoFeature(batch_caption,hf2,feature_shape2)
		
		data_c, data_y = MsrDataUtil.getBatchTestCaptionWithSparseLabel(batch_caption, v2i, capl=capl)
		data_cate = MsrDataUtil.getBatchVideoCategoriesInfo(batch_caption, cate_info, feature_shape1)
		
		[tw, gw, gp] = sess.run([predict_words,finished_beam, logprobs_finished_beams],feed_dict={input_video1:data_v1, input_video2:data_v2, input_captions:data_c, input_categories:data_cate, y:data_y})

		generated_captions = MsrDataUtil.convertCaptionI2V(batch_caption, gw, i2v)

		for idx, sen in enumerate(generated_captions):
			print('%s : %s' %(batch_caption[idx].keys()[0],sen))
			caption_output.append({'image_id':batch_caption[idx].keys()[0],'caption':sen})
	
	js = {}
	js['val_predictions'] = caption_output

	return js

Beispiel #4

Datei anzeigen

Datei: msrvtt_caption_s2s_merge_two_feature.py Projekt: youjiangxu/VideoCaptioning

def exe_test(sess, data, batch_size, v2i, i2v, hf1, hf2, feature_shape, 
	predict_words, input_video, input_captions, y, finished_beam, logprobs_finished_beams, past_logprobs, beam_hidden_state, past_symbols_states, finished_beams_states, capl=16):
	
	caption_output = []
	total_data = len(data)
	
	num_batch = int(math.ceil((total_data*1.0/batch_size)))
	print(num_batch)
	


	for batch_idx in xrange(num_batch):
		batch_caption = data[batch_idx*batch_size:min((batch_idx+1)*batch_size,total_data)]
		
		data_v1 = MsrDataUtil.getBatchVideoFeature(batch_caption,hf1,(feature_shape[0],2048))
		data_v2 = MsrDataUtil.getBatchC3DVideoFeature(batch_caption,hf2,(feature_shape[0],4096))

		# data_v1 = data_v1/(np.linalg.norm(data_v1, ord=None, axis=-1, keepdims=True)+sys.float_info.epsilon)
		# data_v2 = data_v2/(np.linalg.norm(data_v2, ord=None, axis=-1, keepdims=True)+sys.float_info.epsilon)

		data_v = np.concatenate((data_v1,data_v2),axis=-1)
		
		data_c, data_y = MsrDataUtil.getBatchTestCaptionWithSparseLabel(batch_caption, v2i, capl=capl)
		# [gw,tw,gp, gl, pp, pss, fbs] = sess.run([finished_beam, predict_words, logprobs_finished_beams, past_logprobs, beam_hidden_state, past_symbols_states, finished_beams_states],feed_dict={input_video:data_v, input_captions:data_c, y:data_y})
		[tw] = sess.run([predict_words],feed_dict={input_video:data_v, input_captions:data_c, y:data_y})

		generated_captions = MsrDataUtil.convertCaptionI2V(batch_caption, tw, i2v)

		for idx, sen in enumerate(generated_captions):
			print('%s : %s' %(batch_caption[idx].keys()[0],sen))
			caption_output.append({'image_id':batch_caption[idx].keys()[0],'caption':sen})
	
	js = {}
	js['val_predictions'] = caption_output

	return js