Python MsrDataUtil.getBatchTrainCaptionWithSparseLabel Beispiele

Programmiersprache: Python

Namespace / Paketname: utils

Klasse / Typ: MsrDataUtil

Methode / Funktion: getBatchTrainCaptionWithSparseLabel

Beispiele auf hotexamples.com: 4

Python MsrDataUtil.getBatchTrainCaptionWithSparseLabel - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die utils.MsrDataUtil.getBatchTrainCaptionWithSparseLabel, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

getBatchVideoFeature(10)

convertCaptionI2V(6)

getBatchVideoCategoriesInfo(5)

getBatchC3DVideoFeature(4)

getBatchTestCaptionWithSparseLabel(4)

getBatchTrainCaptionWithSparseLabel(4)

create_vocabulary_word2vec(3)

getBatchStepVideoFeature(2)

getBatchTestCaption(2)

getBatchVideoAudioInfo(2)

getBatchTrainCaption(1)

getCategoriesInfo(1)

getNewBatchTrainCaption(1)

Beispiel #1

Datei anzeigen

Datei: msrvtt_final_version_bi_3thstep.py Projekt: youjiangxu/VideoCaptioning

def exe_train(sess,
              data,
              cate_info,
              batch_size,
              v2i,
              hf1,
              hf2,
              feature_shape1,
              feature_shape2,
              train,
              loss,
              input_video1,
              input_video2,
              input_captions,
              input_categories,
              y,
              capl=16):

    np.random.shuffle(data)

    total_data = len(data)
    num_batch = int(round(total_data * 1.0 / batch_size))

    total_loss = 0.0
    for batch_idx in xrange(num_batch):
        # for batch_idx in xrange(500):

        # if batch_idx < 100:
        batch_caption = data[batch_idx *
                             batch_size:min((batch_idx + 1) *
                                            batch_size, total_data)]

        data_v1 = MsrDataUtil.getBatchVideoFeature(batch_caption, hf1,
                                                   feature_shape1)
        data_v2 = MsrDataUtil.getBatchC3DVideoFeature(batch_caption, hf2,
                                                      feature_shape2)

        flag = np.random.randint(0, 2)
        if flag == 1:
            data_v1 = data_v1[:, ::-1, :]
            data_v2 = data_v2[:, ::-1, :]

        data_c, data_y = MsrDataUtil.getBatchTrainCaptionWithSparseLabel(
            batch_caption, v2i, capl=capl)
        data_cate = MsrDataUtil.getBatchVideoCategoriesInfo(
            batch_caption, cate_info, feature_shape1)

        _, l = sess.run(
            [train, loss],
            feed_dict={
                input_video1: data_v1,
                input_video2: data_v2,
                input_captions: data_c,
                input_categories: data_cate,
                y: data_y
            })
        total_loss += l
        print('    batch_idx:%d/%d, loss:%.5f' % (batch_idx + 1, num_batch, l))
    total_loss = total_loss / num_batch
    return total_loss

Beispiel #2

Datei anzeigen

Datei: msrvtt_caption_s2s_merge_two_feature.py Projekt: youjiangxu/VideoCaptioning

def exe_train(sess, data, batch_size, v2i, hf1, hf2, feature_shape, 
	train, loss, input_video, input_captions, y, capl=16):

	np.random.shuffle(data)

	total_data = len(data)
	num_batch = int(round(total_data*1.0/batch_size))

	total_loss = 0.0
	for batch_idx in xrange(num_batch):
	# for batch_idx in xrange(500):

		# if batch_idx < 100:
		batch_caption = data[batch_idx*batch_size:min((batch_idx+1)*batch_size,total_data)]

		data_v1 = MsrDataUtil.getBatchVideoFeature(batch_caption,hf1,(feature_shape[0],2048))
		data_v2 = MsrDataUtil.getBatchC3DVideoFeature(batch_caption,hf2,(feature_shape[0],4096))

		# data_v1 = data_v1/(np.linalg.norm(data_v1, ord=None, axis=-1, keepdims=True)+sys.float_info.epsilon)
		# data_v2 = data_v2/(np.linalg.norm(data_v2, ord=None, axis=-1, keepdims=True)+sys.float_info.epsilon)

		data_v = np.concatenate((data_v1,data_v2),axis=-1)
		data_c, data_y = MsrDataUtil.getBatchTrainCaptionWithSparseLabel(batch_caption, v2i, capl=capl)

		_, l = sess.run([train,loss],feed_dict={input_video:data_v, input_captions:data_c,  y:data_y})
		total_loss += l
		print('    batch_idx:%d/%d, loss:%.5f' %(batch_idx+1,num_batch,l))
	total_loss = total_loss/num_batch
	return total_loss

Beispiel #3

Datei anzeigen

def exe_train(sess,
              data,
              audio_info,
              cate_info,
              batch_size,
              v2i,
              hf,
              feature_shape,
              train,
              loss,
              input_video,
              input_captions,
              input_categories,
              input_audio,
              y,
              capl=16):

    np.random.shuffle(data)

    total_data = len(data)
    num_batch = int(round(total_data * 1.0 / batch_size))

    total_loss = 0.0
    for batch_idx in xrange(num_batch):
        # for batch_idx in xrange(500):

        # if batch_idx < 100:
        batch_caption = data[batch_idx *
                             batch_size:min((batch_idx + 1) *
                                            batch_size, total_data)]

        data_v = MsrDataUtil.getBatchVideoFeature(batch_caption, hf,
                                                  feature_shape)
        data_c, data_y = MsrDataUtil.getBatchTrainCaptionWithSparseLabel(
            batch_caption, v2i, capl=capl)
        data_cate = MsrDataUtil.getBatchVideoCategoriesInfo(
            batch_caption, cate_info, feature_shape)
        data_audio = MsrDataUtil.getBatchVideoAudioInfo(
            batch_caption, audio_info, feature_shape)

        _, l = sess.run(
            [train, loss],
            feed_dict={
                input_video: data_v,
                input_captions: data_c,
                input_categories: data_cate,
                input_audio: data_audio,
                y: data_y
            })
        total_loss += l
        print('    batch_idx:%d/%d, loss:%.5f' % (batch_idx + 1, num_batch, l))
    total_loss = total_loss / num_batch
    return total_loss

Beispiel #4

Datei anzeigen

Datei: msrvtt_sparse_bi_attention_mGRU.py Projekt: youjiangxu/VideoCaptioning

def exe_train(sess,
              data,
              batch_size,
              v2i,
              hf,
              feature_shape,
              train,
              loss,
              input_video,
              input_captions,
              y,
              capl=16):

    np.random.shuffle(data)

    total_data = len(data)
    num_batch = int(round(total_data * 1.0 / batch_size))

    total_loss = 0.0
    for batch_idx in xrange(num_batch):
        # for batch_idx in xrange(500):

        # if batch_idx < 100:
        batch_caption = data[batch_idx *
                             batch_size:min((batch_idx + 1) *
                                            batch_size, total_data)]

        data_v = MsrDataUtil.getBatchVideoFeature(batch_caption, hf,
                                                  feature_shape)
        flag = np.random.randint(0, 2)
        if flag == 1:
            data_v = data_v[:, ::-1, :]
        data_c, data_y = MsrDataUtil.getBatchTrainCaptionWithSparseLabel(
            batch_caption, v2i, capl=capl)

        _, l = sess.run([train, loss],
                        feed_dict={
                            input_video: data_v,
                            input_captions: data_c,
                            y: data_y
                        })
        total_loss += l
        print('    batch_idx:%d/%d, loss:%.5f' % (batch_idx + 1, num_batch, l))
    total_loss = total_loss / num_batch
    return total_loss