Exemple #1
0
def exe_test(sess,
             data,
             batch_size,
             v2i,
             i2v,
             hf,
             feature_shape,
             predict_words,
             input_video,
             input_captions,
             y,
             capl=16):

    caption_output = []
    total_data = len(data)
    num_batch = int(round(total_data * 1.0 / batch_size)) + 1

    for batch_idx in xrange(num_batch):
        batch_caption = data[batch_idx *
                             batch_size:min((batch_idx + 1) *
                                            batch_size, total_data)]

        data_v = MsrDataUtil.getBatchStepVideoFeature(batch_caption, hf,
                                                      feature_shape)
        data_c, data_y = MsrDataUtil.getBatchTestCaption(batch_caption,
                                                         v2i,
                                                         capl=capl)
        [gw] = sess.run([predict_words],
                        feed_dict={
                            input_video: data_v,
                            input_captions: data_c,
                            y: data_y
                        })

        generated_captions = MsrDataUtil.convertCaptionI2V(
            batch_caption, gw, i2v)

        for idx, sen in enumerate(generated_captions):
            print('%s : %s' % (batch_caption[idx].keys()[0], sen))
            caption_output.append({
                'image_id': batch_caption[idx].keys()[0],
                'caption': sen
            })

    js = {}
    js['val_predictions'] = caption_output

    return js
Exemple #2
0
def exe_train(sess,
              data,
              batch_size,
              v2i,
              hf,
              feature_shape,
              train,
              loss,
              input_video,
              input_captions,
              y,
              capl=16):

    np.random.shuffle(data)

    total_data = len(data)
    num_batch = int(round(total_data * 1.0 / batch_size))

    total_loss = 0.0
    # for batch_idx in xrange(num_batch):
    for batch_idx in xrange(500):

        # if batch_idx < 100:
        batch_caption = data[batch_idx *
                             batch_size:min((batch_idx + 1) *
                                            batch_size, total_data)]

        data_v = MsrDataUtil.getBatchStepVideoFeature(batch_caption, hf,
                                                      feature_shape)
        data_c, data_y = MsrDataUtil.getNewBatchTrainCaption(batch_caption,
                                                             v2i,
                                                             capl=capl)

        _, l = sess.run([train, loss],
                        feed_dict={
                            input_video: data_v,
                            input_captions: data_c,
                            y: data_y
                        })
        total_loss += l
        print('    batch_idx:%d/%d, loss:%.5f' % (batch_idx + 1, num_batch, l))
    total_loss = total_loss / num_batch
    return total_loss