def main(unused_argv):
    extractor = feature_extractor.YouTube8MFeatureExtractor(FLAGS.model_dir)
    writer = tf.python_io.TFRecordWriter(FLAGS.output_tfrecords_file)
    total_written = 0
    total_error = 0
    for video_file, labels in csv.reader(open(FLAGS.input_videos_csv)):
        rgb_features = []
        sum_rgb_features = None
        for rgb in frame_iterator(video_file,
                                  every_ms=1000.0 / FLAGS.frames_per_second):
            features = extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
            if sum_rgb_features is None:
                sum_rgb_features = features
            else:
                sum_rgb_features += features
            rgb_features.append(_bytes_feature(quantize(features)))

        if not rgb_features:
            print >> sys.stderr, 'Could not get features for ' + video_file
            total_error += 1
            continue

        mean_rgb_features = sum_rgb_features / len(rgb_features)

        # Create SequenceExample proto and write to output.
        feature_list = {
            FLAGS.image_feature_key:
            tf.train.FeatureList(feature=rgb_features),
        }
        context_features = {
            FLAGS.labels_feature_key:
            _int64_list_feature(sorted(map(int, labels.split(';')))),
            FLAGS.video_file_feature_key:
            _bytes_feature(_make_bytes(map(ord, video_file))),
            'mean_' + FLAGS.image_feature_key:
            tf.train.Feature(float_list=tf.train.FloatList(
                value=mean_rgb_features)),
        }

        if FLAGS.insert_zero_audio_features:
            zero_vec = [0] * 128
            feature_list['audio'] = tf.train.FeatureList(
                feature=[_bytes_feature(_make_bytes(zero_vec))] *
                len(rgb_features))
            context_features['mean_audio'] = tf.train.Feature(
                float_list=tf.train.FloatList(value=zero_vec))

        if FLAGS.skip_frame_level_features:
            example = tf.train.SequenceExample(context=tf.train.Features(
                feature=context_features))
        else:
            example = tf.train.SequenceExample(
                context=tf.train.Features(feature=context_features),
                feature_lists=tf.train.FeatureLists(feature_list=feature_list))
        writer.write(example.SerializeToString())
        total_written += 1

    writer.close()
    print('Successfully encoded %i out of %i videos' %
          (total_written, total_written + total_error))
def process_img(filename, output):
    print(filename)

    # prefix = path_leaf(filename).split(".")[0]  # prefix is simple filename
    # output = "%s/%s_output.tfrecord" % (dir, prefix)
    extractor = feature_extractor.YouTube8MFeatureExtractor(FLAGS.model_dir)
    writer = tf.python_io.TFRecordWriter(output)

    writeTfrecord(filename, extractor, writer)
    writer.close()
    print('Successfully encoded path = %s ' % filename)
Example #3
0
 def __init__(self, model_dir):
     self.model_dir = model_dir # 'Directory to store model files. It defaults to ~/yt8m'
     self.frames_per_second = 1  # 'This many frames per second will be processed'
     self.skip_frame_level_features = False  # 'If set, frame-level features will not be written: only
                                             # 'video-level features will be written with feature names mean_*'
     self.labels_feature_key = 'labels' #  'Labels will be written to context feature with this key, as int64 list feature.'
     self.image_feature_key = 'rgb'  # 'Image features will be written to sequence feature with
                                     # 'this key, as bytes list feature, with only one entry, '
                                     # 'containing quantized feature string.'
     self.video_file_feature_key = 'id' # Input <video_file> will be written to context feature '
                                        # 'with this key, as bytes list feature, with only one '
                                        # 'entry, containing the file path of the video. This '
                                        # 'can be used for debugging but not for training or eval.'
     self.insert_zero_audio_features = True #  'If set, inserts features with name "audio" to be 128-D '
                                            # 'zero vectors. This allows you to use YouTube-8M pre-trained model.'
     self.extractor = feature_extractor.YouTube8MFeatureExtractor(self.model_dir)
def main(unused_argv):
  extractor = feature_extractor.YouTube8MFeatureExtractor(FLAGS.model_dir)
  writer = tf.python_io.TFRecordWriter(FLAGS.output_tfrecords_file)
  total_written = 0
  total_error = 0
  for video_file, labels in csv.reader(open(FLAGS.input_videos_csv)):
    rgb_features = []
    sum_rgb_features = None
    for rgb in frame_iterator(
        video_file, every_ms=1000.0 / FLAGS.frames_per_second):
      features = extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
      if sum_rgb_features is None:
        sum_rgb_features = features
      else:
        sum_rgb_features += features
      rgb_features.append(_bytes_feature(quantize(features)))
def process_img(filename):
    print(filename)
    # print(filename.encode("utf-8"))

    prefix = path_leaf(filename).split(".")[0]  # prefix is simple filename
    output = "%s_output.tfrecord" % prefix
    extractor = feature_extractor.YouTube8MFeatureExtractor(FLAGS.model_dir)
    writer = tf.python_io.TFRecordWriter(output)
    # print("$$$$$ %s" % (FLAGS.input_videos_csv))
    labels = "0"

    rgb_features = []
    # rgb = cv2.imread(filename, cv2.IMREAD_COLOR)
    rgb = cv_imread(filename)
    features = extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
    rgb_features.append(_bytes_feature(quantize(features)))
    if not rgb_features:
        print('Could not get features for ' + video_file, file=sys.stderr)
        return
        # Create SequenceExample proto and write to output.
    feature_list = {
        FLAGS.image_feature_key: tf.train.FeatureList(feature=rgb_features),
    }
    # if FLAGS.insert_zero_audio_features:
    #     feature_list['audio'] = tf.train.FeatureList(
    #         feature=[_bytes_feature(_make_bytes([0] * 128))] * len(rgb_features))

    print(feature_list)
    example = tf.train.SequenceExample(
        context=tf.train.Features(
            feature={
                FLAGS.labels_feature_key:
                _int64_list_feature(sorted(map(int, labels.split(';')))),
                FLAGS.video_file_key_feature_key:
                _bytes_feature(
                    _make_bytes(map(ord, quote(filename)))
                ),  # filename should not have chinese, or else causeproblem
            }),
        feature_lists=tf.train.FeatureLists(feature_list=feature_list))
    writer.write(example.SerializeToString())
    writer.close()
    print('Successfully encoded path = %s ' % filename)
def main(unused_argv):
    extractor = feature_extractor.YouTube8MFeatureExtractor(FLAGS.model_dir)
    writer = tf.python_io.TFRecordWriter(FLAGS.output_tfrecords_file)
    total_written = 0
    total_error = 0
    for video_file, labels in csv.reader(open(FLAGS.input_videos_csv)):
        rgb_features = []
        for rgb in frame_iterator(video_file,
                                  every_ms=1000.0 / FLAGS.frames_per_second):
            features = extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
            rgb_features.append(_bytes_feature(quantize(features)))

        if not rgb_features:
            print >> sys.stderr, 'Could not get features for ' + video_file
            total_error += 1
            continue

        # Create SequenceExample proto and write to output.
        feature_list = {
            FLAGS.image_feature_key:
            tf.train.FeatureList(feature=rgb_features),
        }
        if FLAGS.insert_zero_audio_features:
            try:
                wav_file = video_file + '.wav'
                examples_batch = vggish_input.wavfile_to_examples(wav_file)
                pproc = vggish_postprocess.Postprocessor(
                    'vggish_pca_params.npz')
                with tf.Graph().as_default(), tf.Session() as sess:
                    # Define the model in inference mode, load the checkpoint, and
                    # locate input and output tensors.
                    vggish_slim.define_vggish_slim(training=False)
                    vggish_slim.load_vggish_slim_checkpoint(
                        sess, 'vggish_model.ckpt')
                    features_tensor = sess.graph.get_tensor_by_name(
                        vggish_params.INPUT_TENSOR_NAME)
                    embedding_tensor = sess.graph.get_tensor_by_name(
                        vggish_params.OUTPUT_TENSOR_NAME)
                    [embedding_batch
                     ] = sess.run([embedding_tensor],
                                  feed_dict={features_tensor: examples_batch})
                    postprocessed_batch = pproc.postprocess(embedding_batch)
                    feature_list['audio'] = tf.train.FeatureList(feature=[
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[embedding.tobytes()]))
                        for embedding in postprocessed_batch
                    ])
            except:
                feature_list['audio'] = tf.train.FeatureList(
                    feature=[_bytes_feature(_make_bytes([0] * 128))] *
                    len(rgb_features))

        example = tf.train.SequenceExample(
            context=tf.train.Features(
                feature={
                    FLAGS.labels_feature_key:
                    _int64_list_feature(sorted(map(int, labels.split(';')))),
                    FLAGS.video_file_key_feature_key:
                    _bytes_feature(_make_bytes(map(ord, video_file))),
                }),
            feature_lists=tf.train.FeatureLists(feature_list=feature_list))
        writer.write(example.SerializeToString())
        total_written += 1

    writer.close()
    print('Successfully encoded %i out of %i videos' %
          (total_written, total_written + total_error))
Example #7
0
def process(videofile, tfrecordDir=''):
    file_arr = os.path.splitext(videofile)
    if (tfrecordDir):  # valid
        filename = os.path.basename(videofile).split(".")[0]
        output_tfrecords_file = "%s_output.tfrecord" % (tfrecordDir + '/' +
                                                        str(filename))
    else:
        output_tfrecords_file = "%s_output.tfrecord" % file_arr[0]
    FLAGS.output_tfrecords_file = output_tfrecords_file
    print("output_tfrecords_file", output_tfrecords_file)

    extractor = feature_extractor.YouTube8MFeatureExtractor(FLAGS.model_dir)
    writer = tf.python_io.TFRecordWriter(FLAGS.output_tfrecords_file)

    total_written = 0
    total_error = 0
    labels = "0"
    vh = cv_video_helper.VideoHelper()

    for num_retrieved, rgb in vh.frame_iterator(videofile,
                                                every_ms=1000.0 /
                                                FLAGS.frames_per_second):
        rgb_features = []
        features = extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
        rgb_features.append(_bytes_feature(quantize(features)))
        if not rgb_features:
            printError('Could not get features for ' + videofile +
                       ' ,frame order is ' + num_retrieved)
            total_error += 1
            continue

        # Create SequenceExample proto and write to output.
        feature_list = {
            FLAGS.image_feature_key:
            tf.train.FeatureList(feature=rgb_features),
        }
        if FLAGS.insert_zero_audio_features:
            feature_list['audio'] = tf.train.FeatureList(
                feature=[_bytes_feature(_make_bytes([0] * 128))] *
                len(rgb_features))

        # print(feature_list)
        # E:/work/ai_script/tmp.MP4 -> E:/work/ai_script/tmp/%d.MP4
        # url encode
        virtualPath = quote(
            file_arr[0]) + '/' + str(num_retrieved) + file_arr[1]
        print("virtualPath = ", virtualPath)
        example = tf.train.SequenceExample(
            context=tf.train.Features(
                feature={
                    FLAGS.labels_feature_key:
                    _int64_list_feature(sorted(map(int, labels.split(';')))),
                    FLAGS.video_file_key_feature_key:
                    _bytes_feature(_make_bytes(map(ord, virtualPath))),
                }),
            feature_lists=tf.train.FeatureLists(feature_list=feature_list))
        writer.write(example.SerializeToString())
        total_written += 1

    writer.close()
    print('Successfully encoded %i out of %i videos' %
          (total_written, total_written + total_error))
Example #8
0
def write_csv(all_video, writer):
    extractor = feature_extractor.YouTube8MFeatureExtractor(FLAGS.model_dir)
    total_written = 0
    total_error = 0
    for video_file, labels in all_video.items():
        rgb_features = []
        sum_rgb_features = None
        if os.path.isfile(video_file[:-4] + '.wav'):
            print('have wav file')
        elif FLAGS.extract_wav == True:
            _check = convert.convert_wav(video_file)

        if FLAGS.extract_wav == False:
            audio_feature = [0] * 128
            count = 0
            for rgb in frame_iterator(video_file,
                                      every_ms=1000.0 /
                                      FLAGS.frames_per_second):
                features = extractor.extract_rgb_frame_features(
                    rgb[:, :, ::-1])
                if sum_rgb_features is None:
                    sum_rgb_features = features
                else:
                    sum_rgb_features += features
                rgb_features.append(_bytes_feature(quantize(features)))

            if not rgb_features:
                print(sys.stderr, 'Could not get features for ' + video_file)
                total_error += 1
                continue

            mean_rgb_features = sum_rgb_features / len(rgb_features)

            # Create SequenceExample proto and write to output.
            feature_list = {
                FLAGS.image_feature_key:
                tf.train.FeatureList(feature=rgb_features),
            }
            context_features = {
                FLAGS.labels_feature_key:
                _int64_list_feature(sorted(map(int, labels.split(';')))),
                FLAGS.video_file_feature_key:
                _bytes_feature(_make_bytes(map(ord, video_file))),
                'mean_' + FLAGS.image_feature_key:
                tf.train.Feature(float_list=tf.train.FloatList(
                    value=mean_rgb_features)),
            }

            zero_vec = audio_feature[count]
            feature_list['audio'] = tf.train.FeatureList(
                feature=[_bytes_feature(_make_bytes(zero_vec))] *
                len(rgb_features))
            context_features['mean_audio'] = tf.train.Feature(
                float_list=tf.train.FloatList(value=zero_vec))
            count += 1

            if FLAGS.skip_frame_level_features:
                example = tf.train.SequenceExample(context=tf.train.Features(
                    feature=context_features))
            else:
                example = tf.train.SequenceExample(
                    context=tf.train.Features(feature=context_features),
                    feature_lists=tf.train.FeatureLists(
                        feature_list=feature_list))
            writer.write(example.SerializeToString())
            total_written += 1
        else:
            if os.path.isfile(video_file[:-4] + '.wav'):
                audio_feature = vggish_inference.extract(video_file[:-4] +
                                                         '.wav')
                count = 0
                for rgb in frame_iterator(video_file,
                                          every_ms=1000.0 /
                                          FLAGS.frames_per_second):
                    features = extractor.extract_rgb_frame_features(
                        rgb[:, :, ::-1])
                    if sum_rgb_features is None:
                        sum_rgb_features = features
                    else:
                        sum_rgb_features += features
                    rgb_features.append(_bytes_feature(quantize(features)))

                if not rgb_features:
                    print(sys.stderr,
                          'Could not get features for ' + video_file)
                    total_error += 1
                    continue

                mean_rgb_features = sum_rgb_features / len(rgb_features)

                # Create SequenceExample proto and write to output.
                feature_list = {
                    FLAGS.image_feature_key:
                    tf.train.FeatureList(feature=rgb_features),
                }
                context_features = {
                    FLAGS.labels_feature_key:
                    _int64_list_feature(sorted(map(int, labels.split(';')))),
                    FLAGS.video_file_feature_key:
                    _bytes_feature(_make_bytes(map(ord, video_file))),
                    'mean_' + FLAGS.image_feature_key:
                    tf.train.Feature(float_list=tf.train.FloatList(
                        value=mean_rgb_features)),
                }

                zero_vec = audio_feature[count]
                feature_list['audio'] = tf.train.FeatureList(
                    feature=[_bytes_feature(_make_bytes(zero_vec))] *
                    len(rgb_features))
                context_features['mean_audio'] = tf.train.Feature(
                    float_list=tf.train.FloatList(value=zero_vec))
                count += 1

                if FLAGS.skip_frame_level_features:
                    example = tf.train.SequenceExample(
                        context=tf.train.Features(feature=context_features))
                else:
                    example = tf.train.SequenceExample(
                        context=tf.train.Features(feature=context_features),
                        feature_lists=tf.train.FeatureLists(
                            feature_list=feature_list))
                writer.write(example.SerializeToString())
                total_written += 1
                update_cmd = 'UPDATE adnetwork_task.youtube_crawler \
          SET status_process = 1\
            WHERE VideoID = "' + video_file[38:-4] + '"'
                cursor.execute(update_cmd)
                db.commit()
                os.remove(video_file)
                os.remove(video_file[:-4] + '.wav')

    print('Successfully encoded %i out of %i videos' %
          (total_written, total_written + total_error))
def printError(msg):
    print(msg, file=sys.stderr)


def getTimeInMills():
    return int(round(time.time() * 1000))


# MAIN support gen for multi image files.
# py out-dir file1 file2 ...
if len(sys.argv) < 3:
    printError(" Error! argument is not enough.")
else:
    outDir = sys.argv[1].strip()
    extractor = feature_extractor.YouTube8MFeatureExtractor(FLAGS.model_dir)
    # tfs_time_outputs.tfrecord
    tfs_path = 'tfs_%s_outputs.tfrecord' % str(getTimeInMills())
    tfs_full_path = '%s\%s' % (outDir, tfs_path)
    # tfs_config.txt
    tfsConfigWriter = open('%s%stfs_config.txt' % (outDir, '\\'),
                           "at")  # append

    tfWriter = tf.python_io.TFRecordWriter(tfs_full_path)
    first = True

    size = len(sys.argv)
    for i in range(2, size):
        file_path = sys.argv[i].strip()

        # check file
Example #10
0
 def setUp(self):
     self._extractor = feature_extractor.YouTube8MFeatureExtractor()
def extract_features(output_tfrecords_file,
                     videos_urls,
                     videos_labels,
                     streaming=False,
                     model_dir='yt8m',
                     frames_per_second=1,
                     skip_frame_level_features=False,
                     labels_feature_key='labels',
                     image_feature_key='rgb',
                     video_file_feature_key='id',
                     insert_zero_audio_features=True):

    extractor = feature_extractor.YouTube8MFeatureExtractor(model_dir)
    writer = tf.python_io.TFRecordWriter(output_tfrecords_file)
    total_written = 0
    total_error = 0
    ydl = youtube_dl.YoutubeDL({})
    for video, labels in zip(videos_urls, videos_labels):
        rgb_features = []
        sum_rgb_features = None
        for rgb in frame_iterator(video,
                                  ydl,
                                  streaming,
                                  every_ms=1000.0 / frames_per_second):
            features = extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
            if sum_rgb_features is None:
                sum_rgb_features = features
            else:
                sum_rgb_features += features
            rgb_features.append(_bytes_feature(quantize(features)))

        if not rgb_features:
            print >> sys.stderr, 'Could not get features for ' + video
            total_error += 1
            continue

        mean_rgb_features = sum_rgb_features / len(rgb_features)

        # Create SequenceExample proto and write to output.
        feature_list = {
            image_feature_key: tf.train.FeatureList(feature=rgb_features),
        }
        context_features = {
            labels_feature_key:
            _int64_list_feature(sorted(map(int, labels.split(',')))),
            video_file_feature_key:
            _bytes_feature(_make_bytes(list(map(ord, video)))),
            'mean_' + image_feature_key:
            tf.train.Feature(float_list=tf.train.FloatList(
                value=mean_rgb_features)),
        }

        if insert_zero_audio_features:
            zero_vec = [0] * 128
            feature_list['audio'] = tf.train.FeatureList(
                feature=[_bytes_feature(_make_bytes(zero_vec))] *
                len(rgb_features))
            context_features['mean_audio'] = tf.train.Feature(
                float_list=tf.train.FloatList(value=zero_vec))

        if skip_frame_level_features:
            example = tf.train.SequenceExample(context=tf.train.Features(
                feature=context_features))
        else:
            example = tf.train.SequenceExample(
                context=tf.train.Features(feature=context_features),
                feature_lists=tf.train.FeatureLists(feature_list=feature_list))
        writer.write(example.SerializeToString())
        total_written += 1

    writer.close()
    print(('Successfully encoded %i out of %i videos' %
           (total_written, total_written + total_error)))
Example #12
0
def extract_rgb(video_file,
                output_tfrecords_file,
                labels,
                id,
                model_dir=model_dir,
                insert_zero_audio_features=True,
                skip_frame_level_features=False):
    extractor = feature_extractor.YouTube8MFeatureExtractor(model_dir)
    writer = tf.python_io.TFRecordWriter(output_tfrecords_file)
    total_written = 0
    total_error = 0

    rgb_features = []
    sum_rgb_features = None
    for rgb in frame_iterator(video_file):
        features = extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
        if sum_rgb_features is None:
            sum_rgb_features = features
        else:
            sum_rgb_features += features
        rgb_features.append(_bytes_feature(quantize(features)))

    if not rgb_features:
        print('Could not get features for ' + video_file)
        return

    mean_rgb_features = sum_rgb_features / len(rgb_features)

    # Create SequenceExample proto and write to output.
    feature_list = {
        'rgb': tf.train.FeatureList(feature=rgb_features),
    }
    context_features = {
        'labels':
        _int64_list_feature([labels]),
        'id':
        _bytes_feature(_make_bytes(id.encode('utf-8'))),
        'mean_rgb':
        tf.train.Feature(float_list=tf.train.FloatList(
            value=mean_rgb_features)),
    }

    if insert_zero_audio_features:
        zero_vec = [0] * 128
        feature_list['audio'] = tf.train.FeatureList(
            feature=[_bytes_feature(_make_bytes(zero_vec))] *
            len(rgb_features))
        context_features['mean_audio'] = tf.train.Feature(
            float_list=tf.train.FloatList(value=zero_vec))

    if skip_frame_level_features:
        example = tf.train.SequenceExample(context=tf.train.Features(
            feature=context_features))
    else:
        example = tf.train.SequenceExample(
            context=tf.train.Features(feature=context_features),
            feature_lists=tf.train.FeatureLists(feature_list=feature_list))
    print(example)
    writer.write(example.SerializeToString())
    total_written += 1

    writer.close()
    print('Successfully encoded %i out of %i videos' %
          (total_written, total_written + total_error))
Example #13
0
def main(unused_argv):
    extractor = feature_extractor.YouTube8MFeatureExtractor(FLAGS.model_dir)
    writer = tf.python_io.TFRecordWriter(FLAGS.output_tfrecords_file)
    total_written = 0
    total_error = 0
    for video_file, labels in csv.reader(open(FLAGS.input_videos_csv)):
        rgb_features = []
        sum_rgb_features = None
        for rgb in frame_iterator(video_file,
                                  every_ms=1000 / FLAGS.frames_per_second):
            features = extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
            if sum_rgb_features is None:
                sum_rgb_features = features
            else:
                sum_rgb_features += features
            rgb_features.append(_float_list_feature(features))

        if not rgb_features:
            print('Could not get features for ' + video_file, file=sys.stderr)
            total_error += 1
            continue

        mean_rgb_features = sum_rgb_features / len(rgb_features)
        if not FLAGS.insert_zero_audio_features:
            aud_features = []
            sum_aud_features = None
            sound = AudioSegment.from_file(video_file, video_file[-3:])
            sound = sound.set_channels(1)
            sound.export(video_file[:-4] + '.wav', format="wav")
            aud_file = video_file[:-4] + '.wav'
            [Fs, x] = audioBasicIO.readAudioFile(aud_file)
            a_feats, f_names = audioFeatureExtraction.stFeatureExtraction(
                x, Fs, 0.08 * Fs, 0.04 * Fs)
            a_feats = [
                numpy.array([a_feats[i][j] for i in range(len(a_feats))])
                for j in range(len(a_feats[0]))
            ]
            sum_aud_features = sum(a_feats)
            mean_aud_features = sum_aud_features / len(a_feats)
            for i in range(len(a_feats)):
                aud_features.append(_float_list_feature(a_feats[i]))
        # Create SequenceExample proto and write to output.
        feature_list = {
            FLAGS.image_feature_key:
            tf.train.FeatureList(feature=rgb_features),
        }
        context_features = {
            FLAGS.labels_feature_key:
            _int64_list_feature(sorted(map(int, labels.split(';')))),
            FLAGS.video_file_feature_key:
            _bytes_feature(_make_bytes(map(ord, video_file))),
            'mean_' + FLAGS.image_feature_key:
            tf.train.Feature(float_list=tf.train.FloatList(
                value=mean_rgb_features)),
        }

        if FLAGS.insert_zero_audio_features:
            aud_vec = [0] * 128
            feature_list['audio'] = tf.train.FeatureList(
                feature=[_bytes_feature(_make_bytes(aud_vec))] *
                len(rgb_features))
            context_features['mean_audio'] = tf.train.Feature(
                float_list=tf.train.FloatList(value=aud_vec))
        else:
            aud_vec = aud_features
            feature_list['audio'] = tf.train.FeatureList(feature=aud_vec)
            context_features['mean_audio'] = tf.train.Feature(
                float_list=tf.train.FloatList(value=mean_aud_features))

        if FLAGS.skip_frame_level_features:
            example = tf.train.SequenceExample(context=tf.train.Features(
                feature=context_features))
        else:
            example = tf.train.SequenceExample(
                context=tf.train.Features(feature=context_features),
                feature_lists=tf.train.FeatureLists(feature_list=feature_list))
        writer.write(example.SerializeToString())
        total_written += 1
        print(str(total_written) + ' vids done')

    writer.close()
    print('Successfully encoded %i out of %i videos' %
          (total_written, total_written + total_error))
def main(unused_argv):
  extractor = feature_extractor.YouTube8MFeatureExtractor(FLAGS.model_dir)
  writer = tf.python_io.TFRecordWriter(FLAGS.output_tfrecords_file)
  total_written = 0
  total_error = 0

  linecountwriter = csv.writer(open(FLAGS.output_video_name_line_count, 'w', newline=''), delimiter = ',')

  for video_file, labels in csv.reader(open(FLAGS.input_videos_csv)):
    print(video_file)
    rgb_features = []
    rgb_features_json = []
    sum_rgb_features = None
    for rgb in frame_iterator(
        video_file, every_ms=1000.0/FLAGS.frames_per_second):
      features = extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
      if sum_rgb_features is None:
        sum_rgb_features = features
      else:
        sum_rgb_features += features
      
      qfeatures = quantize(features, return_as_bytes=False)
      
      rgb_features.append(_bytes_feature(_make_bytes(qfeatures)))
      rgb_features_json.append(features.tolist())

    if not rgb_features:
      print >> sys.stderr, 'Could not get features for ' + video_file
      total_error += 1
      continue

    mean_rgb_features = sum_rgb_features / len(rgb_features)

    print("finished feature extraction")
    print(len(rgb_features))
    linecountwriter.writerow([video_file, len(rgb_features)])

    # Create SequenceExample proto and write to output.
    feature_list = {
        FLAGS.image_feature_key: tf.train.FeatureList(feature=rgb_features),
    }
    context_features = {
        FLAGS.labels_feature_key: _int64_list_feature(
            sorted(map(int, labels.split(';')))),
        FLAGS.video_file_feature_key: _bytes_feature(_make_bytes(
            map(ord, video_file))),
        'mean_' + FLAGS.image_feature_key: tf.train.Feature(
            float_list=tf.train.FloatList(value=mean_rgb_features)),
    }

    if FLAGS.insert_zero_audio_features:
      zero_vec = [0] * 128
      feature_list['audio'] = tf.train.FeatureList(
          feature=[_bytes_feature(_make_bytes(zero_vec))] * len(rgb_features))
      context_features['mean_audio'] = tf.train.Feature(
          float_list=tf.train.FloatList(value=zero_vec))

    if FLAGS.skip_frame_level_features:
      print('skip_frame_level_features true')
      example = tf.train.SequenceExample(
          context=tf.train.Features(feature=context_features))
    else:
      print('skip_frame_level_features false')
      example = tf.train.SequenceExample(
          context=tf.train.Features(feature=context_features),
          feature_lists=tf.train.FeatureLists(feature_list=feature_list))
    
    writer.write(example.SerializeToString())
    total_written += 1


    #write out to json

    # median_rgb = numpy.median(rgb_features_json,axis=0)
    # print(type(median_rgb))
    # print('median_rgb shape: {}'.format(median_rgb.shape))
    # median_rgb_list = median_rgb.tolist()
    # print(type(median_rgb_list))
    # print('median_rgb_list len: {}'.format(len(median_rgb_list)))

    jsout = {
      'filename':os.path.basename(video_file),
      'frames_per_second':FLAGS.frames_per_second,
      'labels':labels.split(';'),
      'mean_rgb': mean_rgb_features.tolist(),
      'rgb':rgb_features_json,
      'median_rgb':numpy.median(rgb_features_json,axis=0).tolist()
    }
    
    outputfile = os.path.join(os.path.dirname(FLAGS.output_tfrecords_file),"{}.json".format(os.path.basename(video_file)))

    
    
    with open(outputfile, 'w') as outfile:
      json.dump(jsout, outfile)
    
  writer.close()
  print('Successfully encoded %i out of %i videos' % (
      total_written, total_written + total_error))