Exemple #1
0
def main(unused_argv):

    print("tensorflow version: %s" % tf.__version__)

    all_frame_files = gfile.Glob(FLAGS.input_data_pattern)
    f_fullpath = all_frame_files[FLAGS.file_from:FLAGS.file_to]
    f_fns = [x.split('/')[-1] for x in f_fullpath]

    exist_files = gfile.Glob(FLAGS.output_path + "C*tfrecord")
    exist_fn = [x.split('/')[-1].replace('CAtr', 'Atr') for x in exist_files]

    yet_2_split = [x for x, y in zip(f_fullpath, f_fns) if y not in exist_fn]

    vf = [FLAGS.output_path + 'C' + x.split('/')[-1] for x in yet_2_split]

    mylog('number of files suggested: %d' % len(f_fullpath))
    mylog('number of files yet to process: %d' % len(yet_2_split))

    if FLAGS.parallel:
        from concurrent import futures
        executor = futures.ProcessPoolExecutor(max_workers=2)
        executor.map(process_one_file, zip(yet_2_split, vf))
    else:
        for filenames in zip(yet_2_split, vf):
            #mylog('processing: {}'.format(filenames))
            process_one_file(filenames)

    mylog("done")
Exemple #2
0
def main(unused_argv):

    logging.set_verbosity(tf.logging.ERROR)
    print("tensorflow version: %s" % tf.__version__)

    all_frame_files = gfile.Glob(FLAGS.input_data_pattern)
    f_fullpath = all_frame_files[FLAGS.file_from:FLAGS.file_to]
    f_fns = [x.split('/')[-1] for x in f_fullpath]

    exist_files = gfile.Glob(FLAGS.output_path + "E*tfrecord")
    exist_fn = [x.split('/')[-1].replace('Etr', 'tr') for x in exist_files]
    exist_fn = [x.split('/')[-1].replace('Eval', 'val') for x in exist_fn]
    exist_fn = [x.split('/')[-1].replace('Etes', 'tes') for x in exist_fn]

    yet_2_split = [x for x, y in zip(f_fullpath, f_fns) if y not in exist_fn]

    vf0 = [FLAGS.output_path + 'O' + x.split('/')[-1] for x in yet_2_split]
    vf1 = [FLAGS.output_path + 'E' + x.split('/')[-1] for x in yet_2_split]
    vf2 = [FLAGS.output_path + 'F' + x.split('/')[-1] for x in yet_2_split]

    mylog('number of files suggested: %d' % len(f_fullpath))
    mylog('number of files yet to process: %d' % len(yet_2_split))

    #with tf.device("/gpu:0"):
    with tf.Graph().as_default():
        build_graph()
        split_files(zip(yet_2_split, vf0, vf1, vf2))
    mylog("done")
Exemple #3
0
def pick_features_from_file(input_fn, out_fn, feats=None):
    start_time = time.time()
    opts = tf.python_io.TFRecordOptions(
        tf.python_io.TFRecordCompressionType.ZLIB)
    ex_iter = tf.python_io.tf_record_iterator(input_fn, options=opts)

    num_examples = 0
    with tf.python_io.TFRecordWriter(out_fn, options=opts) as tfwriter:
        out_examples = []
        for input_bytes in ex_iter:  #two loops to split read/write operations
            input_example = tf.train.Example.FromString(input_bytes)
            out_examples.append(
                select_features_from_tfexample(input_example, feats))
        for example in out_examples:
            tfwriter.write(example.SerializeToString())
            num_examples += 1

    seconds_per_file = time.time() - start_time
    num_examples_per_sec = num_examples / seconds_per_file
    mylog(
        "Processed in {:.0f} sec: {}, Examples: {}, Examples/second: {:.0f}.".
        format(seconds_per_file, input_fn, num_examples, num_examples_per_sec))
Exemple #4
0
]
trfiles = [
    "YouTube.Kaggle/input/frame_level_link/train/train" + x + ".tfrecord"
    for x in trfs
]

allFiles = glob.glob(
    "YouTube.Kaggle/input/frame_level_link/validate/*.tfrecord")
allFiles = glob.glob("YouTube.Kaggle/input/video_level/test/*.tfrecord")
allFiles = glob.glob("YouTube.Kaggle/input/GENERATED_DATA/f2test/*.tfrecord")
allFiles = glob.glob(
    "YouTube.Kaggle/input/GENERATED_DATA/f2test/Atest-a.tfrecord")

files = allFiles[0:len(allFiles)]
#files = trfiles
mylog("Number of files to review: {}.".format(len(files)))

file_cnt = 0
i = 0
opts = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)

num_examples_file = open('/tmp/num_examples_by_file.csv', 'w')
num_examples_file.write('filename,num_examples\n')

for filename in files:
    file_cnt = file_cnt + 1
    #print('checking %d/%d %s' % (cnt, filesSize, filename))
    if file_cnt % 200 == 0:
        mylog("Checked {} files.".format(file_cnt))

    num_examples = 0
Exemple #5
0
def split_files(filenames):

    t0 = time.time()
    with tf.Session() as sess:
        vid_tsr = tf.get_collection("vid_tsr")[0]
        labs_tsr = tf.get_collection("labs_tsr")[0]
        rgb_tsr = tf.get_collection("rgb_tsr")[0]
        audio_tsr = tf.get_collection("audio_tsr")[0]
        seq_example_bytes = tf.get_collection("seq_example_bytes")[0]

        for k, file_grp in enumerate(filenames):
            start_time = time.time()
            infn, outfn0, outfn1, outfn2 = file_grp

            ex_iter = tf.python_io.tf_record_iterator(infn)
            v0Examples = []
            v1Examples = []
            v2Examples = []
            for in_ex in ex_iter:
                vid, labs, rgb, audio = sess.run(
                    [vid_tsr, labs_tsr, rgb_tsr, audio_tsr],
                    feed_dict={seq_example_bytes: in_ex})

                nframes = audio.shape[0]
                half = np.int(nframes / 2)

                if nframes > 10:
                    rgb_1 = rgb[:half]
                    rgb_2 = rgb[half:]
                    audio_1 = audio[:half]
                    audio_2 = audio[half:]
                else:  #ignore short videos, let the called handle None's
                    mylog(
                        "One frame video encountered: {}, num_frames: {}, labels: {}"
                        .format(vid, nframes, labs))
                    rgb_1 = rgb
                    rgb_2 = rgb
                    audio_1 = audio
                    audio_2 = audio
                    if FLAGS.skip_shorts:
                        continue

                #try:
                v0Examples.append(np_2_vExample(vid, labs, rgb, audio))
                v1Examples.append(np_2_vExample(vid, labs, rgb_1, audio_1))
                v2Examples.append(np_2_vExample(vid, labs, rgb_2, audio_2))
                #except:
                #  mylog("failed. nframes: {}, rgb shape: {}".format(nframes, rgb.shape))
            opts = tf.python_io.TFRecordOptions(
                tf.python_io.TFRecordCompressionType.ZLIB)
            with tf.python_io.TFRecordWriter(outfn0,
                                             options=opts) as tfwriter0:
                for v0_ex in v0Examples:
                    tfwriter0.write(v0_ex.SerializeToString())
            with tf.python_io.TFRecordWriter(outfn1,
                                             options=opts) as tfwriter1:
                for v1_ex in v1Examples:
                    tfwriter1.write(v1_ex.SerializeToString())
            with tf.python_io.TFRecordWriter(outfn2,
                                             options=opts) as tfwriter2:
                for v2_ex in v2Examples:
                    tfwriter2.write(v2_ex.SerializeToString())

            seconds_per_file = time.time() - start_time
            num_examples_per_sec = len(v1Examples) / seconds_per_file

            mylog(
                "Processed file number {} in {:.2f} sec: {}, Examples: {}, Examples/second: {:.0f}."
                .format(k, seconds_per_file,
                        infn.split('/')[-1], len(v1Examples),
                        num_examples_per_sec))

    ttl_time = time.time() - t0
    mylog("Processed total {} files in {:.2f}.".format(k, ttl_time))
    return k