Exemple #1
0
def AddSequences():
    """Creates one training, validation."""
    errors = []

    # Generate datasets file lists.
    sequences = FindPatternFiles(FLAGS.input_dir, FLAGS.view_pattern, errors)
    num_frames = PrintSequencesInfo(sequences,
                                    'Found the following datasets and files:')

    # Sharding and randomizing sets.
    if FLAGS.max_per_shard > 0:
        sequences = ShardSequences(sequences, FLAGS.max_per_shard)
        num_frames = PrintSequencesInfo(sequences, 'After sharding:')
        tf.logging.info('')

    # Process sets.
    progress = Progress(num_frames)
    output_list = []
    for sequence in sequences:
        record_name = os.path.join(FLAGS.output_dir,
                                   '%s.tfrecord' % sequence['name'])
        if tf.gfile.Exists(record_name) and not FLAGS.overwrite:
            ok, num_frames = CheckRecord(record_name, sequence)
            if ok:
                progress.Add(num_frames)
                tf.logging.info('Skipping existing output file: %s' %
                                record_name)
                continue
            else:
                tf.logging.info(
                    'File does not match sequence, reprocessing...')
        output_dir = os.path.dirname(record_name)
        if not tf.gfile.Exists(output_dir):
            tf.logging.info('Creating output directory: %s' % output_dir)
            tf.gfile.MakeDirs(output_dir)
        output_list.append(record_name)
        tf.logging.info('Writing to ' + record_name)
        writer = tf.python_io.TFRecordWriter(record_name)
        AddSequence(sequence, writer, progress, errors)
        writer.close()
    tf.logging.info('Wrote dataset files: ' + str(output_list))
    tf.logging.info('All errors (%d): %s' % (len(errors), str(errors)))