def AddSequences(): """Creates one training, validation.""" errors = [] # Generate datasets file lists. sequences = FindPatternFiles(FLAGS.input_dir, FLAGS.view_pattern, errors) num_frames = PrintSequencesInfo(sequences, 'Found the following datasets and files:') # Sharding and randomizing sets. if FLAGS.max_per_shard > 0: sequences = ShardSequences(sequences, FLAGS.max_per_shard) num_frames = PrintSequencesInfo(sequences, 'After sharding:') tf.logging.info('') # Process sets. progress = Progress(num_frames) output_list = [] for sequence in sequences: record_name = os.path.join(FLAGS.output_dir, '%s.tfrecord' % sequence['name']) if tf.gfile.Exists(record_name) and not FLAGS.overwrite: ok, num_frames = CheckRecord(record_name, sequence) if ok: progress.Add(num_frames) tf.logging.info('Skipping existing output file: %s' % record_name) continue else: tf.logging.info( 'File does not match sequence, reprocessing...') output_dir = os.path.dirname(record_name) if not tf.gfile.Exists(output_dir): tf.logging.info('Creating output directory: %s' % output_dir) tf.gfile.MakeDirs(output_dir) output_list.append(record_name) tf.logging.info('Writing to ' + record_name) writer = tf.python_io.TFRecordWriter(record_name) AddSequence(sequence, writer, progress, errors) writer.close() tf.logging.info('Wrote dataset files: ' + str(output_list)) tf.logging.info('All errors (%d): %s' % (len(errors), str(errors)))