Ejemplo n.º 1
0
def run(output_dir):

    SAMPLES_PER_FILES = 100

    training_set, test_set = _get_dataset('1M')

    for data_set, name, dir_ in zip([training_set, test_set], ['train', 'test'], output_dir):

        num_samples = len(data_set)
        i = 0
        fidx = 1

        while i < num_samples:

            tf_filename = _get_output_filename(dir_, fidx,  name=name)

            with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:

                j = 0

                while i < num_samples and j < SAMPLES_PER_FILES:

                    sys.stdout.write('\r>> Converting sample %d/%d' % (i+1, num_samples))
                    sys.stdout.flush()

                    sample = data_set[i]
                    _add_to_tfrecord(sample, tfrecord_writer)

                    i += 1
                    j += 1
                fidx += 1

    print('\nFinished converting the dataset!')
Ejemplo n.º 2
0
def main():
    ''' Writes the .txt training and testing data into binary TF_Records.'''

    SAMPLES_PER_FILES = 100

    training_set, test_set = _get_dataset(sys.argv[1])

    for data_set, name, dir_ in zip([training_set, test_set],
                                    ['train', 'test'],
                                    [sys.argv[2], sys.argv[3]]):

        num_samples = len(data_set)
        i = 0
        fidx = 1

        while i < num_samples:

            tf_filename = _get_output_filename(dir_, fidx, name=name)

            with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:

                j = 0
                while i < num_samples and j < SAMPLES_PER_FILES:
                    sys.stdout.write('\r>> Converting sample %d/%d' %
                                     (i + 1, num_samples))
                    sys.stdout.flush()

                    sample = data_set[i]
                    _add_to_tfrecord(sample, tfrecord_writer)
                    i += 1
                    j += 1
                fidx += 1

    print('\nFinished converting the dataset!')