Exemple #1
0
def make_tfrecord(dataset_name,
                  dataset_dir,
                  train_fraction=0.9,
                  num_channels=3,
                  num_shards=4,
                  remove_original_images=False):
    if not tf.gfile.Exists(dataset_dir):
        tf.gfile.MakeDirs(dataset_dir)

    if _dataset_exists(dataset_name, dataset_dir, num_shards):
        print('Dataset files already exist. Exiting without re-creating them.')
        return False

    random.seed(_RANDOM_SEED)
    photo_filenames, class_names = _get_filenames_and_classes(dataset_dir)
    print("Now let's start to make the tfrecord dataset files!")
    random.shuffle(photo_filenames)

    num_train = int(len(photo_filenames) * train_fraction)
    training_filenames = photo_filenames[:num_train]
    print("cnt", len(training_filenames))
    labels_to_class_names = dict(zip(range(len(class_names)), class_names))
    import json
    json.dump(labels_to_class_names,
              open(os.path.join(dataset_dir, "labels.json"), "w+"))
    np.save(os.path.join(dataset_dir, "file_names.npy"), training_filenames)
    validation_filenames = photo_filenames[num_train:]
    class_names_to_ids = dict(zip(class_names, range(len(class_names))))
    # First, convert the training and validation sets.
    if len(training_filenames) > 0:
        _convert_dataset(dataset_name, 'train', training_filenames,
                         class_names_to_ids, dataset_dir, num_shards,
                         num_channels)
    if len(validation_filenames) > 0:
        _convert_dataset(dataset_name, 'validation', validation_filenames,
                         class_names_to_ids, dataset_dir, num_shards,
                         num_channels)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(class_names)), class_names))
    dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

    if remove_original_images:
        _clean_up_temporary_files(dataset_dir)
    return True
Exemple #2
0
def run(config):
    """Runs the download and conversion operation.

    Args:
      dataset_dir: The dataset directory where the dataset is stored.
    """
    dataset_dir = config.dataset_dir
    if not tf.gfile.Exists(dataset_dir):
        tf.gfile.MakeDirs(dataset_dir)

    training_filename = _get_output_filename(dataset_dir, config.train_name)
    testing_filename = _get_output_filename(dataset_dir, config.validation_name)

    if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename):
        print('Dataset files already exist. Exiting without re-creating them.')
        return

    dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)

    # First, process the training data:
    with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:
        offset = 0
        for i in range(_NUM_TRAIN_FILES):
            filename = os.path.join(dataset_dir,
                                    'cifar-10-batches-py',
                                    'data_batch_%d' % (i + 1))  # 1-indexed.
            offset = _add_to_tfrecord(filename, tfrecord_writer, offset)

    # Next, process the testing data:
    with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:
        filename = os.path.join(dataset_dir,
                                'cifar-10-batches-py',
                                'test_batch')
        _add_to_tfrecord(filename, tfrecord_writer)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
    dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

    if config.remove_original_images:
        _clean_up_temporary_files(dataset_dir)
    print('\nFinished converting the Cifar10 dataset!')
def run(config):
    """Runs the download and conversion operation.

    Args:
      dataset_dir: The dataset directory where the dataset is stored.
    """
    dataset_dir = config.dataset_dir
    if not tf.gfile.Exists(dataset_dir):
        tf.gfile.MakeDirs(dataset_dir)

    training_filename = _get_output_filename(dataset_dir, config.train_name)
    testing_filename = _get_output_filename(dataset_dir,
                                            config.validation_name)

    if tf.gfile.Exists(training_filename) and tf.gfile.Exists(
            testing_filename):
        print('Dataset files already exist. Exiting without re-creating them.')
        return

    _download_dataset(dataset_dir)

    # First, process the training data:
    with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:
        data_filename = os.path.join(dataset_dir, _TRAIN_DATA_FILENAME)
        labels_filename = os.path.join(dataset_dir, _TRAIN_LABELS_FILENAME)
        _add_to_tfrecord(data_filename, labels_filename, 60000,
                         tfrecord_writer)

    # Next, process the testing data:
    with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:
        data_filename = os.path.join(dataset_dir, _TEST_DATA_FILENAME)
        labels_filename = os.path.join(dataset_dir, _TEST_LABELS_FILENAME)
        _add_to_tfrecord(data_filename, labels_filename, 10000,
                         tfrecord_writer)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
    dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

    if config.remove_original_images:
        _clean_up_temporary_files(dataset_dir)
    print('\nFinished converting the MNIST dataset!')
Exemple #4
0
def run(config):
    """Runs the download and conversion operation.

    Args:
      dataset_dir: The dataset directory where the dataset is stored.
    """
    dataset_dir = config.dataset_dir
    if not tf.gfile.Exists(dataset_dir):
        tf.gfile.MakeDirs(dataset_dir)

    if _dataset_exists(dataset_dir):
        print('Dataset files already exist. Exiting without re-creating them.')
        return

    dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)
    photo_filenames, class_names = _get_filenames_and_classes(dataset_dir)
    class_names_to_ids = dict(zip(class_names, range(len(class_names))))

    # Divide into train and test:
    random.seed(_RANDOM_SEED)
    random.shuffle(photo_filenames)
    training_filenames = photo_filenames[_NUM_VALIDATION:]
    validation_filenames = photo_filenames[:_NUM_VALIDATION]

    # First, convert the training and validation sets.
    _convert_dataset(config.train_name, training_filenames, class_names_to_ids,
                     dataset_dir)
    _convert_dataset(config.validation_name, validation_filenames, class_names_to_ids,
                     dataset_dir)

    # Finally, write the labels file:
    labels_to_class_names = dict(zip(range(len(class_names)), class_names))
    dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

    if config.remove_original_images:
        _clean_up_temporary_files(dataset_dir)
    print('\nFinished converting the Flowers dataset!')