Ejemplo n.º 1
0
def prepare(dataset_dir):
    """Runs download and conversion operation.

    Args:
        dataset_dir: The dataset directory where the dataset is stored.
    """
    make_dataset_dir(dataset_dir)

    image_reader = PNGNumpyImageReader(shape=(_IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS))
    classes = ['zero', 'one', 'two', 'three', 'four', 'five', 'size', 'seven', 'eight', 'nine']
    converter = ImagesToTFExampleConverter(
        classes=classes, colorspace='grayscale', image_format='png',
        channels=_NUM_CHANNELS, image_reader=image_reader, height=_IMAGE_SIZE, width=_IMAGE_SIZE)

    prepare_dataset(converter, dataset_dir, Modes.TRAIN, 60000, num_eval=10000)
    prepare_dataset(converter, dataset_dir, Modes.PREDICT, 10000)

    # Finally, write the meta data:
    with open(META_DATA_FILENAME_FORMAT.format(dataset_dir), 'w') as meta_data_file:
        meta_data = converter.get_meta_data()
        meta_data['num_samples'] = {Modes.TRAIN: 50000,
                                    Modes.EVAL: 10000,
                                    Modes.PREDICT: 10000}
        meta_data['items_to_descriptions'] = {
            'image': 'A image of fixed size 28.',
            'label': 'A single integer between 0 and 9',
        }
        json.dump(meta_data, meta_data_file)

    print('\nFinished converting the MNIST dataset!')
Ejemplo n.º 2
0
def prepare(dataset_dir):
    """Runs download and conversion operation.

    Args:
        dataset_dir: The dataset directory where the dataset is stored.
    """
    make_dataset_dir(dataset_dir)

    download_datasets(dataset_dir, _DATA_URL, [_FILENAME], uncompress=True)

    image_reader = JPEGImageReader(channels=_NUM_CHANNELS)
    converter = ImagesToTFExampleConverter(
        classes=list(range(17)), colorspace=_IMAGE_COLORSPACE, image_format=_IMAGE_FORMAT,
        channels=_NUM_CHANNELS, image_reader=image_reader, height=_IMAGE_SIZE, width=_IMAGE_SIZE)

    prepare_dataset(converter, dataset_dir, 1360, folds=_FOLDS)

    # Finally, write the meta data:
    with open(META_DATA_FILENAME_FORMAT.format(dataset_dir), 'w') as meta_data_file:
        meta_data = converter.get_meta_data()
        meta_data['num_samples'] = {Modes.TRAIN: 1360 - 2 * (1360 // _FOLDS),
                                    Modes.EVAL: 1360 // _FOLDS,
                                    Modes.PREDICT: 1360 // _FOLDS}
        meta_data['items_to_descriptions'] = {
            'image': 'A image of colorspace {} resized to {}.'.format(
                _IMAGE_COLORSPACE, _IMAGE_SIZE),
            'label': 'A single integer between 0 and 16',
        }
        json.dump(meta_data, meta_data_file)

    print('\nFinished converting the flowers17 dataset!')
Ejemplo n.º 3
0
def prepare(dataset_dir):
    """Runs download and conversion operation.

    Args:
        dataset_dir: The dataset directory where the dataset is stored.
    """
    make_dataset_dir(dataset_dir)

    download_datasets(dataset_dir, _DATA_URL, [_FILENAME], uncompress=True)

    image_reader = PNGNumpyImageReader(shape=(_IMAGE_SIZE, _IMAGE_SIZE,
                                              _NUM_CHANNELS))
    classes = [
        'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
        'horse', 'ship', 'truck'
    ]
    converter = ImagesToTFExampleConverter(classes=classes,
                                           colorspace=_IMAGE_COLORSPACE,
                                           image_format=_IMAGE_FORMAT,
                                           channels=_NUM_CHANNELS,
                                           image_reader=image_reader,
                                           height=_IMAGE_SIZE,
                                           width=_IMAGE_SIZE)

    prepare_dataset(converter, dataset_dir, ModeKeys.TRAIN, [
        _DATA_BATCH_FILENAME_FORMAT.format(dataset_dir, i)
        for i in range(1, 5)
    ])
    prepare_dataset(converter, dataset_dir, ModeKeys.EVAL,
                    [_DATA_BATCH_FILENAME_FORMAT.format(dataset_dir, 5)])
    prepare_dataset(converter, dataset_dir, 'test',
                    [_TEST_DATA_BATCH_FILENAME.format(dataset_dir)])

    # Finally, write the meta data:
    with open(MEAT_DATA_FILENAME_FORMAT.format(dataset_dir),
              'w') as meta_data_file:
        meta_data = converter.get_meta_data()
        meta_data['num_samples'] = {
            ModeKeys.TRAIN:
            count_tfrecord_file_content(
                RECORD_FILE_NAME_FORMAT.format(dataset_dir, ModeKeys.TRAIN)),
            ModeKeys.EVAL:
            count_tfrecord_file_content(
                RECORD_FILE_NAME_FORMAT.format(dataset_dir, ModeKeys.EVAL)),
            'test':
            count_tfrecord_file_content(
                RECORD_FILE_NAME_FORMAT.format(dataset_dir, 'test'))
        }
        meta_data['items_to_descriptions'] = {
            'image':
            'A image of colorspace {} resized to {}.'.format(
                _IMAGE_COLORSPACE, _IMAGE_SIZE),
            'label':
            'A single integer between 0 and {}'.format(len(classes)),
        }
        json.dump(meta_data, meta_data_file)

    print('\nFinished converting the cifar10 dataset!')
Ejemplo n.º 4
0
def prepare(dataset_dir):
    """Runs download and conversion operation.

    Args:
        dataset_dir: The dataset directory where the dataset is stored.
    """
    make_dataset_dir(dataset_dir)
    if all([
            tf.gfile.Exists(
                RECORD_FILE_NAME_FORMAT.format(dataset_dir, Modes.TRAIN)),
            tf.gfile.Exists(
                RECORD_FILE_NAME_FORMAT.format(dataset_dir, Modes.EVAL)),
            tf.gfile.Exists(
                RECORD_FILE_NAME_FORMAT.format(dataset_dir, Modes.PREDICT)),
    ]):
        print('`{}` Dataset files already exist.')
        return

    download_datasets(dataset_dir, _DATA_URL, [_FILENAME])
    with open(os.path.join(dataset_dir, _FILENAME), 'rb') as f:
        train_set = pickle.load(f)
        test_set = pickle.load(f)

    converter = SequenceToTFExampleConverter(
        sequence_features_types={'source_token': 'int'},
        context_features_types={'label': 'int'})

    num_items = len(train_set[0])
    len_eval_data = int(num_items * 0.1)
    len_test_data = len(test_set[0])
    prepare_dataset(converter, dataset_dir, train_set, Modes.TRAIN, num_items,
                    len_eval_data)
    prepare_dataset(converter, dataset_dir, test_set, Modes.PREDICT,
                    len_test_data)

    # Finally, write the meta data:
    with open(META_DATA_FILENAME_FORMAT.format(dataset_dir),
              'w') as meta_data_file:
        meta_data = converter.get_meta_data()
        meta_data['num_samples'] = {
            Modes.TRAIN: num_items - len_eval_data,
            Modes.EVAL: len_eval_data,
            Modes.PREDICT: len_test_data
        }
        meta_data['items_to_descriptions'] = {
            'source_token': 'A sequence of word ids.',
            'label': 'A single integer 0 or 1',
        }
        meta_data['num_classes'] = 2
        json.dump(meta_data, meta_data_file)

    delete_datasets(dataset_dir, [_FILENAME])
    print('\nFinished converting the IMDB dataset!')
Ejemplo n.º 5
0
def prepare(dataset_dir):
    """Runs download and conversion operation.

    Args:
        dataset_dir: The dataset directory where the dataset is stored.
    """
    make_dataset_dir(dataset_dir)

    image_reader = PNGNumpyImageReader(shape=(_IMAGE_SIZE, _IMAGE_SIZE,
                                              _NUM_CHANNELS))
    classes = [
        'zero', 'one', 'two', 'three', 'four', 'five', 'size', 'seven',
        'eight', 'nine'
    ]
    converter = ImagesToTFExampleConverter(classes=classes,
                                           colorspace='grayscale',
                                           image_format='png',
                                           channels=_NUM_CHANNELS,
                                           image_reader=image_reader,
                                           height=_IMAGE_SIZE,
                                           width=_IMAGE_SIZE)

    prepare_dataset(converter,
                    dataset_dir,
                    ModeKeys.TRAIN,
                    60000,
                    num_eval=10000)
    prepare_dataset(converter, dataset_dir, 'test', 10000)

    # Finally, write the meta data:
    with open(MEAT_DATA_FILENAME_FORMAT.format(dataset_dir),
              'w') as meta_data_file:
        meta_data = converter.get_meta_data()
        meta_data['num_samples'] = {
            ModeKeys.TRAIN: 50000,
            ModeKeys.EVAL: 10000,
            ModeKeys.PREDICT: 10000
        }
        meta_data['items_to_descriptions'] = {
            'image': 'A image of fixed size 28.',
            'label': 'A single integer between 0 and 9',
        }
        json.dump(meta_data, meta_data_file)

    print('\nFinished converting the MNIST dataset!')
Ejemplo n.º 6
0
def prepare(dataset_dir):
    """Runs download and conversion operation.

    Args:
        dataset_dir: The dataset directory where the dataset is stored.
    """
    make_dataset_dir(dataset_dir)

    download_datasets(dataset_dir, _DATA_URL, [_FILENAME], uncompress=True)

    image_reader = JPEGImageReader(channels=_NUM_CHANNELS)
    converter = ImagesToTFExampleConverter(classes=list(range(17)),
                                           colorspace=_IMAGE_COLORSPACE,
                                           image_format=_IMAGE_FORMAT,
                                           channels=_NUM_CHANNELS,
                                           image_reader=image_reader,
                                           height=_IMAGE_SIZE,
                                           width=_IMAGE_SIZE)

    prepare_dataset(converter, dataset_dir, 1360, folds=_FOLDS)

    # Finally, write the meta data:
    with open(MEAT_DATA_FILENAME_FORMAT.format(dataset_dir),
              'w') as meta_data_file:
        meta_data = converter.get_meta_data()
        meta_data['num_samples'] = {
            Modes.TRAIN: 1360 - 2 * (1360 // _FOLDS),
            Modes.EVAL: 1360 // _FOLDS,
            Modes.PREDICT: 1360 // _FOLDS
        }
        meta_data['items_to_descriptions'] = {
            'image':
            'A image of colorspace {} resized to {}.'.format(
                _IMAGE_COLORSPACE, _IMAGE_SIZE),
            'label':
            'A single integer between 0 and 16',
        }
        json.dump(meta_data, meta_data_file)

    print('\nFinished converting the flowers17 dataset!')