def download_mnist(data_dir):
    """Download MNIST dataset and convert it to the same format as the Bandcamp dataset (useful as a sanity check)."""
    response = requests.get('http://deeplearning.net/data/mnist/mnist.pkl.gz')
    with GzipFile(fileobj=StringIO(response.content), mode='rb') as unzipped:
        raw_data = cPickle.load(unzipped)
    dataset = {name: (d[0], d[1].astype('int32')) for name, d in zip(['training', 'validation', 'testing'], raw_data)}
    label_to_index = dict(zip(range(10), range(10)))
    with open(os.path.join(data_dir, 'mnist.pkl.zip'), 'wb') as out:
        pkl_utils.dump((dataset, label_to_index), out)
def download_mnist(data_dir):
    """Download MNIST dataset and convert it to the same format as the Bandcamp dataset (useful as a sanity check)."""
    response = requests.get('http://deeplearning.net/data/mnist/mnist.pkl.gz')
    with GzipFile(fileobj=StringIO(response.content), mode='rb') as unzipped:
        raw_data = cPickle.load(unzipped)
    dataset = {
        name: (d[0], d[1].astype('int32'))
        for name, d in zip(['training', 'validation', 'testing'], raw_data)
    }
    label_to_index = dict(zip(map(str, range(10)), range(10)))
    with open(os.path.join(data_dir, 'mnist.pkl.zip'), 'wb') as out:
        pkl_utils.dump((dataset, label_to_index), out)
def create_datasets(image_dir, out_dir, skip_full_pickle=False):
    """Create the dataset pickles and JSONs.

    This is a wrapper around collect_dataset_filenames and load_raw_dataset that creates both the local and full
    datasets.

    On systems with little memory, pass in skip_full_pickle=True to skip creating the pickle for the full dataset.
    """
    for dataset_name, json_path in collect_dataset_filenames(image_dir, out_dir).iteritems():
        if skip_full_pickle and dataset_name == 'full':
            continue
        with open(os.path.join(out_dir, '%s.pkl.zip' % dataset_name), 'wb') as out:
            pkl_utils.dump(load_raw_dataset(json_path), out)
def create_datasets(image_dir, out_dir, skip_full_pickle=False):
    """Create the dataset pickles and JSONs.

    This is a wrapper around collect_dataset_filenames and load_raw_dataset that creates both the local and full
    datasets.

    On systems with little memory, pass in skip_full_pickle=True to skip creating the pickle for the full dataset.
    """
    for dataset_name, json_path in collect_dataset_filenames(image_dir, out_dir).iteritems():
        if skip_full_pickle and dataset_name == 'full':
            continue
        # Create both a greyscale representation (no suffix) and an RGB representation (.rgb suffix)
        for suffix, as_grey, flatten in [('', True, True), ('.rgb', False, False)]:
            with open(os.path.join(out_dir, '%s%s.pkl.zip' % (dataset_name, suffix)), 'wb') as out:
                pkl_utils.dump(load_raw_dataset(json_path, as_grey=as_grey, flatten=flatten), out)
def resize_rgb_dataset(in_filename, out_filename, x_size=0, y_size=0):
    """Resize an RGB dataset so that every image is of the given shape."""
    with open(in_filename, 'rb') as dataset_file:
        in_dataset, label_to_index = pkl_utils.load(dataset_file)

    shape = (x_size, y_size)
    out_dataset = {}
    for subset, (instances, labels) in in_dataset.iteritems():
        resized_instances = np.zeros(shape=(len(instances), 3) + shape, dtype=instances.dtype)
        for i, instance in enumerate(instances):
            # Transpose the image to (height, width, channels), resize, transpose back, and multiply by 255 to get uint8
            resized_instances[i] = resize(instance.transpose(1, 2, 0), shape).transpose(2, 0, 1) * 255
        out_dataset[subset] = (resized_instances, labels)

    with open(out_filename, 'wb') as out:
        pkl_utils.dump((out_dataset, label_to_index), out)
def resize_rgb_dataset(in_filename, out_filename, x_size=0, y_size=0):
    """Resize an RGB dataset so that every image is of the given shape."""
    with open(in_filename, 'rb') as dataset_file:
        in_dataset, label_to_index = pkl_utils.load(dataset_file)

    shape = (x_size, y_size)
    out_dataset = {}
    for subset, (instances, labels) in in_dataset.iteritems():
        resized_instances = np.zeros(shape=(len(instances), 3) + shape,
                                     dtype=instances.dtype)
        for i, instance in enumerate(instances):
            # Transpose the image to (height, width, channels), resize, transpose back, and multiply by 255 to get uint8
            resized_instances[i] = resize(instance.transpose(1, 2, 0),
                                          shape).transpose(2, 0, 1) * 255
        out_dataset[subset] = (resized_instances, labels)

    with open(out_filename, 'wb') as out:
        pkl_utils.dump((out_dataset, label_to_index), out)
def create_datasets(image_dir, out_dir, skip_full_pickle=False):
    """Create the dataset pickles and JSONs.

    This is a wrapper around collect_dataset_filenames and load_raw_dataset that creates both the local and full
    datasets.

    On systems with little memory, pass in skip_full_pickle=True to skip creating the pickle for the full dataset.
    """
    for dataset_name, json_path in collect_dataset_filenames(
            image_dir, out_dir).iteritems():
        if skip_full_pickle and dataset_name == 'full':
            continue
        # Create both a greyscale representation (no suffix) and an RGB representation (.rgb suffix)
        for suffix, as_grey, flatten in [('', True, True),
                                         ('.rgb', False, False)]:
            with open(
                    os.path.join(out_dir,
                                 '%s%s.pkl.zip' % (dataset_name, suffix)),
                    'wb') as out:
                pkl_utils.dump(
                    load_raw_dataset(json_path,
                                     as_grey=as_grey,
                                     flatten=flatten), out)
def _save_model_snapshot(output_layer, snapshot_prefix, next_epoch):
    snapshot_path = '%s.snapshot-%s.pkl.zip' % (snapshot_prefix, next_epoch)
    print('Saving snapshot to %s' % snapshot_path)
    with open(snapshot_path, 'wb') as out:
        pkl_utils.dump((next_epoch, output_layer), out)
def _save_model_snapshot(output_layer, snapshot_prefix, next_epoch):
    snapshot_path = '%s.snapshot-%s.pkl.zip' % (snapshot_prefix, next_epoch)
    print('Saving snapshot to %s' % snapshot_path)
    with open(snapshot_path, 'wb') as out:
        pkl_utils.dump((next_epoch, output_layer), out)