Esempio n. 1
0
    def extract_images(self, overwrite=False):
        from neon.data import CIFAR10
        from PIL import Image
        dataset = dict()
        cifar10 = CIFAR10(path=self.out_dir, normalize=False)
        dataset['train'], dataset['val'], _ = cifar10.load_data()

        for setn in ('train', 'val'):
            data, labels = dataset[setn]

            img_dir = os.path.join(self.out_dir, setn)
            ulabels = np.unique(labels)
            for ulabel in ulabels:
                subdir = os.path.join(img_dir, str(ulabel))
                if not os.path.exists(subdir):
                    os.makedirs(subdir)

            for idx in range(data.shape[0]):
                im = np.pad(data[idx].reshape((3, 32, 32)),
                            self.pad_width,
                            mode='mean')
                im = np.uint8(np.transpose(im, axes=[1, 2, 0]).copy())
                im = Image.fromarray(im)
                path = os.path.join(img_dir, str(labels[idx][0]),
                                    str(idx) + '.png')
                im.save(path, format='PNG')

            if setn == 'train':
                self.pixel_mean = list(
                    data.mean(axis=0).reshape(3, -1).mean(axis=1))
                self.pixel_mean.reverse(
                )  # We will see this in BGR order b/c of opencv
Esempio n. 2
0
def extract_images(out_dir, padded_size):
    '''
    Save CIFAR-10 dataset as PNG files
    '''
    import numpy as np
    from neon.data import CIFAR10
    from PIL import Image
    dataset = dict()
    cifar10 = CIFAR10(path=out_dir, normalize=False)
    dataset['train'], dataset['val'], _ = cifar10.load_data()
    pad_size = (padded_size - 32) // 2 if padded_size > 32 else 0
    pad_width = ((0, 0), (pad_size, pad_size), (pad_size, pad_size))

    for setn in ('train', 'val'):
        data, labels = dataset[setn]

        img_dir = os.path.join(out_dir, setn)
        ulabels = np.unique(labels)
        for ulabel in ulabels:
            subdir = os.path.join(img_dir, str(ulabel))
            if not os.path.exists(subdir):
                os.makedirs(subdir)

        for idx in range(data.shape[0]):
            im = np.pad(data[idx].reshape((3, 32, 32)), pad_width, mode='mean')
            im = np.uint8(np.transpose(im, axes=[1, 2, 0]).copy())
            im = Image.fromarray(im)
            path = os.path.join(img_dir, str(labels[idx][0]),
                                str(idx) + '.png')
            im.save(path, format='PNG')
Esempio n. 3
0
def ingest_cifar10(out_dir, padded_size, overwrite=False):
    """
    Save CIFAR-10 dataset as PNG files
    """
    dataset = dict()
    cifar10 = CIFAR10(path=out_dir, normalize=False)
    dataset['train'], dataset['val'], _ = cifar10.load_data()
    pad_size = (padded_size - 32) // 2 if padded_size > 32 else 0
    pad_width = ((0, 0), (pad_size, pad_size), (pad_size, pad_size))

    set_names = ('train', 'val')
    manifest_files = [
        os.path.join(out_dir, setn + '-index.csv') for setn in set_names
    ]

    cfg_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            'train.cfg')
    log_file = os.path.join(out_dir, 'train.log')
    manifest_list_cfg = ', '.join(
        [k + ':' + v for k, v in zip(set_names, manifest_files)])

    with open(cfg_file, 'w') as f:
        f.write('manifest = [{}]\n'.format(manifest_list_cfg))
        f.write('manifest_root = {}\n'.format(out_dir))
        f.write('log = {}\n'.format(log_file))
        f.write('epochs = 165\nrng_seed = 0\nverbose = True\neval_freq = 1\n')
        f.write('backend = gpu\nbatch_size = 64\n')

    if all([os.path.exists(manifest)
            for manifest in manifest_files]) and not overwrite:
        return manifest_files

    # Write out label files and setup directory structure
    lbl_paths, img_paths = dict(), dict(train=dict(), val=dict())
    for lbl in range(10):
        lbl_paths[lbl] = ensure_dirs_exist(
            os.path.join(out_dir, 'labels',
                         str(lbl) + '.txt'))
        np.savetxt(lbl_paths[lbl], [lbl], fmt='%d')
        for setn in ('train', 'val'):
            img_paths[setn][lbl] = ensure_dirs_exist(
                os.path.join(out_dir, setn,
                             str(lbl) + '/'))

    # Now write out image files and manifests
    for setn, manifest in zip(set_names, manifest_files):
        records = []
        for idx, (img, lbl) in enumerate(tqdm(zip(*dataset[setn]))):
            img_path = os.path.join(img_paths[setn][lbl[0]], str(idx) + '.png')
            im = np.pad(img.reshape((3, 32, 32)), pad_width, mode='mean')
            im = Image.fromarray(
                np.uint8(np.transpose(im, axes=[1, 2, 0]).copy()))
            # im.save(os.path.join(out_dir, img_path), format='PNG')
            im.save(img_path, format='PNG')
            records.append((os.path.relpath(img_path, out_dir),
                            os.path.relpath(lbl_paths[lbl[0]], out_dir)))
        np.savetxt(manifest, records, fmt='%s,%s')

    return manifest_files
Esempio n. 4
0
def ingest_cifar10(out_dir, padded_size, overwrite=False):
    """
    Save CIFAR-10 dataset as PNG files
    """
    dataset = dict()
    cifar10 = CIFAR10(path=out_dir, normalize=False)
    dataset['train'], dataset['val'], _ = cifar10.load_data()
    pad_size = (padded_size - 32) // 2 if padded_size > 32 else 0
    pad_width = ((0, 0), (pad_size, pad_size), (pad_size, pad_size))

    set_names = ('train', 'val')
    manifest_files = [
        os.path.join(out_dir, setn + '-index.csv') for setn in set_names
    ]

    cfg_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            'train.cfg')
    log_file = os.path.join(out_dir, 'train.log')
    manifest_list_cfg = ', '.join(
        [k + ':' + v for k, v in zip(set_names, manifest_files)])

    with open(cfg_file, 'w') as f:
        f.write('manifest = [{}]\n'.format(manifest_list_cfg))
        f.write('manifest_root = {}\n'.format(out_dir))
        f.write('log = {}\n'.format(log_file))
        f.write('epochs = 165\nrng_seed = 0\nverbose = True\neval_freq = 1\n')
        f.write('backend = gpu\nbatch_size = 64\n')

    if (all([os.path.exists(manifest) for manifest in manifest_files])
            and not overwrite):
        print(
            "Found existing manfiest files, skipping ingest, use --overwrite to rerun ingest."
        )
        return manifest_files

    # Now write out image files and manifests
    for setn, manifest in zip(set_names, manifest_files):
        img_path = os.path.join(out_dir, setn)
        if not os.path.isdir(img_path):
            os.makedirs(img_path)

        records = [('@FILE', 'STRING')]

        for idx, (img, lbl) in enumerate(tqdm(zip(*dataset[setn]))):
            fname = os.path.join(img_path, '{}_{:05d}.png'.format(lbl[0], idx))
            im = np.pad(img.reshape((3, 32, 32)), pad_width, mode='mean')
            im = Image.fromarray(
                np.uint8(np.transpose(im, axes=[1, 2, 0]).copy()))
            im.save(fname, format='PNG')
            records.append((os.path.relpath(fname, out_dir), lbl[0]))

        np.savetxt(manifest, records, fmt='%s\t%s')

    print("Manifest files written to:\n" + "\n".join(manifest_files))
Esempio n. 5
0
def ingest_cifar10(out_dir, overwrite=False):
    '''
    Save CIFAR-10 dataset as PNG files
    '''
    dataset = dict()
    cifar10 = CIFAR10(path=out_dir, normalize=False)
    dataset['train'], dataset['val'], _ = cifar10.load_data()

    set_names = ('train', 'val')
    manifest_files = [
        os.path.join(out_dir, setn + '-index.csv') for setn in set_names
    ]

    if (all([os.path.exists(manifest) for manifest in manifest_files])
            and not overwrite):
        return manifest_files

    # Write out label files and setup directory structure
    lbl_paths, img_paths = dict(), dict(train=dict(), val=dict())
    for lbl in range(10):
        lbl_paths[lbl] = ensure_dirs_exist(
            os.path.join(out_dir, 'labels',
                         str(lbl) + '.txt'))
        np.savetxt(lbl_paths[lbl], [lbl], fmt='%d')
        for setn in ('train', 'val'):
            img_paths[setn][lbl] = ensure_dirs_exist(
                os.path.join(out_dir, setn,
                             str(lbl) + '/'))

    np.random.seed(0)
    # Now write out image files and manifests
    for setn, manifest in zip(set_names, manifest_files):
        records = []
        for idx, (img, lbl) in tqdm(enumerate(zip(*dataset[setn]))):
            img_path = os.path.join(img_paths[setn][lbl[0]], str(idx) + '.png')
            im = img.reshape((3, 32, 32))
            im = Image.fromarray(
                np.uint8(np.transpose(im, axes=[1, 2, 0]).copy()))
            im.save(img_path, format='PNG')
            records.append((img_path, lbl_paths[lbl[0]]))

        np.random.shuffle(records)
        np.savetxt(manifest, records, fmt='%s,%s')

    return manifest_files
Esempio n. 6
0
from neon import logger as neon_logger
from neon.data import CIFAR10
from neon.initializers import Uniform
from neon.layers import GeneralizedCost, Affine
from neon.models import Model
from neon.optimizers import GradientDescentMomentum
from neon.transforms import Misclassification, CrossEntropyBinary, Logistic, Rectlin
from neon.callbacks.callbacks import Callbacks
from neon.util.argparser import NeonArgparser

# parse the command line arguments
parser = NeonArgparser(__doc__)
args = parser.parse_args()

dataset = CIFAR10(path=args.data_dir,
                  normalize=True,
                  contrast_normalize=False,
                  whiten=False)
train = dataset.train_iter
test = dataset.valid_iter

init_uni = Uniform(low=-0.1, high=0.1)
opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9)

# set up the model layers
layers = [
    Affine(nout=200, init=init_uni, activation=Rectlin()),
    Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True))
]

cost = GeneralizedCost(costfunc=CrossEntropyBinary())