Exemple #1
0
def test_cifar10():
    train = CIFAR10(('train', ), load_in_memory=False)
    assert train.num_examples == 50000
    handle = train.open()
    features, targets = train.get_data(handle, slice(49990, 50000))
    assert features.shape == (10, 3, 32, 32)
    assert targets.shape == (10, 1)
    train.close(handle)

    test = CIFAR10(('test', ), load_in_memory=False)
    handle = test.open()
    features, targets = test.get_data(handle, slice(0, 10))
    assert features.shape == (10, 3, 32, 32)
    assert targets.shape == (10, 1)
    assert features.dtype == numpy.uint8
    assert targets.dtype == numpy.uint8
    test.close(handle)

    stream = DataStream.default_stream(test,
                                       iteration_scheme=SequentialScheme(
                                           10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX

    assert_raises(ValueError, CIFAR10, ('valid', ))
Exemple #2
0
def test_cifar10():
    cifar10_train = CIFAR10('train', start=20000)
    assert len(cifar10_train.features) == 30000
    assert len(cifar10_train.targets) == 30000
    assert cifar10_train.num_examples == 30000
    cifar10_test = CIFAR10('test', sources=('targets', ))
    assert len(cifar10_test.targets) == 10000
    assert cifar10_test.num_examples == 10000

    first_feature, first_target = cifar10_train.get_data(request=[0])
    assert first_feature.shape == (1, 3072)
    assert first_feature.dtype.kind == 'f'
    assert first_target.shape == (1, 1)
    assert first_target.dtype is numpy.dtype('uint8')

    first_target, = cifar10_test.get_data(request=[0, 1])
    assert first_target.shape == (2, 1)

    assert_raises(ValueError, CIFAR10, 'valid')

    cifar10_test = cPickle.loads(cPickle.dumps(cifar10_test))
    assert len(cifar10_test.targets) == 10000

    cifar_10_test_unflattened = CIFAR10('test', flatten=False)
    cifar_10_test_unflattened.features.shape == (10000, 3, 32, 32)
Exemple #3
0
def create_cifar10_streams(training_batch_size, monitoring_batch_size):
    """Creates CIFAR10 data streams.

    Parameters
    ----------
    training_batch_size : int
        Batch size for training.
    monitoring_batch_size : int
        Batch size for monitoring.

    Returns
    -------
    rval : tuple of data streams
        Data streams for the main loop, the training set monitor,
        the validation set monitor and the test set monitor.

    """
    train_set = CIFAR10(('train',), sources=('features',),
                     subset=slice(0, 45000))
    valid_set = CIFAR10(('train',), sources=('features',),
                     subset=slice(45000, 50000))
    test_set = CIFAR10(('test',), sources=('features',))

    return create_streams(train_set, valid_set, test_set, training_batch_size,
                          monitoring_batch_size)
Exemple #4
0
def create_cifar10_data_streams(batch_size, monitoring_batch_size, rng=None):
    train_set = CIFAR10(
        ('train',), sources=('features',), subset=slice(0, 45000))
    valid_set = CIFAR10(
        ('train',), sources=('features',), subset=slice(45000, 50000))
    main_loop_stream = DataStream.default_stream(
        train_set,
        iteration_scheme=ShuffledScheme(
            train_set.num_examples, batch_size, rng=rng))
    train_monitor_stream = DataStream.default_stream(
        train_set,
        iteration_scheme=ShuffledScheme(
            5000, monitoring_batch_size, rng=rng))
    valid_monitor_stream = DataStream.default_stream(
        valid_set,
        iteration_scheme=ShuffledScheme(
            5000, monitoring_batch_size, rng=rng))
    return main_loop_stream, train_monitor_stream, valid_monitor_stream
Exemple #5
0
def get_cifar10(split, sources, load_in_memory):
    from fuel.datasets import CIFAR10
    if 'test' not in split:
        subset = slice(0, 45000) if 'train' in split else slice(45000, 50000)
        split = ('train', )
    else:
        subset = None
    return CIFAR10(split,
                   sources=sources,
                   subset=subset,
                   load_in_memory=load_in_memory)
Exemple #6
0
def train(args, model_args):

    #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_'

    model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=True)
            spatial_width = 32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=False)
            spatial_width = 64

        n_colors = 3

    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets=['train'],
                               which_format="64",
                               sources=('features', ),
                               load_in_memory=False)
        dataset_test = CelebA(which_sets=['test'],
                              which_format="64",
                              sources=('features', ),
                              load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples,
                                     batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples,
                                     batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train,
                                                 iteration_scheme=tr_scheme)
        test_stream = DataStream.default_stream(dataset_test,
                                                iteration_scheme=ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=100000,
                           classes=1,
                           cycles=2.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))

    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    if args.dataset != 'lsun' and args.dataset != 'celeba':
        train_stream = Flatten(
            DataStream.default_stream(
                dataset_train,
                iteration_scheme=ShuffledScheme(
                    examples=dataset_train.num_examples -
                    (dataset_train.num_examples % args.batch_size),
                    batch_size=args.batch_size)))
    else:
        train_stream = dataset_train
        test_stream = dataset_test

    print "Width", WIDTH, spatial_width

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    '''
    x = T.matrix('x', dtype='float32')
    temp  = T.scalar('temp', dtype='float32')
    f=transition_operator(tparams, model_options, x, temp)

    for data in train_stream.get_epoch_iterator():
        print data[0]
        a = f([data[0], 1.0, 1])
        #ipdb.set_trace()
    '''
    x, cost, start_temperature = build_model(tparams, model_options)
    inps = [x, start_temperature]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    #print 'Building f_cost...',
    #f_cost = theano.function(inps, cost)
    #print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    for param in tparams:
        print param
        print tparams[param].get_value().shape

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'

    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 1
    print 'Number of steps....'
    print args.num_steps
    print "Number of metasteps...."
    print args.meta_steps
    print 'Done'
    count_sample = 1
    for eidx in xrange(max_epochs):
        if eidx % 20 == 0:
            params = unzip(tparams)
            save_params(params,
                        model_dir + '/' + 'params_' + str(eidx) + '.npz')
        n_samples = 0
        print 'Starting Next Epoch ', eidx
        for data in train_stream.get_epoch_iterator():

            if args.dataset == 'CIFAR10':
                if data[0].shape[0] == args.batch_size:
                    data_use = (data[0].reshape(args.batch_size,
                                                3 * 32 * 32), )
                else:
                    continue
            t0 = time.time()
            batch_index += 1
            n_samples += len(data_use[0])
            uidx += 1
            if data_use[0] is None:
                print 'No data '
                uidx -= 1
                continue
            ud_start = time.time()

            t1 = time.time()

            data_run = data_use[0]
            temperature_forward = args.temperature
            meta_cost = []
            for meta_step in range(0, args.meta_steps):
                meta_cost.append(f_grad_shared(data_run, temperature_forward))
                f_update(lrate)
                if args.meta_steps > 1:
                    data_run, sigma, _, _ = forward_diffusion(
                        [data_run, temperature_forward, 1])
                    temperature_forward *= args.temperature_factor
            cost = sum(meta_cost) / len(meta_cost)

            ud = time.time() - ud_start

            #gradient_updates_ = get_grads(data_use[0],args.temperature)

            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1.
            t1 = time.time()
            #print time.time() - t1, "time to get grads"
            t1 = time.time()
            logger.log({
                'epoch': eidx,
                'batch_index': batch_index,
                'uidx': uidx,
                'training_error': cost
            })
            #'Norm_1': np.linalg.norm(gradient_updates_[0]),
            #'Norm_2': np.linalg.norm(gradient_updates_[1]),
            #'Norm_3': np.linalg.norm(gradient_updates_[2]),
            #'Norm_4': np.linalg.norm(gradient_updates_[3])})
            #print time.time() - t1, "time to log"

            #print time.time() - t0, "total time in batch"
            t5 = time.time()

            if batch_index % 20 == 0:
                print batch_index, "cost", cost

            if batch_index % 200 == 0:
                count_sample += 1
                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))
                temperature_forward = args.temperature

                for num_step in range(args.num_steps * args.meta_steps):
                    print "Forward temperature", temperature_forward
                    if num_step == 0:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(
                            [data_use[0], temperature_forward, 1])
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors,
                                                WIDTH, WIDTH)
                        plot_images(
                            x_temp, model_dir + '/' + "batch_" +
                            str(batch_index) + '_corrupted' + 'epoch_' +
                            str(count_sample) + '_time_step_' + str(num_step))
                    else:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(
                            [x_data, temperature_forward, 1])
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors,
                                                WIDTH, WIDTH)
                        plot_images(
                            x_temp, model_dir + '/batch_' + str(batch_index) +
                            '_corrupted' + '_epoch_' + str(count_sample) +
                            '_time_step_' + str(num_step))

                    temperature_forward = temperature_forward * args.temperature_factor

                x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH,
                                              WIDTH)
                plot_images(
                    x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) +
                    '_batch_index_' + str(batch_index))

                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))

                for i in range(args.num_steps * args.meta_steps +
                               args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        [x_data, temperature, 0])
                    print 'On backward step number, using temperature', i, temperature
                    reverse_time(
                        scl, shft, x_data, model_dir + '/' + "batch_" +
                        str(batch_index) + '_samples_backward_' + 'epoch_' +
                        str(count_sample) + '_time_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

                if args.noise == "gaussian":
                    x_sampled = np.random.normal(
                        0.5, 2.0,
                        size=(args.batch_size, INPUT_SIZE)).clip(0.0, 1.0)
                else:
                    s = np.random.binomial(1, 0.5, INPUT_SIZE)

                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))

                x_data = np.asarray(x_sampled).astype('float32')
                for i in range(args.num_steps * args.meta_steps +
                               args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        [x_data, temperature, 0])
                    print 'On step number, using temperature', i, temperature
                    reverse_time(
                        scl, shft, x_data, model_dir + '/batch_index_' +
                        str(batch_index) + '_inference_' + 'epoch_' +
                        str(count_sample) + '_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

    ipdb.set_trace()
def train(args, model_args):

    model_id = '/data/lisatmp4/anirudhg/spiral_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=True)
            spatial_width = 32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=False)
            spatial_width = 64

        n_colors = 3

    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets=['train'],
                               which_format="64",
                               sources=('features', ),
                               load_in_memory=False)
        dataset_test = CelebA(which_sets=['test'],
                              which_format="64",
                              sources=('features', ),
                              load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples,
                                     batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples,
                                     batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train,
                                                 iteration_scheme=tr_scheme)
        test_stream = DataStream.default_stream(dataset_test,
                                                iteration_scheme=ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=20000,
                           classes=1,
                           cycles=1.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))
    elif args.dataset == 'Circle':
        print 'loading Circle'
        train_set = Circle(num_examples=20000,
                           classes=1,
                           cycles=1.,
                           noise=0.0,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))
        iter_per_epoch = train_set.num_examples
    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    train_stream = dataset_train

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    x, cost, start_temperature = build_model(tparams, model_options)
    inps = [x, start_temperature]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    #print 'Building f_cost...',
    #f_cost = theano.function(inps, cost)
    #print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'
    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 0
    print 'Number of steps....', args.num_steps
    print 'Done'
    count_sample = 1
    batch_index = 0
    for eidx in xrange(max_epochs):
        if eidx % 20 == 0:
            params = unzip(tparams)
            save_params(params,
                        model_dir + '/' + 'params_' + str(eidx) + '.npz')
            if eidx == 30:
                ipdb.set_trace()
        n_samples = 0
        print 'Starting Next Epoch ', eidx

        for data in train_stream.get_epoch_iterator():
            batch_index += 1
            n_samples += len(data[0])
            uidx += 1
            if data[0] is None:
                print 'No data '
                uidx -= 1
                continue
            data_run = data[0]
            temperature_forward = args.temperature
            meta_cost = []
            for meta_step in range(0, args.meta_steps):
                meta_cost.append(f_grad_shared(data_run, temperature_forward))
                f_update(lrate)
                if args.meta_steps > 1:
                    data_run, sigma, _, _ = forward_diffusion(
                        data_run, temperature_forward)
                    temperature_forward *= args.temperature_factor
            cost = sum(meta_cost) / len(meta_cost)
            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1.
            logger.log({
                'epoch': eidx,
                'batch_index': batch_index,
                'uidx': uidx,
                'training_error': cost
            })
            empty = []
            spiral_x = [empty for i in range(args.num_steps)]
            spiral_corrupted = []
            spiral_sampled = []
            grad_forward = []
            grad_back = []
            x_data_time = []
            x_tilt_time = []
            if batch_index % 8 == 0:
                count_sample += 1
                temperature = args.temperature * (args.temperature_factor
                                                  **(args.num_steps - 1))
                temperature_forward = args.temperature
                for num_step in range(args.num_steps):
                    if num_step == 0:
                        x_data_time.append(data[0])
                        plot_images(
                            data[0], model_dir + '/' + 'orig_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index))
                        x_data, mu_data, _, _ = forward_diffusion(
                            data[0], temperature_forward)

                        plot_images(
                            x_data, model_dir + '/' + 'corrupted_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index) +
                            '_time_step_' + str(num_step))
                        x_data_time.append(x_data)
                        temp_grad = np.concatenate(
                            (x_data_time[-2], x_data_time[-1]), axis=1)
                        grad_forward.append(temp_grad)

                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        spiral_corrupted.append(x_data)
                        mu_data = np.asarray(mu_data).astype(
                            'float32').reshape(args.batch_size, INPUT_SIZE)
                        mu_data = mu_data.reshape(args.batch_size, 2)
                    else:
                        x_data_time.append(x_data)
                        x_data, mu_data, _, _ = forward_diffusion(
                            x_data, temperature_forward)
                        plot_images(
                            x_data, model_dir + '/' + 'corrupted_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index) +
                            '_time_step_' + str(num_step))
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        spiral_corrupted.append(x_data)

                        mu_data = np.asarray(mu_data).astype(
                            'float32').reshape(args.batch_size, INPUT_SIZE)
                        mu_data = mu_data.reshape(args.batch_size, 2)
                        x_data_time.append(x_data)
                        temp_grad = np.concatenate(
                            (x_data_time[-2], x_data_time[-1]), axis=1)
                        grad_forward.append(temp_grad)
                    temperature_forward = temperature_forward * args.temperature_factor

                mean_sampled = x_data.mean()
                var_sampled = x_data.var()

                x_temp2 = data[0].reshape(args.batch_size, 2)
                plot_2D(
                    spiral_corrupted, args.num_steps,
                    model_dir + '/' + 'corrupted_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                plot_2D(
                    x_temp2, 1, model_dir + '/' + 'orig_' + 'epoch_' +
                    str(count_sample) + '_batch_index_' + str(batch_index))
                plot_grad(
                    grad_forward,
                    model_dir + '/' + 'grad_forward_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                for i in range(args.num_steps + args.extra_steps):
                    x_tilt_time.append(x_data)
                    x_data, sampled_mean = f_sample(x_data, temperature)
                    plot_images(
                        x_data, model_dir + '/' + 'sampled_' + 'epoch_' +
                        str(count_sample) + '_batch_' + str(batch_index) +
                        '_time_step_' + str(i))
                    x_tilt_time.append(x_data)
                    temp_grad = np.concatenate(
                        (x_tilt_time[-2], x_tilt_time[-1]), axis=1)
                    grad_back.append(temp_grad)

                    ###print 'Recons, On step number, using temperature', i, temperature
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

                plot_grad(
                    grad_back, model_dir + '/' + 'grad_back_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                plot_2D(
                    x_tilt_time, args.num_steps,
                    model_dir + '/' + 'sampled_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))

                s = np.random.normal(mean_sampled, var_sampled,
                                     [args.batch_size, 2])
                x_sampled = s

                temperature = args.temperature * (args.temperature_factor
                                                  **(args.num_steps - 1))
                x_data = np.asarray(x_sampled).astype('float32')
                for i in range(args.num_steps + args.extra_steps):
                    x_data, sampled_mean = f_sample(x_data, temperature)
                    spiral_sampled.append(x_data)
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor
                plot_2D(
                    spiral_sampled, args.num_steps,
                    model_dir + '/' + 'inference_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
    ipdb.set_trace()
def train(args, model_args):

    #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_'

    model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=True)
            spatial_width = 32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=False)
            spatial_width = 64

        n_colors = 3

    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets=['train'],
                               which_format="64",
                               sources=('features', ),
                               load_in_memory=False)
        dataset_test = CelebA(which_sets=['test'],
                              which_format="64",
                              sources=('features', ),
                              load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples,
                                     batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples,
                                     batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train,
                                                 iteration_scheme=tr_scheme)
        test_stream = DataStream.default_stream(dataset_test,
                                                iteration_scheme=ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=100000,
                           classes=1,
                           cycles=2.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))

    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    if args.dataset != 'lsun' and args.dataset != 'celeba':
        train_stream = Flatten(
            DataStream.default_stream(
                dataset_train,
                iteration_scheme=ShuffledScheme(
                    examples=dataset_train.num_examples -
                    (dataset_train.num_examples % args.batch_size),
                    batch_size=args.batch_size)))
    else:
        train_stream = dataset_train
        test_stream = dataset_test

    print "Width", WIDTH, spatial_width

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    x, cost, start_temperature, step_chain = build_model(
        tparams, model_options)
    inps = [x.astype('float32'), start_temperature, step_chain]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    step_chain_part = T.scalar('step_chain_part', dtype='int32')

    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature, step_chain_part)

    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    #for param in tparams:
    #    print param
    #    print tparams[param].get_value().shape

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'

    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 1
    print 'Number of steps....'
    print args.num_steps
    print "Number of metasteps...."
    print args.meta_steps
    print 'Done'
    count_sample = 1
    save_data = []
    for eidx in xrange(1):
        print 'Starting Next Epoch ', eidx
        for data_ in range(500):  #train_stream.get_epoch_iterator():
            if args.noise == "gaussian":
                x_sampled = np.random.normal(0.5,
                                             2.0,
                                             size=(args.batch_size,
                                                   INPUT_SIZE)).clip(0.0, 1.0)
            else:
                s = np.random.binomial(1, 0.5, INPUT_SIZE)

            temperature = args.temperature * (args.temperature_factor**(
                args.num_steps * args.meta_steps - 1))
            x_data = np.asarray(x_sampled).astype('float32')
            for i in range(args.num_steps * args.meta_steps +
                           args.extra_steps):
                x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                    x_data.astype('float32'), temperature,
                    args.num_steps * args.meta_steps - i - 1)
                print 'On step number, using temperature', i, temperature
                #reverse_time(scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i))
                x_data = np.asarray(x_data).astype('float32')
                x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                if temperature == args.temperature:
                    temperature = temperature
                else:
                    temperature /= args.temperature_factor

            count_sample = count_sample + 1
            save_data.append(x_data)
            fname = model_dir + '/batch_index_' + str(
                batch_index) + '_inference_' + 'epoch_' + str(
                    count_sample) + '_step_' + str(i)
            np.savez(fname + '.npz', x_data)

        save2_data = np.asarray(save_data).astype('float32')
        fname = model_dir + '/generted_images_50000'  #+ args.saveto_filename
        np.savez(fname + '.npz', save_data)

    ipdb.set_trace()
Exemple #9
0
    def transform_batch(self, batch):
        targets = batch[1]
        for i in len(targets):
            targets[i] = self.relabel[targets[i]]
        batch[1] = targets

        return batch



if __name__ == '__main__':
    from fuel.datasets import CIFAR10
    from fuel.schemes import ShuffledScheme
    from fuel.streams import DataStream

    dataset = CIFAR10(('train',), sources=('features','targets'), subset=slice(0,45000))

    stream = FilterLabelsTransformer(10, 10,
                                     DataStream(
                                         dataset=dataset,
                                         iteration_scheme=ShuffledScheme(
                                             dataset.num_examples,
                                             200)),
                                     produces_examples=False)

    epitr = stream.get_epoch_iterator()
    out = next(epitr)
    print out[0].shape, out[1].shape
    print out[1][:10]
    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features',))
        dataset_test = MNIST(['test'], sources=('features',))
        n_colors = 1
        spatial_width = 28
    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features',))
        dataset_test = CIFAR10(['test'], sources=('features',))
        n_colors = 3
        spatial_width = 32
    elif args.dataset == 'IMAGENET':
        from imagenet_data import IMAGENET
        spatial_width = 128
        dataset_train = IMAGENET(['train'], width=spatial_width)
        dataset_test = IMAGENET(['test'], width=spatial_width)
        n_colors = 3
    else:
        raise ValueError("Unknown dataset %s."%args.dataset)

    train_stream = Flatten(DataStream.default_stream(dataset_train,
                              iteration_scheme=ShuffledScheme(
                                  examples=dataset_train.num_examples,
Exemple #11
0
def train(args, model_args, lrate):

    model_id = '/data/lisatmp4/anirudhg/minst_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    logger = mimir.Logger(filename=model_dir2 + '/' + model_id2 +
                          'log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=100000,
                           classes=1,
                           cycles=2.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))

    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    train_stream = Flatten(
        DataStream.default_stream(dataset_train,
                                  iteration_scheme=ShuffledScheme(
                                      examples=dataset_train.num_examples,
                                      batch_size=args.batch_size)))

    shp = next(train_stream.get_epoch_iterator())[0].shape
    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_ and os.path.exists(args.saveto_filename):
        print 'Reloading Parameters'
        print args.saveto_filename
        params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    '''
    x = T.matrix('x', dtype='float32')
    f=transition_operator(tparams, model_options, x, 1)

    for data in train_stream.get_epoch_iterator():
        print data[0]
        a = f(data[0])
        print a
        ipdb.set_trace()
    '''
    x, cost = build_model(tparams, model_options)
    inps = [x]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    print 'Building f_cost...',
    f_cost = theano.function(inps, cost)
    print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'

    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 0
    print 'Number of steps....'
    print args.num_steps
    print 'Done'
    count_sample = 1
    for eidx in xrange(max_epochs):
        n_samples = 0
        print 'Starting Next Epoch ', eidx
        for data in train_stream.get_epoch_iterator():
            batch_index += 1
            n_samples += len(data[0])
            uidx += 1
            if data[0] is None:
                print 'No data '
                uidx -= 1
                continue
            ud_start = time.time()
            cost = f_grad_shared(data[0])
            f_update(lrate)
            ud = time.time() - ud_start

            if batch_index % 1 == 0:
                print 'Cost is this', cost
                count_sample += 1

                from impainting import change_image, inpainting
                train_temp = data[0]
                print data[0].shape
                change_image(train_temp.reshape(args.batch_size, 1, 28, 28), 3)
                train_temp = train_temp.reshape(args.batch_size, 784)
                output = inpainting(train_temp)
                change_image(output.reshape(args.batch_size, 1, 28, 28), 1)

                reverse_time(
                    scl, shft, output,
                    model_dir + '/' + 'impainting_orig_' + 'epoch_' +
                    str(count_sample) + '_batch_index_' + str(batch_index))
                x_data = np.asarray(output).astype('float32')
                temperature = args.temperature * (args.temperature_factor
                                                  **(args.num_steps - 1))
                temperature = args.temperature  #* (args.temperature_factor ** (args.num_steps -1 ))
                orig_impainted_data = np.asarray(data[0]).astype('float32')

                for i in range(args.num_steps + args.extra_steps + 5):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        x_data, temperature)
                    print 'Impainting using temperature', i, temperature
                    x_data = do_half_image(x_data, orig_impainted_data)
                    reverse_time(
                        scl, shft, x_data, model_dir + '/' +
                        'impainting_orig_' + 'epoch_' + str(count_sample) +
                        '_batch_index_' + str(batch_index) + 'step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature = temperature
                        #temperature /= args.temperature_factor
    ipdb.set_trace()
Exemple #12
0
def _cifir():
    from fuel.datasets import CIFAR10
    import os
    os.environ["FUEL_DATA_PATH"] = os.getcwd() + "/data/"

    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.)
    }

    batch_size = 64
    data_train = CIFAR10(which_sets=['train'], sources=['features'])
    streams = DataStream(
        data_train, iteration_scheme=SequentialScheme(
            data_train.num_examples, batch_size))

    train_stream = Flatten(streams)

    # print train_stream.get_epoch_iterator(as_dict=True).next()
    # raise

    features_size = 32 * 32 * 3
    inputs = T.matrix('features')
    inputs = ((inputs / 255.) * 2. - 1.)

    rng = MRG_RandomStreams(123)

    prior = Z_prior(dim=512)
    gen = Generator(input_dim=512, dims=[512, 512, 512, 512,
                                         features_size],
                    alpha=0.1, **inits)

    dis = Discriminator(dims=[features_size, 512, 512 , 512, 512],
                        alpha=0.1, **inits)

    gan = GAN(dis=dis, gen=gen, prior=prior)
    gan.initialize()

    # gradient penalty
    fake_samples, _ = gan.sampling(inputs.shape[0])
    e = rng.uniform(size=(inputs.shape[0], 1))

    mixed_input = (e * fake_samples) + (1 - e) * inputs

    output_d_mixed = gan._dis.apply(mixed_input)

    grad_mixed = T.grad(T.sum(output_d_mixed), mixed_input)

    norm_grad_mixed = T.sqrt(T.sum(T.square(grad_mixed), axis=1))
    grad_penalty = T.mean(T.square(norm_grad_mixed -1))

    y_hat1, y_hat0, z = gan.apply(inputs)

    d_loss_real = y_hat1.mean()
    d_loss_fake = y_hat0.mean()
    d_loss = - d_loss_real + d_loss_fake + 10 * grad_penalty
    g_loss = - d_loss_fake


    dis_obj = d_loss
    gen_obj = g_loss

    model = Model([y_hat0, y_hat1])

    em_loss = -d_loss_real + d_loss_fake

    em_loss.name = "Earth Move loss"
    dis_obj.name = 'Discriminator loss'
    gen_obj.name = 'Generator loss'

    cg = ComputationGraph([gen_obj, dis_obj])

    gen_filter = VariableFilter(roles=[PARAMETER],
                                bricks=gen.linear_transformations)

    dis_filter = VariableFilter(roles=[PARAMETER],
                                bricks=dis.linear_transformations)

    gen_params = gen_filter(cg.variables)
    dis_params = dis_filter(cg.variables)

# Prepare the dropout
    _inputs = []
    for brick_ in [gen]:
        _inputs.extend(VariableFilter(roles=[INPUT],
                    bricks=brick_.linear_transformations)(cg.variables))

    cg_dropout = apply_dropout(cg, _inputs, 0.02)

    gen_obj = cg_dropout.outputs[0]
    dis_obj = cg_dropout.outputs[1]

    gan.dis_params = dis_params
    gan.gen_params = gen_params

    # gradient penalty

    algo = AdverserialTraning(gen_obj=gen_obj, dis_obj=dis_obj,
                              model=gan, dis_iter=5, gradient_clip=None,
                              step_rule=RMSProp(learning_rate=1e-4),
                              gen_consider_constant=z)

    neg_sample = gan.sampling(size=25)

    from blocks.monitoring.aggregation import mean

    monitor = TrainingDataMonitoring(variables=[mean(gen_obj), mean(dis_obj),
                                                mean(em_loss)],
                                     prefix="train", after_batch=True)

    subdir = './exp/' + 'CIFAR10' + "-" + time.strftime("%Y%m%d-%H%M%S")

    check_point = Checkpoint("{}/{}".format(subdir, 'CIFAR10'),
                                every_n_epochs=100,
                                save_separately=['log', 'model'])

    neg_sampling = GenerateNegtiveSample(inputs, neg_sample,
                                         img_size=(25, 32, 32, 3),
                                         every_n_epochs=10)

    if not os.path.exists(subdir):
        os.makedirs(subdir)

    main_loop = MainLoop(algorithm=algo, model=model,
                         data_stream=train_stream,
                         extensions=[Printing(), ProgressBar(), monitor,
                                     check_point, neg_sampling])

    main_loop.run()
def main(num_epochs=100):
    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    num_filters = 64
    layers = [ConvolutionalLayer(filter_size=(8, 8), num_filters=num_filters,
                                 num_channels=3, step=(1, 1),
                                 border_mode='valid'),
              MaxPooling(pooling_size=(2, 2)),
              ConvolutionalLayer(filter_size=(5, 5), num_filters=num_filters,
                                 num_channels=num_filters, step=(1, 1),
                                 border_mode='valid')]
    convnet = ConvolutionalNetwork(layers=layers)
    output_shape = convnet.get_output_shape((32, 32))
    convnet.set_input_shape((32, 32))
    fc_net = NeuralSoftmax(input_dim=numpy.prod(output_shape) * num_filters,
                           n_classes=10, n_hidden=[500])

    convnet_features = convnet.apply(x)
    probs = fc_net.get_probs(features=convnet_features.flatten(2))
    params = convnet.get_params() + fc_net.get_params()
    weights = convnet.get_weights()
    cost = fc_net.get_cost(probs=probs, targets=y).mean()
    cost.name = 'cost'
    misclassification = fc_net.get_misclassification(
        probs=probs, targets=y
    ).mean()
    misclassification.name = 'misclassification'

    train_dataset = CIFAR10('train', flatten=False)
    test_dataset = CIFAR10('test', flatten=False)

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=Momentum(learning_rate=.01,
                           momentum=0.1))

    train_data_stream = Mapping(
        data_stream=ForceFloatX(
            data_stream=DataStream(
                dataset=train_dataset,
                iteration_scheme=ShuffledScheme(
                    examples=range(50000),
                    batch_size=100,
                )
            )
        ),
        mapping=Rescale(scale=1/127.5, shift=-127.5)
    )
    valid_data_stream = Mapping(
        data_stream=ForceFloatX(
            data_stream=DataStream(
                dataset=train_dataset,
                iteration_scheme=SequentialScheme(
                    examples=range(40000, 50000),
                    batch_size=1000,
                )
            )
        ),
        mapping=Rescale(scale=1/127.5, shift=-127.5)
    )
    test_data_stream = Mapping(
        data_stream=ForceFloatX(
            data_stream=DataStream(
                dataset=test_dataset,
                iteration_scheme=SequentialScheme(
                    examples=test_dataset.num_examples,
                    batch_size=100,
                )
            )
        ),
        mapping=Rescale(scale=1/127.5, shift=-127.5)
    )

    model = Model(cost)

    extensions = []
    extensions.append(Timing())
    extensions.append(FinishAfter(after_n_epochs=num_epochs))
    extensions.append(DataStreamMonitoring(
        [cost, misclassification],
        test_data_stream,
        prefix='test'))
    extensions.append(DataStreamMonitoring(
        [cost, misclassification],
        valid_data_stream,
        prefix='valid'))
    extensions.append(TrainingDataMonitoring(
        [cost, misclassification],
        prefix='train',
        after_epoch=True))

    plotters = []
    plotters.append(Plotter(
        channels=[['test_cost', 'test_misclassification',
                   'train_cost', 'train_misclassification']],
        titles=['Costs']))
    display_train = ImageDataStreamDisplay(
        data_stream=copy.deepcopy(train_data_stream),
        image_shape=(3, 32, 32),
        axes=('c', 0, 1),
        shift=0,
        rescale=1.,
    )
    weight_display = WeightDisplay(
        weights=weights,
        transpose=(0, 1, 2, 3),
        image_shape=(3, 8, 8),
        axes=('c', 0, 1),
        shift=-0.5,
        rescale=2.,
    )

    # Feature maps
    one_example_train_data_stream = Mapping(
        data_stream=ForceFloatX(
            data_stream=DataStream(
                dataset=train_dataset,
                iteration_scheme=ShuffledScheme(
                    examples=train_dataset.num_examples,
                    batch_size=1,
                )
            )
        ),
        mapping=Rescale(scale=1/127.5, shift=-127.5)
    )
    displayable_convnet_features = convnet_features.dimshuffle((1, 0, 2, 3))
    convnet_features_normalizer = abs(
        displayable_convnet_features
    ).max(axis=(1, 2, 3))
    displayable_convnet_features = displayable_convnet_features \
        / convnet_features_normalizer.dimshuffle((0, 'x', 'x', 'x'))
    get_displayable_convnet_features = theano.function(
        [x], displayable_convnet_features
    )
    display_feature_maps_data_stream = Mapping(
        data_stream=one_example_train_data_stream,
        mapping=TupleMapping(get_displayable_convnet_features,
                             same_len_out=True)
    )
    display_feature_maps = ImageDataStreamDisplay(
        data_stream=display_feature_maps_data_stream,
        image_shape=(1, ) + output_shape,
        axes=('c', 0, 1),
        shift=0,
        rescale=1.,
    )

    # Saliency map
    displayable_saliency_map = tensor.grad(cost, x)
    saliency_map_normalizer = abs(
        displayable_saliency_map
    ).max(axis=(1, 2, 3))
    displayable_saliency_map = displayable_saliency_map \
        / saliency_map_normalizer.dimshuffle((0, 'x', 'x', 'x'))
    get_displayable_saliency_map = theano.function(
        [x, y], displayable_saliency_map
    )

    display_saliency_map_data_stream = Mapping(
        data_stream=copy.deepcopy(train_data_stream),
        mapping=TupleMapping(get_displayable_saliency_map,
                             same_len_out=True,
                             same_len_in=True)
    )
    display_saliency_map = ImageDataStreamDisplay(
        data_stream=display_saliency_map_data_stream,
        image_shape=(3, 32, 32),
        axes=('c', 0, 1),
        shift=0,
        rescale=1.,
    )

    # Deconvolution
    x_repeated = x.repeat(num_filters, axis=0)
    convnet_features_repeated = convnet.apply(x_repeated)
    convnet_features_selected = convnet_features_repeated \
        * tensor.eye(num_filters).repeat(
            x.shape[0], axis=0
        ).dimshuffle((0, 1, 'x', 'x'))
    displayable_deconvolution = tensor.grad(misclassification, x_repeated,
                                            known_grads={
                                                convnet_features_selected:
                                                    convnet_features_selected
                                            })
    deconvolution_normalizer = abs(
        displayable_deconvolution
    ).max(axis=(1, 2, 3))
    displayable_deconvolution = displayable_deconvolution \
        / deconvolution_normalizer.dimshuffle((0, 'x', 'x', 'x'))
    get_displayable_deconvolution = theano.function(
        [x], displayable_deconvolution
    )
    display_deconvolution_data_stream = Mapping(
        data_stream=one_example_train_data_stream,
        mapping=TupleMapping(get_displayable_deconvolution,
                             same_len_out=True)
    )
    display_deconvolution = ImageDataStreamDisplay(
        data_stream=display_deconvolution_data_stream,
        image_shape=(3, 32, 32),
        axes=('c', 0, 1),
        shift=0,
        rescale=1.,
    )

    images_displayer = DisplayImage(
        image_getters=[display_train, weight_display, display_feature_maps,
                       display_saliency_map, display_deconvolution],
        titles=['Training examples', 'Convolutional weights', 'Feature maps',
                'Sensitivity', 'Deconvolution']
    )
    plotters.append(images_displayer)

    extensions.append(PlotManager('CIFAR-10 convnet examples',
                                  plotters=plotters,
                                  after_epoch=False,
                                  every_n_epochs=10,
                                  after_training=True))
    extensions.append(Printing())
    main_loop = MainLoop(model=model,
                         data_stream=train_data_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    main_loop.run()
Exemple #14
0
import numpy

from transformers import ZCA, ContrastNorm, Whitening
from fuel.transformers import ForceFloatX
from fuel.datasets import CIFAR10
from fuel.streams import DataStream
from fuel.schemes import ShuffledScheme, SequentialScheme

train_dataset = CIFAR10(['train'], sources=('features', 'targets'))
test_dataset = CIFAR10(['test'], sources=('features', 'targets'))

batch_size = 400  #Batch size for training
batch_size_mon = 4000  # Batch size for monitoring and batch normalization
n_batches = int(numpy.ceil(float(train_dataset.num_examples) / batch_size_mon))
num_protos = 10

train_data = train_dataset.get_data(
    request=range(train_dataset.num_examples))[0]
print train_data.shape
#cnorm = ContrastNorm()
#train_data = cnorm.apply(train_data)
#whiten = ZCA()
#whiten.fit(3072, train_data)


def preprocessing(data_stream, num_examples, batch_size):
    return data_stream
    #return ForceFloatX(Whitening(data_stream, ShuffledScheme(num_examples, batch_size), whiten, cnorm), which_sources=('features',))


train_stream_mon = preprocessing(DataStream(train_dataset),
Exemple #15
0
def main(save_to, num_epochs,
         weight_decay=0.0001, noise_pressure=0, subset=None, num_batches=None,
         batch_size=None, histogram=None, resume=False):
    output_size = 10

    prior_noise_level = -10
    noise_step_rule = Scale(1e-6)
    noise_rate = theano.shared(numpy.asarray(1e-5, dtype=theano.config.floatX))
    convnet = create_res_net(out_noise=True, tied_noise=True, tied_sigma=True,
            noise_rate=noise_rate,
            prior_noise_level=prior_noise_level)

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    test_probs = convnet.apply(x)
    test_cost = (CategoricalCrossEntropy().apply(y.flatten(), test_probs)
            .copy(name='cost'))
    test_error_rate = (MisclassificationRate().apply(y.flatten(), test_probs)
                  .copy(name='error_rate'))
    test_confusion = (ConfusionMatrix().apply(y.flatten(), test_probs)
                  .copy(name='confusion'))
    test_confusion.tag.aggregation_scheme = Sum(test_confusion)

    test_cg = ComputationGraph([test_cost, test_error_rate])

    # Apply dropout to all layer outputs except final softmax
    # dropout_vars = VariableFilter(
    #         roles=[OUTPUT], bricks=[Convolutional],
    #         theano_name_regex="^conv_[25]_apply_output$")(test_cg.variables)
    # drop_cg = apply_dropout(test_cg, dropout_vars, 0.5)

    # Apply 0.2 dropout to the pre-averaging layer
    # dropout_vars_2 = VariableFilter(
    #         roles=[OUTPUT], bricks=[Convolutional],
    #         theano_name_regex="^conv_8_apply_output$")(test_cg.variables)
    # train_cg = apply_dropout(test_cg, dropout_vars_2, 0.2)

    # Apply 0.2 dropout to the input, as in the paper
    # train_cg = apply_dropout(test_cg, [x], 0.2)
    # train_cg = drop_cg
    # train_cg = apply_batch_normalization(test_cg)

    # train_cost, train_error_rate, train_components = train_cg.outputs

    with batch_normalization(convnet):
        with training_noise(convnet):
            train_probs = convnet.apply(x)
    train_cost = (CategoricalCrossEntropy().apply(y.flatten(), train_probs)
                .copy(name='cost'))
    train_components = (ComponentwiseCrossEntropy().apply(y.flatten(),
                train_probs).copy(name='components'))
    train_error_rate = (MisclassificationRate().apply(y.flatten(),
                train_probs).copy(name='error_rate'))
    train_cg = ComputationGraph([train_cost,
                train_error_rate, train_components])
    population_updates = get_batch_normalization_updates(train_cg)
    bn_alpha = 0.9
    extra_updates = [(p, p * bn_alpha + m * (1 - bn_alpha))
                for p, m in population_updates]

    # for annealing
    nit_penalty = theano.shared(numpy.asarray(noise_pressure, dtype=theano.config.floatX))
    nit_penalty.name = 'nit_penalty'

    # Compute noise rates for training graph
    train_logsigma = VariableFilter(roles=[LOG_SIGMA])(train_cg.variables)
    train_mean_log_sigma = tensor.concatenate([n.flatten() for n in train_logsigma]).mean()
    train_mean_log_sigma.name = 'mean_log_sigma'
    train_nits = VariableFilter(roles=[NITS])(train_cg.auxiliary_variables)
    train_nit_rate = tensor.concatenate([n.flatten() for n in train_nits]).mean()
    train_nit_rate.name = 'nit_rate'
    train_nit_regularization = nit_penalty * train_nit_rate
    train_nit_regularization.name = 'nit_regularization'

    # Apply regularization to the cost
    trainable_parameters = VariableFilter(roles=[WEIGHT, BIAS])(
            train_cg.parameters)
    mask_parameters = [p for p in trainable_parameters
            if get_brick(p).name == 'mask']
    noise_parameters = VariableFilter(roles=[NOISE])(train_cg.parameters)
    biases = VariableFilter(roles=[BIAS])(train_cg.parameters)
    weights = VariableFilter(roles=[WEIGHT])(train_cg.variables)
    nonmask_weights = [p for p in weights if get_brick(p).name != 'mask']
    l2_norm = sum([(W ** 2).sum() for W in nonmask_weights])
    l2_norm.name = 'l2_norm'
    l2_regularization = weight_decay * l2_norm
    l2_regularization.name = 'l2_regularization'

    # testversion
    test_cost = test_cost + l2_regularization
    test_cost.name = 'cost_with_regularization'

    # Training version of cost
    train_cost_without_regularization = train_cost
    train_cost_without_regularization.name = 'cost_without_regularization'
    train_cost = train_cost + l2_regularization + train_nit_regularization
    train_cost.name = 'cost_with_regularization'

    cifar10_train = CIFAR10(("train",))
    cifar10_train_stream = RandomPadCropFlip(
        NormalizeBatchLevels(DataStream.default_stream(
            cifar10_train, iteration_scheme=ShuffledScheme(
                cifar10_train.num_examples, batch_size)),
        which_sources=('features',)),
        (32, 32), pad=4, which_sources=('features',))

    test_batch_size = 128
    cifar10_test = CIFAR10(("test",))
    cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream(
        cifar10_test,
        iteration_scheme=ShuffledScheme(
            cifar10_test.num_examples, test_batch_size)),
        which_sources=('features',))

    momentum = Momentum(0.01, 0.9)

    # Create a step rule that doubles the learning rate of biases, like Caffe.
    # scale_bias = Restrict(Scale(2), biases)
    # step_rule = CompositeRule([scale_bias, momentum])

    # Create a step rule that reduces the learning rate of noise
    scale_mask = Restrict(noise_step_rule, mask_parameters)
    step_rule = CompositeRule([scale_mask, momentum])

    # from theano.compile.nanguardmode import NanGuardMode

    # Train with simple SGD
    algorithm = GradientDescent(
        cost=train_cost, parameters=trainable_parameters,
        step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    #,
    #    theano_func_kwargs={
    #        'mode': NanGuardMode(
    #            nan_is_error=True, inf_is_error=True, big_is_error=True)})

    exp_name = save_to.replace('.%d', '')

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs,
                              after_n_batches=num_batches),
                  EpochSchedule(momentum.learning_rate, [
                      (0, 0.01),     # Warm up with 0.01 learning rate
                      (50, 0.1),     # Then go back to 0.1
                      (100, 0.01),
                      (150, 0.001)
                      # (83, 0.01),  # Follow the schedule in the paper
                      # (125, 0.001)
                  ]),
                  EpochSchedule(noise_step_rule.learning_rate, [
                      (0, 1e-2),
                      (2, 1e-1),
                      (4, 1)
                      # (0, 1e-6),
                      # (2, 1e-5),
                      # (4, 1e-4)
                  ]),
                  EpochSchedule(noise_rate, [
                      (0, 1e-2),
                      (2, 1e-1),
                      (4, 1)
                      # (0, 1e-6),
                      # (2, 1e-5),
                      # (4, 1e-4),
                      # (6, 3e-4),
                      # (8, 1e-3), # Causes nit rate to jump
                      # (10, 3e-3),
                      # (12, 1e-2),
                      # (15, 3e-2),
                      # (19, 1e-1),
                      # (24, 3e-1),
                      # (30, 1)
                  ]),
                  NoiseExtension(
                      noise_parameters=noise_parameters),
                  NoisyDataStreamMonitoring(
                      [test_cost, test_error_rate, test_confusion],
                      cifar10_test_stream,
                      noise_parameters=noise_parameters,
                      prefix="test"),
                  TrainingDataMonitoring(
                      [train_cost, train_error_rate, train_nit_rate,
                       train_cost_without_regularization,
                       l2_regularization,
                       train_nit_regularization,
                       momentum.learning_rate,
                       train_mean_log_sigma,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      every_n_batches=17),
                      # after_epoch=True),
                  Plot('Training performance for ' + exp_name,
                      channels=[
                          ['train_cost_with_regularization',
                           'train_cost_without_regularization',
                           'train_nit_regularization',
                           'train_l2_regularization'],
                          ['train_error_rate'],
                          ['train_total_gradient_norm'],
                          ['train_mean_log_sigma'],
                      ],
                      every_n_batches=17),
                  Plot('Test performance for ' + exp_name,
                      channels=[[
                          'train_error_rate',
                          'test_error_rate',
                          ]],
                      after_epoch=True),
                  EpochCheckpoint(save_to, use_cpickle=True, after_epoch=True),
                  ProgressBar(),
                  Printing()]

    if histogram:
        attribution = AttributionExtension(
            components=train_components,
            parameters=cg.parameters,
            components_size=output_size,
            after_batch=True)
        extensions.insert(0, attribution)

    if resume:
        extensions.append(Load(exp_name, True, True))

    model = Model(train_cost)

    main_loop = MainLoop(
        algorithm,
        cifar10_train_stream,
        model=model,
        extensions=extensions)

    main_loop.run()

    if histogram:
        save_attributions(attribution, filename=histogram)

    with open('execution-log.json', 'w') as outfile:
        json.dump(main_loop.log, outfile, cls=NumpyEncoder)
Exemple #16
0
def main(save_to,
         num_epochs,
         regularization=0.001,
         subset=None,
         num_batches=None,
         batch_size=None,
         histogram=None,
         resume=False):
    output_size = 10
    convnet = create_all_conv_net()

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    test_cost = (CategoricalCrossEntropy().apply(y.flatten(),
                                                 probs).copy(name='cost'))
    test_components = (ComponentwiseCrossEntropy().apply(
        y.flatten(), probs).copy(name='components'))
    test_error_rate = (MisclassificationRate().apply(
        y.flatten(), probs).copy(name='error_rate'))
    test_confusion = (ConfusionMatrix().apply(y.flatten(),
                                              probs).copy(name='confusion'))
    test_confusion.tag.aggregation_scheme = Sum(test_confusion)

    test_cg = ComputationGraph([test_cost, test_error_rate, test_components])

    # Apply dropout to all layer outputs except final softmax
    dropout_vars = VariableFilter(
        roles=[OUTPUT],
        bricks=[Convolutional],
        theano_name_regex="^conv_[25]_apply_output$")(test_cg.variables)
    drop_cg = apply_dropout(test_cg, dropout_vars, 0.5)

    # Apply 0.2 dropout to the pre-averaging layer
    # dropout_vars_2 = VariableFilter(
    #         roles=[OUTPUT], bricks=[Convolutional],
    #         theano_name_regex="^conv_8_apply_output$")(drop_cg.variables)
    # train_cg = apply_dropout(drop_cg, dropout_vars_2, 0.2)

    # Apply 0.2 dropout to the input, as in the paper
    # train_cg = apply_dropout(drop_cg, [x], 0.2)
    train_cg = drop_cg
    # train_cg = test_cg

    train_cost, train_error_rate, train_components = train_cg.outputs

    # Apply regularization to the cost
    biases = VariableFilter(roles=[BIAS])(train_cg.parameters)
    weights = VariableFilter(roles=[WEIGHT])(train_cg.variables)
    l2_norm = sum([(W**2).sum() for W in weights])
    l2_norm.name = 'l2_norm'
    l2_regularization = regularization * l2_norm
    l2_regularization.name = 'l2_regularization'
    test_cost = test_cost + l2_regularization
    test_cost.name = 'cost_with_regularization'

    # Training version of cost
    train_cost_without_regularization = train_cost
    train_cost_without_regularization.name = 'cost_without_regularization'
    train_cost = train_cost + regularization * l2_norm
    train_cost.name = 'cost_with_regularization'

    cifar10_train = CIFAR10(("train", ))
    #cifar10_train_stream = RandomPadCropFlip(
    #    NormalizeBatchLevels(DataStream.default_stream(
    #        cifar10_train, iteration_scheme=ShuffledScheme(
    #            cifar10_train.num_examples, batch_size)),
    #    which_sources=('features',)),
    #    (32, 32), pad=5, which_sources=('features',))
    cifar10_train_stream = NormalizeBatchLevels(DataStream.default_stream(
        cifar10_train,
        iteration_scheme=ShuffledScheme(cifar10_train.num_examples,
                                        batch_size)),
                                                which_sources=('features', ))

    test_batch_size = 1000
    cifar10_test = CIFAR10(("test", ))
    cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream(
        cifar10_test,
        iteration_scheme=ShuffledScheme(cifar10_test.num_examples,
                                        test_batch_size)),
                                               which_sources=('features', ))

    momentum = Momentum(0.002, 0.9)

    # Create a step rule that doubles the learning rate of biases, like Caffe.
    # scale_bias = Restrict(Scale(2), biases)
    # step_rule = CompositeRule([scale_bias, momentum])
    # step_rule = CompositeRule([StepClipping(100), momentum])
    step_rule = momentum

    # Train with simple SGD
    algorithm = GradientDescent(cost=train_cost,
                                parameters=train_cg.parameters,
                                step_rule=step_rule)

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches),
        EpochSchedule(momentum.learning_rate, [(1, 0.005), (3, 0.01),
                                               (5, 0.02), (200, 0.002),
                                               (250, 0.0002), (300, 0.00002)]),
        DataStreamMonitoring([test_cost, test_error_rate, test_confusion],
                             cifar10_test_stream,
                             prefix="test"),
        TrainingDataMonitoring([
            train_cost, train_error_rate, train_cost_without_regularization,
            l2_regularization, momentum.learning_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               every_n_batches=10),
        # after_epoch=True),
        Plot('Training performance for ' + save_to,
             channels=[
                 [
                     'train_cost_with_regularization',
                     'train_cost_without_regularization',
                     'train_l2_regularization'
                 ],
                 ['train_error_rate'],
                 ['train_total_gradient_norm'],
             ],
             every_n_batches=10),
        # after_batch=True),
        Plot('Test performance for ' + save_to,
             channels=[[
                 'train_error_rate',
                 'test_error_rate',
             ]],
             after_epoch=True),
        Checkpoint(save_to),
        ProgressBar(),
        Printing()
    ]

    if histogram:
        attribution = AttributionExtension(components=train_components,
                                           parameters=cg.parameters,
                                           components_size=output_size,
                                           after_batch=True)
        extensions.insert(0, attribution)

    if resume:
        extensions.append(Load(save_to, True, True))

    model = Model(train_cost)

    main_loop = MainLoop(algorithm,
                         cifar10_train_stream,
                         model=model,
                         extensions=extensions)

    main_loop.run()

    if histogram:
        save_attributions(attribution, filename=histogram)

    with open('execution-log.json', 'w') as outfile:
        json.dump(main_loop.log, outfile, cls=NumpyEncoder)
Exemple #17
0
    slice_100 = slice(0, 32 * 10)
else:
    host_plot = 'http://hades.calculquebec.ca:5042'
    slice_train = slice(0, n_ex)
    slice_test = slice(45000, 50000 - 8)
    slice_valid = slice(40000, 45000 - 8)
    slice_100 = slice(0, 50000)

## Load cifar10 stream
batch_size = 32
num_train_example = slice_train.stop - slice_train.start
num_valid_example = slice_valid.stop - slice_valid.start
num_test_example = slice_test.stop - slice_test.start
num_train_cifar100 = slice_100.stop - slice_100.start

train_dataset = CIFAR10(('train', ), subset=slice_train)
train_stream = DataStream.default_stream(train_dataset,
                                         iteration_scheme=SequentialScheme(
                                             train_dataset.num_examples,
                                             batch_size))
train_stream = OneHotEncode10(train_stream, which_sources=('targets', ))
train_stream = RandomHorizontalFlip(train_stream, which_sources=('features', ))

valid_dataset = CIFAR10(('train', ), subset=slice_valid)
valid_stream = DataStream.default_stream(valid_dataset,
                                         iteration_scheme=SequentialScheme(
                                             valid_dataset.num_examples,
                                             batch_size))
valid_stream = OneHotEncode10(valid_stream, which_sources=('targets', ))

test_dataset = CIFAR10(('train', ), subset=slice_test)
    # action.add_argument('-t', '--train', help="Start training the model")
    # action.add_argument('-s', '--sample', help='Sample images from the trained model')
    #
    # parser.add_argument('--experiment', nargs=1, type=str,
    #     help="Change default location to run experiment")
    # parser.add_argument('--path', nargs=1, type=str,
    #     help="Change default location to save model")

    if dataset == 'mnist':
        data = MNIST(("train", ), sources=('features', ))
        data_test = MNIST(("test", ), sources=('features', ))
    elif dataset == 'binarized_mnist':
        data = BinarizedMNIST(("train", ), sources=('features', ))
        data_test = BinarizedMNIST(("test", ), sources=('features', ))
    elif dataset == "cifar10":
        data = CIFAR10(("train", ))
        data_test = CIFAR10(("test", ))
    else:
        pass  # Add CIFAR 10
    training_stream = DataStream(data,
                                 iteration_scheme=ShuffledScheme(
                                     data.num_examples, batch_size))
    test_stream = DataStream(data_test,
                             iteration_scheme=ShuffledScheme(
                                 data_test.num_examples, batch_size))
    logger.info("Dataset: {} loaded".format(dataset))

    if train:
        cost, cost_bits_dim = create_network()
        model, algorithm, extensions = prepare_opti(cost, test_stream,
                                                    cost_bits_dim)