Example #1
0
def load_datastream(train_batch_size=100):
    from fuel.datasets.mnist import MNIST
    from fuel.transformers import ScaleAndShift, Cast, Flatten, Mapping
    from fuel.streams import DataStream
    from fuel.schemes import SequentialScheme, ShuffledScheme

    MNIST.default_transformers = (
        (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}),
        (Cast, [np.float32], {'which_sources': 'features'}),
    )

    mnist_train = MNIST(('train',), subset=slice(None, 50000))
    mnist_train_stream = DataStream.default_stream(
        mnist_train,
        iteration_scheme=ShuffledScheme(mnist_train.num_examples, train_batch_size)
    )

    mnist_validation = MNIST(('train',), subset=slice(50000, None))
    mnist_validation_stream = DataStream.default_stream(
        mnist_validation,
        iteration_scheme=SequentialScheme(mnist_validation.num_examples, 250)
    )

    mnist_test = MNIST(('test',))
    mnist_test_stream = DataStream.default_stream(
        mnist_test,
        iteration_scheme=SequentialScheme(mnist_test.num_examples, 250)
    )

    return {
        'train': mnist_train_stream,
        'validation': mnist_validation_stream,
        'test': mnist_test_stream
    }
Example #2
0
def get_streams(num_train_examples, batch_size, use_test=True):
    dataset = MNIST(("train",))
    all_ind = numpy.arange(dataset.num_examples)
    rng = numpy.random.RandomState(seed=1)
    rng.shuffle(all_ind)

    indices_train = all_ind[:num_train_examples]
    indices_valid = all_ind[num_train_examples:]

    tarin_stream = Flatten(
        DataStream.default_stream(dataset, iteration_scheme=ShuffledScheme(indices_train, batch_size))
    )

    valid_stream = None
    if len(indices_valid) != 0:
        valid_stream = Flatten(
            DataStream.default_stream(dataset, iteration_scheme=ShuffledScheme(indices_valid, batch_size))
        )

    test_stream = None
    if use_test:
        dataset = MNIST(("test",))
        ind = numpy.arange(dataset.num_examples)
        rng = numpy.random.RandomState(seed=1)
        rng.shuffle(all_ind)

        test_stream = Flatten(DataStream.default_stream(dataset, iteration_scheme=ShuffledScheme(ind, batch_size)))

    return tarin_stream, valid_stream, test_stream
def maxout_vae_mnist_test(path_vae_mnist):

    # load vae model on mnist
    vae_mnist = load(path_vae_mnist)
    maxout = Maxout()
    x = T.matrix('features')
    y = T.imatrix('targets')
    batch_size = 128
    z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x))
    predict = maxout.apply(z)

    cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
    y_hat = Softmax().apply(predict)
    cost.name = 'cost'
    cg = ComputationGraph(cost)

    temp = cg.parameters
    for t, i in zip(temp, range(len(temp))):
        t.name = t.name+str(i)+"maxout"

    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y, y_hat) 

    # training
    step_rule = RMSProp(0.01, 0.9)
    #step_rule = Momentum(0.2, 0.9)
    train_set = MNIST('train')
    test_set = MNIST("test")

    data_stream_train = Flatten(DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)))

    data_stream_test =Flatten(DataStream.default_stream(
            test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size)))

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], data_stream=data_stream_train, prefix="train")
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="test")


    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=50),
                    Printing(every_n_epochs=1)
                  ]

    main_loop = MainLoop(data_stream=data_stream_train,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()

    # save here
    from blocks.serialization import dump
    with closing(open('../data_mnist/maxout', 'w')) as f:
	    dump(maxout, f)
Example #4
0
def prepare_cifar10():
	class Dataset:
		pass

	result = Dataset()

	CIFAR10.default_transformers = (
		(ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}),
		(Cast, [np.float32], {'which_sources': 'features'}))

	mean = cifar10_mean()

	def patch_get_epoch_iterator(stream):
		def get_epoch_iterator(self):
			for X, Y in self._get_epoch_iterator():
				# 0 degrees
				X -= mean[numpy.newaxis,:,:,:]

				yield augument(X, 25), Y

		stream._get_epoch_iterator = stream.get_epoch_iterator
		stream.get_epoch_iterator = types.MethodType(get_epoch_iterator, stream)

	def patch_get_epoch_iterator_test(stream):
		def get_epoch_iterator(self):
			for X, Y in self._get_epoch_iterator():
				# 0 degrees
				X -= mean[numpy.newaxis,:,:,:]
				yield X, Y

		stream._get_epoch_iterator = stream.get_epoch_iterator
		stream.get_epoch_iterator = types.MethodType(get_epoch_iterator, stream)


	result.train = train = CIFAR10(("train",), subset = slice(None, 40000))
	result.train_stream = DataStream.default_stream(
		result.train,
		iteration_scheme = ShuffledScheme(result.train.num_examples, 25))

	patch_get_epoch_iterator(result.train_stream)

	result.validation = CIFAR10(("train",), subset=slice(40000, None))
	result.validation_stream = DataStream.default_stream(
		result.validation, 
		iteration_scheme = SequentialScheme(result.validation.num_examples, 100))

	patch_get_epoch_iterator(result.validation_stream)

	result.test = CIFAR10(("test",))
	result.test_stream = DataStream.default_stream(
		result.test, 
		iteration_scheme = SequentialScheme(result.test.num_examples, 100))

	patch_get_epoch_iterator_test(result.test_stream)

	return result
def main(save_to, num_epochs, bokeh=False):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      Flatten(
                          DataStream.default_stream(
                              mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 500)),
                          which_sources=('features',)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    main_loop = MainLoop(
        algorithm,
        Flatten(
            DataStream.default_stream(
                mnist_train,
                iteration_scheme=SequentialScheme(
                    mnist_train.num_examples, 50)),
            which_sources=('features',)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()
Example #6
0
def get_stream(batch_size, input_size, test=False):
    from fuel.datasets.dogs_vs_cats import DogsVsCats
    from fuel.streams import DataStream
    from fuel.schemes import ShuffledScheme, SequentialScheme ,SequentialExampleScheme
    from fuel.transformers.image import RandomFixedSizeCrop
    from fuel.transformers import Flatten #, ForceFloatX
    from ScikitResize import ScikitResize
    from fuel.transformers import Cast
    # Load the training set
    if test :
        train = DogsVsCats(('train',),subset=slice(0, 30)) 
        valid = DogsVsCats(('train',),subset=slice(19980, 20000)) 
        test = DogsVsCats(('test',),subset=slice(0,4))
    else :
        train = DogsVsCats(('train',),subset=slice(0,22000)) 
        valid = DogsVsCats(('train',),subset=slice(22000, 25000)) 
        test = DogsVsCats(('test',))
    #Generating stream
    train_stream = DataStream.default_stream(
        train,
        iteration_scheme=ShuffledScheme(train.num_examples, batch_size)
    )

    valid_stream = DataStream.default_stream(
        valid,
        iteration_scheme=ShuffledScheme(valid.num_examples, batch_size)
    )
    test_stream = DataStream.default_stream(
        test,
        iteration_scheme=SequentialScheme(test.num_examples, 1)
#        iteration_scheme=SequentialExampleScheme(test.num_examples)
    )
    #Reshaping procedure
    #Apply crop and resize to desired square shape
    train_stream = ScikitResize(train_stream, input_size, which_sources=('image_features',))
    valid_stream = ScikitResize(valid_stream, input_size, which_sources=('image_features',))
    test_stream = ScikitResize(test_stream, input_size, which_sources=('image_features',))

    #ForceFloatX, to spare you from possible bugs
    #train_stream = ForceFloatX(train_stream)
    #valid_stream = ForceFloatX(valid_stream)
    #test_stream = ForceFloatX(test_stream)

    #Cast instead of forcefloatX
    train_stream = Cast(train_stream, dtype='float32',which_sources=('image_features',))
    valid_stream = Cast(valid_stream, dtype='float32',which_sources=('image_features',))
    test_stream = Cast(test_stream, dtype='float32',which_sources=('image_features',))
    return train_stream, valid_stream, test_stream
Example #7
0
def test_cifar10():
    train = CIFAR10(('train',), load_in_memory=False)
    assert train.num_examples == 50000
    handle = train.open()
    features, targets = train.get_data(handle, slice(49990, 50000))
    assert features.shape == (10, 3, 32, 32)
    assert targets.shape == (10, 1)
    train.close(handle)

    test = CIFAR10(('test',), load_in_memory=False)
    handle = test.open()
    features, targets = test.get_data(handle, slice(0, 10))
    assert features.shape == (10, 3, 32, 32)
    assert targets.shape == (10, 1)
    assert features.dtype == numpy.uint8
    assert targets.dtype == numpy.uint8
    test.close(handle)

    stream = DataStream.default_stream(
        test, iteration_scheme=SequentialScheme(10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX

    assert_raises(ValueError, CIFAR10, ('valid',))

    dummy = CIFAR10(('train',), subset=slice(50000, 60000))
    handle = dummy.open()
    assert_raises(ValueError, dummy.get_data, handle, slice(0, 10000))
    dummy.close(handle)
Example #8
0
 def create_dataset(dataset):
     if trainning:
         scheme = ShuffledScheme(dataset.num_examples, 32)
     else:
         scheme = SequentialScheme(dataset.num_examples, 32)
     stream = DataStream.default_stream(dataset, iteration_scheme=scheme)
     return ResizeTransformer(stream, image_size)
Example #9
0
def get_mnist_video_streams(batch_size):
    train_dataset = ClutteredMNISTVideo(which_sets=["train"])
    valid_dataset = ClutteredMNISTVideo(which_sets=["valid"])
    train_ind = numpy.arange(train_dataset.num_examples)
    valid_ind = numpy.arange(valid_dataset.num_examples)
    rng = numpy.random.RandomState(seed=1)
    rng.shuffle(train_ind)
    rng.shuffle(valid_ind)

    train_datastream = DataStream.default_stream(train_dataset, iteration_scheme=ShuffledScheme(train_ind, batch_size))
    train_datastream = PreprocessTransformer(train_datastream)

    valid_datastream = DataStream.default_stream(valid_dataset, iteration_scheme=ShuffledScheme(valid_ind, batch_size))
    valid_datastream = PreprocessTransformer(valid_datastream)

    return train_datastream, valid_datastream
Example #10
0
def monk_music_stream (which_sets = ('train',),batch_size = 64,
        seq_size=128, frame_size=160, num_examples= None,
        which_sources = ('features',)):

    """
    This function generates the stream for the monk_music dataset.
    It doesn't compute incremental windows and instead simply separates the
    dataset into sequences
    """

    dataset = MonkMusic(which_sets = which_sets, filename = "dataset.hdf5",
        load_in_memory=True)

    large_batch_size = batch_size * frame_size * seq_size
    if not num_examples:
        num_examples = large_batch_size*(dataset.num_examples/large_batch_size)

    # If there are memory problems revert to SequentialScheme
    data_stream = DataStream.default_stream(
            dataset, iteration_scheme=SequentialScheme(
            num_examples,
            large_batch_size))

    data_stream = ScaleAndShift(data_stream,
            scale = 1./data_stats["std"],
            shift = -data_stats["mean"]/data_stats["std"])

    data_stream = Mapping(data_stream,
            lambda data: _get_subsequences(data,batch_size,seq_size,frame_size))

    data_stream = ForceFloatX(data_stream)

    return data_stream
Example #11
0
def get_cmv_v1_streams(batch_size):
    train_dataset = CMVv1(which_sets=["train"])
    valid_dataset = CMVv1(which_sets=["valid"])
    train_ind = numpy.arange(train_dataset.num_examples)
    valid_ind = numpy.arange(valid_dataset.num_examples)
    rng = numpy.random.RandomState(seed=1)
    rng.shuffle(train_ind)
    rng.shuffle(valid_ind)

    train_datastream = DataStream.default_stream(train_dataset, iteration_scheme=ShuffledScheme(train_ind, batch_size))
    train_datastream = Preprocessor_CMV_v1(train_datastream)

    valid_datastream = DataStream.default_stream(valid_dataset, iteration_scheme=ShuffledScheme(valid_ind, batch_size))
    valid_datastream = Preprocessor_CMV_v1(valid_datastream)

    return train_datastream, valid_datastream
Example #12
0
def DStream(datatype, config):

    if datatype=='train':
        filename = config['train_file']
    elif datatype == 'valid':
        filename = config['valid_file']
    elif datatype == 'test':
        filename = config['test_file']
    else:
        logger.error('wrong datatype, train, valid, or test')


    data = TextFile(files=[filename],
                    dictionary=pickle.load(open(config['train_dic'],'rb')),
                    unk_token=config['unk_token'],
                    level='word',
                    bos_token=config['bos_token'],
                    eos_token=config['eos_token'])

    data_stream = DataStream.default_stream(data)
    data_stream.sources = ('sentence',)


    # organize data in batches and pad shorter sequences with zeros
    batch_size = config['batch_size']
    data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(batch_size))
    data_stream = Padding(data_stream)
    return data_stream
Example #13
0
def test_cifar100():
    train = CIFAR100('train', load_in_memory=False)
    assert train.num_examples == 50000
    handle = train.open()
    coarse_labels, features, fine_labels = train.get_data(handle,
                                                          slice(49990, 50000))

    assert features.shape == (10, 3, 32, 32)
    assert coarse_labels.shape == (10, 1)
    assert fine_labels.shape == (10, 1)
    train.close(handle)

    test = CIFAR100('test', load_in_memory=False)
    handle = test.open()
    coarse_labels, features, fine_labels = test.get_data(handle,
                                                         slice(0, 10))

    assert features.shape == (10, 3, 32, 32)
    assert coarse_labels.shape == (10, 1)
    assert fine_labels.shape == (10, 1)

    assert features.dtype == numpy.uint8
    assert coarse_labels.dtype == numpy.uint8
    assert fine_labels.dtype == numpy.uint8

    test.close(handle)

    stream = DataStream.default_stream(
        test, iteration_scheme=SequentialScheme(10, 10))
    data = next(stream.get_epoch_iterator())[1]

    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX

    assert_raises(ValueError, CIFAR100, 'valid')
Example #14
0
def open_stream(which_sets= ('train',), port=5557, num_examples = None):

    dataset = Blizzard(which_sets = which_sets)

    if num_examples == None:
        num_examples = dataset.num_examples

    data_stream = DataStream.default_stream(
            dataset, iteration_scheme=SequentialScheme(
            num_examples, batch_size))

    data_stream = ScaleAndShift(data_stream, scale = 1/data_std, 
                                            shift = -data_mean/data_std)
    data_stream = Mapping(data_stream, _downsample_and_upsample, 
                          add_sources=('upsampled',))
    data_stream = Mapping(data_stream, _equalize_size)
    data_stream = Mapping(data_stream, _get_residual,
                          add_sources = ('residual',))
    data_stream = FilterSources(data_stream, 
                          sources = ('upsampled', 'residual',))
    data_stream = Mapping(data_stream, _segment_axis)
    data_stream = Mapping(data_stream, _transpose)
    data_stream = ForceFloatX(data_stream)

    start_server(data_stream, port=port)
Example #15
0
def fuel_data_to_list(fuel_data, shuffle):
    if(shuffle):
        scheme = ShuffledScheme(fuel_data.num_examples, fuel_data.num_examples)
    else:
        scheme = SequentialScheme(fuel_data.num_examples, fuel_data.num_examples)
    fuel_data_stream = DataStream.default_stream(fuel_data, iteration_scheme=scheme)
    return fuel_data_stream.get_epoch_iterator().next()
Example #16
0
def create_svhn_data_streams(batch_size, monitoring_batch_size, rng=None):
    train_set = SVHN(2, ('extra',), sources=('features',))
    valid_set = SVHN(2, ('train',), sources=('features',))
    main_loop_stream = DataStream.default_stream(
        train_set,
        iteration_scheme=ShuffledScheme(
            train_set.num_examples, batch_size, rng=rng))
    train_monitor_stream = DataStream.default_stream(
        train_set,
        iteration_scheme=ShuffledScheme(
            5000, monitoring_batch_size, rng=rng))
    valid_monitor_stream = DataStream.default_stream(
        valid_set,
        iteration_scheme=ShuffledScheme(
            5000, monitoring_batch_size, rng=rng))
    return main_loop_stream, train_monitor_stream, valid_monitor_stream
Example #17
0
def cifar10_mean():
	train = CIFAR10(("train",), subset=slice(None, 40000))
	train_stream = DataStream.default_stream(train, iteration_scheme = SequentialScheme(train.num_examples, 100))

	X = numpy.array([numpy.mean(X, 0) for X, _ in train_stream.get_epoch_iterator()])
	X = numpy.mean(X, 0)

	return X
Example #18
0
def create_celeba_data_streams(batch_size, monitoring_batch_size,
                               sources=('features', ), rng=None):
    train_set = CelebA('64', ('train',), sources=sources)
    valid_set = CelebA('64', ('valid',), sources=sources)
    main_loop_stream = DataStream.default_stream(
        train_set,
        iteration_scheme=ShuffledScheme(
            train_set.num_examples, batch_size, rng=rng))
    train_monitor_stream = DataStream.default_stream(
        train_set,
        iteration_scheme=ShuffledScheme(
            5000, monitoring_batch_size, rng=rng))
    valid_monitor_stream = DataStream.default_stream(
        valid_set,
        iteration_scheme=ShuffledScheme(
            5000, monitoring_batch_size, rng=rng))
    return main_loop_stream, train_monitor_stream, valid_monitor_stream
Example #19
0
def create_tiny_imagenet_data_streams(batch_size, monitoring_batch_size,
                                      rng=None):
    train_set = TinyILSVRC2012(('train',), sources=('features',))
    valid_set = TinyILSVRC2012(('valid',), sources=('features',))
    main_loop_stream = DataStream.default_stream(
        train_set,
        iteration_scheme=ShuffledScheme(
            train_set.num_examples, batch_size, rng=rng))
    train_monitor_stream = DataStream.default_stream(
        train_set,
        iteration_scheme=ShuffledScheme(
            4096, monitoring_batch_size, rng=rng))
    valid_monitor_stream = DataStream.default_stream(
        valid_set,
        iteration_scheme=ShuffledScheme(
            4096, monitoring_batch_size, rng=rng))
    return main_loop_stream, train_monitor_stream, valid_monitor_stream
Example #20
0
def set_datastream(data_path, batch_size):
    dataset = H5PYDataset(file_or_path=data_path,
                          which_sets=('train',),
                          sources=('input_feature', 'target_feature'))
    data_stream = DataStream.default_stream(dataset=dataset,
                                            iteration_scheme=ShuffledScheme(batch_size=batch_size,
                                                                            examples=dataset.num_examples))
    return data_stream
Example #21
0
 def create_data(data):
     stream = DataStream.default_stream(data, iteration_scheme=ShuffledScheme(data.num_examples, batch_size))
     stream_downscale = MinimumImageDimensions(stream, image_size, which_sources=('image_features',))
     #stream_rotate = Random2DRotation(stream_downscale, which_sources=('image_features',))
     stream_max = ScikitResize(stream_downscale, image_size, which_sources=('image_features',))
     stream_scale = ScaleAndShift(stream_max, 1./255, 0, which_sources=('image_features',))
     stream_cast = Cast(stream_scale, dtype='float32', which_sources=('image_features',))
     #stream_flat = Flatten(stream_scale, which_sources=('image_features',))
     return stream_cast
Example #22
0
    def create_act_table(self, save_to, act_table):
        batch_size = 500
        image_size = (28, 28)
        output_size = 10
        convnet = create_lenet_5()
        layers = convnet.layers

        x = tensor.tensor4('features')
        y = tensor.lmatrix('targets')

        # Normalize input and apply the convnet
        probs = convnet.apply(x)
        cg = ComputationGraph([probs])

        def full_brick_name(brick):
            return '/'.join([''] + [b.name for b in brick.get_unique_path()])

        # Find layer outputs to probe
        outmap = OrderedDict((full_brick_name(get_brick(out)), out)
                for out in VariableFilter(
                    roles=[OUTPUT], bricks=[Convolutional, Linear])(
                        cg.variables))
        # Generate pics for biases
        biases = VariableFilter(roles=[BIAS])(cg.parameters)

        # Generate parallel array, in the same order, for outputs
        outs = [outmap[full_brick_name(get_brick(b))] for b in biases]

        # Figure work count
        error_rate = (MisclassificationRate().apply(y.flatten(), probs)
                      .copy(name='error_rate'))
        max_activation_table = (MaxActivationTable().apply(
                outs).copy(name='max_activation_table'))
        max_activation_table.tag.aggregation_scheme = (
                Concatenate(max_activation_table))

        model = Model([
            error_rate,
            max_activation_table])

        # Load it with trained parameters
        params = load_parameters(open(save_to, 'rb'))
        model.set_parameter_values(params)

        mnist_test_stream = DataStream.default_stream(
            self.mnist_test,
            iteration_scheme=SequentialScheme(
                self.mnist_test.num_examples, batch_size))

        evaluator = DatasetEvaluator([
            error_rate,
            max_activation_table
            ])
        results = evaluator.evaluate(mnist_test_stream)
        table = results['max_activation_table']
        pickle.dump(table, open(act_table, 'wb'))
        return table
Example #23
0
 def get_stream(self, which_set, scheme=None):
     if not scheme:
         scheme = ShuffledScheme(
             self.datasets[which_set].num_examples
             / self.shrink_dataset_by,
             self.batch_size)
     return DataStream.default_stream(
         dataset=self.datasets[which_set],
         iteration_scheme=scheme)
Example #24
0
def create_cifar10_data_streams(batch_size, monitoring_batch_size, rng=None):
    train_set = CIFAR10(
        ('train',), sources=('features',), subset=slice(0, 45000))
    valid_set = CIFAR10(
        ('train',), sources=('features',), subset=slice(45000, 50000))
    main_loop_stream = DataStream.default_stream(
        train_set,
        iteration_scheme=ShuffledScheme(
            train_set.num_examples, batch_size, rng=rng))
    train_monitor_stream = DataStream.default_stream(
        train_set,
        iteration_scheme=ShuffledScheme(
            5000, monitoring_batch_size, rng=rng))
    valid_monitor_stream = DataStream.default_stream(
        valid_set,
        iteration_scheme=ShuffledScheme(
            5000, monitoring_batch_size, rng=rng))
    return main_loop_stream, train_monitor_stream, valid_monitor_stream
Example #25
0
def create_streams(train_set, valid_set, test_set, training_batch_size,
                   monitoring_batch_size):
    """Creates data streams for training and monitoring.

    Parameters
    ----------
    train_set : :class:`fuel.datasets.Dataset`
        Training set.
    valid_set : :class:`fuel.datasets.Dataset`
        Validation set.
    test_set : :class:`fuel.datasets.Dataset`
        Test set.
    monitoring_batch_size : int
        Batch size for monitoring.
    include_targets : bool
        If ``True``, use both features and targets. If ``False``, use
        features only.

    Returns
    -------
    rval : tuple of data streams
        Data streams for the main loop, the training set monitor,
        the validation set monitor and the test set monitor.

    """
    main_loop_stream = DataStream.default_stream(
        dataset=train_set,
        iteration_scheme=ShuffledScheme(
            train_set.num_examples, training_batch_size))
    train_monitor_stream = DataStream.default_stream(
        dataset=train_set,
        iteration_scheme=ShuffledScheme(
            train_set.num_examples, monitoring_batch_size))
    valid_monitor_stream = DataStream.default_stream(
        dataset=valid_set,
        iteration_scheme=ShuffledScheme(
            valid_set.num_examples, monitoring_batch_size))
    test_monitor_stream = DataStream.default_stream(
        dataset=test_set,
        iteration_scheme=ShuffledScheme(
            test_set.num_examples, monitoring_batch_size))

    return (main_loop_stream, train_monitor_stream, valid_monitor_stream,
            test_monitor_stream)
Example #26
0
def getTextFile(filename, dic_path, config):
    data = TextFile(files=[filename],
                    dictionary=pickle.load(open(dic_path,'rb')),
                    unk_token=config['unk_token'],
                    level='word',
                    bos_token=config['bos_token'],
                    eos_token=config['eos_token'])
    data_stream = DataStream.default_stream(data)
    data_stream.sources = ('sentence',)
    return data_stream
Example #27
0
def _test_dataset():
    train = DogsVsCats(('train',))
    assert train.num_examples == 25000
    assert_raises(ValueError, DogsVsCats, ('valid',))

    test = DogsVsCats(('test',))
    stream = DataStream.default_stream(
        test, iteration_scheme=SequentialScheme(10, 10))
    data = next(stream.get_epoch_iterator())[0][0]
    assert data.dtype.kind == 'f'
Example #28
0
def create_main_loop(dataset, nvis, nhid, num_epochs, debug_level=0, lrate=1e-3):
    seed = 188229
    n_inference_steps = 6
    num_examples = dataset.num_examples
    batch_size = num_examples

    train_loop_stream = Flatten(
        DataStream.default_stream(
            dataset=dataset,
            iteration_scheme=SequentialScheme(dataset.num_examples, batch_size)  # Repeat(
            # , n_inference_steps)
            #            ShuffledScheme(dataset.num_examples, batch_size), n_inference_steps))
        ),
        which_sources=("features",),
    )

    model_brick = FivEM(
        nvis=nvis,
        nhid=nhid,
        epsilon=0.01,
        batch_size=batch_size,
        weights_init=IsotropicGaussian(0.1),
        biases_init=Constant(0),
        noise_scaling=1,
        debug=debug_level,
        lateral_x=False,
        lateral_h=False,
        n_inference_steps=n_inference_steps,
    )
    model_brick.initialize()

    x = tensor.matrix("features")

    cost = model_brick.cost(x)
    computation_graph = ComputationGraph([cost])
    model = Model(cost)
    # step_rule = Adam(learning_rate=2e-5, beta1=0.1, beta2=0.001, epsilon=1e-8,
    #                 decay_factor=(1 - 1e-8))
    step_rule = Momentum(learning_rate=lrate, momentum=0.95)
    # step_rule = AdaDelta()
    # step_rule = RMSProp(learning_rate=0.01)
    # step_rule = AdaGrad(learning_rate=1e-4)
    algorithm = GradientDescent(cost=cost, params=computation_graph.parameters, step_rule=step_rule)
    algorithm.add_updates(computation_graph.updates)

    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        TrainingDataMonitoring([cost] + computation_graph.auxiliary_variables, after_batch=False, after_epoch=True),
        #                       every_n_epochs=1),
        Printing(after_epoch=True, after_batch=False),  # every_n_epochs=1,
        # Checkpoint(path="./fivem.zip",every_n_epochs=10,after_training=True)
    ]
    main_loop = MainLoop(model=model, data_stream=train_loop_stream, algorithm=algorithm, extensions=extensions)
    return main_loop
Example #29
0
def getDataStream(dataset, batch_size):
    stream = Flatten(DataStream.default_stream(
          dataset=dataset
        , iteration_scheme=ShuffledScheme(dataset.num_examples, batch_size=batch_size)))
    stream = Digit2String(stream, which_sources=('targets',))
    stream = Words2Indices(stream, which_sources=('targets',))
    stream = Padding(stream)

    stream = FilterSources(stream, sources=("features", "targets"))

    return stream
Example #30
0
 def get_datastream(self, kind, indices):
     split = {
         'trn': self.trn,
         'val': self.val,
         'tst': self.tst,
     }[kind]
     indices = indices if indices is not None else split.ind
     assert len(set(indices) - set(split.ind)) == 0, 'requested indices outside of split'
     ds = DataStream.default_stream(
         split.set, iteration_scheme=ShuffledScheme(indices, split.batch_size))
     return ds
Example #31
0
def evaluate_lenet5(train,
                    test,
                    valid,
                    learning_rate=0.1,
                    n_epochs=200,
                    nkerns=[20, 50],
                    batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :param dataset train: Fuel dataset to use for training.
    :param dataset test: Fuel dataset to use for testing.
    :param dataset valid: Fuel dataset to use for validation.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    train_stream = DataStream.default_stream(train,
                                             iteration_scheme=SequentialScheme(
                                                 train.num_examples,
                                                 batch_size))
    valid_stream = DataStream.default_stream(valid,
                                             iteration_scheme=SequentialScheme(
                                                 train.num_examples,
                                                 batch_size))
    test_stream = DataStream.default_stream(test,
                                            iteration_scheme=SequentialScheme(
                                                train.num_examples,
                                                batch_size))

    x = T.tensor4('x')
    yi = T.imatrix('y')
    y = yi.reshape((yi.shape[0], ))

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=x,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function([x, yi], layer3.errors(y))

    validate_model = theano.function([x, yi], layer3.errors(y))

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function([x, yi], cost, updates=updates)

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found

    # a relative improvement of this much is considered significant
    improvement_threshold = 0.995

    n_train_batches = (train.num_examples + batch_size - 1) // batch_size

    # go through this many minibatches before checking the network on
    # the validation set; in this case we check every epoch
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    iter = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1

        minibatch_index = 0
        for minibatch in train_stream.get_epoch_iterator():
            iter += 1
            minibatch_index += 1
            if iter % 100 == 0:
                print 'training @ iter = ', iter

            error = train_model(minibatch[0], minibatch[1])

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(vb[0], vb[1])
                    for vb in valid_stream.get_epoch_iterator()
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(tb[0], tb[1])
                        for tb in test_stream.get_epoch_iterator()
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained at iteration %i, '
        'with test performance %f %%' %
        (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code ran for %.2fm' %
                          ((end_time - start_time) / 60.))
Example #32
0
def main(save_to):
    batch_size = 365
    feature_maps = [6, 16]
    mlp_hiddens = [120, 84]
    conv_sizes = [5, 5]
    pool_sizes = [2, 2]
    image_size = (28, 28)
    output_size = 10

    # The above are from LeCun's paper. The blocks example had:
    #    feature_maps = [20, 50]
    #    mlp_hiddens = [500]

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations, 1, image_size,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='valid',
                    weights_init=Uniform(width=.2),
                    biases_init=Constant(0))
    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=.2)
    convnet.layers[1].weights_init = Uniform(width=.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()
    logging.info("Input dim: {} {} {}".format(
        *convnet.children[0].get_dim('input_')))
    for i, layer in enumerate(convnet.layers):
        if isinstance(layer, Activation):
            logging.info("Layer {} ({})".format(
                i, layer.__class__.__name__))
        else:
            logging.info("Layer {} ({}) dim: {} {} {}".format(
                i, layer.__class__.__name__, *layer.get_dim('output')))

    x = tensor.tensor4('features')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    cg = ComputationGraph([probs])
    outs = VariableFilter(
            roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables)

    # Create an interior activation model
    model = Model([probs] + outs)

    # Load it with trained parameters
    params = load_parameters(open(save_to, 'rb'))
    model.set_parameter_values(params)

    algorithm = MaximumActivationSearch(outputs=outs)

    # Use the mnist test set, unshuffled
    mnist_test = MNIST(("test",), sources=['features'])
    mnist_test_stream = DataStream.default_stream(
        mnist_test,
        iteration_scheme=SequentialScheme(
            mnist_test.num_examples, batch_size))

    extensions = [Timing(),
                  FinishAfter(after_n_epochs=1),
                  DataStreamMonitoring(
                      [],
                      mnist_test_stream,
                      prefix="test"),
                  Checkpoint("maxact.tar"),
                  ProgressBar(),
                  Printing()]

    main_loop = MainLoop(
        algorithm,
        mnist_test_stream,
        model=model,
        extensions=extensions)

    main_loop.run()

    examples = mnist_test.get_example_stream()
    example = examples.get_data(0)[0]
    layers = convnet.layers
    for output, record in algorithm.maximum_activations.items():
        layer = get_brick(output)
        activations, indices, snapshots = (
                r.get_value() if r else None for r in record[1:])
        filmstrip = Filmstrip(
            example.shape[-2:], (indices.shape[1], indices.shape[0]),
            background='blue')
        if layer in layers:
            fieldmap = layerarray_fieldmap(layers[0:layers.index(layer) + 1])
            for unit in range(indices.shape[1]):
                for index in range(100):
                    mask = make_mask(example.shape[-2:], fieldmap, numpy.clip(
                        snapshots[index, unit, :, :], 0, numpy.inf))
                    imagenum = indices[index, unit, 0]
                    filmstrip.set_image((unit, index),
                            examples.get_data(imagenum)[0], mask)
        else:
            for unit in range(indices.shape[1]):
                for index in range(100):
                    imagenum = indices[index, unit]
                    filmstrip.set_image((unit, index),
                            examples.get_data(imagenum)[0])
        filmstrip.save(layer.name + '_maxact.jpg')
Example #33
0
from fuel.datasets.hdf5 import H5PYDataset

train_set = H5PYDataset(
    './data/data.hdf5',
    which_sets=('train', ),
    subset=slice(0, 290000),  #
    load_in_memory=True)

valid_set = H5PYDataset(
    './data/data.hdf5',
    which_sets=('train', ),
    subset=slice(290000, 300000),  #
    load_in_memory=True)

train_stream = DataStream.default_stream(train_set,
                                         iteration_scheme=ShuffledScheme(
                                             train_set.num_examples,
                                             batch_size=1000))

valid_stream = DataStream.default_stream(valid_set,
                                         iteration_scheme=ShuffledScheme(
                                             valid_set.num_examples,
                                             batch_size=1000))

# compute mean target values
print('Computing mean target values...')
cps = []
deps = []
primes = []
hascar = []
cp_index = train_set.provides_sources.index('codepostal')
prime_index = train_set.provides_sources.index('labels')
Example #34
0
def create_main_loop(save_to,
                     num_epochs,
                     unit_order=None,
                     batch_size=500,
                     num_batches=None):
    image_size = (28, 28)
    output_size = 10
    convnet = create_lenet_5()
    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    case_costs = CasewiseCrossEntropy().apply(y.flatten(), probs)
    cost = case_costs.mean().copy(name='cost')
    # cost = (CategoricalCrossEntropy().apply(y.flatten(), probs)
    #         .copy(name='cost'))
    error_rate = (MisclassificationRate().apply(y.flatten(),
                                                probs).copy(name='error_rate'))

    cg = ComputationGraph([cost, error_rate])

    # Apply regularization to the cost
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + sum([0.0003 * (W**2).sum() for W in weights])
    cost.name = 'cost_with_regularization'

    mnist_train = MNIST(("train", ))
    mnist_train_stream = DataStream.default_stream(
        mnist_train,
        iteration_scheme=ShuffledScheme(mnist_train.num_examples, batch_size))

    mnist_test = MNIST(("test", ))
    mnist_test_stream = DataStream.default_stream(
        mnist_test,
        iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size))

    # Generate pics for biases
    biases = VariableFilter(roles=[BIAS])(cg.parameters)

    # Train with simple SGD
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=AdaDelta())

    # Find layer outputs to probe
    outs = OrderedDict(
        reversed(
            list((get_brick(out).name, out)
                 for out in VariableFilter(roles=[OUTPUT],
                                           bricks=[Convolutional, Linear])(
                                               cg.variables))))

    actpic_extension = ActpicExtension(actpic_variables=outs,
                                       case_labels=y,
                                       pics=x,
                                       label_count=output_size,
                                       rectify=-1,
                                       data_stream=mnist_test_stream,
                                       after_batch=True)

    synpic_extension = SynpicExtension(synpic_parameters=biases,
                                       case_costs=case_costs,
                                       case_labels=y,
                                       pics=x,
                                       batch_size=batch_size,
                                       pic_size=image_size,
                                       label_count=output_size,
                                       after_batch=True)

    # Impose an orderint for the SaveImages extension
    if unit_order is not None:
        with open(unit_order, 'rb') as handle:
            histograms = pickle.load(handle)
        unit_order = compute_unit_order(histograms)

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches),
        actpic_extension, synpic_extension,
        SaveImages(picsources=[synpic_extension, actpic_extension],
                   title="LeNet-5: batch {i}, " +
                   "cost {cost_with_regularization:.2f}, " +
                   "trainerr {error_rate:.3f}",
                   data=[cost, error_rate],
                   graph='error_rate',
                   graph_len=500,
                   unit_order=unit_order,
                   after_batch=True),
        DataStreamMonitoring([cost, error_rate],
                             mnist_test_stream,
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        ProgressBar(),
        Printing()
    ]
    model = Model(cost)
    main_loop = MainLoop(algorithm,
                         mnist_train_stream,
                         model=model,
                         extensions=extensions)

    return main_loop
Example #35
0
data_train = H5PYDataset('/home/xuehongyang/TGIF_open_161217.hdf5',
                         which_sets=('train', ),
                         subset=slice(0, 230689 // bs * bs))

data_test = H5PYDataset('/home/xuehongyang/TGIF_open_161217.hdf5',
                        which_sets=('test', ),
                        sources=(
                            'question_features',
                            'question_features_reverse',
                            'mask_matrix',
                            'visual_features',
                        ),
                        subset=slice(0, 32378 // bs * bs))

data_stream_train = DataStream.default_stream(data_train,
                                              iteration_scheme=ShuffledScheme(
                                                  data_train.num_examples,
                                                  batch_size=bs))

data_stream_test = DataStream.default_stream(data_test,
                                             iteration_scheme=SequentialScheme(
                                                 data_test.num_examples,
                                                 batch_size=bs))

learning_rate = 0.0002
n_epochs = 100
algorithm = GradientDescent(cost=cost,
                            parameters=cg.parameters,
                            on_unused_sources='ignore',
                            step_rule=CompositeRule([
                                StepClipping(10.),
                                Adam(learning_rate),
Example #36
0
 def s(s):
     return Flatten(
         DataStream.default_stream(s,
                                   iteration_scheme=ShuffledScheme(
                                       s.num_examples, batch_size=256)))
Example #37
0
    '''
    return net['conv1_1']


if __name__ == '__main__':

    from fuel.datasets import MNIST
    dataset_train = MNIST(['train'], sources=('features', ))
    dataset_test = MNIST(['test'], sources=('features', ))
    n_colors = 1
    spatial_width = 28
    train_stream = Flatten(
        DataStream.default_stream(dataset_train,
                                  iteration_scheme=ShuffledScheme(
                                      examples=dataset_train.num_examples -
                                      (dataset_train.num_examples % 32),
                                      batch_size=32)))
    shp = next(train_stream.get_epoch_iterator())[0].shape

    input_ = T.tensor4('inputs_var')
    unet = buildUnet(1, dropout=True, input_var=input_, trainable=True)
    output = unet.get_output_for(input_)
    test_prediction = lasagne.layers.get_output(unet, deterministic=True)[0]
    #test_prediction_dimshuffle = test_prediction.dimshuffle((0, 2, 3, 1))
    pred_fcn_fn = theano.function([input_], test_prediction)

    for data in train_stream.get_epoch_iterator():
        data_use = (data[0].reshape(32, 1, 28, 28), )
        out_put = pred_fcn_fn(data_use[0])
        import ipdb
Example #38
0
def train(args, model_args):

    #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_'

    model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=True)
            spatial_width = 32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=False)
            spatial_width = 64

        n_colors = 3

    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets=['train'],
                               which_format="64",
                               sources=('features', ),
                               load_in_memory=False)
        dataset_test = CelebA(which_sets=['test'],
                              which_format="64",
                              sources=('features', ),
                              load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples,
                                     batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples,
                                     batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train,
                                                 iteration_scheme=tr_scheme)
        test_stream = DataStream.default_stream(dataset_test,
                                                iteration_scheme=ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=100000,
                           classes=1,
                           cycles=2.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))

    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    if args.dataset != 'lsun' and args.dataset != 'celeba':
        train_stream = Flatten(
            DataStream.default_stream(
                dataset_train,
                iteration_scheme=ShuffledScheme(
                    examples=dataset_train.num_examples -
                    (dataset_train.num_examples % args.batch_size),
                    batch_size=args.batch_size)))
    else:
        train_stream = dataset_train
        test_stream = dataset_test

    print "Width", WIDTH, spatial_width

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    '''
    x = T.matrix('x', dtype='float32')
    temp  = T.scalar('temp', dtype='float32')
    f=transition_operator(tparams, model_options, x, temp)

    for data in train_stream.get_epoch_iterator():
        print data[0]
        a = f([data[0], 1.0, 1])
        #ipdb.set_trace()
    '''
    x, cost, start_temperature = build_model(tparams, model_options)
    inps = [x, start_temperature]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    #print 'Building f_cost...',
    #f_cost = theano.function(inps, cost)
    #print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    for param in tparams:
        print param
        print tparams[param].get_value().shape

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'

    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 1
    print 'Number of steps....'
    print args.num_steps
    print "Number of metasteps...."
    print args.meta_steps
    print 'Done'
    count_sample = 1
    for eidx in xrange(max_epochs):
        if eidx % 20 == 0:
            params = unzip(tparams)
            save_params(params,
                        model_dir + '/' + 'params_' + str(eidx) + '.npz')
        n_samples = 0
        print 'Starting Next Epoch ', eidx
        for data in train_stream.get_epoch_iterator():

            if args.dataset == 'CIFAR10':
                if data[0].shape[0] == args.batch_size:
                    data_use = (data[0].reshape(args.batch_size,
                                                3 * 32 * 32), )
                else:
                    continue
            t0 = time.time()
            batch_index += 1
            n_samples += len(data_use[0])
            uidx += 1
            if data_use[0] is None:
                print 'No data '
                uidx -= 1
                continue
            ud_start = time.time()

            t1 = time.time()

            data_run = data_use[0]
            temperature_forward = args.temperature
            meta_cost = []
            for meta_step in range(0, args.meta_steps):
                meta_cost.append(f_grad_shared(data_run, temperature_forward))
                f_update(lrate)
                if args.meta_steps > 1:
                    data_run, sigma, _, _ = forward_diffusion(
                        [data_run, temperature_forward, 1])
                    temperature_forward *= args.temperature_factor
            cost = sum(meta_cost) / len(meta_cost)

            ud = time.time() - ud_start

            #gradient_updates_ = get_grads(data_use[0],args.temperature)

            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1.
            t1 = time.time()
            #print time.time() - t1, "time to get grads"
            t1 = time.time()
            logger.log({
                'epoch': eidx,
                'batch_index': batch_index,
                'uidx': uidx,
                'training_error': cost
            })
            #'Norm_1': np.linalg.norm(gradient_updates_[0]),
            #'Norm_2': np.linalg.norm(gradient_updates_[1]),
            #'Norm_3': np.linalg.norm(gradient_updates_[2]),
            #'Norm_4': np.linalg.norm(gradient_updates_[3])})
            #print time.time() - t1, "time to log"

            #print time.time() - t0, "total time in batch"
            t5 = time.time()

            if batch_index % 20 == 0:
                print batch_index, "cost", cost

            if batch_index % 200 == 0:
                count_sample += 1
                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))
                temperature_forward = args.temperature

                for num_step in range(args.num_steps * args.meta_steps):
                    print "Forward temperature", temperature_forward
                    if num_step == 0:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(
                            [data_use[0], temperature_forward, 1])
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors,
                                                WIDTH, WIDTH)
                        plot_images(
                            x_temp, model_dir + '/' + "batch_" +
                            str(batch_index) + '_corrupted' + 'epoch_' +
                            str(count_sample) + '_time_step_' + str(num_step))
                    else:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(
                            [x_data, temperature_forward, 1])
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors,
                                                WIDTH, WIDTH)
                        plot_images(
                            x_temp, model_dir + '/batch_' + str(batch_index) +
                            '_corrupted' + '_epoch_' + str(count_sample) +
                            '_time_step_' + str(num_step))

                    temperature_forward = temperature_forward * args.temperature_factor

                x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH,
                                              WIDTH)
                plot_images(
                    x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) +
                    '_batch_index_' + str(batch_index))

                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))

                for i in range(args.num_steps * args.meta_steps +
                               args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        [x_data, temperature, 0])
                    print 'On backward step number, using temperature', i, temperature
                    reverse_time(
                        scl, shft, x_data, model_dir + '/' + "batch_" +
                        str(batch_index) + '_samples_backward_' + 'epoch_' +
                        str(count_sample) + '_time_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

                if args.noise == "gaussian":
                    x_sampled = np.random.normal(
                        0.5, 2.0,
                        size=(args.batch_size, INPUT_SIZE)).clip(0.0, 1.0)
                else:
                    s = np.random.binomial(1, 0.5, INPUT_SIZE)

                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))

                x_data = np.asarray(x_sampled).astype('float32')
                for i in range(args.num_steps * args.meta_steps +
                               args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        [x_data, temperature, 0])
                    print 'On step number, using temperature', i, temperature
                    reverse_time(
                        scl, shft, x_data, model_dir + '/batch_index_' +
                        str(batch_index) + '_inference_' + 'epoch_' +
                        str(count_sample) + '_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

    ipdb.set_trace()
Example #39
0
def main(job_id, params):
    config = ConfigParser.ConfigParser()
    config.readfp(open('./params'))
    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))

    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    hidden_units = int(config.get('hyperparams', 'hidden_units', 32))
    input_dropout_ratio = float(
        config.get('hyperparams', 'input_dropout_ratio', 0.2))
    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    side = config.get('hyperparams', 'side', 'b')

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([
            AdaGrad(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])
    else:
        solver_type = CompositeRule([
            RMSProp(learning_rate=base_lr),
            VariableClipping(threshold=max_norm)
        ])

    input_dim = {'l': 11427, 'r': 10519, 'b': 10519 + 11427}
    data_file = config.get('hyperparams', 'data_file')

    if 'b' in side:
        train = H5PYDataset(data_file, which_set='train')
        valid = H5PYDataset(data_file, which_set='valid')
        test = H5PYDataset(data_file, which_set='test')
        x_l = tensor.matrix('l_features')
        x_r = tensor.matrix('r_features')
        x = tensor.concatenate([x_l, x_r], axis=1)

    else:
        train = H5PYDataset(data_file,
                            which_set='train',
                            sources=['{}_features'.format(side), 'targets'])
        valid = H5PYDataset(data_file,
                            which_set='valid',
                            sources=['{}_features'.format(side), 'targets'])
        test = H5PYDataset(data_file,
                           which_set='test',
                           sources=['{}_features'.format(side), 'targets'])
        x = tensor.matrix('{}_features'.format(side))

    y = tensor.lmatrix('targets')

    # Define a feed-forward net with an input, two hidden layers, and a softmax output:
    model = MLP(activations=[
        Rectifier(name='h1'),
        Rectifier(name='h2'),
        Softmax(name='output'),
    ],
                dims=[input_dim[side], hidden_units, hidden_units, 2],
                weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                biases_init=IsotropicGaussian(b_sd, b_mu))

    # Don't forget to initialize params:
    model.initialize()

    # y_hat is the output of the neural net with x as its inputs
    y_hat = model.apply(x)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
    cost += weight_decay * l2_norm(W)
    cost.name = 'entropy'

    # computational graph with l2 reg
    cost_graph = ComputationGraph([cost])

    # Apply dropout to inputs:
    inputs = VariableFilter([INPUT])(cost_graph.variables)
    dropout_inputs = [
        input for input in inputs if input.name.startswith('linear_')
    ]
    dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]],
                                  input_dropout_ratio)
    dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:],
                                  dropout_ratio)
    dropout_cost = dropout_graph.outputs[0]
    dropout_cost.name = 'dropout_entropy'

    # Learning Algorithm (notice: we use the dropout cost for learning):
    algo = GradientDescent(step_rule=solver_type,
                           params=dropout_graph.parameters,
                           cost=dropout_cost)

    # algo.step_rule.learning_rate.name = 'learning_rate'

    # Data stream used for training model:
    training_stream = Flatten(
        DataStream.default_stream(dataset=train,
                                  iteration_scheme=ShuffledScheme(
                                      train.num_examples,
                                      batch_size=train_batch)))

    training_monitor = TrainingDataMonitoring([
        dropout_cost,
        aggregation.mean(error),
        aggregation.mean(algo.total_gradient_norm)
    ],
                                              after_batch=True)

    # Use the 'valid' set for validation during training:
    validation_stream = Flatten(
        DataStream.default_stream(dataset=valid,
                                  iteration_scheme=ShuffledScheme(
                                      valid.num_examples,
                                      batch_size=valid_batch)))

    validation_monitor = DataStreamMonitoring(variables=[cost, error],
                                              data_stream=validation_stream,
                                              prefix='validation',
                                              after_epoch=True)

    test_stream = Flatten(
        DataStream.default_stream(
            dataset=test,
            iteration_scheme=ShuffledScheme(test.num_examples,
                                            batch_size=test_batch)))

    test_monitor = DataStreamMonitoring(variables=[error],
                                        data_stream=test_stream,
                                        prefix='test',
                                        after_training=True)

    plotting = Plot('AdniNet_{}'.format(side),
                    channels=[
                        ['dropout_entropy', 'validation_entropy'],
                        ['error', 'validation_error'],
                    ],
                    after_batch=False)

    # Checkpoint class used to save model and log:
    stamp = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y-%m-%d-%H:%M')
    checkpoint = Checkpoint('./models/{}net/{}'.format(side, stamp),
                            save_separately=['model', 'log'],
                            every_n_epochs=1)

    # Home-brewed class for early stopping when we detect we have started to overfit
    early_stopper = FinishIfOverfitting(error_name='error',
                                        validation_name='validation_error',
                                        threshold=0.1,
                                        epochs=5,
                                        burn_in=100)

    # The main loop will train the network and output reports, etc
    main_loop = MainLoop(data_stream=training_stream,
                         model=model,
                         algorithm=algo,
                         extensions=[
                             validation_monitor,
                             training_monitor,
                             plotting,
                             FinishAfter(after_n_epochs=max_epoch),
                             early_stopper,
                             Printing(),
                             ProgressBar(),
                             checkpoint,
                             test_monitor,
                         ])
    main_loop.run()

    ve = float(main_loop.log.last_epoch_row['validation_error'])
    te = float(main_loop.log.last_epoch_row['error'])
    spearmint_loss = ve + abs(te - ve)
    print 'Spearmint Loss: {}'.format(spearmint_loss)
    return spearmint_loss
Example #40
0
def run(epochs=1, corpus="data/", HIDDEN_DIMS=100, path="./"):
    brown = BrownDataset(corpus)

    INPUT_DIMS = brown.get_vocabulary_size()

    OUTPUT_DIMS = brown.get_vocabulary_size()

    # These are theano variables
    x = tensor.lmatrix('context')
    y = tensor.ivector('output')

    # Construct the graph
    input_to_hidden = LookupTable(name='input_to_hidden', length=INPUT_DIMS,
                                  dim=HIDDEN_DIMS)

    # Compute the weight matrix for every word in the context and then compute
    # the average.
    h = tensor.mean(input_to_hidden.apply(x), axis=1)

    hidden_to_output = Linear(name='hidden_to_output', input_dim=HIDDEN_DIMS,
                              output_dim=OUTPUT_DIMS)
    y_hat = Softmax().apply(hidden_to_output.apply(h))

    # And initialize with random varibales and set the bias vector to 0
    weights = IsotropicGaussian(0.01)
    input_to_hidden.weights_init = hidden_to_output.weights_init = weights
    input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0)
    input_to_hidden.initialize()
    hidden_to_output.initialize()

    # And now the cost function
    cost = CategoricalCrossEntropy().apply(y, y_hat)
    cg = ComputationGraph(cost)

    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + 0.01 * (W1 ** 2).sum() + 0.01 * (W2 ** 2).sum()
    cost.name = 'cost_with_regularization'

    mini_batch = SequentialScheme(brown.num_instances(), 512)
    data_stream = DataStream.default_stream(brown, iteration_scheme=mini_batch)

    # Now we tie up lose ends and construct the algorithm for the training
    # and define what happens in the main loop.
    algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))

    extensions = [
        ProgressBar(),
        FinishAfter(after_n_epochs=epochs),
        Printing(),
        # TrainingDataMonitoring(variables=[cost]),
        SaveWeights(layers=[input_to_hidden, hidden_to_output],
                    prefixes=['%sfirst' % path, '%ssecond' % path]),
        # Plot(
        #     'Word Embeddings',
        #     channels=[
        #         [
        #             'cost_with_regularization'
        #         ]
        #     ])
    ]

    logger.info("Starting main loop...")
    main = MainLoop(data_stream=data_stream,
                    algorithm=algorithm,
                    extensions=extensions)

    main.run()

    pickle.dump(cg, open('%scg.pickle' % path, 'wb'))
Example #41
0
def _segment_axis(data):
    x = numpy.array([segment_axis(x, frame_size, 0) for x in data[0]])
    return (x, )


data_dir = os.environ['FUEL_DATA_PATH']
data_dir = os.path.join(data_dir, 'blizzard/', 'blizzard_standardize.npz')

data_stats = numpy.load(data_dir)
data_mean = data_stats['data_mean']
data_std = data_stats['data_std']

dataset = Blizzard(which_sets=('train', ))
data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=SequentialScheme(
                                            dataset.num_examples, batch_size))
data_stream = ScaleAndShift(data_stream,
                            scale=1 / data_std,
                            shift=-data_mean / data_std)
data_stream = Mapping(data_stream, _segment_axis)
data_stream = Mapping(data_stream, _transpose)
data_stream = ForceFloatX(data_stream)
train_stream = data_stream

num_valid_examples = 4 * 64 * 5
dataset = Blizzard(which_sets=('valid', ))
data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=SequentialScheme(
                                            num_valid_examples,
                                            10 * batch_size))
Example #42
0
def run():
    name = 'colored-mnist'
    epochs = 200
    subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S")
    if not os.path.isdir(subdir):
        os.mkdir(subdir)
    

    bs = 150
    data_train = CaptionedMNIST(banned=[np.random.randint(0,10) for i in xrange(12)], dataset='train', num=50000, bs=bs)
    data_valid = CaptionedMNIST(banned=[np.random.randint(0,10) for i in xrange(12)], dataset='valid', num=10000, bs=bs)

    train_stream = DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, bs))
    valid_stream = DataStream.default_stream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, bs))


    img_height, img_width = (60,60)

    
    x = T.matrix('features')
    #x.tag.test_value = np.random.rand(bs, 60*60).astype('float32')
    y = T.lmatrix('captions')
    #y.tag.test_value = np.random.rand(bs, 12).astype(int)
    mask = T.lmatrix('mask')
    #mask.tag.test_value = np.ones((bs,12)).astype(int)

    K = 29
    lang_N = 14
    N = 32
    read_size = 8
    write_size = 8
    m = 256
    gen_dim = 300
    infer_dim = 300
    z_dim = 150
    l = 512

    model = ImageModel(bs, K, lang_N, N, read_size, write_size, m, gen_dim, infer_dim, z_dim, l, image_size=60*60, cinit=-10, channels=3)
    model._inputs = [x,y,mask]

    kl, log_recons, log_likelihood, c = model.train(x,y,mask)
    kl.name = 'kl'
    log_recons.name = 'log_recons'
    log_likelihood.name = 'log_likelihood'
    c.name = 'c'

    model._outputs = [kl, log_recons, log_likelihood, c]

    params = model.params

    from solvers.RMSProp import RMSProp as solver
    lr = theano.shared(np.asarray(0.001).astype(theano.config.floatX))
    updates = solver(log_likelihood, params, lr=lr)#0.001)#, clipnorm=10.0)
    model._updates = updates

    logger.info('Compiling sample function')
    model.build_sample_function(y, mask)
    logger.info('Compiled sample function')

    # ============= TRAIN =========
    plots = [['train_kl','valid_kl'],
             ['train_log_recons','valid_log_recons'],
             ['train_log_likelihood','valid_log_likelihood']]
    main_loop = MainLoop(model, train_stream,
                         [FinishAfter(epochs),
                          Track(variables=['kl','log_recons','log_likelihood'], prefix='train'),
                          #TrackBest(variables=['kl'], prefix='train'),
                          DataStreamTrack(valid_stream, ['kl','log_recons','log_likelihood'], prefix='valid'),
                          SampleSentences(subdir, bs, 60, 60),
                          DropLearningRate(lr, 110, 0.00001),
                          Plot(name, plots, 'http://nameless-wave-6526.herokuapp.com/'),
                          SaveModel(subdir, name+'.model'),
                          TimeProfile(),
                          Printing()])
    main_loop.run()
Example #43
0
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme, ShuffledScheme
from fuel.datasets.hdf5 import H5PYDataset
from fuel.server import start_server
from functions.custom_transformers import RandomDownscale, RandomFixedSizeCrop, RandomRotate, Normalize, Cast
import math

train_set = H5PYDataset('../data/data_1.hdf5',
                        which_sets=('train', ),
                        subset=slice(0, 20000),
                        load_in_memory=True)

index_images = 0
index_labels = 1

stream = DataStream.default_stream(train_set,
                                   iteration_scheme=ShuffledScheme(
                                       train_set.num_examples, 125))

#downscaled_stream = RandomDownscale(stream, 140)
stream = RandomRotate(stream, 20)
#cropped_stream    = RandomFixedSizeCrop(rotated_stream, (130,130))
stream = Normalize(stream)
stream = Cast(stream, 'floatX')

start_server(stream, hwm=10)
def run():
    configs = [0]
    for config in configs:
        bs = 48
        feature_dim = 4000

        from uniform_dataset import UniformDataset
        data_test = UniformDataset(bs=bs,
                                   filename='/ssd2/hmdb/hmdb-tdd-1.hdf5',
                                   which_sets=['test'],
                                   sources=['features', 'time_mask', 'labels'])

        test_stream = DataStream.default_stream(
            data_test,
            iteration_scheme=SequentialScheme(data_test.num_examples, bs))

        x = T.tensor3('features')
        time_mask = T.wmatrix('time_mask')
        y = T.imatrix('labels')

        classes = eval(sys.argv[1])
        outputs = []
        for clas in classes:
            print 'Loading', clas
            model = cPickle.load(open('models/learned_' + str(clas), 'rb'))
            prob, loss, (tp, tn, fp, fn) = model.run(x, time_mask, y)
            prob.name = 'prob_' + str(clas)

            outputs += [prob]
        # prob is Nx1
        # outputs is 51xNx1
        # stack and take max along 51-class index
        outputs = T.stacklists(outputs)
        preds = T.argmax(outputs, axis=0)

        # predicted class is now outputs
        # which is shape Nx1, reshape to vector of N
        preds = preds.reshape((preds.shape[0], 1))

        num_err = T.neq(preds, y).sum()
        acc = 1 - (num_err / y.shape[0])

        test_func = theano.function([x, time_mask, y],
                                    outputs,
                                    on_unused_input='warn')

        data = test_stream.get_epoch_iterator(as_dict=True)
        total_acc = 0
        num = 0
        res = None
        labs = None
        for batch in data:
            o = test_func(batch['features'], batch['time_mask'],
                          batch['labels'])
            if res is None:
                res = o
                labs = batch['labels']
            else:
                # append on axis 1, to get 51xDs_size
                res = np.append(res, o, axis=1)
                labs = np.append(labs, batch['labels'], axis=0)
            continue

            total_acc += acc
            num += 1
            print acc
        np.save('results' + sys.argv[2], res)
        np.save('labs' + sys.argv[2], labs)
Example #45
0
def main(save_to,
         num_epochs,
         feature_maps=None,
         mlp_hiddens=None,
         conv_sizes=None,
         pool_sizes=None,
         batch_size=500):
    if feature_maps is None:
        feature_maps = [20, 50]
    if mlp_hiddens is None:
        mlp_hiddens = [500]
    if conv_sizes is None:
        conv_sizes = [5, 5]
    if pool_sizes is None:
        pool_sizes = [2, 2]
    image_size = (28, 28)
    output_size = 10

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations,
                    1,
                    image_size,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='full',
                    weights_init=Uniform(width=.2),
                    biases_init=Constant(0))
    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=.2)
    convnet.layers[1].weights_init = Uniform(width=.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()
    logging.info(
        "Input dim: {} {} {}".format(*convnet.children[0].get_dim('input_')))
    for i, layer in enumerate(convnet.layers):
        logging.info("Layer {} dim: {} {} {}".format(i,
                                                     *layer.get_dim('output')))

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    cost = named_copy(CategoricalCrossEntropy().apply(y.flatten(), probs),
                      'cost')
    error_rate = named_copy(MisclassificationRate().apply(y.flatten(), probs),
                            'error_rate')

    cg = ComputationGraph([cost, error_rate])

    mnist_train = MNIST(("train", ))
    mnist_train_stream = DataStream.default_stream(
        mnist_train,
        iteration_scheme=ShuffledScheme(mnist_train.num_examples, batch_size))

    mnist_test = MNIST(("test", ))
    mnist_test_stream = DataStream.default_stream(
        mnist_test,
        iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size))

    # Train with simple SGD
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             mnist_test_stream,
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        ProgressBar(),
        Printing()
    ]

    model = Model(cost)

    main_loop = MainLoop(algorithm,
                         mnist_train_stream,
                         model=model,
                         extensions=extensions)

    main_loop.run()
Example #46
0
cg = ComputationGraph(cost)

W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
cost = cost + 0.001 * abs(W1).sum() + 0.001 * abs(W2).sum()
cost.name = 'cost'

error_rate = MisclassificationRate().apply(y.argmax(axis=1), y_hat)
error_rate.name = 'error_rate'

algorithm = GradientDescent(cost=cost,
                            parameters=cg.parameters,
                            step_rule=Scale(learning_rate=0.1))

train_set = H5PYDataset('mushrooms.hdf5', which_sets=('train', ))
train_stream = DataStream.default_stream(train_set,
                                         iteration_scheme=SequentialScheme(
                                             train_set.num_examples,
                                             batch_size=128))

test_set = H5PYDataset('mushrooms.hdf5', which_sets=('test', ))
test_stream = DataStream.default_stream(test_set,
                                        iteration_scheme=SequentialScheme(
                                            test_set.num_examples,
                                            batch_size=128))

main = MainLoop(model=Model(cost),
                data_stream=train_stream,
                algorithm=algorithm,
                extensions=[
                    FinishAfter(after_n_epochs=10),
                    Printing(),
                    TrainingDataMonitoring([cost, error_rate],
Example #47
0
    def __init__(self, save_to):
        batch_size = 500
        image_size = (28, 28)
        output_size = 10
        convnet = create_lenet_5()
        layers = convnet.layers

        mnist_test = MNIST(("test", ), sources=['features', 'targets'])

        x = tensor.tensor4('features')
        y = tensor.lmatrix('targets')

        # Normalize input and apply the convnet
        probs = convnet.apply(x)
        cg = ComputationGraph([probs])

        def full_brick_name(brick):
            return '/'.join([''] + [b.name for b in brick.get_unique_path()])

        # Find layer outputs to probe
        outmap = OrderedDict(
            (full_brick_name(get_brick(out)), out) for out in VariableFilter(
                roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables))
        # Generate pics for biases
        biases = VariableFilter(roles=[BIAS])(cg.parameters)

        # Generate parallel array, in the same order, for outputs
        outs = [outmap[full_brick_name(get_brick(b))] for b in biases]

        # Figure work count
        error_rate = (MisclassificationRate().apply(
            y.flatten(), probs).copy(name='error_rate'))
        sensitive_unit_count = (SensitiveUnitCount().apply(
            y.flatten(), probs, biases).copy(name='sensitive_unit_count'))
        sensitive_unit_count.tag.aggregation_scheme = (
            Concatenate(sensitive_unit_count))
        active_unit_count = (ActiveUnitCount().apply(outs).copy(
            name='active_unit_count'))
        active_unit_count.tag.aggregation_scheme = (
            Concatenate(active_unit_count))
        ignored_unit_count = (IgnoredUnitCount().apply(
            y.flatten(), probs, biases, outs).copy(name='ignored_unit_count'))
        ignored_unit_count.tag.aggregation_scheme = (
            Concatenate(ignored_unit_count))

        model = Model([
            error_rate, sensitive_unit_count, active_unit_count,
            ignored_unit_count
        ])

        # Load it with trained parameters
        params = load_parameters(open(save_to, 'rb'))
        model.set_parameter_values(params)

        mnist_test = MNIST(("test", ))
        mnist_test_stream = DataStream.default_stream(
            mnist_test,
            iteration_scheme=SequentialScheme(mnist_test.num_examples,
                                              batch_size))

        evaluator = DatasetEvaluator([
            error_rate, sensitive_unit_count, active_unit_count,
            ignored_unit_count
        ])
        results = evaluator.evaluate(mnist_test_stream)

        def save_ranked_image(scores, filename):
            sorted_instances = scores.argsort()
            filmstrip = Filmstrip(image_shape=(28, 28), grid_shape=(100, 100))
            for i, index in enumerate(sorted_instances):
                filmstrip.set_image((i // 100, i % 100),
                                    mnist_test.get_data(request=index)[0])
            filmstrip.save(filename)

        save_ranked_image(results['sensitive_unit_count'], 'sensitive.jpg')
        save_ranked_image(results['active_unit_count'], 'active.jpg')
        save_ranked_image(results['ignored_unit_count'], 'ignored.jpg')
Example #48
0
def train():

    if os.path.isfile('trainingdata.tar'):
        with open('trainingdata.tar', 'rb') as f:
            main = load(f)
    else:
        hidden_size = 512
        filename = 'warpeace.hdf5'

        encoder = HDF5CharEncoder('warpeace_input.txt', 1000)
        encoder.write(filename)
        alphabet_len = encoder.length

        x = theano.tensor.lmatrix('x')

        readout = Readout(
            readout_dim=alphabet_len,
            feedback_brick=LookupFeedback(alphabet_len, hidden_size, name='feedback'),
            source_names=['states'],
            emitter=RandomSoftmaxEmitter(),
            name='readout'
        )

        transition = GatedRecurrent(
            activation=Tanh(),
            dim=hidden_size)
        transition.weights_init = IsotropicGaussian(0.01)

        gen = SequenceGenerator(readout=readout,
                                transition=transition,
                                weights_init=IsotropicGaussian(0.01),
                                biases_init=Constant(0),
                                name='sequencegenerator')

        gen.push_initialization_config()
        gen.initialize()

        cost = gen.cost(outputs=x)
        cost.name = 'cost'

        cg = ComputationGraph(cost)

        algorithm = GradientDescent(cost=cost,
                                    parameters=cg.parameters,
                                    step_rule=Scale(0.5))

        train_set = encoder.get_dataset()
        train_stream = DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(
                train_set.num_examples, batch_size=128))

        main = MainLoop(
            model=Model(cost),
            data_stream=train_stream,
            algorithm=algorithm,
            extensions=[
                FinishAfter(),
                Printing(),
                Checkpoint('trainingdata.tar', every_n_epochs=10),
                ShowOutput(every_n_epochs=10)
            ])

    main.run()
Example #49
0
                            parameters=cg.parameters,
                            step_rule=CompositeRule(step_rules),
                            on_unused_sources='ignore')

from blocks.extensions import Timing, FinishAfter, Printing, ProgressBar
from blocks.extensions.monitoring import TrainingDataMonitoring
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme
from blocks.main_loop import MainLoop
from blocks.extensions.saveload import Checkpoint

from blocks.model import Model

main_loop = MainLoop(algorithm=algorithm,
                     data_stream=DataStream.default_stream(
                         dataset=train_dataset,
                         iteration_scheme=SequentialScheme(
                             train_dataset.num_examples, batch_size=100)),
                     model=Model(y_est),
                     extensions=[
                         Timing(),
                         FinishAfter(after_n_epochs=200),
                         TrainingDataMonitoring(variables=[cost],
                                                prefix="train",
                                                after_epoch=True),
                         Printing(),
                         ProgressBar(),
                         Checkpoint(path="./checkpoint.zip")
                     ])

main_loop.run()
Example #50
0
valid_set = H5PYDataset(
	'./data_kaggle/kaggle_heart.hdf5',
	which_sets=('train',),
	#subset=slice(451, 494), 
	subset=slice(451, 491), 
	load_in_memory=True
)

index_cases    = 0
index_position = 1
index_mult     = 2
index_sax      = 3
index_images   = 4
index_targets  = 5

stream = DataStream.default_stream(
    valid_set,
    iteration_scheme=ShuffledScheme(valid_set.num_examples, 10)
)

#downscaled_stream = RandomDownscale(stream, 70)
masked_stream     = ApplyMask(stream)
order_stream      = OrderFeatures(masked_stream)
cropped_stream    = RandomFixedSizeCrop(order_stream, (64,64))
float_stream      = Normalize(cropped_stream)
padded_stream     = ZeroPadding(float_stream)
casted_stream     = Cast(padded_stream, 'floatX')

start_server(casted_stream, port=5558, hwm=10)

Example #51
0
def main(dataset_path, use_c, log_min, log_max, num_steps):
    train_set = H5PYDataset(
        dataset_path, which_sets=('train',), sources=('features', 'targets'),
        subset=slice(0, 63257), load_in_memory=True)
    train_stream = DataStream.default_stream(
        train_set,
        iteration_scheme=ShuffledExampleScheme(train_set.num_examples))

    def get_class_balanced_batch(iterator):
        train_features = [[] for _ in range(10)]
        train_targets = [[] for _ in range(10)]
        batch_size = 0
        while batch_size < 1000:
            f, t = next(iterator)
            t = t[0]
            if len(train_features[t]) < 100:
                train_features[t].append(f)
                train_targets[t].append(t)
                batch_size += 1
        train_features = numpy.vstack(sum(train_features, []))
        train_targets = numpy.vstack(sum(train_targets, []))
        return train_features, train_targets

    train_features, train_targets = get_class_balanced_batch(
        train_stream.get_epoch_iterator())

    valid_set = H5PYDataset(
        dataset_path, which_sets=('train',), sources=('features', 'targets'),
        subset=slice(63257, 73257), load_in_memory=True)
    valid_features, valid_targets = valid_set.data_sources

    test_set = H5PYDataset(
        dataset_path, which_sets=('test',), sources=('features', 'targets'),
        load_in_memory=True)
    test_features, test_targets = test_set.data_sources

    if use_c is None:
        best_error_rate = 1.0
        best_C = None
        for log_C in numpy.linspace(log_min, log_max, num_steps):
            C = numpy.exp(log_C)
            svm = LinearSVC(C=C)
            svm.fit(train_features, train_targets.ravel())
            error_rate = 1 - numpy.mean(
                [svm.score(valid_features[1000 * i: 1000 * (i + 1)],
                           valid_targets[1000 * i: 1000 * (i + 1)].ravel())
                 for i in range(10)])
            if error_rate < best_error_rate:
                best_error_rate = error_rate
                best_C = C
            print('C = {}, validation error rate = {} '.format(C, error_rate) +
                  '(best is {}, {})'.format(best_C, best_error_rate))
    else:
        best_C = use_c

    error_rates = []
    for _ in range(10):
        train_features, train_targets = get_class_balanced_batch(
            train_stream.get_epoch_iterator())
        svm = LinearSVC(C=best_C)
        svm.fit(train_features, train_targets.ravel())
        error_rates.append(1 - numpy.mean(
            [svm.score(valid_features[1000 * i: 1000 * (i + 1)],
                       valid_targets[1000 * i: 1000 * (i + 1)].ravel())
             for i in range(10)]))

    print('Validation error rate = {} +- {} '.format(numpy.mean(error_rates),
                                                     numpy.std(error_rates)))

    error_rates = []
    for _ in range(100):
        train_features, train_targets = get_class_balanced_batch(
            train_stream.get_epoch_iterator())
        svm = LinearSVC(C=best_C)
        svm.fit(train_features, train_targets.ravel())
        s = 1000 * numpy.sum(
            [svm.score(test_features[1000 * i: 1000 * (i + 1)],
                       test_targets[1000 * i: 1000 * (i + 1)].ravel())
             for i in range(26)])
        s += 32 * svm.score(test_features[-32:], test_targets[-32:].ravel())
        s = s / 26032.0
        error_rates.append(1 - s)

    print('Test error rate = {} +- {} '.format(numpy.mean(error_rates),
                                               numpy.std(error_rates)))
Example #52
0
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme, ShuffledScheme
from fuel.datasets.hdf5 import H5PYDataset
from fuel.server import start_server
from config import basepath, minibatch_size
from transformers.custom_transformers import Standardize

submit_set = H5PYDataset(
    basepath + 'data.hdf5',
    which_sets=('submit', ),
    #subset=slice(0,50),
    sources=['features', 'image_name'],
    load_in_memory=False)

stream = DataStream.default_stream(submit_set,
                                   iteration_scheme=SequentialScheme(
                                       submit_set.num_examples,
                                       minibatch_size))

print('I provide sources ', submit_set.sources)
print('Number of examples', submit_set.num_examples)

standardized_stream = Standardize(stream, 255)

start_server(standardized_stream)
Example #53
0
    def __init__(self, save_to):
        batch_size = 500
        image_size = (28, 28)
        output_size = 10
        convnet = create_lenet_5()
        layers = convnet.layers

        logging.info("Input dim: {} {} {}".format(
            *convnet.children[0].get_dim('input_')))
        for i, layer in enumerate(convnet.layers):
            if isinstance(layer, Activation):
                logging.info("Layer {} ({})".format(
                    i, layer.__class__.__name__))
            else:
                logging.info("Layer {} ({}) dim: {} {} {}".format(
                    i, layer.__class__.__name__, *layer.get_dim('output')))

        mnist_test = MNIST(("test",), sources=['features', 'targets'])
        basis = create_fair_basis(mnist_test, 10, 10)

        x = tensor.tensor4('features')
        y = tensor.lmatrix('targets')

        # Normalize input and apply the convnet
        probs = convnet.apply(x)
        cg = ComputationGraph([probs])

        def full_brick_name(brick):
            return '/'.join([''] + [b.name for b in brick.get_unique_path()])

        # Find layer outputs to probe
        outs = OrderedDict((full_brick_name(get_brick(out)), out)
                for out in VariableFilter(
                    roles=[OUTPUT], bricks=[Convolutional, Linear])(
                        cg.variables))

        # Normalize input and apply the convnet
        error_rate = (MisclassificationRate().apply(y.flatten(), probs)
                      .copy(name='error_rate'))
        confusion = (ConfusionMatrix().apply(y.flatten(), probs)
                      .copy(name='confusion'))
        confusion.tag.aggregation_scheme = Sum(confusion)
        confusion_image = (ConfusionImage().apply(y.flatten(), probs, x)
                      .copy(name='confusion_image'))
        confusion_image.tag.aggregation_scheme = Sum(confusion_image)

        model = Model(
                [error_rate, confusion, confusion_image] + list(outs.values()))

        # Load it with trained parameters
        params = load_parameters(open(save_to, 'rb'))
        model.set_parameter_values(params)

        mnist_test = MNIST(("test",))
        mnist_test_stream = DataStream.default_stream(
            mnist_test,
            iteration_scheme=SequentialScheme(
                mnist_test.num_examples, batch_size))

        self.model = model
        self.mnist_test_stream = mnist_test_stream
        self.evaluator = DatasetEvaluator(
                [error_rate, confusion, confusion_image])
        self.base_results = self.evaluator.evaluate(mnist_test_stream)

        # TODO: allow target layer to be parameterized
        self.target_layer = '/lenet/mlp/linear_0'
        self.next_layer_param = '/lenet/mlp/linear_1.W'
        self.base_sample = extract_sample(
                outs[self.target_layer], mnist_test_stream)
        self.base_param_value = (
            model.get_parameter_dict()[
                self.next_layer_param].get_value().copy())
Example #54
0
dropout_inputs = [input for input in inputs if input.name.startswith('linear_')]
dropout_graph = apply_dropout(cost_graph, dropout_inputs, dropout_ratio)
dropout_cost = dropout_graph.outputs[0]
dropout_cost.name = 'dropout_entropy'

# Learning Algorithm:
algo = GradientDescent(
    step_rule=solver_type,
    params=dropout_graph.parameters,
    cost=dropout_cost)

# Data stream used for training model:
training_stream = Flatten(
    DataStream.default_stream(
        dataset=train,
        iteration_scheme=ShuffledScheme(
            train.num_examples,
            batch_size=train_batch)))

training_monitor = TrainingDataMonitoring([cost], after_batch=True)

# Use the 'valid' set for validation during training:
validation_stream = Flatten(
    DataStream.default_stream(
        dataset=valid,
        iteration_scheme=ShuffledScheme(
            valid.num_examples,
            batch_size=valid_batch)))

validation_monitor = DataStreamMonitoring(
    variables=[cost],
Example #55
0
def main(save_to, num_epochs,
         weight_decay=0.0001, noise_pressure=0, subset=None, num_batches=None,
         batch_size=None, histogram=None, resume=False):
    output_size = 10

    prior_noise_level = -10
    noise_step_rule = Scale(1e-6)
    noise_rate = theano.shared(numpy.asarray(1e-5, dtype=theano.config.floatX))
    convnet = create_res_net(out_noise=True, tied_noise=True, tied_sigma=True,
            noise_rate=noise_rate,
            prior_noise_level=prior_noise_level)

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    test_probs = convnet.apply(x)
    test_cost = (CategoricalCrossEntropy().apply(y.flatten(), test_probs)
            .copy(name='cost'))
    test_error_rate = (MisclassificationRate().apply(y.flatten(), test_probs)
                  .copy(name='error_rate'))
    test_confusion = (ConfusionMatrix().apply(y.flatten(), test_probs)
                  .copy(name='confusion'))
    test_confusion.tag.aggregation_scheme = Sum(test_confusion)

    test_cg = ComputationGraph([test_cost, test_error_rate])

    # Apply dropout to all layer outputs except final softmax
    # dropout_vars = VariableFilter(
    #         roles=[OUTPUT], bricks=[Convolutional],
    #         theano_name_regex="^conv_[25]_apply_output$")(test_cg.variables)
    # drop_cg = apply_dropout(test_cg, dropout_vars, 0.5)

    # Apply 0.2 dropout to the pre-averaging layer
    # dropout_vars_2 = VariableFilter(
    #         roles=[OUTPUT], bricks=[Convolutional],
    #         theano_name_regex="^conv_8_apply_output$")(test_cg.variables)
    # train_cg = apply_dropout(test_cg, dropout_vars_2, 0.2)

    # Apply 0.2 dropout to the input, as in the paper
    # train_cg = apply_dropout(test_cg, [x], 0.2)
    # train_cg = drop_cg
    # train_cg = apply_batch_normalization(test_cg)

    # train_cost, train_error_rate, train_components = train_cg.outputs

    with batch_normalization(convnet):
        with training_noise(convnet):
            train_probs = convnet.apply(x)
    train_cost = (CategoricalCrossEntropy().apply(y.flatten(), train_probs)
                .copy(name='cost'))
    train_components = (ComponentwiseCrossEntropy().apply(y.flatten(),
                train_probs).copy(name='components'))
    train_error_rate = (MisclassificationRate().apply(y.flatten(),
                train_probs).copy(name='error_rate'))
    train_cg = ComputationGraph([train_cost,
                train_error_rate, train_components])
    population_updates = get_batch_normalization_updates(train_cg)
    bn_alpha = 0.9
    extra_updates = [(p, p * bn_alpha + m * (1 - bn_alpha))
                for p, m in population_updates]

    # for annealing
    nit_penalty = theano.shared(numpy.asarray(noise_pressure, dtype=theano.config.floatX))
    nit_penalty.name = 'nit_penalty'

    # Compute noise rates for training graph
    train_logsigma = VariableFilter(roles=[LOG_SIGMA])(train_cg.variables)
    train_mean_log_sigma = tensor.concatenate([n.flatten() for n in train_logsigma]).mean()
    train_mean_log_sigma.name = 'mean_log_sigma'
    train_nits = VariableFilter(roles=[NITS])(train_cg.auxiliary_variables)
    train_nit_rate = tensor.concatenate([n.flatten() for n in train_nits]).mean()
    train_nit_rate.name = 'nit_rate'
    train_nit_regularization = nit_penalty * train_nit_rate
    train_nit_regularization.name = 'nit_regularization'

    # Apply regularization to the cost
    trainable_parameters = VariableFilter(roles=[WEIGHT, BIAS])(
            train_cg.parameters)
    mask_parameters = [p for p in trainable_parameters
            if get_brick(p).name == 'mask']
    noise_parameters = VariableFilter(roles=[NOISE])(train_cg.parameters)
    biases = VariableFilter(roles=[BIAS])(train_cg.parameters)
    weights = VariableFilter(roles=[WEIGHT])(train_cg.variables)
    nonmask_weights = [p for p in weights if get_brick(p).name != 'mask']
    l2_norm = sum([(W ** 2).sum() for W in nonmask_weights])
    l2_norm.name = 'l2_norm'
    l2_regularization = weight_decay * l2_norm
    l2_regularization.name = 'l2_regularization'

    # testversion
    test_cost = test_cost + l2_regularization
    test_cost.name = 'cost_with_regularization'

    # Training version of cost
    train_cost_without_regularization = train_cost
    train_cost_without_regularization.name = 'cost_without_regularization'
    train_cost = train_cost + l2_regularization + train_nit_regularization
    train_cost.name = 'cost_with_regularization'

    cifar10_train = CIFAR10(("train",))
    cifar10_train_stream = RandomPadCropFlip(
        NormalizeBatchLevels(DataStream.default_stream(
            cifar10_train, iteration_scheme=ShuffledScheme(
                cifar10_train.num_examples, batch_size)),
        which_sources=('features',)),
        (32, 32), pad=4, which_sources=('features',))

    test_batch_size = 128
    cifar10_test = CIFAR10(("test",))
    cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream(
        cifar10_test,
        iteration_scheme=ShuffledScheme(
            cifar10_test.num_examples, test_batch_size)),
        which_sources=('features',))

    momentum = Momentum(0.01, 0.9)

    # Create a step rule that doubles the learning rate of biases, like Caffe.
    # scale_bias = Restrict(Scale(2), biases)
    # step_rule = CompositeRule([scale_bias, momentum])

    # Create a step rule that reduces the learning rate of noise
    scale_mask = Restrict(noise_step_rule, mask_parameters)
    step_rule = CompositeRule([scale_mask, momentum])

    # from theano.compile.nanguardmode import NanGuardMode

    # Train with simple SGD
    algorithm = GradientDescent(
        cost=train_cost, parameters=trainable_parameters,
        step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    #,
    #    theano_func_kwargs={
    #        'mode': NanGuardMode(
    #            nan_is_error=True, inf_is_error=True, big_is_error=True)})

    exp_name = save_to.replace('.%d', '')

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs,
                              after_n_batches=num_batches),
                  EpochSchedule(momentum.learning_rate, [
                      (0, 0.01),     # Warm up with 0.01 learning rate
                      (50, 0.1),     # Then go back to 0.1
                      (100, 0.01),
                      (150, 0.001)
                      # (83, 0.01),  # Follow the schedule in the paper
                      # (125, 0.001)
                  ]),
                  EpochSchedule(noise_step_rule.learning_rate, [
                      (0, 1e-2),
                      (2, 1e-1),
                      (4, 1)
                      # (0, 1e-6),
                      # (2, 1e-5),
                      # (4, 1e-4)
                  ]),
                  EpochSchedule(noise_rate, [
                      (0, 1e-2),
                      (2, 1e-1),
                      (4, 1)
                      # (0, 1e-6),
                      # (2, 1e-5),
                      # (4, 1e-4),
                      # (6, 3e-4),
                      # (8, 1e-3), # Causes nit rate to jump
                      # (10, 3e-3),
                      # (12, 1e-2),
                      # (15, 3e-2),
                      # (19, 1e-1),
                      # (24, 3e-1),
                      # (30, 1)
                  ]),
                  NoiseExtension(
                      noise_parameters=noise_parameters),
                  NoisyDataStreamMonitoring(
                      [test_cost, test_error_rate, test_confusion],
                      cifar10_test_stream,
                      noise_parameters=noise_parameters,
                      prefix="test"),
                  TrainingDataMonitoring(
                      [train_cost, train_error_rate, train_nit_rate,
                       train_cost_without_regularization,
                       l2_regularization,
                       train_nit_regularization,
                       momentum.learning_rate,
                       train_mean_log_sigma,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      every_n_batches=17),
                      # after_epoch=True),
                  Plot('Training performance for ' + exp_name,
                      channels=[
                          ['train_cost_with_regularization',
                           'train_cost_without_regularization',
                           'train_nit_regularization',
                           'train_l2_regularization'],
                          ['train_error_rate'],
                          ['train_total_gradient_norm'],
                          ['train_mean_log_sigma'],
                      ],
                      every_n_batches=17),
                  Plot('Test performance for ' + exp_name,
                      channels=[[
                          'train_error_rate',
                          'test_error_rate',
                          ]],
                      after_epoch=True),
                  EpochCheckpoint(save_to, use_cpickle=True, after_epoch=True),
                  ProgressBar(),
                  Printing()]

    if histogram:
        attribution = AttributionExtension(
            components=train_components,
            parameters=cg.parameters,
            components_size=output_size,
            after_batch=True)
        extensions.insert(0, attribution)

    if resume:
        extensions.append(Load(exp_name, True, True))

    model = Model(train_cost)

    main_loop = MainLoop(
        algorithm,
        cifar10_train_stream,
        model=model,
        extensions=extensions)

    main_loop.run()

    if histogram:
        save_attributions(attribution, filename=histogram)

    with open('execution-log.json', 'w') as outfile:
        json.dump(main_loop.log, outfile, cls=NumpyEncoder)
Example #56
0
else:
    from model import build_model
    host_plot = 'http://hades.calculquebec.ca:5042'
    slice_train = slice(0, n_ex)
    slice_test = slice(45000, 50000 - 8)
    slice_valid = slice(40000, 45000 - 8)

## Load cifar10 stream
batch_size = 32
num_train_example = slice_train.stop - slice_train.start
num_valid_example = slice_valid.stop - slice_valid.start
num_test_example = slice_test.stop - slice_test.start

train_dataset = CIFAR10(('train', ), subset=slice_train)
train_stream = DataStream.default_stream(train_dataset,
                                         iteration_scheme=SequentialScheme(
                                             train_dataset.num_examples,
                                             batch_size))
train_stream = OneHotEncode10(train_stream, which_sources=('targets', ))
train_stream = RandomHorizontalFlip(train_stream, which_sources=('features', ))
train_stream = MinimumImageDimensions(train_stream, (224, 224),
                                      which_sources=('features', ))
train_stream = ScaleAndShift(train_stream, 1., 0, which_sources=('features', ))
train_stream = Cast(train_stream, 'floatX', which_sources=('features', ))

valid_dataset = CIFAR10(('train', ), subset=slice_valid)
valid_stream = DataStream.default_stream(valid_dataset,
                                         iteration_scheme=SequentialScheme(
                                             valid_dataset.num_examples,
                                             batch_size))
valid_stream = OneHotEncode10(valid_stream, which_sources=('targets', ))
valid_stream = MinimumImageDimensions(valid_stream, (224, 224),
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features',))
        dataset_test = CIFAR10(['test'], sources=('features',))
        n_colors = 3
        spatial_width = 32
    elif args.dataset == 'IMAGENET':
        from imagenet_data import IMAGENET
        spatial_width = 128
        dataset_train = IMAGENET(['train'], width=spatial_width)
        dataset_test = IMAGENET(['test'], width=spatial_width)
        n_colors = 3
    else:
        raise ValueError("Unknown dataset %s."%args.dataset)

    train_stream = Flatten(DataStream.default_stream(dataset_train,
                              iteration_scheme=ShuffledScheme(
                                  examples=dataset_train.num_examples,
                                  batch_size=args.batch_size)))
    test_stream = Flatten(DataStream.default_stream(dataset_test,
                             iteration_scheme=ShuffledScheme(
                                 examples=dataset_test.num_examples,
                                 batch_size=args.batch_size))
                             )

    shp = next(train_stream.get_epoch_iterator())[0].shape

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch*scl)
    # scale is applied before shift
def train(args, model_args):

    model_id = '/data/lisatmp4/anirudhg/spiral_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=True)
            spatial_width = 32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=False)
            spatial_width = 64

        n_colors = 3

    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets=['train'],
                               which_format="64",
                               sources=('features', ),
                               load_in_memory=False)
        dataset_test = CelebA(which_sets=['test'],
                              which_format="64",
                              sources=('features', ),
                              load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples,
                                     batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples,
                                     batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train,
                                                 iteration_scheme=tr_scheme)
        test_stream = DataStream.default_stream(dataset_test,
                                                iteration_scheme=ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=20000,
                           classes=1,
                           cycles=1.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))
    elif args.dataset == 'Circle':
        print 'loading Circle'
        train_set = Circle(num_examples=20000,
                           classes=1,
                           cycles=1.,
                           noise=0.0,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))
        iter_per_epoch = train_set.num_examples
    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    train_stream = dataset_train

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    x, cost, start_temperature = build_model(tparams, model_options)
    inps = [x, start_temperature]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    #print 'Building f_cost...',
    #f_cost = theano.function(inps, cost)
    #print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'
    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 0
    print 'Number of steps....', args.num_steps
    print 'Done'
    count_sample = 1
    batch_index = 0
    for eidx in xrange(max_epochs):
        if eidx % 20 == 0:
            params = unzip(tparams)
            save_params(params,
                        model_dir + '/' + 'params_' + str(eidx) + '.npz')
            if eidx == 30:
                ipdb.set_trace()
        n_samples = 0
        print 'Starting Next Epoch ', eidx

        for data in train_stream.get_epoch_iterator():
            batch_index += 1
            n_samples += len(data[0])
            uidx += 1
            if data[0] is None:
                print 'No data '
                uidx -= 1
                continue
            data_run = data[0]
            temperature_forward = args.temperature
            meta_cost = []
            for meta_step in range(0, args.meta_steps):
                meta_cost.append(f_grad_shared(data_run, temperature_forward))
                f_update(lrate)
                if args.meta_steps > 1:
                    data_run, sigma, _, _ = forward_diffusion(
                        data_run, temperature_forward)
                    temperature_forward *= args.temperature_factor
            cost = sum(meta_cost) / len(meta_cost)
            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1.
            logger.log({
                'epoch': eidx,
                'batch_index': batch_index,
                'uidx': uidx,
                'training_error': cost
            })
            empty = []
            spiral_x = [empty for i in range(args.num_steps)]
            spiral_corrupted = []
            spiral_sampled = []
            grad_forward = []
            grad_back = []
            x_data_time = []
            x_tilt_time = []
            if batch_index % 8 == 0:
                count_sample += 1
                temperature = args.temperature * (args.temperature_factor
                                                  **(args.num_steps - 1))
                temperature_forward = args.temperature
                for num_step in range(args.num_steps):
                    if num_step == 0:
                        x_data_time.append(data[0])
                        plot_images(
                            data[0], model_dir + '/' + 'orig_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index))
                        x_data, mu_data, _, _ = forward_diffusion(
                            data[0], temperature_forward)

                        plot_images(
                            x_data, model_dir + '/' + 'corrupted_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index) +
                            '_time_step_' + str(num_step))
                        x_data_time.append(x_data)
                        temp_grad = np.concatenate(
                            (x_data_time[-2], x_data_time[-1]), axis=1)
                        grad_forward.append(temp_grad)

                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        spiral_corrupted.append(x_data)
                        mu_data = np.asarray(mu_data).astype(
                            'float32').reshape(args.batch_size, INPUT_SIZE)
                        mu_data = mu_data.reshape(args.batch_size, 2)
                    else:
                        x_data_time.append(x_data)
                        x_data, mu_data, _, _ = forward_diffusion(
                            x_data, temperature_forward)
                        plot_images(
                            x_data, model_dir + '/' + 'corrupted_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index) +
                            '_time_step_' + str(num_step))
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        spiral_corrupted.append(x_data)

                        mu_data = np.asarray(mu_data).astype(
                            'float32').reshape(args.batch_size, INPUT_SIZE)
                        mu_data = mu_data.reshape(args.batch_size, 2)
                        x_data_time.append(x_data)
                        temp_grad = np.concatenate(
                            (x_data_time[-2], x_data_time[-1]), axis=1)
                        grad_forward.append(temp_grad)
                    temperature_forward = temperature_forward * args.temperature_factor

                mean_sampled = x_data.mean()
                var_sampled = x_data.var()

                x_temp2 = data[0].reshape(args.batch_size, 2)
                plot_2D(
                    spiral_corrupted, args.num_steps,
                    model_dir + '/' + 'corrupted_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                plot_2D(
                    x_temp2, 1, model_dir + '/' + 'orig_' + 'epoch_' +
                    str(count_sample) + '_batch_index_' + str(batch_index))
                plot_grad(
                    grad_forward,
                    model_dir + '/' + 'grad_forward_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                for i in range(args.num_steps + args.extra_steps):
                    x_tilt_time.append(x_data)
                    x_data, sampled_mean = f_sample(x_data, temperature)
                    plot_images(
                        x_data, model_dir + '/' + 'sampled_' + 'epoch_' +
                        str(count_sample) + '_batch_' + str(batch_index) +
                        '_time_step_' + str(i))
                    x_tilt_time.append(x_data)
                    temp_grad = np.concatenate(
                        (x_tilt_time[-2], x_tilt_time[-1]), axis=1)
                    grad_back.append(temp_grad)

                    ###print 'Recons, On step number, using temperature', i, temperature
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

                plot_grad(
                    grad_back, model_dir + '/' + 'grad_back_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                plot_2D(
                    x_tilt_time, args.num_steps,
                    model_dir + '/' + 'sampled_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))

                s = np.random.normal(mean_sampled, var_sampled,
                                     [args.batch_size, 2])
                x_sampled = s

                temperature = args.temperature * (args.temperature_factor
                                                  **(args.num_steps - 1))
                x_data = np.asarray(x_sampled).astype('float32')
                for i in range(args.num_steps + args.extra_steps):
                    x_data, sampled_mean = f_sample(x_data, temperature)
                    spiral_sampled.append(x_data)
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor
                plot_2D(
                    spiral_sampled, args.num_steps,
                    model_dir + '/' + 'inference_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
    ipdb.set_trace()
Example #59
0
    logger = logging.Logger(__name__)
    FORMAT = '[%(asctime)s] %(name)s %(message)s'
    DATEFMT = "%M:%D:%S"
    logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.DEBUG)

    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.)
    }

    batch_size = 100
    data_train = MNIST(which_sets=['train'], sources=['features'])

    train_stream = Flatten(
        DataStream.default_stream(data_train,
                                  iteration_scheme=SequentialScheme(
                                      data_train.num_examples, batch_size)))

    features_size = 28 * 28 * 1

    inputs = T.matrix('features')

    test_data = {
        inputs:
        255 * np.random.normal(size=(batch_size, 28 * 28)).astype('float32')
    }

    prior = Z_prior(dim=128)

    gen = Generator(input_dim=128,
                    dims=[128, 64, 64, features_size],
Example #60
0
def main(save_to,
         num_epochs,
         regularization=0.001,
         subset=None,
         num_batches=None,
         batch_size=None,
         histogram=None,
         resume=False):
    output_size = 10
    convnet = create_all_conv_net()

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    test_cost = (CategoricalCrossEntropy().apply(y.flatten(),
                                                 probs).copy(name='cost'))
    test_components = (ComponentwiseCrossEntropy().apply(
        y.flatten(), probs).copy(name='components'))
    test_error_rate = (MisclassificationRate().apply(
        y.flatten(), probs).copy(name='error_rate'))
    test_confusion = (ConfusionMatrix().apply(y.flatten(),
                                              probs).copy(name='confusion'))
    test_confusion.tag.aggregation_scheme = Sum(test_confusion)

    test_cg = ComputationGraph([test_cost, test_error_rate, test_components])

    # Apply dropout to all layer outputs except final softmax
    dropout_vars = VariableFilter(
        roles=[OUTPUT],
        bricks=[Convolutional],
        theano_name_regex="^conv_[25]_apply_output$")(test_cg.variables)
    drop_cg = apply_dropout(test_cg, dropout_vars, 0.5)

    # Apply 0.2 dropout to the pre-averaging layer
    # dropout_vars_2 = VariableFilter(
    #         roles=[OUTPUT], bricks=[Convolutional],
    #         theano_name_regex="^conv_8_apply_output$")(drop_cg.variables)
    # train_cg = apply_dropout(drop_cg, dropout_vars_2, 0.2)

    # Apply 0.2 dropout to the input, as in the paper
    # train_cg = apply_dropout(drop_cg, [x], 0.2)
    train_cg = drop_cg
    # train_cg = test_cg

    train_cost, train_error_rate, train_components = train_cg.outputs

    # Apply regularization to the cost
    biases = VariableFilter(roles=[BIAS])(train_cg.parameters)
    weights = VariableFilter(roles=[WEIGHT])(train_cg.variables)
    l2_norm = sum([(W**2).sum() for W in weights])
    l2_norm.name = 'l2_norm'
    l2_regularization = regularization * l2_norm
    l2_regularization.name = 'l2_regularization'
    test_cost = test_cost + l2_regularization
    test_cost.name = 'cost_with_regularization'

    # Training version of cost
    train_cost_without_regularization = train_cost
    train_cost_without_regularization.name = 'cost_without_regularization'
    train_cost = train_cost + regularization * l2_norm
    train_cost.name = 'cost_with_regularization'

    cifar10_train = CIFAR10(("train", ))
    #cifar10_train_stream = RandomPadCropFlip(
    #    NormalizeBatchLevels(DataStream.default_stream(
    #        cifar10_train, iteration_scheme=ShuffledScheme(
    #            cifar10_train.num_examples, batch_size)),
    #    which_sources=('features',)),
    #    (32, 32), pad=5, which_sources=('features',))
    cifar10_train_stream = NormalizeBatchLevels(DataStream.default_stream(
        cifar10_train,
        iteration_scheme=ShuffledScheme(cifar10_train.num_examples,
                                        batch_size)),
                                                which_sources=('features', ))

    test_batch_size = 1000
    cifar10_test = CIFAR10(("test", ))
    cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream(
        cifar10_test,
        iteration_scheme=ShuffledScheme(cifar10_test.num_examples,
                                        test_batch_size)),
                                               which_sources=('features', ))

    momentum = Momentum(0.002, 0.9)

    # Create a step rule that doubles the learning rate of biases, like Caffe.
    # scale_bias = Restrict(Scale(2), biases)
    # step_rule = CompositeRule([scale_bias, momentum])
    # step_rule = CompositeRule([StepClipping(100), momentum])
    step_rule = momentum

    # Train with simple SGD
    algorithm = GradientDescent(cost=train_cost,
                                parameters=train_cg.parameters,
                                step_rule=step_rule)

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches),
        EpochSchedule(momentum.learning_rate, [(1, 0.005), (3, 0.01),
                                               (5, 0.02), (200, 0.002),
                                               (250, 0.0002), (300, 0.00002)]),
        DataStreamMonitoring([test_cost, test_error_rate, test_confusion],
                             cifar10_test_stream,
                             prefix="test"),
        TrainingDataMonitoring([
            train_cost, train_error_rate, train_cost_without_regularization,
            l2_regularization, momentum.learning_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               every_n_batches=10),
        # after_epoch=True),
        Plot('Training performance for ' + save_to,
             channels=[
                 [
                     'train_cost_with_regularization',
                     'train_cost_without_regularization',
                     'train_l2_regularization'
                 ],
                 ['train_error_rate'],
                 ['train_total_gradient_norm'],
             ],
             every_n_batches=10),
        # after_batch=True),
        Plot('Test performance for ' + save_to,
             channels=[[
                 'train_error_rate',
                 'test_error_rate',
             ]],
             after_epoch=True),
        Checkpoint(save_to),
        ProgressBar(),
        Printing()
    ]

    if histogram:
        attribution = AttributionExtension(components=train_components,
                                           parameters=cg.parameters,
                                           components_size=output_size,
                                           after_batch=True)
        extensions.insert(0, attribution)

    if resume:
        extensions.append(Load(save_to, True, True))

    model = Model(train_cost)

    main_loop = MainLoop(algorithm,
                         cifar10_train_stream,
                         model=model,
                         extensions=extensions)

    main_loop.run()

    if histogram:
        save_attributions(attribution, filename=histogram)

    with open('execution-log.json', 'w') as outfile:
        json.dump(main_loop.log, outfile, cls=NumpyEncoder)