Exemple #1
0
    def __init__(self, batch_size, output_length,
                 visual_dim, word_dim,
                 visual_feature_dim,
                 question_feature_dim,
                 joint_dim,
                 memory_dim,
                 output_dim,
                 fc1_dim,
                 fc2_dim,
                 voc_size):
        # the video encoder
        self.video_encoder = visualEncoder(
            visual_dim,
            visual_feature_dim)
        self.sentence_encoder = questionEncoder(
            word_dim,
            question_feature_dim)
        self.toJoint = embeddingLayer(
            2 * question_feature_dim,
            2 * visual_feature_dim,
            joint_dim)
        self.rewatcher = videoAttentionLayer(
            joint_dim,
            memory_dim,
            output_dim)

        self.seq_gen = seqDecoder(
            joint_dim,
            output_dim,
            fc1_dim,
            fc2_dim)
        self.softmax_layer = Softmax()
        self.bs = batch_size
        self.output_length = output_length
        self.voc_size = voc_size
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(config, **kwargs)

        self.dest_mlp = MLP(
            activations=[Rectifier()
                         for _ in config.dim_hidden_dest] + [Softmax()],
            dims=[config.dim_hidden[-1]] + config.dim_hidden_dest +
            [config.dim_output_dest],
            name='dest_mlp')
        self.time_mlp = MLP(
            activations=[Rectifier()
                         for _ in config.dim_hidden_time] + [Softmax()],
            dims=[config.dim_hidden[-1]] + config.dim_hidden_time +
            [config.dim_output_time],
            name='time_mlp')

        self.dest_classes = theano.shared(numpy.array(
            config.dest_tgtcls, dtype=theano.config.floatX),
                                          name='dest_classes')
        self.time_classes = theano.shared(numpy.array(
            config.time_tgtcls, dtype=theano.config.floatX),
                                          name='time_classes')

        self.inputs.append('input_time')
        self.children.extend([self.dest_mlp, self.time_mlp])
Exemple #3
0
def build_training(lr=0.002, model=None):
	x = T.tensor4('x')
	y = T.imatrix()
	if model is None:
		model = build_model()
	y_prev = model.apply(x)
	y_softmax =Softmax().apply(y_prev)
	##### prediction #####
	#cost = CategoricalCrossEntropy().apply(y.flatten(), y_prev).mean()
	cost = Softmax().categorical_cross_entropy(y.flatten(), y_prev).mean()
    	error = MisclassificationRate().apply(y.flatten(), y_softmax).mean()
	W, B = get_Params(y_prev)
	params = W + B
	regulizer_full = sum([w.norm(2) for w in W[0:2]])
	regulizer_conv = sum([w.norm(2) for w in W[2:]])
	cost = cost #+ 0.01*regulizer_conv #+ 0.001*regulizer_conv
	updates, updates_init = RMSProp(cost, params, lr)
	#updates, updates_init = Adam(cost, params, lr)
	#updates = Sgd(cost, params, lr)
	train_function = theano.function([x,y], cost, updates=updates,
			allow_input_downcast=True)
	valid_function = theano.function([x,y], cost,
			allow_input_downcast=True)
	test_function = theano.function([x,y], error,
			allow_input_downcast=True)
	reinit = theano.function([], T.zeros((1,)), updates=updates_init)
	observation = theano.function([], [w.norm(2) for w in W])
	"""
	reg_function = theano.function([], T.zeros((1,)), updates=clip(W),
			allow_input_downcast=True)

	observation = theano.function([], [w.norm(2) for w in W])
	"""
	return train_function, valid_function, test_function, model, reinit
Exemple #4
0
    def __init__(self, feature_dim, hidden_dim, output_dim):
        self.image_embed = Linear(input_dim=feature_dim,
                                  output_dim=hidden_dim,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  use_bias=False,
                                  name='image_embed')
        self.word_embed = Linear(input_dim=feature_dim,
                                 output_dim=hidden_dim,
                                 weights_init=IsotropicGaussian(0.01),
                                 biases_init=Constant(0),
                                 use_bias=False,
                                 name='word_embed')
        self.r_embed = Linear(input_dim=feature_dim,
                              output_dim=hidden_dim,
                              weights_init=IsotropicGaussian(0.01),
                              biases_init=Constant(0),
                              use_bias=False,
                              name='r_embed')
        self.m_to_s = Linear(input_dim=hidden_dim,
                             output_dim=1,
                             weights_init=IsotropicGaussian(0.01),
                             biases_init=Constant(0),
                             use_bias=False,
                             name='m_to_s')
        self.attention_dist = Softmax(name='attention_dist_softmax')
        self.r_to_r = Linear(input_dim=feature_dim,
                             output_dim=feature_dim,
                             weights_init=IsotropicGaussian(0.01),
                             biases_init=Constant(0),
                             use_bias=False,
                             name='r_to_r')
        # self.r_to_g = Linear(input_dim=feature_dim,
        #                      output_dim=output_dim,
        #                      weights_init=IsotropicGaussian(0.01),
        #                      biases_init=Constant(0),
        #                      use_bias=False,
        #                      name='r_to_g')
        self.image_embed.initialize()
        self.word_embed.initialize()
        self.r_embed.initialize()
        self.m_to_s.initialize()
        self.r_to_r.initialize()
        # self.r_to_g.initialize()

        # the sequence to sequence LSTM
        self.seq = LSTM(output_dim,
                        name='rewatcher_seq',
                        weights_init=IsotropicGaussian(0.01),
                        biases_init=Constant(0))
        self.seq_embed = Linear(feature_dim,
                                output_dim * 4,
                                name='rewatcher_seq_embed',
                                weights_init=IsotropicGaussian(0.01),
                                biases_init=Constant(0),
                                use_bias=False)

        self.seq.initialize()
        self.seq_embed.initialize()
def maxout_vae_mnist_test(path_vae_mnist):

    # load vae model on mnist
    vae_mnist = load(path_vae_mnist)
    maxout = Maxout()
    x = T.matrix('features')
    y = T.imatrix('targets')
    batch_size = 128
    z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x))
    predict = maxout.apply(z)

    cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
    y_hat = Softmax().apply(predict)
    cost.name = 'cost'
    cg = ComputationGraph(cost)

    temp = cg.parameters
    for t, i in zip(temp, range(len(temp))):
        t.name = t.name+str(i)+"maxout"

    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y, y_hat) 

    # training
    step_rule = RMSProp(0.01, 0.9)
    #step_rule = Momentum(0.2, 0.9)
    train_set = MNIST('train')
    test_set = MNIST("test")

    data_stream_train = Flatten(DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)))

    data_stream_test =Flatten(DataStream.default_stream(
            test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size)))

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], data_stream=data_stream_train, prefix="train")
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="test")


    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=50),
                    Printing(every_n_epochs=1)
                  ]

    main_loop = MainLoop(data_stream=data_stream_train,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()

    # save here
    from blocks.serialization import dump
    with closing(open('../data_mnist/maxout', 'w')) as f:
	    dump(maxout, f)
Exemple #6
0
 def __init__(self, config, **kwargs):
     super(Model, self).__init__(config,
                                 output_dim=config.tgtcls.shape[0],
                                 **kwargs)
     self.classes = theano.shared(numpy.array(config.tgtcls,
                                              dtype=theano.config.floatX),
                                  name='classes')
     self.softmax = Softmax()
     self.children.append(self.softmax)
Exemple #7
0
        def onestepContextAttn(hContextAttn):

            preContextatt = attentionmlpContext.apply(hContextAttn)
            attContextsoft = Softmax()
            attContextpyx = attContextsoft.apply(preContextatt.flatten())
            attContextpred = attContextpyx.flatten()
            attcontext = T.mul(hContextAttn.dimshuffle(1,0), attContextpred).dimshuffle(1,0)

            return attcontext
Exemple #8
0
        def onestepEncAttn(hEncAttn):

            preEncattn = attentionmlpEnc.apply(hEncAttn)
            attEncsoft = Softmax()
            attEncpyx = attEncsoft.apply(preEncattn.flatten())
            attEncpred = attEncpyx.flatten()
            attenc = T.mul(hEncAttn.dimshuffle(1,0), attEncpred).dimshuffle(1,0)

            return attenc
Exemple #9
0
def get_config(config):
    config1 = {}

    if config == '5layers':
        config1['num_epochs'] = 150
        config1['num_channels'] = 3
        config1['image_shape'] = (192, 192)
        config1['filter_size'] = [(5, 5), (5, 5), (5, 5), (5, 5), (5, 5)]
        config1['num_filter'] = [32, 48, 64, 128, 256]
        config1['pooling_sizes'] = [(2, 2), (2, 2), (2, 2), (2, 2), (2, 2)]
        config1['mlp_hiddens'] = [1000, 100]
        config1['output_size'] = 2
        config1['batch_size'] = 16
        config1['activation'] = [Rectifier() for _ in config1['num_filter']]
        config1['mlp_activation'] = [
            Rectifier().apply for _ in config1['mlp_hiddens']
        ] + [Softmax().apply]
        config1['num_batches'] = None
    elif config == '4layers':
        config1['num_epochs'] = 100
        config1['num_channels'] = 3
        config1['image_shape'] = (160, 160)
        config1['filter_size'] = [(5, 5), (5, 5), (5, 5), (5, 5)]
        config1['num_filter'] = [32, 64, 128, 128]
        config1['pooling_sizes'] = [(2, 2), (2, 2), (2, 2), (2, 2)]
        config1['mlp_hiddens'] = [1000, 100]
        config1['output_size'] = 2
        config1['batch_size'] = 32
        config1['activation'] = [Rectifier() for _ in config1['num_filter']]
        config1['mlp_activation'] = [
            Rectifier().apply for _ in config1['mlp_hiddens']
        ] + [Softmax().apply]
        config1['num_batches'] = None
    else:
        config1['num_epochs'] = 100
        config1['num_channels'] = 3
        config1['image_shape'] = (128, 128)
        config1['filter_size'] = [(5, 5), (5, 5), (5, 5)]
        config1['num_filter'] = [20, 50, 80]
        config1['pooling_sizes'] = [(2, 2), (2, 2), (2, 2)]
        config1['mlp_hiddens'] = [1000]
        config1['output_size'] = 2
        config1['batch_size'] = 64
        config1['activation'] = [Rectifier() for _ in config1['num_filter']]
        config1['mlp_activation'] = [
            Rectifier().apply for _ in config1['mlp_hiddens']
        ] + [Softmax().apply]
        config1['num_batches'] = 11000

        if config == 'test':
            print("Test run...")
            config1['test'] = True
        else:
            print("Using default config..")

    return config1
 def __init__(self, config, **kwargs):
     super(Model, self).__init__(config,
                                 rec_input_len=4,
                                 output_dim=config.tgtcls.shape[0],
                                 **kwargs)
     self.classes = theano.shared(numpy.array(config.tgtcls,
                                              dtype=theano.config.floatX),
                                  name='classes')
     self.softmax = Softmax()
     self.sequences.extend(['latitude_lag', 'longitude_lag'])
     self.children.append(self.softmax)
Exemple #11
0
    def __init__(self, hidden_dim, n_classes, **kwargs):
        super(SingleSoftmax, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        self.mlp = MLP(activations=[Rectifier(), Softmax()],
                       dims=[hidden_dim, hidden_dim / 2, self.n_classes],
                       weights_init=Orthogonal(),
                       biases_init=Constant(0))
        self.softmax = Softmax()

        self.children = [self.mlp, self.softmax]
Exemple #12
0
    def __init__(self, config, prefix_encoder, candidate_encoder, **kwargs):
        super(MemoryNetworkBase, self).__init__(**kwargs)

        self.prefix_encoder = prefix_encoder
        self.candidate_encoder = candidate_encoder
        self.config = config

        self.softmax = Softmax()
        self.children = [self.softmax, prefix_encoder, candidate_encoder]

        self.inputs = self.prefix_encoder.apply.inputs \
                      + ['candidate_%s'%x for x in self.candidate_encoder.apply.inputs] \
                      + ['candidate_destination_latitude', 'candidate_destination_longitude']
Exemple #13
0
    def build_pretrain_model(self, data_dict, hyper_params):
        """
        pretrain-method specific;
        constucts an SCE net;
        works with any network structure of the pipeline
        :param data_dict:
        :param hyper_params:
        :return:
        """
        from theano import tensor
        from blocks.model import Model

        # Note: this has to match the sources defined in the dataset
        indices = [tensor.ivector('{}_indices'.format(i)) for i in range(3)]

        pipeline = self.encoder_pipeline_factory.build_pipeline(
            input_shape=data_dict.get_value().shape, params=hyper_params)

        # compute feature represenation
        rep = [pipeline.apply(data_dict[indices[i]]) for i in range(3)]
        # for r in rep: print r.type

        # flatten representations
        rep = [r.flatten(ndim=2) for r in rep]
        # for r in rep: print r.type

        # compute similarities
        rval = []
        for i in range(1, 3):
            r = (rep[0] * rep[i]).sum(
                axis=1)  # element-wise multiplication and row sum
            r = tensor.reshape(r, (r.shape[0], 1))
            rval.append(r)
        rval = tensor.concatenate(rval, axis=1)
        # print rval.type

        # optional softmax layer (normalization to sum = 1)
        if 'apply_softmax' in hyper_params and hyper_params[
                'apply_softmax']:  # default=False
            from blocks.bricks import Softmax
            rval = Softmax().apply(rval)

        # optional argmax (int output instead of scores
        if 'return_probs' in hyper_params and hyper_params[
                'return_probs'] is False:  # default=True
            rval = rval.argmax(axis=1)

        return Model(rval)
def test_activations():
    x = tensor.vector()
    x_val = numpy.random.rand(8).astype(theano.config.floatX)
    exp_x_val = numpy.exp(x_val)

    assert_allclose(x_val, Identity().apply(x).eval({x: x_val}))
    assert_allclose(numpy.tanh(x_val),
                    Tanh().apply(x).eval({x: x_val}),
                    rtol=1e-06)
    assert_allclose(numpy.log(1 + exp_x_val),
                    Softplus(x).apply(x).eval({x: x_val}),
                    rtol=1e-6)
    assert_allclose(exp_x_val / numpy.sum(exp_x_val),
                    Softmax(x).apply(x).eval({
                        x: x_val
                    }).flatten(),
                    rtol=1e-6)
    assert_allclose(1.0 / (1.0 + numpy.exp(-x_val)),
                    Logistic(x).apply(x).eval({x: x_val}),
                    rtol=1e-6)
    leaky_out_1 = x_val - 0.5
    leaky_out_1[leaky_out_1 < 0] *= 0.01
    assert_allclose(leaky_out_1,
                    LeakyRectifier().apply(x).eval({x: x_val - 0.5}),
                    rtol=1e-5)
    leaky_out_2 = x_val - 0.5
    leaky_out_2[leaky_out_2 < 0] *= 0.05
    assert_allclose(leaky_out_2,
                    LeakyRectifier(leak=0.05).apply(x).eval({x: x_val - 0.5}),
                    rtol=1e-5)
Exemple #15
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(input_dim=10000,
                          dim=500,
                          mlp_hidden_dims=[2000, 500, 4],
                          batch_size=100,
                          image_shape=(100, 100),
                          patch_shape=(28, 28),
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
    model.initialize()
    h, c = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [500, 100, 10],
                     weights_init=IsotropicGaussian(0.01),
                     biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)

    return cost, error_rate
Exemple #16
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Exemple #17
0
class SingleSoftmax(Initializable):
    def __init__(self, hidden_dim, n_classes, **kwargs):
        super(SingleSoftmax, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        self.mlp = MLP(activations=[Rectifier(), Softmax()],
                       dims=[hidden_dim, hidden_dim/2, self.n_classes],
                       weights_init=Orthogonal(),
                       biases_init=Constant(0))
        self.softmax = Softmax()

        self.children = [self.mlp, self.softmax]

    # some day: @application(...) def feedback(self, h)

    @application(inputs=['cs', 'y'], outputs=['cost'])
    def cost(self, cs, y, n_patches):
        energies = [self.mlp.apply(cs[:, t, :])
                    for t in xrange(n_patches)]
        cross_entropies = [self.softmax.categorical_cross_entropy(y.flatten(), energy)
                           for energy in energies]
        error_rates = [T.neq(y, energy.argmax(axis=1)).mean(axis=0)
                       for energy in energies]
        # train on final prediction
        cost = util.named(cross_entropies[-1], "cost")
        # monitor final prediction
        self.add_auxiliary_variable(cross_entropies[-1], name="cross_entropy")
        self.add_auxiliary_variable(error_rates[-1], name="error_rate")
        return cost
Exemple #18
0
def build_model(images, labels):

    # Construct a bottom convolutional sequence
    bottom_conv_sequence = convolutional_sequence((3, 3), 64, (150, 150))
    bottom_conv_sequence._push_allocation_config()

    # Flatten layer
    flattener = Flattener()

    # Construct a top MLP
    conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output'))
    top_mlp = MLP([
        LeakyRectifier(name='non_linear_9'),
        LeakyRectifier(name='non_linear_10'),
        Softmax(name='non_linear_11')
    ], [conv_out_dim, 2048, 612, 10],
                  weights_init=IsotropicGaussian(),
                  biases_init=Constant(1))

    # Construct feedforward sequence
    ss_seq = FeedforwardSequence(
        [bottom_conv_sequence.apply, flattener.apply, top_mlp.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()

    prediction = ss_seq.apply(images)
    cost = CategoricalCrossEntropy().apply(labels.flatten(), prediction)

    return cost
Exemple #19
0
    def __init__(self, config):
        self.X = T.tensor4("features")
        c = config

        seq = BrickSequence(
            input_dim=(3, 32, 32),
            bricks=[
                conv3(c['n_l1']),
                conv3(c['n_l2']),
                max_pool(),
                conv3(c['n_l3']),
                conv3(c['n_l4']),
                max_pool(),
                #conv3(10),
                #conv3(10),
                Flattener(),
                linear(c['n_l5']),
                Softmax()
            ])

        seq.initialize()

        self.pred = seq.apply(self.X)
        self.Y = T.imatrix("targets")

        self.cost = CategoricalCrossEntropy().apply(self.Y.flatten(),
                                                    self.pred)
        self.cost.name = "cost"

        self.accur = 1.0 - MisclassificationRate().apply(
            self.Y.flatten(), self.pred)
        self.accur.name = "accur"
Exemple #20
0
class SingleSoftmax(Initializable):
    def __init__(self, hidden_dim, n_classes, **kwargs):
        super(SingleSoftmax, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        self.mlp = MLP(activations=[Rectifier(), Softmax()],
                       dims=[hidden_dim, hidden_dim / 2, self.n_classes],
                       weights_init=Orthogonal(),
                       biases_init=Constant(0))
        self.softmax = Softmax()

        self.children = [self.mlp, self.softmax]

    # some day: @application(...) def feedback(self, h)

    @application(inputs=['cs', 'y'], outputs=['cost'])
    def cost(self, cs, y, n_patches):
        energies = [self.mlp.apply(cs[:, t, :]) for t in xrange(n_patches)]
        cross_entropies = [
            self.softmax.categorical_cross_entropy(y.flatten(), energy)
            for energy in energies
        ]
        error_rates = [
            T.neq(y, energy.argmax(axis=1)).mean(axis=0) for energy in energies
        ]
        # train on final prediction
        cost = util.named(cross_entropies[-1], "cost")
        # monitor final prediction
        self.add_auxiliary_variable(cross_entropies[-1], name="cross_entropy")
        self.add_auxiliary_variable(error_rates[-1], name="error_rate")
        return cost
Exemple #21
0
def construct_model(input_dim, output_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    # input_dim must be nr
    mlp = MLP(activations=activation_functions,
              dims=[input_dim] + hidden_dims + [2])

    weights = mlp.apply(r)

    final = tensor.dot(x, weights)

    cost = Softmax().categorical_cross_entropy(y, final).mean()

    pred = final.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters
    for brick in [mlp]:
        brick.weights_init = IsotropicGaussian(0.01)
        brick.biases_init = Constant(0.001)
        brick.initialize()

    # apply noise
    cg = ComputationGraph([cost, error_rate])
    noise_vars = VariableFilter(roles=[WEIGHT])(cg)
    apply_noise(cg, noise_vars, noise_std)
    [cost, error_rate] = cg.outputs

    return cost, error_rate
Exemple #22
0
def create_lenet_5():
    feature_maps = [6, 16]
    mlp_hiddens = [120, 84]
    conv_sizes = [5, 5]
    pool_sizes = [2, 2]
    image_size = (28, 28)
    output_size = 10

    # The above are from LeCun's paper. The blocks example had:
    #    feature_maps = [20, 50]
    #    mlp_hiddens = [500]

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations, 1, image_size,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='valid',
                    weights_init=Uniform(width=.2),
                    biases_init=Constant(0))
    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=.2)
    convnet.layers[1].weights_init = Uniform(width=.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()

    return convnet
Exemple #23
0
    def __init__(self, **kwargs):
        conv_layers = [
            Convolutional(filter_size=(3, 3), num_filters=64,
                          border_mode=(1, 1), name='conv_1'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=64,
                          border_mode=(1, 1), name='conv_2'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_2'),

            Convolutional(filter_size=(3, 3), num_filters=128,
                          border_mode=(1, 1), name='conv_3'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=128,
                          border_mode=(1, 1), name='conv_4'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_4'),

            Convolutional(filter_size=(3, 3), num_filters=256,
                          border_mode=(1, 1), name='conv_5'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=256,
                          border_mode=(1, 1), name='conv_6'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=256,
                          border_mode=(1, 1), name='conv_7'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_7'),

            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_8'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_9'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_10'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_10'),

            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_11'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_12'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_13'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_13'),
        ]

        mlp = MLP([Rectifier(name='fc_14'), Rectifier('fc_15'), Softmax()],
                  [25088, 4096, 4096, 1000],
                  )
        conv_sequence = ConvolutionalSequence(
            conv_layers, 3, image_size=(224, 224))

        super(VGGNet, self).__init__(
            [conv_sequence.apply, Flattener().apply, mlp.apply], **kwargs)
Exemple #24
0
    def __init__(self,
                 image_shape=None,
                 output_size=None,
                 noise_batch_size=None,
                 noise_without_rectifier=False,
                 noise_after_rectifier=False,
                 **kwargs):
        self.num_channels = 3
        self.image_shape = image_shape or (32, 32)
        self.output_size = output_size or 10
        self.noise_batch_size = noise_batch_size
        conv_parameters = [(96, 3, 1, 'half', Convolutional),
                           (96, 3, 1, 'half', Convolutional),
                           (96, 3, 2, 'half', NoisyConvolutional),
                           (192, 3, 1, 'half', Convolutional),
                           (192, 3, 1, 'half', Convolutional),
                           (192, 3, 2, 'half', NoisyConvolutional),
                           (192, 3, 1, 'half', Convolutional),
                           (192, 1, 1, 'valid', Convolutional),
                           (10, 1, 1, 'valid', Convolutional)]
        fc_layer = 10

        self.convolutions = []
        layers = []
        for i, (num_filters, filter_size, conv_step, border_mode,
                cls) in enumerate(conv_parameters):
            if cls == NoisyConvolutional and noise_after_rectifier:
                cls = NoisyConvolutional2
            layer = cls(filter_size=(filter_size, filter_size),
                        num_filters=num_filters,
                        step=(conv_step, conv_step),
                        border_mode=border_mode,
                        tied_biases=True,
                        name='conv_{}'.format(i))
            if cls == NoisyConvolutional or cls == NoisyConvolutional2:
                layer.noise_batch_size = self.noise_batch_size
            self.convolutions.append(layer)
            layers.append(layer)
            if cls != NoisyConvolutional2 and not noise_without_rectifier:
                layers.append(Rectifier())

        self.conv_sequence = ConvolutionalSequence(layers,
                                                   self.num_channels,
                                                   image_size=self.image_shape)

        # The AllConvNet applies average pooling to combine top-level
        # features across the image.
        self.flattener = GlobalAverageFlattener()

        # Then it inserts one final 10-way FC layer before softmax
        # self.top_mlp = MLP([Rectifier(), Softmax()],
        #     [conv_parameters[-1][0], fc_layer, self.output_size])
        self.top_softmax = Softmax()

        application_methods = [
            self.conv_sequence.apply, self.flattener.apply,
            self.top_softmax.apply
        ]

        super(NoisyAllConvNet, self).__init__(application_methods, **kwargs)
Exemple #25
0
    def __init__(self, hidden_dim, n_classes, batch_normalize, **kwargs):
        super(SingleSoftmax, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        self.mlp = masonry.construct_mlp(
            activations=[None, Identity()],
            input_dim=hidden_dim,
            hidden_dims=[hidden_dim / 2, self.n_classes],
            batch_normalize=batch_normalize,
            weights_init=Orthogonal(),
            biases_init=Constant(0))
        self.softmax = Softmax()

        self.children = [self.mlp, self.softmax]
class Model(RNN):
    @lazy()
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(config,
                                    rec_input_len=4,
                                    output_dim=config.tgtcls.shape[0],
                                    **kwargs)
        self.classes = theano.shared(numpy.array(config.tgtcls,
                                                 dtype=theano.config.floatX),
                                     name='classes')
        self.softmax = Softmax()
        self.sequences.extend(['latitude_lag', 'longitude_lag'])
        self.children.append(self.softmax)

    def before_predict_all(self, kwargs):
        super(Model, self).before_predict_all(kwargs)
        kwargs['latitude_lag'] = tensor.extra_ops.repeat(kwargs['latitude'],
                                                         2,
                                                         axis=0)
        kwargs['longitude_lag'] = tensor.extra_ops.repeat(kwargs['longitude'],
                                                          2,
                                                          axis=0)

    def process_rto(self, rto):
        return tensor.dot(self.softmax.apply(rto), self.classes)

    def rec_input(self, latitude, longitude, latitude_lag, longitude_lag,
                  **kwargs):
        return (tensor.shape_padright(latitude),
                tensor.shape_padright(longitude),
                tensor.shape_padright(latitude_lag),
                tensor.shape_padright(longitude_lag))
Exemple #27
0
class MemoryNetworkBase(Initializable):
    def __init__(self, config, prefix_encoder, candidate_encoder, **kwargs):
        super(MemoryNetworkBase, self).__init__(**kwargs)

        self.prefix_encoder = prefix_encoder
        self.candidate_encoder = candidate_encoder
        self.config = config

        self.softmax = Softmax()
        self.children = [self.softmax, prefix_encoder, candidate_encoder]

        self.inputs = self.prefix_encoder.apply.inputs \
                      + ['candidate_%s'%x for x in self.candidate_encoder.apply.inputs] \
                      + ['candidate_destination_latitude', 'candidate_destination_longitude']

    def candidate_destination(self, **kwargs):
        return tensor.concatenate(
            (tensor.shape_padright(kwargs['candidate_destination_latitude']),
             tensor.shape_padright(kwargs['candidate_destination_longitude'])),
            axis=1)

    @application(outputs=['cost'])
    def cost(self, **kwargs):
        y_hat = self.predict(**kwargs)
        y = tensor.concatenate((kwargs['destination_latitude'][:, None],
                                kwargs['destination_longitude'][:, None]),
                               axis=1)

        return error.erdist(y_hat, y).mean()

    @application(outputs=['destination'])
    def predict(self, **kwargs):
        prefix_representation = self.prefix_encoder.apply(
            **{x: kwargs[x]
               for x in self.prefix_encoder.apply.inputs})
        candidate_representation = self.candidate_encoder.apply(
            **{
                x: kwargs['candidate_' + x]
                for x in self.candidate_encoder.apply.inputs
            })

        if self.config.normalize_representation:
            prefix_representation = prefix_representation \
                    / tensor.sqrt((prefix_representation ** 2).sum(axis=1, keepdims=True))
            candidate_representation = candidate_representation \
                    / tensor.sqrt((candidate_representation ** 2).sum(axis=1, keepdims=True))

        similarity_score = tensor.dot(prefix_representation,
                                      candidate_representation.T)
        similarity = self.softmax.apply(similarity_score)

        return tensor.dot(similarity, self.candidate_destination(**kwargs))

    @predict.property('inputs')
    def predict_inputs(self):
        return self.inputs

    @cost.property('inputs')
    def cost_inputs(self):
        return self.inputs + ['destination_latitude', 'destination_longitude']
Exemple #28
0
def build_model(images, labels):
    
    # Construct a bottom convolutional sequence
    bottom_conv_sequence = convolutional_sequence((3,3), 16, (160, 160))
    bottom_conv_sequence._push_allocation_config()
    
    # Flatten layer
    flattener = Flattener()

    # Construct a top MLP
    conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output'))
    #top_mlp = MLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0))
    top_mlp = BatchNormalizedMLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0))
    
    # Construct feedforward sequence
    ss_seq = FeedforwardSequence([bottom_conv_sequence.apply, flattener.apply, top_mlp.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()
    
    prediction = ss_seq.apply(images)
    cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction)

    # add regularization
    selector = Selector([top_mlp])
    Ws = selector.get_parameters('W')
    mlp_brick_name = 'batchnormalizedmlp'
    W0 = Ws['/%s/linear_0.W' % mlp_brick_name]
    W1 = Ws['/%s/linear_1.W' % mlp_brick_name]

    cost = cost_noreg + .01 * (W0 ** 2).mean() + .01 * (W1 ** 2).mean()


    return cost
Exemple #29
0
def training(repo, learning_rate, batch_size, filenames):

    print 'LOAD DATA'
    (x_train,
     y_train), (x_valid,
                y_valid), (x_test,
                           y_test) = load_datasets_mnist(repo, filenames)

    print 'BUILD MODEL'
    train_f, valid_f, test_f, model, fisher, params = build_training()
    x_train = x_train[:1000]
    y_train = y_train[:1000]

    x = T.tensor4()
    y = T.imatrix()
    output = model.apply(x)
    output = output.reshape(
        (x.shape[0],
         model.get_dim('output')))  #TO DO : get_dim('name') for Architecture
    cost = Softmax().categorical_cross_entropy(y.flatten(), output).mean()
    cg = ComputationGraph(cost)

    inputs_conv = VariableFilter(roles=[INPUT], bricks=[Convolutional])(cg)
    outputs_conv = VariableFilter(roles=[OUTPUT], bricks=[Convolutional])(cg)
    inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg)
    outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg)
    dico = OrderedDict([('conv_output', outputs_conv[0])])
    [grad_s] = T.grad(cost, outputs_conv)
    dico['conv_output'] = grad_s

    f = theano.function([x, y],
                        grad_s,
                        allow_input_downcast=True,
                        on_unused_input='ignore')
    print np.mean(f(x_train[:10], y_train[:10]))
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(config, output_dim=config.tgtcls.shape[0], **kwargs)

        self.classes = theano.shared(numpy.array(config.tgtcls, dtype=theano.config.floatX),
                                     name='classes')
        self.softmax = Softmax()
        self.children.append(self.softmax)
Exemple #31
0
class rewatching:
    def __init__(self, batch_size, output_length, visual_dim, word_dim,
                 visual_feature_dim, question_feature_dim, joint_dim,
                 memory_dim, output_dim, fc1_dim, fc2_dim, voc_size):
        # the video encoder
        self.video_encoder = visualEncoder(visual_dim, visual_feature_dim)
        self.sentence_encoder = questionEncoder(word_dim, question_feature_dim)
        self.toJoint = embeddingLayer(2 * question_feature_dim,
                                      2 * visual_feature_dim, joint_dim)
        self.rewatcher = impatientLayer(joint_dim, memory_dim, output_dim)

        self.seq_gen = seqDecoder(joint_dim, output_dim, fc1_dim, fc2_dim)
        self.softmax_layer = Softmax()
        self.bs = batch_size
        self.output_length = output_length
        self.voc_size = voc_size

    def build_model(self, frame, q, q_rev, mask, maskMat, mask01, padding):
        bs = self.bs
        # visual dim -> visual feature dim
        video_embedding = self.video_encoder.apply(frame)
        # wod_dim -> question feature dimA
        question_embedding, u1, u2 = self.sentence_encoder.apply(
            q, q_rev, mask, bs)
        # -> joint_dim
        questionJoint, videoJoint, u = self.toJoint.apply(
            words=question_embedding, video=video_embedding, u1=u1, u2=u2)
        # bs x joint_dim, bs x output_dim
        question = questionJoint[:, -1, :]
        #video = videoJoint[:, -1, :]

        r_q, seq_r_q = self.rewatcher.apply(videoJoint, questionJoint, mask,
                                            bs)
        fc_r = self.seq_gen.apply(self.output_length, r_q, question, padding)
        fc = fc_r.reshape((self.bs * self.output_length, self.voc_size))
        self.softmax_result = self.softmax_layer.apply(fc)
        self.pred = T.argmax(self.softmax_result, axis=1)
        self.pred = self.pred.reshape((self.bs, self.output_length))

    # groundtruth_: batch_size x output_length
    # mask_01: (batch_size x output_length)
    # this mask is a 0-1 matrix where 0 indicates padding area of the answer
    def loss(self, groundtruth_, mask_01):
        mask = mask_01.flatten()
        gt = groundtruth_.flatten()

        self.p = self.softmax_result[T.arange(self.bs * self.output_length),
                                     gt]
        self.cost_ = T.log(self.p + 1e-20)
        self.cost = -T.sum(self.cost_ * mask) / self.bs
        self.cost.name = 'softmax_cost'
        return self.cost

    def error(self, groundtruth, mask_01):
        return T.neq(T.sum(T.neq(self.pred, groundtruth) * mask_01, axis=1),
                     0).sum() / self.bs

    def predict(self):
        return self.pred
Exemple #32
0
    def __init__(self, hidden_dim, n_classes, **kwargs):
        super(Emitter, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        # TODO: use TensorLinear or some such
        self.emitters = [
            MLP(activations=[Rectifier(), Identity()],
                dims=[hidden_dim, hidden_dim / 2, n],
                name="mlp_%i" % i,
                weights_init=Orthogonal(),
                biases_init=Constant(0)) for i, n in enumerate(self.n_classes)
        ]
        self.softmax = Softmax()

        self.children = self.emitters + [self.softmax]
Exemple #33
0
class Emitter(Initializable):
    def __init__(self, hidden_dim, n_classes, **kwargs):
        super(Emitter, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        # TODO: use TensorLinear or some such
        self.emitters = [
            MLP(activations=[Rectifier(), Identity()],
                dims=[hidden_dim, hidden_dim / 2, n],
                name="mlp_%i" % i,
                weights_init=Orthogonal(),
                biases_init=Constant(0)) for i, n in enumerate(self.n_classes)
        ]
        self.softmax = Softmax()

        self.children = self.emitters + [self.softmax]

    # some day: @application(...) def feedback(self, h)

    @application(inputs=['cs', 'y'], outputs=['cost'])
    def cost(self, cs, y, n_patches):
        max_length = len(self.n_classes) - 1
        _length_masks = theano.shared(np.tril(
            np.ones((max_length, max_length), dtype='int8')),
                                      name='shared_length_masks')
        lengths = y[:, -1]
        length_masks = _length_masks[lengths]

        mean_cross_entropies = []
        error_rates = []
        for t in xrange(n_patches):
            energies = [
                emitter.apply(cs[:, t, :]) for emitter in self.emitters
            ]
            mean_cross_entropies.append(
                sum(
                    self.softmax.categorical_cross_entropy(y[:, i], energy)
                    # to avoid punishing predictions of nonexistent digits:
                    * (length_masks[:, i] if i < max_length else 1)
                    for i, energy in enumerate(energies)).mean())
            # FIXME: do proper logprob-minimizing prediction of length
            error_rates.append(
                T.stack(*[
                    T.neq(y[:, i], energy.argmax(axis=1))
                    # to avoid punishing predictions of nonexistent digits:
                    * (length_masks[:, i] if i < max_length else 1)
                    for i, energy in enumerate(energies)
                ]).any(axis=0).mean())

        self.add_auxiliary_variable(mean_cross_entropies[-1],
                                    name="cross_entropy")
        self.add_auxiliary_variable(error_rates[-1], name="error_rate")

        # minimize the mean cross entropy over time and over batch
        cost = mean_cross_entropies[-1]
        return cost
Exemple #34
0
class Model(RNN):
    @lazy()
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(config, output_dim=config.tgtcls.shape[0], **kwargs)
        self.classes = theano.shared(numpy.array(config.tgtcls, dtype=theano.config.floatX), name='classes')
        self.softmax = Softmax()
        self.children.append(self.softmax)

    def process_rto(self, rto):
        return tensor.dot(self.softmax.apply(rto), self.classes)
Exemple #35
0
class Emitter(Initializable):
    def __init__(self, hidden_dim, n_classes, batch_normalize, **kwargs):
        super(Emitter, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        # TODO: use TensorLinear or some such
        self.emitters = [
            masonry.construct_mlp(
                activations=[None, Identity()],
                input_dim=hidden_dim,
                hidden_dims=[hidden_dim/2, n],
                name="mlp_%i" % i,
                batch_normalize=batch_normalize,
                initargs=dict(weights_init=Orthogonal(),
                              biases_init=Constant(0)))
            for i, n in enumerate(self.n_classes)]
        self.softmax = Softmax()

        self.children = self.emitters + [self.softmax]

    # some day: @application(...) def feedback(self, h)

    @application(inputs=['cs', 'y'], outputs=['cost'])
    def cost(self, cs, y, n_patches):
        max_length = len(self.n_classes) - 1
        _length_masks = theano.shared(
            np.tril(np.ones((max_length, max_length), dtype='int8')),
            name='shared_length_masks')
        lengths = y[:, -1]
        length_masks = _length_masks[lengths]

        mean_cross_entropies = []
        error_rates = []
        for t in xrange(n_patches):
            energies = [emitter.apply(cs[:, t, :]) for emitter in self.emitters]
            mean_cross_entropies.append(
                sum(self.softmax.categorical_cross_entropy(y[:, i], energy)
                    # to avoid punishing predictions of nonexistent digits:
                    * (length_masks[:, i] if i < max_length else 1)
                    for i, energy in enumerate(energies)).mean())
            # FIXME: do proper logprob-minimizing prediction of length
            error_rates.append(
                T.stack(*[T.neq(y[:, i], energy.argmax(axis=1))
                          # to avoid punishing predictions of nonexistent digits:
                          * (length_masks[:, i] if i < max_length else 1)
                          for i, energy in enumerate(energies)]).any(axis=0).mean())

        self.add_auxiliary_variable(mean_cross_entropies[-1], name="cross_entropy")
        self.add_auxiliary_variable(error_rates[-1], name="error_rate")

        # minimize the mean cross entropy over time and over batch
        cost = mean_cross_entropies[-1]
        return cost
class SoftmaxLinear(Initializable):
    def __init__(self, input_dim, output_dim, **kwargs):
        super(SoftmaxLinear, self).__init__(**kwargs)
        self.linear = Linear(input_dim=input_dim, output_dim=output_dim)
        self.sofmax = Softmax()

        self.children = [self.linear, self.sofmax]

    def apply(self, input_):
        output = self.sofmax.apply(self.linear.apply(input_))
        return output
Exemple #37
0
class MemoryNetworkBase(Initializable):
    def __init__(self, config, prefix_encoder, candidate_encoder, **kwargs):
        super(MemoryNetworkBase, self).__init__(**kwargs)

        self.prefix_encoder = prefix_encoder
        self.candidate_encoder = candidate_encoder
        self.config = config

        self.softmax = Softmax()
        self.children = [ self.softmax, prefix_encoder, candidate_encoder ]

        self.inputs = self.prefix_encoder.apply.inputs \
                      + ['candidate_%s'%x for x in self.candidate_encoder.apply.inputs] \
                      + ['candidate_destination_latitude', 'candidate_destination_longitude']

    def candidate_destination(self, **kwargs):
        return tensor.concatenate(
                (tensor.shape_padright(kwargs['candidate_destination_latitude']),
                 tensor.shape_padright(kwargs['candidate_destination_longitude'])),
                axis=1)

    @application(outputs=['cost'])
    def cost(self, **kwargs):
        y_hat = self.predict(**kwargs)
        y = tensor.concatenate((kwargs['destination_latitude'][:, None],
                                kwargs['destination_longitude'][:, None]), axis=1)

        return error.erdist(y_hat, y).mean()

    @application(outputs=['destination'])
    def predict(self, **kwargs):
        prefix_representation = self.prefix_encoder.apply(**{ x: kwargs[x] for x in self.prefix_encoder.apply.inputs })
        candidate_representation = self.candidate_encoder.apply(**{ x: kwargs['candidate_'+x] for x in self.candidate_encoder.apply.inputs })

        if self.config.normalize_representation:
            prefix_representation = prefix_representation \
                    / tensor.sqrt((prefix_representation ** 2).sum(axis=1, keepdims=True))
            candidate_representation = candidate_representation \
                    / tensor.sqrt((candidate_representation ** 2).sum(axis=1, keepdims=True))

        similarity_score = tensor.dot(prefix_representation, candidate_representation.T)
        similarity = self.softmax.apply(similarity_score)

        return tensor.dot(similarity, self.candidate_destination(**kwargs))

    @predict.property('inputs')
    def predict_inputs(self):
        return self.inputs

    @cost.property('inputs')
    def cost_inputs(self):
        return self.inputs + ['destination_latitude', 'destination_longitude']
Exemple #38
0
    def __init__(self, config, prefix_encoder, candidate_encoder, **kwargs):
        super(MemoryNetworkBase, self).__init__(**kwargs)

        self.prefix_encoder = prefix_encoder
        self.candidate_encoder = candidate_encoder
        self.config = config

        self.softmax = Softmax()
        self.children = [ self.softmax, prefix_encoder, candidate_encoder ]

        self.inputs = self.prefix_encoder.apply.inputs \
                      + ['candidate_%s'%x for x in self.candidate_encoder.apply.inputs] \
                      + ['candidate_destination_latitude', 'candidate_destination_longitude']
Exemple #39
0
    def __init__(self, hidden_dim, n_classes, **kwargs):
        super(SingleSoftmax, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        self.mlp = MLP(activations=[Rectifier(), Softmax()],
                       dims=[hidden_dim, hidden_dim/2, self.n_classes],
                       weights_init=Orthogonal(),
                       biases_init=Constant(0))
        self.softmax = Softmax()

        self.children = [self.mlp, self.softmax]
Exemple #40
0
    def __init__(self, hidden_dim, n_classes, **kwargs):
        super(Emitter, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        # TODO: use TensorLinear or some such
        self.emitters = [MLP(activations=[Rectifier(), Identity()],
                             dims=[hidden_dim, hidden_dim/2, n],
                             name="mlp_%i" % i,
                             weights_init=Orthogonal(),
                             biases_init=Constant(0))
                         for i, n in enumerate(self.n_classes)]
        self.softmax = Softmax()

        self.children = self.emitters + [self.softmax]
Exemple #41
0
    def __init__(self, hidden_dim, n_classes, batch_normalize, **kwargs):
        super(Emitter, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        # TODO: use TensorLinear or some such
        self.emitters = [
            masonry.construct_mlp(
                activations=[None, Identity()],
                input_dim=hidden_dim,
                hidden_dims=[hidden_dim/2, n],
                name="mlp_%i" % i,
                batch_normalize=batch_normalize,
                initargs=dict(weights_init=Orthogonal(),
                              biases_init=Constant(0)))
            for i, n in enumerate(self.n_classes)]
        self.softmax = Softmax()

        self.children = self.emitters + [self.softmax]
Exemple #42
0
class Model(RNN):
    @lazy()
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(config, rec_input_len=4, output_dim=config.tgtcls.shape[0], **kwargs)
        self.classes = theano.shared(numpy.array(config.tgtcls, dtype=theano.config.floatX), name='classes')
        self.softmax = Softmax()
        self.sequences.extend(['latitude_lag', 'longitude_lag'])
        self.children.append(self.softmax)

    def before_predict_all(self, kwargs):
        super(Model, self).before_predict_all(kwargs)
        kwargs['latitude_lag'] = tensor.extra_ops.repeat(kwargs['latitude'], 2, axis=0)
        kwargs['longitude_lag'] = tensor.extra_ops.repeat(kwargs['longitude'], 2, axis=0)

    def process_rto(self, rto):
        return tensor.dot(self.softmax.apply(rto), self.classes)

    def rec_input(self, latitude, longitude, latitude_lag, longitude_lag, **kwargs):
        return (tensor.shape_padright(latitude),
                tensor.shape_padright(longitude),
                tensor.shape_padright(latitude_lag),
                tensor.shape_padright(longitude_lag))
Exemple #43
0
    def __init__(self, io_dim, hidden_dims, cond_cert, activation=None, **kwargs):
        super(CCHLSTM, self).__init__(**kwargs)

        self.cond_cert = cond_cert

        self.io_dim = io_dim
        self.hidden_dims = hidden_dims

        self.children = []
        self.layers = []

        self.softmax = Softmax()
        self.children.append(self.softmax)

        for i, d in enumerate(hidden_dims):
            i0 = LookupTable(length=io_dim,
                             dim=4*d,
                             name='i0-%d'%i)
            self.children.append(i0)

            if i > 0:
                i1 = Linear(input_dim=hidden_dims[i-1],
                            output_dim=4*d,
                            name='i1-%d'%i)
                self.children.append(i1)
            else:
                i1 = None

            lstm = LSTM(dim=d, activation=activation,
                        name='LSTM-%d'%i)
            self.children.append(lstm)

            o = Linear(input_dim=d,
                       output_dim=io_dim,
                       name='o-%d'%i)
            self.children.append(o)

            self.layers.append((i0, i1, lstm, o))
Exemple #44
0
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(**kwargs)
        self.config = config

        self.context_embedder = ContextEmbedder(config)

        self.prefix_encoder = MLP(
            activations=[Rectifier() for _ in config.prefix_encoder.dim_hidden] + [config.representation_activation()],
            dims=[config.prefix_encoder.dim_input] + config.prefix_encoder.dim_hidden + [config.representation_size],
            name="prefix_encoder",
        )
        self.candidate_encoder = MLP(
            activations=[Rectifier() for _ in config.candidate_encoder.dim_hidden]
            + [config.representation_activation()],
            dims=[config.candidate_encoder.dim_input]
            + config.candidate_encoder.dim_hidden
            + [config.representation_size],
            name="candidate_encoder",
        )
        self.softmax = Softmax()

        self.prefix_extremities = {
            "%s_k_%s" % (side, ["latitude", "longitude"][axis]): axis for side in ["first", "last"] for axis in [0, 1]
        }
        self.candidate_extremities = {
            "candidate_%s_k_%s" % (side, ["latitude", "longitude"][axis]): axis
            for side in ["first", "last"]
            for axis in [0, 1]
        }

        self.inputs = (
            self.context_embedder.inputs
            + ["candidate_%s" % k for k in self.context_embedder.inputs]
            + self.prefix_extremities.keys()
            + self.candidate_extremities.keys()
        )
        self.children = [self.context_embedder, self.prefix_encoder, self.candidate_encoder, self.softmax]
Exemple #45
0
class Model(Initializable):
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(**kwargs)
        self.config = config

        self.context_embedder = ContextEmbedder(config)

        self.prefix_encoder = MLP(
            activations=[Rectifier() for _ in config.prefix_encoder.dim_hidden] + [config.representation_activation()],
            dims=[config.prefix_encoder.dim_input] + config.prefix_encoder.dim_hidden + [config.representation_size],
            name="prefix_encoder",
        )
        self.candidate_encoder = MLP(
            activations=[Rectifier() for _ in config.candidate_encoder.dim_hidden]
            + [config.representation_activation()],
            dims=[config.candidate_encoder.dim_input]
            + config.candidate_encoder.dim_hidden
            + [config.representation_size],
            name="candidate_encoder",
        )
        self.softmax = Softmax()

        self.prefix_extremities = {
            "%s_k_%s" % (side, ["latitude", "longitude"][axis]): axis for side in ["first", "last"] for axis in [0, 1]
        }
        self.candidate_extremities = {
            "candidate_%s_k_%s" % (side, ["latitude", "longitude"][axis]): axis
            for side in ["first", "last"]
            for axis in [0, 1]
        }

        self.inputs = (
            self.context_embedder.inputs
            + ["candidate_%s" % k for k in self.context_embedder.inputs]
            + self.prefix_extremities.keys()
            + self.candidate_extremities.keys()
        )
        self.children = [self.context_embedder, self.prefix_encoder, self.candidate_encoder, self.softmax]

    def _push_initialization_config(self):
        for (mlp, config) in [
            [self.prefix_encoder, self.config.prefix_encoder],
            [self.candidate_encoder, self.config.candidate_encoder],
        ]:
            mlp.weights_init = config.weights_init
            mlp.biases_init = config.biases_init

    @application(outputs=["destination"])
    def predict(self, **kwargs):
        prefix_embeddings = tuple(self.context_embedder.apply(**{k: kwargs[k] for k in self.context_embedder.inputs}))
        prefix_extremities = tuple(
            (kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.prefix_extremities.items()
        )
        prefix_inputs = tensor.concatenate(prefix_extremities + prefix_embeddings, axis=1)
        prefix_representation = self.prefix_encoder.apply(prefix_inputs)
        if self.config.normalize_representation:
            prefix_representation = prefix_representation / tensor.sqrt(
                (prefix_representation ** 2).sum(axis=1, keepdims=True)
            )

        candidate_embeddings = tuple(
            self.context_embedder.apply(**{k: kwargs["candidate_%s" % k] for k in self.context_embedder.inputs})
        )
        candidate_extremities = tuple(
            (kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.candidate_extremities.items()
        )
        candidate_inputs = tensor.concatenate(candidate_extremities + candidate_embeddings, axis=1)
        candidate_representation = self.candidate_encoder.apply(candidate_inputs)
        if self.config.normalize_representation:
            candidate_representation = candidate_representation / tensor.sqrt(
                (candidate_representation ** 2).sum(axis=1, keepdims=True)
            )

        similarity_score = tensor.dot(prefix_representation, candidate_representation.T)
        similarity = self.softmax.apply(similarity_score)

        candidate_destination = tensor.concatenate(
            (
                tensor.shape_padright(kwargs["candidate_last_k_latitude"][:, -1]),
                tensor.shape_padright(kwargs["candidate_last_k_longitude"][:, -1]),
            ),
            axis=1,
        )

        return tensor.dot(similarity, candidate_destination)

    @predict.property("inputs")
    def predict_inputs(self):
        return self.inputs

    @application(outputs=["cost"])
    def cost(self, **kwargs):
        y_hat = self.predict(**kwargs)
        y = tensor.concatenate(
            (kwargs["destination_latitude"][:, None], kwargs["destination_longitude"][:, None]), axis=1
        )

        return error.erdist(y_hat, y).mean()

    @cost.property("inputs")
    def cost_inputs(self):
        return self.inputs + ["destination_latitude", "destination_longitude"]
Exemple #46
0
rect = Rectifier()
mlp = MLP(dims=[784, 1200, 1200, 200], activations=[rect, rect, rect], seed=10)
mlp.weights_init = Uniform(0.0, 0.01)
mlp.biases_init = Constant(0.0)
mlp.initialize()

lin = Linear(200, 10, use_bias=True)
lin.weights_init = Uniform(0.0, 0.01)
lin.biases_init = Constant(0.0)
lin.initialize()

train_out = lin.apply(mlp.apply(flat_x))
test_out = lin.apply(mlp.apply(flat_x))

sm = Softmax(name='softmax')
loss = sm.categorical_cross_entropy(flat_y, train_out).mean()
loss.name = 'nll'
misclass = MisclassificationRate().apply(flat_y, train_out)
misclass.name = 'misclass'

test_loss = sm.categorical_cross_entropy(flat_y, test_out).mean()
test_loss.name = 'nll'
test_misclass = MisclassificationRate().apply(flat_y, test_out)
test_misclass.name = 'misclass'

model = Model(loss)

######################
# Data
######################
def build_model_vanilla(vocab_size, args, dtype=floatX):
    logger.info('Building model ...')

    # Parameters for the model
    context = args.context
    state_dim = args.state_dim
    layers = args.layers
    skip_connections = args.skip_connections

    # Symbolic variables
    # In both cases: Time X Batch
    x = tensor.lmatrix('features')
    y = tensor.lmatrix('targets')

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    lookup = LookupTable(length=vocab_size, dim=state_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names, input_dim=args.mini_batch_size,
                output_dims=output_dims,
                prototype=FeedforwardSequence(
                    [lookup.apply]))

    transitions = [SimpleRecurrent(dim=state_dim, activation=Tanh())
                   for _ in range(layers)]

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # If skip_connections: dim = layers * state_dim
    # else: dim = state_dim
    output_layer = Linear(
        input_dim=skip_connections * layers *
        state_dim + (1 - skip_connections) * state_dim,
        output_dim=vocab_size, name="output_layer")

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give a name to the input of each layer
    if skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t].name = "pre_rnn_" + str(t)
    else:
        pre_rnn.name = "pre_rnn"

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    init_states = {}
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if skip_connections:
            kwargs['inputs' + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs['inputs'] = pre_rnn
        init_states[d] = theano.shared(
            numpy.zeros((args.mini_batch_size, state_dim)).astype(floatX),
            name='state0_%d' % d)
        kwargs['states' + suffix] = init_states[d]

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    # We have
    # h = [state, state_1, state_2 ...] if layers > 1
    # h = state if layers == 1

    # If we have skip connections, concatenate all the states
    # Else only consider the state of the highest layer
    last_states = {}
    if layers > 1:
        # Save all the last states
        for d in range(layers):
            last_states[d] = h[d][-1, :, :]
        if skip_connections:
            h = tensor.concatenate(h, axis=2)
        else:
            h = h[-1]
    else:
        last_states[0] = h[-1, :, :]
    h.name = "hidden_state"

    # The updates of the hidden states
    updates = []
    for d in range(layers):
        updates.append((init_states[d], last_states[d]))

    presoft = output_layer.apply(h[context:, :, :])
    # Define the cost
    # Compute the probability distribution
    time, batch, feat = presoft.shape
    presoft.name = 'presoft'

    cross_entropy = Softmax().categorical_cross_entropy(
        y[context:, :].flatten(),
        presoft.reshape((batch * time, feat)))
    cross_entropy = cross_entropy / tensor.log(2)
    cross_entropy.name = "cross_entropy"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = cross_entropy + tensor.log(1)
    cost.name = "regularized_cost"

    # Initialize the model
    logger.info('Initializing...')

    fork.initialize()

    rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()

    return cost, cross_entropy, updates
Exemple #48
0
def main(save_to, cost_name, learning_rate, momentum, num_epochs):
    mlp = MLP([None], [784, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    scores = mlp.apply(x)

    batch_size = y.shape[0]
    indices = tensor.arange(y.shape[0])
    target_scores = tensor.set_subtensor(
        tensor.zeros((batch_size, 10))[indices, y.flatten()],
        1)
    score_diff = scores - target_scores

    # Logistic Regression
    if cost_name == 'lr':
        cost = Softmax().categorical_cross_entropy(y.flatten(), scores).mean()
    # MSE
    elif cost_name == 'mse':
        cost = (score_diff ** 2).mean()
    # Perceptron
    elif cost_name == 'perceptron':
        cost = (scores.max(axis=1) - scores[indices, y.flatten()]).mean()
    # TLE
    elif cost_name == 'minmin':
        cost = abs(score_diff[indices, y.flatten()]).mean()
        cost += abs(score_diff[indices, scores.argmax(axis=1)]).mean()
    # TLEcut
    elif cost_name == 'minmin_cut':
        # Score of the groundtruth should be greater or equal than its target score
        cost = tensor.maximum(0, -score_diff[indices, y.flatten()]).mean()
        # Score of the prediction should be less or equal than its actual score
        cost += tensor.maximum(0, score_diff[indices, scores.argmax(axis=1)]).mean()
    # TLE2
    elif cost_name == 'minmin2':
        cost = ((score_diff[tensor.arange(y.shape[0]), y.flatten()]) ** 2).mean()
        cost += ((score_diff[tensor.arange(y.shape[0]), scores.argmax(axis=1)]) ** 2).mean()
    # Direct loss minimization
    elif cost_name == 'direct':
        epsilon = 0.1
        cost = (- scores[indices, (scores + epsilon * target_scores).argmax(axis=1)]
                + scores[indices, scores.argmax(axis=1)]).mean()
        cost /= epsilon
    elif cost_name == 'svm':
        cost = (scores[indices, (scores - 1 * target_scores).argmax(axis=1)]
                - scores[indices, y.flatten()]).mean()
    else:
        raise ValueError("Unknown cost " + cost)

    error_rate = MisclassificationRate().apply(y.flatten(), scores)
    error_rate.name = 'error_rate'

    cg = ComputationGraph([cost])
    cost.name = 'cost'

    mnist_train = MNIST(("train",))
    mnist_test = MNIST(("test",))

    if learning_rate == None:
        learning_rate = 0.0001
    if momentum == None:
        momentum = 0.0
    rule = Momentum(learning_rate=learning_rate,
                    momentum=momentum)
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=rule)
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      Flatten(
                          DataStream.default_stream(
                              mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 500)),
                          which_sources=('features',)),
                      prefix="test"),
                  # CallbackExtension(
                  #    lambda: rule.learning_rate.set_value(rule.learning_rate.get_value() * 0.9),
                  #    after_epoch=True),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm),
                       rule.learning_rate],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_cost',
                 'test_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        Flatten(
            DataStream.default_stream(
                mnist_train,
                iteration_scheme=SequentialScheme(
                    mnist_train.num_examples, 50)),
            which_sources=('features',)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()

    df = pandas.DataFrame.from_dict(main_loop.log, orient='index')
    res = {'cost' : cost_name,
           'learning_rate' : learning_rate,
           'momentum' : momentum,
           'train_cost' : df.train_cost.iloc[-1],
           'test_cost' : df.test_cost.iloc[-1],
           'best_test_cost' : df.test_cost.min(),
           'train_error' : df.train_error_rate.iloc[-1],
           'test_error' : df.test_error_rate.iloc[-1],
           'best_test_error' : df.test_error_rate.min()}
    res = {k: float(v) if isinstance(v, numpy.ndarray) else v for k, v in res.items()}
    json.dump(res, sys.stdout)
    sys.stdout.flush()
Exemple #49
0
def main():
    # # # # # # # # # # # 
    # Modeling Building #
    # # # # # # # # # # # 
    
    # ConvOp requires input be a 4D tensor
    x = tensor.tensor4("features")

    y = tensor.ivector("targets")

    # Convolutional Layers
    # ====================
    
    # "Improving neural networks by preventing co-adaptation of feature detectors"
    # conv_layers = [
    #     # ConvolutionalLayer(activiation, filter_size, num_filters, pooling_size, name)
    #       ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l1')
    #     , ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l2')
    #     , ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l3')
    #     ]

    # "VGGNet"
    conv_layers = [
          ConvolutionalActivation(Rectifier().apply, (3,3), 64, border_mode='full', name='l1')
        , ConvolutionalLayer(Rectifier().apply, (3,3), 64, (2,2), border_mode='full', name='l2')
        , ConvolutionalActivation(Rectifier().apply, (3,3), 128, border_mode='full', name='l3')
        , ConvolutionalLayer(Rectifier().apply, (3,3), 128, (2,2), border_mode='full', name='l4')
        , ConvolutionalActivation(Rectifier().apply, (3,3), 256, border_mode='full', name='l5')
        , ConvolutionalLayer(Rectifier().apply, (3,3), 256, (2,2), border_mode='full', name='l6')
        ]

    # Bake my own
    # conv_layers = [
    #     # ConvolutionalLayer(activiation, filter_size, num_filters, pooling_size, name)
    #       ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l1')
    #     , ConvolutionalLayer(Rectifier().apply, (3,3), 128, (2,2), border_mode='full', name='l2')
    #     , ConvolutionalActivation(Rectifier().apply, (3,3), 256, border_mode='full', name='l3')
    #     , ConvolutionalLayer(Rectifier().apply, (3,3), 256, (2,2), border_mode='full', name='l4')
    #     ]

    
    convnet = ConvolutionalSequence(
        conv_layers, num_channels=3, image_size=(32,32),
        weights_init=IsotropicGaussian(0.1),
        biases_init=Constant(0)
        )
    convnet.initialize()

    output_dim = np.prod(convnet.get_dim('output'))

    # Fully Connected Layers
    # ======================
    conv_features = convnet.apply(x)
    features = Flattener().apply(conv_features)

    mlp = MLP(  activations=[Rectifier()]*2+[None]
              , dims=[output_dim, 256, 256, 10]
              , weights_init=IsotropicGaussian(0.01)
              , biases_init=Constant(0)
        )
    mlp.initialize()

    y_hat = mlp.apply(features)
    # print y_hat.shape.eval({x: np.zeros((1, 3, 32, 32), dtype=theano.config.floatX)})

    # Numerically Stable Softmax
    cost = Softmax().categorical_cross_entropy(y, y_hat)
    error_rate = MisclassificationRate().apply(y, y_hat)

    cg = ComputationGraph(cost)

    weights = VariableFilter(roles=[FILTER, WEIGHT])(cg.variables)
    l2_regularization = 0.005 * sum((W**2).sum() for W in weights)

    cost = cost + l2_regularization
    cost.name = 'cost_with_regularization'

    # Print sizes to check
    print("Representation sizes:")
    for layer in convnet.layers:
        print(layer.get_dim('input_'))

    # # # # # # # # # # # 
    # Modeling Training #
    # # # # # # # # # # # 

    # Figure out data source
    train = CIFAR10("train")
    test = CIFAR10("test")

    # Load Data Using Fuel
    train_stream = DataStream.default_stream(
          dataset=train
        , iteration_scheme=SequentialScheme(train.num_examples, batch_size=128))
    test_stream = DataStream.default_stream(
          dataset=test
        , iteration_scheme=SequentialScheme(test.num_examples, batch_size=1024))

    # Train
    algorithm = GradientDescent(
          cost=cost
        , params=cg.parameters
        , step_rule=Adam(learning_rate=0.0005)
        )


    main_loop = MainLoop(
          model=Model(cost)
        , data_stream=train_stream
        , algorithm=algorithm
        , extensions=[
              TrainingDataMonitoring(
                  [cost, error_rate]
                , prefix='train'
                , after_epoch=True)
            , DataStreamMonitoring(
                  [cost, error_rate]
                , test_stream,
                  prefix='test')
            , ExperimentSaver(dest_directory='...', src_directory='.')
            , Printing()
            , ProgressBar()
            ]
        )
    main_loop.run()
Exemple #50
0
 def __init__(self, config, **kwargs):
     super(Model, self).__init__(config, rec_input_len=4, output_dim=config.tgtcls.shape[0], **kwargs)
     self.classes = theano.shared(numpy.array(config.tgtcls, dtype=theano.config.floatX), name='classes')
     self.softmax = Softmax()
     self.sequences.extend(['latitude_lag', 'longitude_lag'])
     self.children.append(self.softmax)
Exemple #51
0

###################
#### Softmax
###################

from blocks.bricks import Softmax
from blocks.bricks.cost import MisclassificationRate

W2 = theano.shared(numpy.random.normal(size=(n_out, num_protos)).astype('float32'))
b = theano.shared(numpy.zeros((num_protos,)).astype('float32'))
y = tensor.ivector('y')

h = tensor.dot(h3, W2) + b
h = tensor.switch(h < 0, -h , h)
sm = Softmax()
pred = sm.apply(h)
misclass = MisclassificationRate().apply(y, pred)
c = sm.categorical_cross_entropy(y, h).mean()

s_params = [W2, b]
s_grad = theano.grad(c, s_params)
s_updates = [p - numpy.float32(0.05)*g for p, g in zip(s_params, s_grad)]
s_f = theano.function([h3, y], [c, misclass], updates=zip(s_params, s_updates))
s_pred = theano.function([h3], pred)

for j in range(200):
    for i in range(n_batches):
	if i == 0:
            print s_f(data[i*batch_size:(i+1)*batch_size, :], labels[i*batch_size:(i+1)*batch_size])
	else:
# Fully connected layers
features = Flattener().apply(convnet.apply(x))

mlp = MLP(
        activations=[Rectifier(), None],
        dims=[output_dim, 100, 10],
        weights_init=IsotropicGaussian(0.01),
        biases_init=Constant(0)
        )
mlp.initialize()

y_hat = mlp.apply(features)


# numerically stable softmax
cost = Softmax().categorical_cross_entropy(y.flatten(), y_hat)
cost.name = 'nll'
error_rate = MisclassificationRate().apply(y.flatten(), y_hat)
#cost = MisclassificationRate().apply(y, y_hat)
#cost.name = 'error_rate'

cg = ComputationGraph(cost)

#pdb.set_trace()
weights = VariableFilter(roles=[FILTER, WEIGHT])(cg.variables)
l2_regularization = 0.005 * sum((W**2).sum() for W in weights)

cost_l2 = cost + l2_regularization
cost.name = 'cost_with_regularization'

# Print sizes to check
Exemple #53
0
pre_rnn = x_to_h1.apply(x)
if is_LSTM:
    rnn = DropLSTM(dim=h_dim, model_type=model_type,
                   update_prob=update_prob, name="rnn")
    h1, c1 = rnn.apply(pre_rnn, drops, is_for_test)
else:
    rnn = DropGRU(dim=h_dim, model_type=model_type,
                  update_prob=update_prob, name="rnn")
    h1, sd = rnn.apply(pre_rnn[:, :, :h_dim],
                   pre_rnn[:, :, h_dim:],
                   drops, is_for_test)
h1_to_o = Linear(name='h1_to_o',
                 input_dim=h_dim,
                 output_dim=y_dim)
pre_softmax = h1_to_o.apply(h1)
softmax = Softmax()
shape = pre_softmax.shape
softmax_out = softmax.apply(pre_softmax.reshape((-1, y_dim)))
softmax_out = softmax_out.reshape(shape)
softmax_out.name = 'softmax_out'

# comparing only last time-step
cost = CategoricalCrossEntropy().apply(y, softmax_out[-1])
cost.name = 'CrossEntropy'
error_rate = MisclassificationRate().apply(y, softmax_out[-1])
error_rate.name = 'error_rate'

# Initialization
for brick in (x_to_h1, h1_to_o, rnn):
    brick.weights_init = Glorot()
    brick.biases_init = Constant(0)
def build_model_soft(vocab_size, args, dtype=floatX):
    logger.info('Building model ...')

    # Parameters for the model
    context = args.context
    state_dim = args.state_dim
    layers = args.layers
    skip_connections = args.skip_connections

    # Symbolic variables
    # In both cases: Time X Batch
    x = tensor.lmatrix('features')
    y = tensor.lmatrix('targets')

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    lookup = LookupTable(length=vocab_size, dim=state_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names, input_dim=args.mini_batch_size,
                output_dims=output_dims,
                prototype=FeedforwardSequence(
                    [lookup.apply]))

    transitions = [SimpleRecurrent(dim=state_dim, activation=Tanh())]

    # Build the MLP
    dims = [2 * state_dim]
    activations = []
    for i in range(args.mlp_layers):
        activations.append(Rectifier())
        dims.append(state_dim)

    # Activation of the last layer of the MLP
    if args.mlp_activation == "logistic":
        activations.append(Logistic())
    elif args.mlp_activation == "rectifier":
        activations.append(Rectifier())
    elif args.mlp_activation == "hard_logistic":
        activations.append(HardLogistic())
    else:
        assert False

    # Output of MLP has dimension 1
    dims.append(1)

    for i in range(layers - 1):
        mlp = MLP(activations=activations, dims=dims,
                  weights_init=initialization.IsotropicGaussian(0.1),
                  biases_init=initialization.Constant(0),
                  name="mlp_" + str(i))
        transitions.append(
            SoftGatedRecurrent(dim=state_dim,
                               mlp=mlp,
                               activation=Tanh()))

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # dim = layers * state_dim
    output_layer = Linear(
        input_dim=layers * state_dim,
        output_dim=vocab_size, name="output_layer")

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give a name to the input of each layer
    if skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t].name = "pre_rnn_" + str(t)
    else:
        pre_rnn.name = "pre_rnn"

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    init_states = {}
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if skip_connections:
            kwargs['inputs' + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs['inputs' + suffix] = pre_rnn
        init_states[d] = theano.shared(
            numpy.zeros((args.mini_batch_size, state_dim)).astype(floatX),
            name='state0_%d' % d)
        kwargs['states' + suffix] = init_states[d]

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    # Now we have:
    # h = [state, state_1, gate_value_1, state_2, gate_value_2, state_3, ...]

    # Extract gate_values
    gate_values = h[2::2]
    new_h = [h[0]]
    new_h.extend(h[1::2])
    h = new_h

    # Now we have:
    # h = [state, state_1, state_2, ...]
    # gate_values = [gate_value_1, gate_value_2, gate_value_3]

    for i, gate_value in enumerate(gate_values):
        gate_value.name = "gate_value_" + str(i)

    # Save all the last states
    last_states = {}
    for d in range(layers):
        last_states[d] = h[d][-1, :, :]

    # Concatenate all the states
    if layers > 1:
        h = tensor.concatenate(h, axis=2)
    h.name = "hidden_state"

    # The updates of the hidden states
    updates = []
    for d in range(layers):
        updates.append((init_states[d], last_states[d]))

    presoft = output_layer.apply(h[context:, :, :])
    # Define the cost
    # Compute the probability distribution
    time, batch, feat = presoft.shape
    presoft.name = 'presoft'

    cross_entropy = Softmax().categorical_cross_entropy(
        y[context:, :].flatten(),
        presoft.reshape((batch * time, feat)))
    cross_entropy = cross_entropy / tensor.log(2)
    cross_entropy.name = "cross_entropy"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = cross_entropy + tensor.log(1)
    cost.name = "regularized_cost"

    # Initialize the model
    logger.info('Initializing...')

    fork.initialize()

    rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()

    return cost, cross_entropy, updates, gate_values
def test_communication(path_vae_mnist,
                       path_maxout_mnist):
                       
    # load models
    vae_mnist = load(path_vae_mnist)
    # get params : to be remove from the computation graph

    # write an object maxout
    classifier = Maxout()
    # get params : to be removed from the computation graph

    # vae whose prior is a zero mean unit variance normal distribution
    activation = Rectifier()
    full_weights_init = Orthogonal()
    weights_init = full_weights_init

    # SVHN en niveau de gris
    layers = [32*32, 200, 200, 200, 50]
    encoder_layers = layers[:-1]
    encoder_mlp = MLP([activation] * (len(encoder_layers)-1),
              encoder_layers,
              name="MLP_SVHN_encode", biases_init=Constant(0.), weights_init=weights_init)

    enc_dim = encoder_layers[-1]
    z_dim = layers[-1]
    sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init)
    decoder_layers = layers[:]  ## includes z_dim as first layer
    decoder_layers.reverse()
    decoder_mlp = MLP([activation] * (len(decoder_layers)-2) + [Rectifier()],
              decoder_layers,
              name="MLP_SVHN_decode", biases_init=Constant(0.), weights_init=weights_init)

    
    vae_svhn = VAEModel(encoder_mlp, sampler, decoder_mlp)
    vae_svhn.initialize()

    # do the connection
    
    x = T.tensor4('x') # SVHN samples preprocessed with local contrast normalization
    x_ = (T.sum(x, axis=1)).flatten(ndim=2)
    y = T.imatrix('y')
    batch_size = 512

    svhn_z, _ = vae_svhn.sampler.sample(vae_svhn.encoder_mlp.apply(x_))
    mnist_decode = vae_mnist.decoder_mlp.apply(svhn_z)
    # reshape
    shape = mnist_decode.shape
    mnist_decode = mnist_decode.reshape((shape[0], 1, 28, 28))
    prediction = classifier.apply(mnist_decode)
    y_hat = Softmax().apply(prediction)

    x_recons, kl_terms = vae_svhn.reconstruct(x_)
    recons_term = BinaryCrossEntropy().apply(x_, T.clip(x_recons, 1e-4, 1 - 1e-4))
    recons_term.name = "recons_term"

    cost_A = recons_term + kl_terms.mean()
    cost_A.name = "cost_A"

    cost_B = Softmax().categorical_cross_entropy(y.flatten(), prediction)
    cost_B.name = 'cost_B'

    cost = cost_B
    cost.name = "cost"
    cg = ComputationGraph(cost) # probably discard some of the parameters
    parameters = cg.parameters
    params = []
    for t in parameters:
        if not re.match(".*mnist", t.name):
            params.append(t)

    """
    f = theano.function([x], cost_A)
    value_x = np.random.ranf((1, 3, 32, 32)).astype("float32")
    print f(value_x)
    
    return
    """
    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y.flatten(), y_hat)
    error_rate.name = "error_rate"
    
    # training here
    step_rule = RMSProp(0.001,0.99)

    dataset_hdf5_file="/Tmp/ducoffem/SVHN/"
    train_set = H5PYDataset(os.path.join(dataset_hdf5_file, "all.h5"), which_set='train')
    test_set = H5PYDataset(os.path.join(dataset_hdf5_file, "all.h5"), which_set='valid')
    
    data_stream = DataStream.default_stream(
        train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size))
        
    data_stream_test = DataStream.default_stream(
        test_set, iteration_scheme=SequentialScheme(2000, batch_size))


    algorithm = GradientDescent(cost=cost, params=params,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], prefix="train", every_n_batches=10)
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="valid", every_n_batches=10)

    # drawing_samples = ImagesSamplesSave("../data_svhn", vae, (3, 32, 32), every_n_epochs=1)
    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_batches=10000),
                    Printing(every_n_batches=10)
                  ]

    main_loop = MainLoop(data_stream=data_stream,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()
rnn = Bidirectional(
  SimpleRecurrent(dim=hidden_dim, activation=Tanh(),
    weights_init=IsotropicGaussian(0.01),
    biases_init=Constant(0),
  ),
)

### Will need to reshape the rnn outputs to produce suitable input here...
gather = Linear(name='hidden_to_output', 
  input_dim=hidden_dim*2, output_dim=labels_size,
  weights_init=IsotropicGaussian(0.01),
  biases_init=Constant(0)
)

p_labels = Softmax()



## Let's initialize the variables
lookup.allocate()
#print("lookup.parameters=", lookup.parameters)                         # ('lookup.parameters=', [W])

#lookup.weights_init = FUNCTION
#lookup.initialize() 

#lookup.params[0].set_value( np.random.normal( scale = 0.1, size=(vocab_size, embedding_dim) ).astype(np.float32) )
#lookup.params[0].set_value( embedding )

# See : https://github.com/mila-udem/blocks/blob/master/tests/bricks/test_lookup.py
#lookup.W.set_value(numpy.arange(15).reshape(5, 3).astype(theano.config.floatX))
def training_model_mnist(learning_rate, momentum, iteration, batch_size, epoch_end, iter_batch):

    x = T.tensor4('features')
    y = T.imatrix('targets')

    classifier = build_model_mnist()

    predict = classifier.apply(x)
    y_hat = Softmax().apply(predict)

    cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
    cost.name = "cost"
    cg = ComputationGraph(cost)
    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y.flatten(), y_hat)
    error_rate.name = "error"


    train_set = MNIST(('train', ))
    test_set = MNIST(("test",))

    if iteration =="slice":
        data_stream = DataStream.default_stream(
                train_set, iteration_scheme=SequentialScheme_slice(train_set.num_examples,
                                                            batch_size))
        data_stream_test = DataStream.default_stream(
                test_set, iteration_scheme=SequentialScheme_slice(test_set.num_examples,
                                                            batch_size))
    else:
        data_stream = DataStream.default_stream(
                train_set, iteration_scheme=SequentialScheme(train_set.num_examples,
                                                            batch_size))

        data_stream_test = DataStream.default_stream(
                test_set, iteration_scheme=SequentialScheme(test_set.num_examples,
                                                            batch_size))

    step_rule = Momentum(learning_rate=learning_rate,
                         momentum=momentum)

    start = time.clock()
    time_spent = shared_floatx(np.float32(0.), name="time_spent")
    time_extension = Time_reference(start, time_spent, every_n_batches=1)

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], prefix="train", every_n_epochs=iter_batch)
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate, time_spent], data_stream=data_stream_test, prefix="valid",
        every_n_epochs=iter_batch)

    # add a monitor variable about the time
    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=epoch_end),
                    Printing(every_n_epochs=iter_batch),
                    time_extension
                  ]

    main_loop = MainLoop(data_stream=data_stream,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()
Exemple #58
0
class CCHLSTM(BaseRecurrent, Initializable):
    def __init__(self, io_dim, hidden_dims, cond_cert, activation=None, **kwargs):
        super(CCHLSTM, self).__init__(**kwargs)

        self.cond_cert = cond_cert

        self.io_dim = io_dim
        self.hidden_dims = hidden_dims

        self.children = []
        self.layers = []

        self.softmax = Softmax()
        self.children.append(self.softmax)

        for i, d in enumerate(hidden_dims):
            i0 = LookupTable(length=io_dim,
                             dim=4*d,
                             name='i0-%d'%i)
            self.children.append(i0)

            if i > 0:
                i1 = Linear(input_dim=hidden_dims[i-1],
                            output_dim=4*d,
                            name='i1-%d'%i)
                self.children.append(i1)
            else:
                i1 = None

            lstm = LSTM(dim=d, activation=activation,
                        name='LSTM-%d'%i)
            self.children.append(lstm)

            o = Linear(input_dim=d,
                       output_dim=io_dim,
                       name='o-%d'%i)
            self.children.append(o)

            self.layers.append((i0, i1, lstm, o))


    @recurrent(contexts=[])
    def apply(self, inputs, **kwargs):

        l0i, _, l0l, l0o = self.layers[0]
        l0iv = l0i.apply(inputs)
        new_states0, new_cells0 = l0l.apply(states=kwargs['states0'],
                                            cells=kwargs['cells0'],
                                            inputs=l0iv,
                                            iterate=False)
        l0ov = l0o.apply(new_states0)

        pos = l0ov
        ps = new_states0

        passnext = tensor.ones((inputs.shape[0],))
        out_sc = [new_states0, new_cells0, passnext]

        for i, (cch, (i0, i1, l, o)) in enumerate(zip(self.cond_cert, self.layers[1:])):
            pop = self.softmax.apply(pos)
            best = pop.max(axis=1)
            passnext = passnext * tensor.le(best, cch) * kwargs['pass%d'%i]

            i0v = i0.apply(inputs)
            i1v = i1.apply(ps)

            prev_states = kwargs['states%d'%i]
            prev_cells = kwargs['cells%d'%i]
            new_states, new_cells = l.apply(inputs=i0v + i1v,
                                            states=prev_states,
                                            cells=prev_cells,
                                            iterate=False)
            new_states = tensor.switch(passnext[:, None], new_states, prev_states)
            new_cells = tensor.switch(passnext[:, None], new_cells, prev_cells)
            out_sc += [new_states, new_cells, passnext]

            ov = o.apply(new_states)
            pos = tensor.switch(passnext[:, None], pos + ov, pos)
            ps = new_states

        return [pos] + out_sc

    def get_dim(self, name):
        dims = {'pred': self.io_dim}
        for i, d in enumerate(self.hidden_dims):
            dims['states%d'%i] = dims['cells%d'%i] = d
        if name in dims:
            return dims[name]
        return super(CCHLSTM, self).get_dim(name)

    @apply.property('sequences')
    def apply_sequences(self):
        return ['inputs'] + ['pass%d'%i for i in range(len(self.hidden_dims)-1)]

    @apply.property('states')
    def apply_states(self):
        ret = []
        for i in range(len(self.hidden_dims)):
            ret += ['states%d'%i, 'cells%d'%i]
        return ret

    @apply.property('outputs')
    def apply_outputs(self):
        ret = ['pred']
        for i in range(len(self.hidden_dims)):
            ret += ['states%d'%i, 'cells%d'%i, 'active%d'%i]
        return ret
Exemple #59
0
    def __init__(self, config):
        inp = tensor.imatrix('bytes')

        embed = theano.shared(config.embedding_matrix.astype(theano.config.floatX),
                              name='embedding_matrix')
        in_repr = embed[inp.flatten(), :].reshape((inp.shape[0], inp.shape[1], config.repr_dim))
        in_repr.name = 'in_repr'

        bricks = []
        states = []

        # Construct predictive GRU hierarchy
        hidden = []
        costs = []
        next_target = in_repr.dimshuffle(1, 0, 2)
        for i, (hdim, cf, q) in enumerate(zip(config.hidden_dims,
                                                   config.cost_factors,
                                                   config.hidden_q)):
            init_state = theano.shared(numpy.zeros((config.num_seqs, hdim)).astype(theano.config.floatX),
                                       name='st0_%d'%i)

            linear = Linear(input_dim=config.repr_dim, output_dim=3*hdim,
                            name="lstm_in_%d"%i)
            lstm = GatedRecurrent(dim=hdim, activation=config.activation_function,
                        name="lstm_rec_%d"%i)
            linear2 = Linear(input_dim=hdim, output_dim=config.repr_dim, name='lstm_out_%d'%i)
            tanh = Tanh('lstm_out_tanh_%d'%i)
            bricks += [linear, lstm, linear2, tanh]
            if i > 0:
                linear1 = Linear(input_dim=config.hidden_dims[i-1], output_dim=3*hdim,
                                 name='lstm_in2_%d'%i)
                bricks += [linear1]

            next_target = tensor.cast(next_target, dtype=theano.config.floatX)
            inter = linear.apply(theano.gradient.disconnected_grad(next_target))
            if i > 0:
                inter += linear1.apply(theano.gradient.disconnected_grad(hidden[-1][:-1,:,:]))
            new_hidden = lstm.apply(inputs=inter[:,:,:hdim],
                                    gate_inputs=inter[:,:,hdim:],
                                    states=init_state)
            states.append((init_state, new_hidden[-1, :, :]))

            hidden += [tensor.concatenate([init_state[None,:,:], new_hidden],axis=0)]
            pred = tanh.apply(linear2.apply(hidden[-1][:-1,:,:]))
            costs += [numpy.float32(cf) * (-next_target * pred).sum(axis=2).mean()]
            costs += [numpy.float32(cf) * q * abs(pred).sum(axis=2).mean()]
            diff = next_target - pred
            next_target = tensor.ge(diff, 0.5) - tensor.le(diff, -0.5)


        # Construct output from hidden states
        hidden = [s.dimshuffle(1, 0, 2) for s in hidden]

        out_parts = []
        out_dims = config.out_hidden + [config.io_dim]
        for i, (dim, state) in enumerate(zip(config.hidden_dims, hidden)):
            pred_linear = Linear(input_dim=dim, output_dim=out_dims[0],
                                name='pred_linear_%d'%i)
            bricks.append(pred_linear)
            lin = theano.gradient.disconnected_grad(state)
            out_parts.append(pred_linear.apply(lin))

        # Do prediction and calculate cost
        out = sum(out_parts)

        if len(out_dims) > 1:
            out = config.out_hidden_act[0](name='out_act0').apply(out)
            mlp = MLP(dims=out_dims,
                      activations=[x(name='out_act%d'%i) for i, x in enumerate(config.out_hidden_act[1:])]
                                 +[Identity()],
                      name='out_mlp')
            bricks.append(mlp)
            out = mlp.apply(out.reshape((inp.shape[0]*(inp.shape[1]+1),-1))
                           ).reshape((inp.shape[0],inp.shape[1]+1,-1))

        pred = out.argmax(axis=2)

        cost = Softmax().categorical_cross_entropy(inp.flatten(),
                                                   out[:,:-1,:].reshape((inp.shape[0]*inp.shape[1],
                                                                config.io_dim))).mean()
        error_rate = tensor.neq(inp.flatten(), pred[:,:-1].flatten()).mean()

        sgd_cost = cost + sum(costs)
            
        # Initialize all bricks
        for brick in bricks:
            brick.weights_init = config.weights_init
            brick.biases_init = config.biases_init
            brick.initialize()

        # apply noise
        cg = ComputationGraph([sgd_cost, cost, error_rate]+costs)
        if config.weight_noise > 0:
            noise_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, noise_vars, config.weight_noise)
        sgd_cost = cg.outputs[0]
        cost = cg.outputs[1]
        error_rate = cg.outputs[2]
        costs = cg.outputs[3:]


        # put stuff into self that is usefull for training or extensions
        self.sgd_cost = sgd_cost

        sgd_cost.name = 'sgd_cost'
        for i in range(len(costs)):
            costs[i].name = 'pred_cost_%d'%i
        cost.name = 'cost'
        error_rate.name = 'error_rate'
        self.monitor_vars = [costs, [cost],
                             [error_rate]]

        self.out = out[:,1:,:]
        self.pred = pred[:,1:]

        self.states = states
Exemple #60
0
    def __init__(self):
        inp = tensor.lmatrix('bytes')

        # Make state vars
        state_vars = {}
        for i, d in enumerate(hidden_dims):
            state_vars['states%d'%i] = theano.shared(numpy.zeros((num_seqs, d))
                                                        .astype(theano.config.floatX),
                                                     name='states%d'%i)
            state_vars['cells%d'%i] = theano.shared(numpy.zeros((num_seqs, d))
                                                        .astype(theano.config.floatX),
                                                    name='cells%d'%i)
        # Construct brick
        cchlstm = CCHLSTM(io_dim=io_dim,
                          hidden_dims=hidden_dims,
                          cond_cert=cond_cert,
                          activation=activation_function)

        # Random pass
        passdict = {}
        for i, p in enumerate(block_prob):
            passdict['pass%d'%i] = rng.binomial(size=(inp.shape[1], inp.shape[0]), p=1-p)

        # Apply it
        outs = cchlstm.apply(inputs=inp.dimshuffle(1, 0),
                             **dict(state_vars.items() + passdict.items()))
        states = []
        active_prop = []
        for i in range(len(hidden_dims)):
            states.append((state_vars['states%d'%i], outs[3*i+1][-1, :, :]))
            states.append((state_vars['cells%d'%i], outs[3*i+2][-1, :, :]))
            active_prop.append(outs[3*i+3].mean())
            active_prop[-1].name = 'active_prop_%d'%i

        out = outs[0].dimshuffle(1, 0, 2)

        # Do prediction and calculate cost
        pred = out.argmax(axis=2)

        cost = Softmax().categorical_cross_entropy(inp[:, 1:].flatten(),
                                                   out[:, :-1, :].reshape((inp.shape[0]*(inp.shape[1]-1),
                                                                           io_dim)))
        error_rate = tensor.neq(inp[:, 1:].flatten(), pred[:, :-1].flatten()).mean()

        # Initialize all bricks
        for brick in [cchlstm]:
            brick.weights_init = IsotropicGaussian(0.1)
            brick.biases_init = Constant(0.)
            brick.initialize()

        # Apply noise and dropoutvars
        cg = ComputationGraph([cost, error_rate])
        if w_noise_std > 0:
            noise_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, noise_vars, w_noise_std)
        [cost_reg, error_rate_reg] = cg.outputs

        self.sgd_cost = cost_reg
        self.monitor_vars = [[cost, cost_reg],
                             [error_rate, error_rate_reg],
                             active_prop]

        cost.name = 'cost'
        cost_reg.name = 'cost_reg'
        error_rate.name = 'error_rate'
        error_rate_reg.name = 'error_rate_reg'

        self.out = out
        self.pred = pred

        self.states = states