Exemple #1
0
    def create_base_model(self, x, y, input_dim, interim_dim=30):

        # Create the output of the MLP
        mlp = MLP([Tanh(), Tanh(), Tanh()], [input_dim, 60, 60, interim_dim],
                  weights_init=IsotropicGaussian(0.001),
                  biases_init=Constant(0))
        mlp.initialize()
        inter = mlp.apply(x)

        fine_tuner = MLP([Logistic()], [interim_dim, 1],
                         weights_init=IsotropicGaussian(0.001),
                         biases_init=Constant(0))
        fine_tuner.initialize()
        probs = fine_tuner.apply(inter)
        #sq_err = BinaryCrossEntropy()
        err = T.sqr(y.flatten() - probs.flatten())
        # cost = T.mean(err * y.flatten() * (1 - self.p) + err *
        #              (1 - y.flatten()) * self.p)
        cost = T.mean(err)
        #cost = sq_err.apply(probs.flatten(), y.flatten())
        # cost = T.mean(y.flatten() * T.log(probs.flatten()) +
        #              (1 - y.flatten()) * T.log(1 - probs.flatten()))
        cost.name = 'cost'
        pred_out = probs > 0.5
        mis_cost = T.sum(T.neq(y.flatten(), pred_out.flatten()))
        mis_cost.name = 'MisclassificationRate'
        return mlp, fine_tuner, cost, mis_cost
def create_vae(x=None, batch=batch_size):
    x = T.matrix('features') if x is None else x
    x = x / 255.

    encoder = MLP(
        activations=[Rectifier(), Logistic()],
        dims=[img_dim**2, hidden_dim, 2*latent_dim],
        weights_init=IsotropicGaussian(std=0.01, mean=0),
        biases_init=Constant(0.01),
        name='encoder'
    )
    encoder.initialize()
    z_param = encoder.apply(x)
    z_mean, z_log_std = z_param[:,latent_dim:], z_param[:,:latent_dim]
    z = Sampling(theano_seed=seed).apply([z_mean, z_log_std], batch=batch_size)

    decoder = MLP(
        activations=[Rectifier(), Logistic()],
        dims=[latent_dim, hidden_dim, img_dim**2],
        weights_init=IsotropicGaussian(std=0.01, mean=0),
        biases_init=Constant(0.01),
        name='decoder'
    )
    decoder.initialize()
    x_reconstruct = decoder.apply(z)

    cost = VAEloss().apply(x, x_reconstruct, z_mean, z_log_std)
    cost.name = 'vae_cost'
    return cost
Exemple #3
0
def build_mlp(features_cat, features_int, labels):

    mlp_int = MLP(activations=[Rectifier(), Rectifier()],
                  dims=[19, 50, 50],
                  weights_init=IsotropicGaussian(),
                  biases_init=Constant(0),
                  name='mlp_interval')
    mlp_int.initialize()
    mlp_cat = MLP(activations=[Logistic()],
                  dims=[320, 50],
                  weights_init=IsotropicGaussian(),
                  biases_init=Constant(0),
                  name='mlp_categorical')
    mlp_cat.initialize()

    mlp = MLP(activations=[Rectifier(), None],
              dims=[50, 50, 1],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0))
    mlp.initialize()

    gated = mlp_cat.apply(features_cat) * mlp_int.apply(features_int)
    prediction = mlp.apply(gated)
    cost = MAPECost().apply(prediction, labels)

    cg = ComputationGraph(cost)
    print cg.variables

    cg_dropout1   = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3]], .2)
    cost_dropout1 = cg_dropout1.outputs[0]

    return cost_dropout1, cg_dropout1.parameters, cost
class AttentionReader(Initializable):
    def __init__(self, x_dim, dec_dim, channels, height, width, N, **kwargs):
        super(AttentionReader, self).__init__(name="reader", **kwargs)

        self.img_height = height
        self.img_width = width
        self.N = N
        self.x_dim = x_dim
        self.dec_dim = dec_dim
        self.output_dim = 2*channels*N*N

        self.zoomer = ZoomableAttentionWindow(channels, height, width, N)
        self.readout = MLP(activations=[Identity()], dims=[dec_dim, 5], **kwargs)

        self.children = [self.readout]

    def get_dim(self, name):
        if name == 'input':
            return self.dec_dim
        elif name == 'x_dim':
            return self.x_dim
        elif name == 'output':
            return self.output_dim
        else:
            raise ValueError
            
    @application(inputs=['x', 'x_hat', 'h_dec'], outputs=['r'])
    def apply(self, x, x_hat, h_dec):
        l = self.readout.apply(h_dec)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        w     = gamma * self.zoomer.read(x    , center_y, center_x, delta, sigma)
        w_hat = gamma * self.zoomer.read(x_hat, center_y, center_x, delta, sigma)
        
        return T.concatenate([w, w_hat], axis=1)

    @application(inputs=['x', 'x_hat', 'h_dec'], outputs=['r','center_y', 'center_x', 'delta'])
    def apply_detailed(self, x, x_hat, h_dec):
        l = self.readout.apply(h_dec)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        w     = gamma * self.zoomer.read(x    , center_y, center_x, delta, sigma)
        w_hat = gamma * self.zoomer.read(x_hat, center_y, center_x, delta, sigma)
        
        r = T.concatenate([w, w_hat], axis=1)
        return r, center_y, center_x, delta

    @application(inputs=['x', 'h_dec'], outputs=['r','center_y', 'center_x', 'delta'])
    def apply_simple(self, x, h_dec):
        l = self.readout.apply(h_dec)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        r     = gamma * self.zoomer.read(x    , center_y, center_x, delta, sigma)

        return r, center_y, center_x, delta
def setupNN(NNParam):
    NNWidth = NNParam['NNWidth']
    WeightStdDev = NNParam['WeightStdDev']
    L2Weight = NNParam['L2Weight']
    DropOutProb = NNParam['DropOutProb']
    InitialLearningRate = NNParam['InitialLearningRate']
    x = theano.tensor.concatenate([x0, x1, x2, x3], axis=1)
    mlp = MLP(activations=[Rectifier(), Rectifier(), Rectifier(), Rectifier(), Rectifier()], dims=[69*4, NNWidth, NNWidth, NNWidth, NNWidth, 100],
           weights_init=IsotropicGaussian(WeightStdDev),
           biases_init=Constant(0))

    x_forward = mlp.apply(x)
    mlp_sm = MLP(activations=[None], dims=[100, 39],
           weights_init=IsotropicGaussian(WeightStdDev),
           biases_init=Constant(0))
    y_hat_b = Softmax().apply(mlp_sm.apply(x_forward))
    mlp.initialize()
    mlp_sm.initialize()
    cg = blocks.graph.ComputationGraph(y_hat_b)
    parameters = list()
    for p in cg.parameters:
        parameters.append(p)
    weights = VariableFilter(roles=[blocks.roles.WEIGHT])(cg.variables)
    cg_dropout = blocks.graph.apply_dropout(cg,[weights[3]] , DropOutProb)
    y_hat_b_do = cg_dropout.outputs[0]
    pred_b = theano.tensor.argmax(cg.outputs[0],axis=1)
    err_b = theano.tensor.mean(theano.tensor.eq(pred_b,y_b))
    cW = 0
    for W in weights:
        cW += (W**2).sum()
    cost = theano.tensor.mean(theano.tensor.nnet.categorical_crossentropy(y_hat_b_do, y_b))  + cW*L2Weight


    Learning_Rate_Decay = numpy.float32(0.98)
    learning_rate_theano = theano.shared(numpy.float32(InitialLearningRate), name='learning_rate')

    learning_rate_update = theano.function(inputs=[],outputs=learning_rate_theano,updates=[(learning_rate_theano,learning_rate_theano*Learning_Rate_Decay)])
    update_proc = momentum_sgd(cost,parameters,0.8, learning_rate_theano)

    #train
    training_proc = theano.function(
        	inputs=[shuffIdx], outputs=cost, updates=update_proc,
        	givens={x0:tX[theano.tensor.flatten(shuffIdx[:,0])],
                x1:tX[theano.tensor.flatten(shuffIdx[:,1])],
                x2:tX[theano.tensor.flatten(shuffIdx[:,2])],
                x3:tX[theano.tensor.flatten(shuffIdx[:,3])],
                y_b:tYb[theano.tensor.flatten(shuffIdx[:,1])]}) 
    #test
    test_on_testing_proc = theano.function(
        	inputs=[shuffIdx], outputs=[err_b], 
        	givens={x0:vX[shuffIdx[:,0]],x1:vX[shuffIdx[:,1]],x2:vX[shuffIdx[:,2]],x3:vX[shuffIdx[:,3]],y_b:vYb[shuffIdx[:,1]]}) 
       
    test_on_training_proc = theano.function(
        	inputs=[shuffIdx], outputs=[err_b], 
        	givens={x0:tX[shuffIdx[:,0]],x1:tX[shuffIdx[:,1]],x2:tX[shuffIdx[:,2]],x3:tX[shuffIdx[:,3]],y_b:tYb[shuffIdx[:,1]]}) 

    forward_proc = theano.function(inputs=[x0,x1,x2,x3],outputs=[x_forward])
    return (learning_rate_update, training_proc, test_on_testing_proc,test_on_training_proc,forward_proc)
Exemple #6
0
def test_model():
    x = tensor.matrix('x')
    mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1")
    mlp2 = MLP([Tanh()], [30, 40], name="mlp2")
    h1 = mlp1.apply(x)
    h2 = mlp2.apply(h1)

    model = Model(h2)
    assert model.get_top_bricks() == [mlp1, mlp2]
    # The order of parameters returned is deterministic but
    # not sensible.
    assert list(model.get_parameter_dict().items()) == [
        ('/mlp2/linear_0.b', mlp2.linear_transformations[0].b),
        ('/mlp1/linear_1.b', mlp1.linear_transformations[1].b),
        ('/mlp1/linear_0.b', mlp1.linear_transformations[0].b),
        ('/mlp1/linear_0.W', mlp1.linear_transformations[0].W),
        ('/mlp1/linear_1.W', mlp1.linear_transformations[1].W),
        ('/mlp2/linear_0.W', mlp2.linear_transformations[0].W)
    ]

    # Test getting and setting parameter values
    mlp3 = MLP([Tanh()], [10, 10])
    mlp3.allocate()
    model3 = Model(mlp3.apply(x))
    parameter_values = {
        '/mlp/linear_0.W': 2 * numpy.ones(
            (10, 10), dtype=theano.config.floatX),
        '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX)
    }
    model3.set_parameter_values(parameter_values)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[0].get_value() == 2)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[1].get_value() == 3)
    got_parameter_values = model3.get_parameter_values()
    assert len(got_parameter_values) == len(parameter_values)
    for name, value in parameter_values.items():
        assert_allclose(value, got_parameter_values[name])

    # Test exception is raised if parameter shapes don't match
    def helper():
        parameter_values = {
            '/mlp/linear_0.W': 2 * numpy.ones(
                (11, 11), dtype=theano.config.floatX),
            '/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX)
        }
        model3.set_parameter_values(parameter_values)

    assert_raises(ValueError, helper)

    # Test name conflict handling
    mlp4 = MLP([Tanh()], [10, 10])

    def helper():
        Model(mlp4.apply(mlp3.apply(x)))

    assert_raises(ValueError, helper)
class MLP_conv_dense(Initializable):
    def __init__(self, n_layers_conv, n_layers_dense_lower, n_layers_dense_upper,
        n_hidden_conv, n_hidden_dense_lower, n_hidden_dense_lower_output, n_hidden_dense_upper,
        spatial_width, n_colors, n_temporal_basis):
        """
        The multilayer perceptron, that provides temporal weighting coefficients for mu and sigma
        images. This consists of a lower segment with a convolutional MLP, and optionally with a
        dense MLP in parallel. The upper segment then consists of a per-pixel dense MLP
        (convolutional MLP with 1x1 kernel).
        """
        super(MLP_conv_dense, self).__init__()

        self.n_colors = n_colors
        self.spatial_width = spatial_width
        self.n_hidden_dense_lower = n_hidden_dense_lower
        self.n_hidden_dense_lower_output = n_hidden_dense_lower_output
        self.n_hidden_conv = n_hidden_conv

        ## the lower layers
        self.mlp_conv = MultiLayerConvolution(n_layers_conv, n_hidden_conv, spatial_width, n_colors)
        self.children = [self.mlp_conv]
        if n_hidden_dense_lower > 0 and n_layers_dense_lower > 0:
            n_input = n_colors*spatial_width**2
            n_output = n_hidden_dense_lower_output*spatial_width**2
            self.mlp_dense_lower = MLP([dense_nonlinearity] * n_layers_conv,
                [n_input] + [n_hidden_dense_lower] * (n_layers_conv-1) + [n_output],
                name='MLP dense lower', weights_init=Orthogonal(), biases_init=Constant(0))
            self.children.append(self.mlp_dense_lower)
        else:
            n_hidden_dense_lower_output = 0

        ## the upper layers (applied to each pixel independently)
        n_output = n_colors*n_temporal_basis*2 # "*2" for both mu and sigma
        self.mlp_dense_upper = MLP([dense_nonlinearity] * (n_layers_dense_upper-1) + [Identity()],
            [n_hidden_conv+n_hidden_dense_lower_output] +
            [n_hidden_dense_upper] * (n_layers_dense_upper-1) + [n_output],
            name='MLP dense upper', weights_init=Orthogonal(), biases_init=Constant(0))
        self.children.append(self.mlp_dense_upper)

    @application
    def apply(self, X):
        """
        Take in noisy input image and output temporal coefficients for mu and sigma.
        """
        Y = self.mlp_conv.apply(X)
        Y = Y.dimshuffle(0,2,3,1)
        if self.n_hidden_dense_lower > 0:
            n_images = X.shape[0]
            X = X.reshape((n_images, self.n_colors*self.spatial_width**2))
            Y_dense = self.mlp_dense_lower.apply(X)
            Y_dense = Y_dense.reshape((n_images, self.spatial_width, self.spatial_width,
                self.n_hidden_dense_lower_output))
            Y = T.concatenate([Y/T.sqrt(self.n_hidden_conv),
                Y_dense/T.sqrt(self.n_hidden_dense_lower_output)], axis=3)
        Z = self.mlp_dense_upper.apply(Y)
        return Z
Exemple #8
0
class GRUInitialStateWithInitialStateSumContext(GatedRecurrent):
    """Gated Recurrent with special initial state.

    Initial state of Gated Recurrent is set by an MLP that conditions on the
    last hidden state of the bidirectional encoder, applies an affine
    transformation followed by a tanh non-linearity to set initial state.

    """
    def __init__(self, attended_dim, context_dim, **kwargs):
        super(GRUInitialStateWithInitialStateSumContext,
              self).__init__(**kwargs)
        self.attended_dim = attended_dim
        self.context_dim = context_dim

        # two MLPs which map to the same dimension, then we sum
        # the motivation here is to allow the network to pretrain on the normal MT, task,
        # then keep some params static, and continue training with the context-enhanced task
        # the state transformer
        self.initial_transformer = MLP(activations=[Tanh()],
                                       dims=[attended_dim, self.dim],
                                       name='state_initializer')

        # the context transformer
        self.context_transformer = MLP(
            activations=[Tanh(), Tanh(), Tanh()],
            dims=[context_dim, 2000, 1000, self.dim],
            name='context_initializer')

        self.children.extend(
            [self.initial_transformer, self.context_transformer])

    # THINKING: how to best combine the image info with the source info?
    @application
    def initial_states(self, batch_size, *args, **kwargs):
        attended = kwargs['attended']
        context = kwargs['initial_state_context']
        attended_reverse_final_state = attended[0, :, -self.attended_dim:]
        initial_state_representation = self.initial_transformer.apply(
            attended_reverse_final_state)
        initial_context_representation = self.context_transformer.apply(
            context)
        initial_state = initial_state_representation + initial_context_representation
        return initial_state

    def _allocate(self):
        self.parameters.append(
            shared_floatx_nans((self.dim, self.dim), name='state_to_state'))
        self.parameters.append(
            shared_floatx_nans((self.dim, 2 * self.dim),
                               name='state_to_gates'))
        for i in range(2):
            if self.parameters[i]:
                add_role(self.parameters[i], WEIGHT)
Exemple #9
0
def build_mlp(features_car_cat, features_car_int, features_nocar_cat,
              features_nocar_int, features_cp, features_hascar, means, labels):

    mlp_car = MLP(activations=[Rectifier(), Rectifier(), None],
                  dims=[8 + 185, 200, 200, 1],
                  weights_init=IsotropicGaussian(.1),
                  biases_init=Constant(0),
                  name='mlp_interval_car')
    mlp_car.initialize()
    mlp_nocar = MLP(activations=[Rectifier(), Rectifier(), None],
                    dims=[5 + 135, 200, 200, 1],
                    weights_init=IsotropicGaussian(.1),
                    biases_init=Constant(0),
                    name='mlp_interval_nocar')
    mlp_nocar.initialize()

    feature_car = tensor.concatenate((features_car_cat, features_car_int),
                                     axis=1)
    feature_nocar = tensor.concatenate(
        (features_nocar_cat, features_nocar_int), axis=1)
    prediction = mlp_nocar.apply(feature_nocar)
    # gating with the last feature : does the dude own a car
    prediction += tensor.addbroadcast(features_hascar,
                                      1) * mlp_car.apply(feature_car)

    prediction_loc, _, _, _, = \
            build_mlp_onlyloc(features_car_cat, features_car_int,
                              features_nocar_cat, features_nocar_int,
                              features_cp, features_hascar,
                              means, labels)
    prediction += prediction_loc

    # add crm
    mlp_crm = MLP(activations=[None],
                  dims=[1, 1],
                  weights_init=IsotropicGaussian(.1),
                  biases_init=Constant(0),
                  name='mlp_crm')
    mlp_crm.initialize()
    crm = features_nocar_int[:, 0][:, None]
    prediction = prediction * mlp_crm.apply(crm)

    cost = MAPECost().apply(labels, prediction)

    cg = ComputationGraph(cost)
    input_var = VariableFilter(roles=[INPUT])(cg.variables)
    print input_var

    cg_dropout1 = apply_dropout(cg, [input_var[6], input_var[7]], .4)
    cost_dropout1 = cg_dropout1.outputs[0]

    return prediction, cost_dropout1, cg_dropout1.parameters, cost
Exemple #10
0
def test_model():
    x = tensor.matrix('x')
    mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1")
    mlp2 = MLP([Tanh()], [30, 40], name="mlp2")
    h1 = mlp1.apply(x)
    h2 = mlp2.apply(h1)

    model = Model(h2)
    assert model.get_top_bricks() == [mlp1, mlp2]
    # The order of parameters returned is deterministic but
    # not sensible.
    assert list(model.get_parameter_dict().items()) == [
        ('/mlp2/linear_0.b', mlp2.linear_transformations[0].b),
        ('/mlp1/linear_1.b', mlp1.linear_transformations[1].b),
        ('/mlp1/linear_0.b', mlp1.linear_transformations[0].b),
        ('/mlp1/linear_0.W', mlp1.linear_transformations[0].W),
        ('/mlp1/linear_1.W', mlp1.linear_transformations[1].W),
        ('/mlp2/linear_0.W', mlp2.linear_transformations[0].W)]

    # Test getting and setting parameter values
    mlp3 = MLP([Tanh()], [10, 10])
    mlp3.allocate()
    model3 = Model(mlp3.apply(x))
    parameter_values = {
        '/mlp/linear_0.W': 2 * numpy.ones((10, 10),
                                          dtype=theano.config.floatX),
        '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX)}
    model3.set_parameter_values(parameter_values)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[0].get_value() == 2)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[1].get_value() == 3)
    got_parameter_values = model3.get_parameter_values()
    assert len(got_parameter_values) == len(parameter_values)
    for name, value in parameter_values.items():
        assert_allclose(value, got_parameter_values[name])

    # Test exception is raised if parameter shapes don't match
    def helper():
        parameter_values = {
            '/mlp/linear_0.W': 2 * numpy.ones((11, 11),
                                              dtype=theano.config.floatX),
            '/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX)}
        model3.set_parameter_values(parameter_values)
    assert_raises(ValueError, helper)

    # Test name conflict handling
    mlp4 = MLP([Tanh()], [10, 10])

    def helper():
        Model(mlp4.apply(mlp3.apply(x)))
    assert_raises(ValueError, helper)
Exemple #11
0
class DGSRNN(BaseRecurrent, Initializable):
    def __init__(self, input_dim, state_dim, act, transition_h, tr_h_activations, **kwargs):
        super(DGSRNN, self).__init__(**kwargs)

        self.input_dim = input_dim
        self.state_dim = state_dim

        logistic = Logistic()

        self.inter = MLP(dims=[input_dim + state_dim] + transition_h,
                         activations=tr_h_activations,
                         name='inter')
        self.reset = MLP(dims=[transition_h[-1], state_dim],
                         activations=[logistic],
                         name='reset')
        self.update = MLP(dims=[transition_h[-1], state_dim],
                          activations=[act],
                          name='update')

        self.children = [self.inter, self.reset, self.update, logistic, act] + tr_h_activations

        # init state
        self.params = [shared_floatx_zeros((state_dim,), name='init_state')]
        add_role(self.params[0], INITIAL_STATE)

    def get_dim(self, name):
        if name == 'state':
            return self.state_dim
        return super(GFGRU, self).get_dim(name)

    @recurrent(sequences=['inputs', 'drop_updates_mask'], states=['state'],
               outputs=['state', 'reset'], contexts=[])
    def apply(self, inputs=None, drop_updates_mask=None, state=None):
        inter_v = self.inter.apply(tensor.concatenate([inputs, state], axis=1))
        reset_v = self.reset.apply(inter_v)
        update_v = self.update.apply(inter_v)

        reset_v = reset_v * drop_updates_mask

        new_state = state * (1 - reset_v) + reset_v * update_v

        return new_state, reset_v

    @application
    def initial_state(self, state_name, batch_size, *args, **kwargs):
        return tensor.repeat(self.params[0][None, :],
                             repeats=batch_size,
                             axis=0)
Exemple #12
0
class SingleSoftmax(Initializable):
    def __init__(self, hidden_dim, n_classes, **kwargs):
        super(SingleSoftmax, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        self.mlp = MLP(activations=[Rectifier(), Softmax()],
                       dims=[hidden_dim, hidden_dim/2, self.n_classes],
                       weights_init=Orthogonal(),
                       biases_init=Constant(0))
        self.softmax = Softmax()

        self.children = [self.mlp, self.softmax]

    # some day: @application(...) def feedback(self, h)

    @application(inputs=['cs', 'y'], outputs=['cost'])
    def cost(self, cs, y, n_patches):
        energies = [self.mlp.apply(cs[:, t, :])
                    for t in xrange(n_patches)]
        cross_entropies = [self.softmax.categorical_cross_entropy(y.flatten(), energy)
                           for energy in energies]
        error_rates = [T.neq(y, energy.argmax(axis=1)).mean(axis=0)
                       for energy in energies]
        # train on final prediction
        cost = util.named(cross_entropies[-1], "cost")
        # monitor final prediction
        self.add_auxiliary_variable(cross_entropies[-1], name="cross_entropy")
        self.add_auxiliary_variable(error_rates[-1], name="error_rate")
        return cost
Exemple #13
0
def test_pylearn2_training():
    # Construct the model
    mlp = MLP(activations=[Sigmoid(), Sigmoid()],
              dims=[784, 100, 784],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0.01))
    mlp.initialize()
    cost = SquaredError()

    # Load the data
    rng = numpy.random.RandomState(14)
    train_dataset = random_dense_design_matrix(rng, 1024, 784, 10)
    valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10)

    x = tensor.matrix('features')
    block_cost = Pylearn2Cost(cost.apply(x, mlp.apply(x)))
    block_model = Pylearn2Model(mlp)

    # Silence Pylearn2's logger
    logger = logging.getLogger(pylearn2.__name__)
    logger.setLevel(logging.ERROR)

    # Training algorithm
    sgd = SGD(learning_rate=0.01,
              cost=block_cost,
              batch_size=128,
              monitoring_dataset=valid_dataset)
    train = Pylearn2Train(train_dataset, block_model, algorithm=sgd)
    train.main_loop(time_budget=3)
Exemple #14
0
class AttentionReader(Initializable):
    def __init__(self, x_dim, dec_dim, width, height, N, **kwargs):
        super(AttentionReader, self).__init__(name="reader", **kwargs)

        self.width = width
        self.height = height
        self.N = N
        self.x_dim = x_dim
        self.dec_dim = dec_dim
        self.output_dim = 2 * N * N

        self.zoomer = ZoomableAttentionWindow(height, width, N, normalize=True)
        self.readout = MLP(activations=[Identity()],
                           dims=[dec_dim, 5],
                           **kwargs)

        self.children = [self.readout]

    @application(inputs=['x', 'x_hat', 'h_dec'], outputs=['r'])
    def apply(self, x, x_hat, h_dec):
        l = self.readout.apply(h_dec)
        center_y = (l[:, 0] + 1.) / 2.
        center_x = (l[:, 1] + 1.) / 2.
        log_delta = l[:, 2]
        log_sigma = l[:, 3] / 2.
        log_gamma = l[:, 4]

        w = self.zoomer.read(x, center_y, center_x, T.exp(log_delta),
                             T.exp(log_sigma))
        w_hat = self.zoomer.read(x_hat, center_y, center_x, T.exp(log_delta),
                                 T.exp(log_sigma))

        gamma = T.exp(log_gamma).dimshuffle(0, 'x')
        return gamma * T.concatenate([w, w_hat], axis=1)
Exemple #15
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0),
              seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(cost=cost,
                        params=ComputationGraph(cost).parameters,
                        step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) + [
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)), prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()
        ])
    main_loop.run()
    return main_loop
Exemple #16
0
def generation(z_list, n_latent, hu_decoder, n_out, y):
    logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent,
                hu_decoder)
    if hu_decoder == 0:
        return generation_simple(z_list, n_latent, n_out, y)
    mlp1 = MLP(activations=[Rectifier()],
               dims=[n_latent, hu_decoder],
               name='latent_to_hidDecoder')
    initialize([mlp1])
    hid_to_out = Linear(name='hidDecoder_to_output',
                        input_dim=hu_decoder,
                        output_dim=n_out)
    initialize([hid_to_out])
    mysigmoid = Logistic(name='y_hat_vae')
    agg_logpy_xz = 0.
    agg_y_hat = 0.
    for i, z in enumerate(z_list):
        y_hat = mysigmoid.apply(hid_to_out.apply(
            mlp1.apply(z)))  #reconstructed x
        agg_logpy_xz += cross_entropy_loss(y_hat, y)
        agg_y_hat += y_hat

    agg_logpy_xz /= len(z_list)
    agg_y_hat /= len(z_list)
    return agg_y_hat, agg_logpy_xz
Exemple #17
0
def build_mlp(features_car_cat, features_car_int, features_nocar_cat,
              features_nocar_int, features_cp, features_hascar, means, labels):

    features = tensor.concatenate([
        features_hascar, means['cp'][features_cp[:, 0]],
        means['dep'][features_cp[:, 1]]
    ],
                                  axis=1)

    mlp = MLP(activations=[Rectifier(), Rectifier(), None],
              dims=[5, 50, 50, 1],
              weights_init=IsotropicGaussian(.1),
              biases_init=Constant(0),
              name='mlp')
    mlp.initialize()

    prediction = mlp.apply(features)

    cost = MAPECost().apply(labels, prediction)

    cg = ComputationGraph(cost)
    input_var = VariableFilter(roles=[INPUT])(cg.variables)
    print input_var

    cg_dropout1 = apply_dropout(cg, [input_var[3], input_var[5]], .4)
    cost_dropout1 = cg_dropout1.outputs[0]

    return prediction, cost_dropout1, cg_dropout1.parameters, cost
Exemple #18
0
class SingleSoftmax(Initializable):
    def __init__(self, hidden_dim, n_classes, **kwargs):
        super(SingleSoftmax, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        self.mlp = MLP(activations=[Rectifier(), Softmax()],
                       dims=[hidden_dim, hidden_dim / 2, self.n_classes],
                       weights_init=Orthogonal(),
                       biases_init=Constant(0))
        self.softmax = Softmax()

        self.children = [self.mlp, self.softmax]

    # some day: @application(...) def feedback(self, h)

    @application(inputs=['cs', 'y'], outputs=['cost'])
    def cost(self, cs, y, n_patches):
        energies = [self.mlp.apply(cs[:, t, :]) for t in xrange(n_patches)]
        cross_entropies = [
            self.softmax.categorical_cross_entropy(y.flatten(), energy)
            for energy in energies
        ]
        error_rates = [
            T.neq(y, energy.argmax(axis=1)).mean(axis=0) for energy in energies
        ]
        # train on final prediction
        cost = util.named(cross_entropies[-1], "cost")
        # monitor final prediction
        self.add_auxiliary_variable(cross_entropies[-1], name="cross_entropy")
        self.add_auxiliary_variable(error_rates[-1], name="error_rate")
        return cost
Exemple #19
0
class FFMLP(Initializable):
    def __init__(self, config, output_layer=None, **kwargs):
        super(FFMLP, self).__init__(**kwargs)
        self.config = config

        self.context_embedder = ContextEmbedder(config)

        output_activation = [] if output_layer is None else [output_layer()]
        output_dim = [] if output_layer is None else [config.dim_output]
        self.mlp = MLP(activations=[Rectifier() for _ in config.dim_hidden] + output_activation,
                       dims=[config.dim_input] + config.dim_hidden + output_dim)

        self.extremities = {'%s_k_%s' % (side, ['latitude', 'longitude'][axis]): axis for side in ['first', 'last'] for axis in [0, 1]}
        self.inputs = self.context_embedder.inputs + self.extremities.keys()
        self.children = [ self.context_embedder, self.mlp ]

    def _push_initialization_config(self):
        self.mlp.weights_init = self.config.mlp_weights_init
        self.mlp.biases_init = self.config.mlp_biases_init

    @application(outputs=['prediction'])
    def predict(self, **kwargs):
        embeddings = tuple(self.context_embedder.apply(**{k: kwargs[k] for k in self.context_embedder.inputs }))
        extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.extremities.items())

        inputs = tensor.concatenate(extremities + embeddings, axis=1)
        outputs = self.mlp.apply(inputs)

        return outputs

    @predict.property('inputs')
    def predict_inputs(self):
        return self.inputs
Exemple #20
0
class GRUInitialState(GatedRecurrent):
    """Gated Recurrent with special initial state.

    Initial state of Gated Recurrent is set by an MLP that conditions on the
    last hidden state of the bidirectional encoder, applies an affine
    transformation followed by a tanh non-linearity to set initial state.

    """
    def __init__(self, attended_dim, **kwargs):
        super(GRUInitialState, self).__init__(**kwargs)
        self.attended_dim = attended_dim
        self.initial_transformer = MLP(activations=[Tanh()],
                                       dims=[attended_dim, self.dim],
                                       name='state_initializer')
        self.children.append(self.initial_transformer)


    @application
    def initial_states(self, batch_size, *args, **kwargs):
        attended = kwargs['attended']
        initial_state = self.initial_transformer.apply(
            attended[0, :, -self.attended_dim:])
        return initial_state

    def _allocate(self):
        self.parameters.append(shared_floatx_nans((self.dim, self.dim),
                               name='state_to_state'))
        self.parameters.append(shared_floatx_nans((self.dim, 2 * self.dim),
                               name='state_to_gates'))
        for i in range(2):
            if self.parameters[i]:
                add_role(self.parameters[i], WEIGHT)
Exemple #21
0
class GRU2GO(GatedRecurrent):
    def __init__(self, attended_dim, **kwargs):
        super(GRU2GO, self).__init__(**kwargs)
        self.attended_dim = attended_dim
        self.initial_transformer = MLP(activations=[Tanh()],
                                       dims=[attended_dim, self.dim],
                                       name='state_initializer')
        self.children.append(self.initial_transformer)

    @application
    def initial_states(self, batch_size, *args, **kwargs):
        attended = kwargs['attended']
        initial_state = self.initial_transformer.apply(
            attended[0, :, -self.attended_dim:])
        return initial_state

    def _allocate(self):
        self.parameters.append(
            shared_floatx_nans((self.dim, self.dim), name='state_to_state'))
        self.parameters.append(
            shared_floatx_nans((self.dim, 2 * self.dim),
                               name='state_to_gates'))
        for i in range(2):
            if self.parameters[i]:
                add_role(self.parameters[i], WEIGHT)
Exemple #22
0
def build_mlp(features_int, features_cat, labels, labels_mean):

    inputs = tensor.concatenate([features_int, features_cat], axis=1)

    mlp = MLP(activations=[Rectifier(),
                           Rectifier(),
                           Rectifier(), None],
              dims=[337, 800, 1200, 1],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(1))
    mlp.initialize()

    prediction = mlp.apply(inputs)
    cost = MAPECost().apply(prediction, labels, labels_mean)

    cg = ComputationGraph(cost)
    #cg_dropout0   = apply_dropout(cg, [VariableFilter(roles=[INPUT])(cg.variables)[1]], .2)
    cg_dropout1 = apply_dropout(cg, [
        VariableFilter(roles=[OUTPUT])(cg.variables)[1],
        VariableFilter(roles=[OUTPUT])(cg.variables)[3],
        VariableFilter(roles=[OUTPUT])(cg.variables)[5]
    ], .2)
    cost_dropout1 = cg_dropout1.outputs[0]

    return cost_dropout1, cg_dropout1.parameters, cost  #cost, cg.parameters, cost #
Exemple #23
0
class topicalq_transformer(Initializable):

    def __init__(self, vocab_size, topical_embedding_dim, state_dim,word_num,batch_size,
                 **kwargs):
        super(topicalq_transformer, self).__init__(**kwargs)
        self.vocab_size = vocab_size;
        self.word_embedding_dim = topical_embedding_dim;
        self.state_dim = state_dim;
        self.word_num=word_num;
        self.batch_size=batch_size;
        self.look_up=LookupTable(name='topical_embeddings');
        self.transformer=MLP(activations=[Tanh()],
                                dims=[self.word_embedding_dim*self.word_num, self.state_dim],
                                name='topical_transformer');
        self.children = [self.look_up,self.transformer];

    def _push_allocation_config(self):
        self.look_up.length = self.vocab_size
        self.look_up.dim = self.word_embedding_dim


    # do we have to push_config? remain unsure
    @application(inputs=['source_topical_word_sequence'],
                 outputs=['topical_embedding'])
    def apply(self, source_topical_word_sequence):
        # Time as first dimension
        source_topical_word_sequence=source_topical_word_sequence.T;
        word_topical_embeddings = self.look_up.apply(source_topical_word_sequence);
        word_topical_embeddings=word_topical_embeddings.swapaxes(0,1);
        #requires testing
        concatenated_topical_embeddings=tensor.reshape(word_topical_embeddings,[word_topical_embeddings.shape[0],word_topical_embeddings.shape[1]*word_topical_embeddings.shape[2]]);
        topical_embedding=self.transformer.apply(concatenated_topical_embeddings);
        return topical_embedding
Exemple #24
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, params=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) +
        [Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()])
    main_loop.run()
    return main_loop
class GRUInitialState(GatedRecurrent):
    """Gated Recurrent with special initial state.

    Initial state of Gated Recurrent is set by an MLP that conditions on the
    first hidden state of the bidirectional encoder, applies an affine
    transformation followed by a tanh non-linearity to set initial state.

    """
    def __init__(self, attended_dim, **kwargs):
        super(GRUInitialState, self).__init__(**kwargs)
        self.attended_dim = attended_dim
        self.initial_transformer = MLP(activations=[Tanh()],
                                       dims=[attended_dim, self.dim],
                                       name='state_initializer')
        self.children.append(self.initial_transformer)

    @application
    def initial_states(self, batch_size, *args, **kwargs):
        attended = kwargs['attended']
        initial_state = self.initial_transformer.apply(
            attended[0, :, -self.attended_dim:])
        return initial_state

    def _allocate(self):
        self.parameters.append(
            shared_floatx_nans((self.dim, self.dim), name='state_to_state'))
        self.parameters.append(
            shared_floatx_nans((self.dim, 2 * self.dim),
                               name='state_to_gates'))
        for i in range(2):
            if self.parameters[i]:
                add_role(self.parameters[i], WEIGHT)
Exemple #26
0
class FFMLP(Initializable):
    def __init__(self, config, output_layer=None, **kwargs):
        super(FFMLP, self).__init__(**kwargs)
        self.config = config

        self.context_embedder = ContextEmbedder(config)

        output_activation = [] if output_layer is None else [output_layer()]
        output_dim = [] if output_layer is None else [config.dim_output]
        self.mlp = MLP(activations=[Rectifier() for _ in config.dim_hidden] + output_activation,
                       dims=[config.dim_input] + config.dim_hidden + output_dim)

        self.extremities = {'%s_k_%s' % (side, ['latitude', 'longitude'][axis]): axis for side in ['first', 'last'] for axis in [0, 1]}
        self.inputs = self.context_embedder.inputs + self.extremities.keys()
        self.children = [ self.context_embedder, self.mlp ]

    def _push_initialization_config(self):
        self.mlp.weights_init = self.config.mlp_weights_init
        self.mlp.biases_init = self.config.mlp_biases_init

    @application(outputs=['prediction'])
    def predict(self, **kwargs):
        embeddings = tuple(self.context_embedder.apply(**{k: kwargs[k] for k in self.context_embedder.inputs }))
        extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.extremities.items())

        inputs = tensor.concatenate(extremities + embeddings, axis=1)
        outputs = self.mlp.apply(inputs)

        return outputs

    @predict.property('inputs')
    def predict_inputs(self):
        return self.inputs
Exemple #27
0
    def create_model(self, x, y, input_dim, tol=10e-5):

        # Create the output of the MLP
        mlp = MLP(
            [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 100, 1],
            weights_init=IsotropicGaussian(0.01),
            biases_init=Constant(0))
        mlp.initialize()
        probs = mlp.apply(x)
        y = y.dimshuffle(0, 'x')
        # Create the if-else cost function
        true_p = (T.sum(y * probs) + tol) * 1.0 / (T.sum(y) + tol)
        true_n = (T.sum((1 - y) * (1 - probs)) + tol) * \
            1.0 / (T.sum(1 - y) + tol)
        #p = (T.sum(y) + tol) / (y.shape[0] + tol)
        theta = (1 - self.p) / self.p
        numerator = (1 + self.beta**2) * true_p
        denominator = self.beta**2 + theta + true_p - theta * true_n

        Fscore = numerator / denominator

        cost = -1 * Fscore
        cost.name = "cost"

        return mlp, cost, probs
Exemple #28
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(input_dim=10000,
                          dim=500,
                          mlp_hidden_dims=[2000, 500, 4],
                          batch_size=100,
                          image_shape=(100, 100),
                          patch_shape=(28, 28),
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
    model.initialize()
    h, c = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [500, 100, 10],
                     weights_init=IsotropicGaussian(0.01),
                     biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)

    return cost, error_rate
Exemple #29
0
def test_pylearn2_training():
    # Construct the model
    mlp = MLP(activations=[Sigmoid(), Sigmoid()], dims=[784, 100, 784],
              weights_init=IsotropicGaussian(), biases_init=Constant(0.01))
    mlp.initialize()
    cost = SquaredError()

    # Load the data
    rng = numpy.random.RandomState(14)
    train_dataset = random_dense_design_matrix(rng, 1024, 784, 10)
    valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10)

    x = tensor.matrix('features')
    block_cost = Pylearn2Cost(cost.apply(x, mlp.apply(x)))
    block_model = Pylearn2Model(mlp)

    # Silence Pylearn2's logger
    logger = logging.getLogger(pylearn2.__name__)
    logger.setLevel(logging.ERROR)

    # Training algorithm
    sgd = SGD(learning_rate=0.01, cost=block_cost, batch_size=128,
              monitoring_dataset=valid_dataset)
    train = Pylearn2Train(train_dataset, block_model, algorithm=sgd)
    train.main_loop(time_budget=3)
Exemple #30
0
def setup_model(configs):
    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5("features")
    tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3)
    locs = tensor3("locs")
    # shape: B x Classes
    target = T.ivector("targets")

    model = LSTMAttention(configs, weights_init=Glorot(), biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, alpha, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply(
        input_, locs
    )

    model.location = location
    model.scale = scale
    model.alpha = location
    model.patch = patch

    classifier = MLP(
        [Rectifier(), Softmax()], configs["classifier_dims"], weights_init=Glorot(), biases_init=Constant(0)
    )
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    cost.name = "CE"
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = "ER"
    model.cost = cost
    model.error_rate = error_rate
    model.probabilities = probabilities

    if configs["load_pretrained"]:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open("VGG_CNN_params.npz") as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs["test_model"]:
        print "TESTING THE MODEL: CHECK THE INPUT SIZE!"
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost], on_unused_input="ignore", allow_input_downcast=True)
        data = configs["get_streams"](configs["batch_size"])[0].get_epoch_iterator().next()
        f(data[1], data[0], data[2])

        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
Exemple #31
0
def construct_model(input_dim, output_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    # input_dim must be nr
    mlp = MLP(activations=activation_functions,
              dims=[input_dim] + hidden_dims + [2])

    weights = mlp.apply(r)

    final = tensor.dot(x, weights)

    cost = Softmax().categorical_cross_entropy(y, final).mean()

    pred = final.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters
    for brick in [mlp]:
        brick.weights_init = IsotropicGaussian(0.01)
        brick.biases_init = Constant(0.001)
        brick.initialize()

    # apply noise
    cg = ComputationGraph([cost, error_rate])
    noise_vars = VariableFilter(roles=[WEIGHT])(cg)
    apply_noise(cg, noise_vars, noise_std)
    [cost, error_rate] = cg.outputs

    return cost, error_rate
Exemple #32
0
def build_mlp(features_car_cat, features_car_int, features_nocar_cat,
              features_nocar_int, features_cp, features_hascar, means, labels):

    prediction, _, _, _, = \
            build_mlp_onlyloc(features_car_cat, features_car_int,
                              features_nocar_cat, features_nocar_int, features_cp, features_hascar,
                              means, labels)

    mlp_crm = MLP(activations=[None],
                  dims=[1, 1],
                  weights_init=IsotropicGaussian(.1),
                  biases_init=Constant(0),
                  name='mlp_crm')
    mlp_crm.initialize()
    crm = features_nocar_int[:, 0][:, None]

    prediction = prediction * mlp_crm.apply(crm)

    cost = MAPECost().apply(labels, prediction)

    cg = ComputationGraph(cost)
    input_var = VariableFilter(roles=[INPUT])(cg.variables)
    print input_var

    cg_dropout = apply_dropout(cg, [input_var[7], input_var[5]], .4)
    cost_dropout = cg_dropout.outputs[0]

    return prediction, cost_dropout, cg_dropout.parameters, cost
Exemple #33
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Exemple #34
0
class GMMMLP(Initializable):
    """An mlp brick that branchs out to output
    sigma and mu for GMM
    Parameters
    ----------
    mlp: MLP brick
        the main mlp to wrap around.
    dim:
        output dim
    """
    def __init__(self, mlp, dim, k, const=1e-5, **kwargs):
        super(GMMMLP, self).__init__(**kwargs)

        self.dim = dim
        self.const = const
        self.k = k
        input_dim = mlp.output_dim
        self.mu = MLP(activations=[Identity()],
                      dims=[input_dim, dim],
                      name=self.name + "_mu")
        self.sigma = MLP(activations=[SoftPlus()],
                         dims=[input_dim, dim],
                         name=self.name + "_sigma")

        self.coeff = MLP(activations=[Identity()],
                         dims=[input_dim, k],
                         name=self.name + "_coeff")

        self.coeff2 = NDimensionalSoftmax()
        self.mlp = mlp
        self.children = [
            self.mlp, self.mu, self.sigma, self.coeff, self.coeff2
        ]
        #self.children.extend(self.mlp.children)

    @application
    def apply(self, inputs):
        state = self.mlp.apply(inputs)
        mu = self.mu.apply(state)
        sigma = self.sigma.apply(state)
        coeff = self.coeff2.apply(self.coeff.apply(state),
                                  extra_ndim=state.ndim - 2) + self.const
        return mu, sigma, coeff

    @property
    def output_dim(self):
        return self.dim
Exemple #35
0
class GMMMLP(Initializable):
    """An mlp brick that branchs out to output
    sigma and mu for GMM
    Parameters
    ----------
    mlp: MLP brick
        the main mlp to wrap around.
    dim:
        output dim
    """
    def __init__(self, mlp, dim, k, const=1e-5, **kwargs):
        super(GMMMLP, self).__init__(**kwargs)

        self.dim = dim
        self.const = const
        self.k = k
        input_dim = mlp.output_dim
        self.mu = MLP(activations=[Identity()],
                      dims=[input_dim, dim],
                      name=self.name + "_mu")
        self.sigma = MLP(activations=[SoftPlus()],
                         dims=[input_dim, dim],
                         name=self.name + "_sigma")

        self.coeff = MLP(activations=[Identity()],
                         dims=[input_dim, k],
                         name=self.name + "_coeff")


        self.coeff2 = NDimensionalSoftmax()
        self.mlp = mlp
        self.children = [self.mlp, self.mu, 
                         self.sigma, self.coeff, self.coeff2]
        #self.children.extend(self.mlp.children)

    @application
    def apply(self, inputs):
        state = self.mlp.apply(inputs)
        mu = self.mu.apply(state)
        sigma = self.sigma.apply(state)
        coeff = self.coeff2.apply(self.coeff.apply(state),
            extra_ndim=state.ndim - 2) + self.const
        return mu, sigma, coeff

    @property
    def output_dim(self):
        return self.dim
Exemple #36
0
class SimpleSpeechRecognizer(Initializable):
    """
    Initializable, does nothing more than combining
    class DeepBidirectional and an MLP as output
    Parameters
    ----------
    
    transition: transition of bidirectional (e.g. GatedRecurrent or LSTM)
    dims_transition: list of dims for RNN in bidirectional
    num_features: number of features or input dimensionality
    num_classes
    
    """
    def __init__(self, transition, dims_transition, num_features, num_classes,
                 **kwargs):

        super(SimpleSpeechRecognizer, self).__init__(**kwargs)
        # TODO: think about putting this into conf ?
        # Owant to use rthogonal in  LSTM only for the recurrent weights (W_states),
        # but 1. blocks concats all 4 recurrents matrices to one. Does Orthogonal Init
        # know this and do the correct init? 2. peepholes (vectors/diag-mats) are initialized
        # with the same weight_init ..... cant init vector with Orthogonal.
        # For now, don't use Orthogonal.
        # TODO: Maybe implement LSTM by myself

        # self.rec_weights_init = Orthogonal(scale=1.0)
        self.rec_weights_init = Uniform(mean=0, width=0.01)
        self.ff_weights_init = Uniform(mean=0, width=0.01)
        self.biases_init = Constant(0.0)
        self.transition = transition

        # ************ Deep BiRNN *************
        self.dblstm = DeepBidirectional(
            transition=self.transition,
            dims_hidden=dims_transition,
            dim_input=num_features,
            rec_weights_init=self.rec_weights_init,
            ff_weights_init=self.ff_weights_init,
            biases_init=self.biases_init,
        )

        # ************ Output ***************
        self.output = MLP(
            [None],
            [2 * dims_transition[-1]] + [num_classes],
            weights_init=self.ff_weights_init,
            biases_init=self.biases_init,
            name="top",
        )

        # Remember child bricks
        self.children = [self.dblstm, self.output]

    @application(inputs=['sequence', 'mask'], outputs=['output'])
    def apply(self, sequence, mask):
        blstm_processed = self.dblstm.apply(input_=sequence, mask=mask)
        return self.output.apply(blstm_processed)
Exemple #37
0
def task_ID_layers(x, recurrent_in_size):
    mlp = MLP([Rectifier()] * (len(task_ID_FF_dims)-1), task_ID_FF_dims, name='task_ID_mlp', weights_init=Uniform(width=.2), biases_init=Constant(0))
    mlp.push_initialization_config()
    mlp.initialize()
    out_size = task_ID_FF_dims[-1] + recurrent_in_size - len(game_tasks)
    zero_padded_task_IDs = T.concatenate([x[:,:,-len(game_tasks):], T.zeros((x.shape[0], x.shape[1], task_ID_FF_dims[0] - len(game_tasks)))], axis=2)
    mlp_out = mlp.apply(zero_padded_task_IDs)
    task_ID_out = T.concatenate([x[:,:,:-len(game_tasks)]] + [mlp_out], axis=2)
    return task_ID_out, out_size
Exemple #38
0
def test_mlp():
    x = tensor.matrix()
    x_val = numpy.random.rand(2, 16).astype(theano.config.floatX)
    mlp = MLP(activations=[Tanh(), None], dims=[16, 8, 4],
              weights_init=Constant(1), biases_init=Constant(1))
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(
        numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot(
            numpy.ones((8, 4))) + numpy.ones((2, 4)),
        y.eval({x: x_val}), rtol=1e-06)

    mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False)
    mlp.dims = [16, 8]
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(x_val.dot(numpy.ones((16, 8))),
                    y.eval({x: x_val}), rtol=1e-06)
Exemple #39
0
def setup_model(configs):

    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5('features')
    # shape: B x Classes
    target = T.lmatrix('targets')

    model = LSTMAttention(
        configs,
        weights_init=Glorot(),
        biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, patch, downn_sampled_input,
        conved_part_1, conved_part_2, pre_lstm) = model.apply(input_)

    classifier = MLP(
        [Rectifier(), Logistic()],
        configs['classifier_dims'],
        weights_init=Glorot(),
        biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = BinaryCrossEntropy().apply(target, probabilities)
    cost.name = 'CE'
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = 'ER'
    model.cost = cost

    if configs['load_pretrained']:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open('VGG_CNN_params.npz') as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs['test_model']:
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost],
                            on_unused_input='ignore',
                            allow_input_downcast=True)
        data = np.random.randn(10, 40, 3, 224, 224)
        targs = np.random.randn(40, 101)
        f(data, targs)
        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
Exemple #40
0
def test_snapshot():
    x = tensor.matrix('x')
    linear = MLP([Identity(), Identity()], [10, 10, 10],
                 weights_init=Constant(1), biases_init=Constant(2))
    linear.initialize()
    y = linear.apply(x)
    cg = ComputationGraph(y)
    snapshot = cg.get_snapshot(dict(x=numpy.zeros((1, 10), dtype=floatX)))
    assert len(snapshot) == 14
Exemple #41
0
class SimpleSpeechRecognizer(Initializable):
    """
    Initializable, does nothing more than combining
    class DeepBidirectional and an MLP as output
    Parameters
    ----------
    
    transition: transition of bidirectional (e.g. GatedRecurrent or LSTM)
    dims_transition: list of dims for RNN in bidirectional
    num_features: number of features or input dimensionality
    num_classes
    
    """
    def __init__(self, transition, dims_transition, 
                 num_features, num_classes, **kwargs):
                     
        super(SimpleSpeechRecognizer, self).__init__(**kwargs)
        # TODO: think about putting this into conf ?
        # Owant to use rthogonal in  LSTM only for the recurrent weights (W_states),
        # but 1. blocks concats all 4 recurrents matrices to one. Does Orthogonal Init
        # know this and do the correct init? 2. peepholes (vectors/diag-mats) are initialized
        # with the same weight_init ..... cant init vector with Orthogonal.
        # For now, don't use Orthogonal.
        # TODO: Maybe implement LSTM by myself

        # self.rec_weights_init = Orthogonal(scale=1.0)
        self.rec_weights_init = Uniform(mean=0, width=0.01)
        self.ff_weights_init = Uniform(mean=0, width=0.01)
        self.biases_init = Constant(0.0)
        self.transition = transition
        
        # ************ Deep BiRNN *************
        self.dblstm = DeepBidirectional(
                        transition=self.transition, 
                        dims_hidden=dims_transition,
                        dim_input=num_features,
                        rec_weights_init=self.rec_weights_init,
                        ff_weights_init=self.ff_weights_init,
                        biases_init=self.biases_init,)

        # ************ Output ***************
        self.output = MLP(
                        [None],[2 * dims_transition[-1]]+[num_classes],
                        weights_init=self.ff_weights_init,
                        biases_init=self.biases_init,
                        name="top",)

        # Remember child bricks
        self.children = [self.dblstm, self.output]


    @application(inputs=['sequence', 'mask'],
                 outputs=['output'])
    def apply(self, sequence, mask):
        blstm_processed = self.dblstm.apply(
            input_=sequence, mask=mask)
        return self.output.apply(blstm_processed)
Exemple #42
0
def test_mlp_apply():
    x = tensor.matrix()
    x_val = numpy.random.rand(2, 16).astype(theano.config.floatX)
    mlp = MLP(activations=[Tanh().apply, None], dims=[16, 8, 4],
              weights_init=Constant(1), biases_init=Constant(1))
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(
        numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot(
            numpy.ones((8, 4))) + numpy.ones((2, 4)),
        y.eval({x: x_val}), rtol=1e-06)

    mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False)
    mlp.dims = [16, 8]
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(x_val.dot(numpy.ones((16, 8))),
                    y.eval({x: x_val}), rtol=1e-06)
    assert mlp.rng == mlp.linear_transformations[0].rng
Exemple #43
0
class LocatorReader(Initializable):
    def __init__(self, x_dim, dec_dim, channels, height, width, N, **kwargs):
        super(LocatorReader, self).__init__(name="reader", **kwargs)

        self.img_height = height
        self.img_width = width
        self.N = N
        self.x_dim = x_dim
        self.dec_dim = dec_dim
        self.output_dim = channels * N * N

        self.zoomer = ZoomableAttentionWindow(channels, height, width, N)
        self.readout = MLP(activations=[Identity()], dims=[dec_dim, 7], **kwargs)

        self.children = [self.readout]

    def get_dim(self, name):
        if name == 'input':
            return self.dec_dim
        elif name == 'x_dim':
            return self.x_dim
        elif name == 'output':
            return self.output_dim
        else:
            raise ValueError

    @application(inputs=['x', 'h_dec'], outputs=['r', 'l'])
    def apply(self, x, h_dec):
        l = self.readout.apply(h_dec)

        center_y, center_x, deltaY, deltaX, sigmaY, sigmaX, gamma = self.zoomer.nn2att(l)

        w = gamma * self.zoomer.read(x, center_y, center_x, deltaY, deltaX, sigmaY, sigmaX)

        return w, l

    @application(inputs=['h_dec'], outputs=['center_y', 'center_x', 'deltaY', 'deltaX'])
    def apply_l(self, h_dec):
        l = self.readout.apply(h_dec)

        center_y, center_x, deltaY, deltaX = self.zoomer.nn2att_wn(l)

        return center_y, center_x, deltaY, deltaX
Exemple #44
0
 def create_model(self):
     x = self.x
     input_dim = self.input_dim
     mlp = MLP([Logistic(), Logistic(), Tanh()], [input_dim, 100, 100, 1],
               weights_init=IsotropicGaussian(0.001),
               biases_init=Constant(0))
     mlp.initialize()
     self.mlp = mlp
     probs = mlp.apply(x)
     return probs
Exemple #45
0
class GaussianMLP(Initializable):
    """An mlp brick that branchs out to output
    sigma and mu for Gaussian dist
    Parameters
    ----------
    mlp: MLP brick
        the main mlp to wrap around.
    dim:
        output dim
    """

    def __init__(self, mlp, dim, const=0., **kwargs):
        super(GaussianMLP, self).__init__(**kwargs)

        self.dim = dim
        self.const = const
        input_dim = mlp.output_dim
        self.mu = MLP(activations=[Identity()],
                      dims=[input_dim, dim],
                      weights_init=self.weights_init,
                      biases_init=self.biases_init,
                      name=self.name + "_mu")
        self.sigma = MLP(activations=[SoftPlus()],
                         dims=[input_dim, dim],
                         weights_init=self.weights_init,
                         biases_init=self.biases_init,
                         name=self.name + "_sigma")

        self.mlp = mlp
        self.children = [self.mlp, self.mu, self.sigma]
        self.children.extend(self.mlp.children)

    @application
    def apply(self, inputs):
        state = self.mlp.apply(inputs)
        mu = self.mu.apply(state)
        sigma = self.sigma.apply(state) + self.const

        return mu, sigma

    @property
    def output_dim(self):
        return self.dim
Exemple #46
0
def main(save_to, num_epochs, bokeh=False):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      DataStream(mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if bokeh:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(
                       mnist_train.num_examples, 50)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()
def test_fully_layer():
	batch_size=2
	x = T.tensor4();
	y = T.ivector()
	V = 200
	layer_conv = Convolutional(filter_size=(5,5),num_filters=V,
				name="toto",
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	# try with no bias
	activation = Rectifier()
	pool = MaxPooling(pooling_size=(2,2))

	convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15,
					image_size=(10,10),
					name="conv_section")
	convnet.push_allocation_config()
	convnet.initialize()
	output=convnet.apply(x)
	batch_size=output.shape[0]
	output_dim=np.prod(convnet.get_dim('output'))
	result_conv = output.reshape((batch_size, output_dim))
	mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10],
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	mlp.initialize()
	output=mlp.apply(result_conv)
	cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output))
	cg = ComputationGraph(cost)
	W = VariableFilter(roles=[WEIGHT])(cg.variables)
	B = VariableFilter(roles=[BIAS])(cg.variables)
	W = W[0]; b = B[0]

	inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg)
	outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg)
	var_input=inputs_fully[0]
	var_output=outputs_fully[0]
	
	[d_W,d_S,d_b] = T.grad(cost, [W, var_output, b])

	d_b = d_b.dimshuffle(('x',0))
	d_p = T.concatenate([d_W, d_b], axis=0)
	x_value = 1e3*np.random.ranf((2,15, 10, 10))
	f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore')
	A, B, C= f(x_value, [5, 0])
	A = np.concatenate([A, np.ones((2,1))], axis=1)
	print 'A', A.shape
	print 'B', B.shape
	print 'C', C.shape

	print lin.norm(C - np.dot(np.transpose(A), B), 'fro')

	return
	
	"""
Exemple #48
0
class GaussianMLP(Initializable):
    """An mlp brick that branchs out to output
    sigma and mu for Gaussian dist
    Parameters
    ----------
    mlp: MLP brick
        the main mlp to wrap around.
    dim:
        output dim
    """
    def __init__(self, mlp, dim, const=0., **kwargs):
        super(GaussianMLP, self).__init__(**kwargs)

        self.dim = dim
        self.const = const
        input_dim = mlp.output_dim
        self.mu = MLP(activations=[Identity()],
                      dims=[input_dim, dim],
                      weights_init=self.weights_init,
                      biases_init=self.biases_init,
                      name=self.name + "_mu")
        self.sigma = MLP(activations=[SoftPlus()],
                         dims=[input_dim, dim],
                         weights_init=self.weights_init,
                         biases_init=self.biases_init,
                         name=self.name + "_sigma")

        self.mlp = mlp
        self.children = [self.mlp, self.mu, self.sigma]
        self.children.extend(self.mlp.children)

    @application
    def apply(self, inputs):
        state = self.mlp.apply(inputs)
        mu = self.mu.apply(state)
        sigma = self.sigma.apply(state) + self.const

        return mu, sigma

    @property
    def output_dim(self):
        return self.dim
Exemple #49
0
 def build_model(self, hidden_dim):
     board_input = T.vector('input')
     mlp = MLP(activations=[LeakyRectifier(0.1), LeakyRectifier(0.1)],
               dims=[9, hidden_dim,  9],
               weights_init=IsotropicGaussian(0.00001),
               biases_init=Constant(0.01))
     output = mlp.apply(board_input)
     masked_output = Softmax().apply(output * T.eq(board_input, 0) * 1000)
     mlp.initialize()
     cost, chosen = self.get_cost(masked_output)
     return board_input, mlp, cost, chosen, output
Exemple #50
0
 def apply(self, input_, target):
     mlp = MLP(self.non_lins, self.dims,
               weights_init=IsotropicGaussian(0.01),
               biases_init=Constant(0),
               name=self.name)
     mlp.initialize()
     probs = mlp.apply(T.flatten(input_, outdim=2))
     probs.name = 'probs'
     cost = CategoricalCrossEntropy().apply(target.flatten(), probs)
     cost.name = "CE"
     self.outputs = {}
     self.outputs['probs'] = probs
     self.outputs['cost'] = cost
Exemple #51
0
class LSTM2GO(LSTM):
    def __init__(self, attended_dim, **kwargs):
        super(LSTM2GO, self).__init__(**kwargs)
        self.attended_dim = attended_dim
        self.initial_transformer_s = MLP(activations=[Tanh()],
                                       dims=[attended_dim, self.dim],
                                       name='state_initializer')
        self.children.append(self.initial_transformer_s)

        self.initial_transformer_c = MLP(activations=[Tanh()],
                                       dims=[attended_dim, self.dim],
                                       name='cell_initializer')
        self.children.append(self.initial_transformer_c)

    @application
    def initial_states(self, batch_size, *args, **kwargs):
        attended = kwargs['attended']
        initial_state = self.initial_transformer_s.apply(
            attended[0, :, -self.attended_dim:])
        initial_cell = self.initial_transformer_c.apply(
            attended[0, :, -self.attended_dim:])
        return [initial_state, initial_cell]

    def _allocate(self):
        self.W_state = shared_floatx_nans((self.dim, 4*self.dim),
                                          name='W_state')
        self.W_cell_to_in = shared_floatx_nans((self.dim,),
                                               name='W_cell_to_in')
        self.W_cell_to_forget = shared_floatx_nans((self.dim,),
                                                   name='W_cell_to_forget')
        self.W_cell_to_out = shared_floatx_nans((self.dim,),
                                                name='W_cell_to_out')
        add_role(self.W_state, WEIGHT)
        add_role(self.W_cell_to_in, WEIGHT)
        add_role(self.W_cell_to_forget, WEIGHT)
        add_role(self.W_cell_to_out, WEIGHT)

        self.parameters = [
            self.W_state, self.W_cell_to_in, self.W_cell_to_forget, self.W_cell_to_out]
Exemple #52
0
def prior_network(x, n_input, hu_encoder, n_latent):
    logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input, hu_encoder)
    mlp1 = MLP(activations=[Rectifier()], dims=[n_input, hu_encoder], name='prior_in_to_hidEncoder')
    initialize([mlp1])
    h_encoder = mlp1.apply(x)
    h_encoder = debug_print(h_encoder, 'h_encoder', False)
    lin1 = Linear(name='prior_hiddEncoder_to_latent_mu', input_dim=hu_encoder, output_dim=n_latent)
    lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma', input_dim=hu_encoder, output_dim=n_latent)
    initialize([lin1])
    initialize([lin2], rndstd=0.001)
    mu = lin1.apply(h_encoder)
    log_sigma = lin2.apply(h_encoder)
    return mu, log_sigma
Exemple #53
0
def construct_model(input_dim, output_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    nx = x.shape[0]
    nj = x.shape[1]  # also is r.shape[0]
    nr = r.shape[1]

    # r is nj x nr
    # x is nx x nj
    # y is nx x 1

    # r_rep is nx x nj x nr
    r_rep = r[None, :, :].repeat(axis=0, repeats=nx)
    # x3 is nx x nj x 1
    x3 = x[:, :, None]

    # concat is nx x nj x (nr + 1)
    concat = tensor.concatenate([r_rep, x3], axis=2)
    mlp_input = concat.reshape((nx * nj, nr + 1))

    # input_dim must be nr
    mlp = MLP(activations=activation_functions,
              dims=[input_dim+1] + hidden_dims + [output_dim])

    activations = mlp.apply(mlp_input)

    act_sh = activations.reshape((nx, nj, output_dim))
    final = act_sh.mean(axis=1)

    cost = Softmax().categorical_cross_entropy(y, final).mean()

    pred = final.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters
    for brick in [mlp]:
        brick.weights_init = IsotropicGaussian(0.01)
        brick.biases_init = Constant(0.001)
        brick.initialize()

    # apply noise
    cg = ComputationGraph([cost, error_rate])
    noise_vars = VariableFilter(roles=[WEIGHT])(cg)
    apply_noise(cg, noise_vars, noise_std)
    [cost_reg, error_rate_reg] = cg.outputs

    return cost_reg, error_rate_reg, cost, error_rate
def create_model():
    """Create the deep autoencoder model with Blocks, and load MNIST."""
    mlp = MLP(activations=[Logistic(), Logistic(), Logistic(), None,
                           Logistic(), Logistic(), Logistic(), Logistic()],
              dims=[784, 1000, 500, 250, 30, 250, 500, 1000, 784],
              weights_init=Sparse(15, IsotropicGaussian()),
              biases_init=Constant(0))
    mlp.initialize()

    x = tensor.matrix('features')
    x_hat = mlp.apply(tensor.flatten(x, outdim=2))
    squared_err = SquaredError().apply(tensor.flatten(x, outdim=2), x_hat)
    cost = BinaryCrossEntropy().apply(tensor.flatten(x, outdim=2), x_hat)

    return x, cost, squared_err
Exemple #55
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(dim=256,
                          mlp_hidden_dims=[256, 4],
                          batch_size=100,
                          image_shape=(64, 64),
                          patch_shape=(16, 16),
                          weights_init=Glorot(),
                          biases_init=Constant(0))
    model.initialize()
    h, c, location, scale = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [256 * 2, 200, 10],
                     weights_init=Glorot(),
                     biases_init=Constant(0))
    model.h = h
    model.c = c
    model.location = location
    model.scale = scale
    classifier.initialize()

    probabilities = classifier.apply(T.concatenate([h[-1], c[-1]], axis=1))
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)
    model.cost = cost

    location_x_0_avg = T.mean(location[0, :, 0])
    location_x_0_avg.name = 'location_x_0_avg'
    location_x_10_avg = T.mean(location[10, :, 0])
    location_x_10_avg.name = 'location_x_10_avg'
    location_x_20_avg = T.mean(location[-1, :, 0])
    location_x_20_avg.name = 'location_x_20_avg'

    scale_x_0_avg = T.mean(scale[0, :, 0])
    scale_x_0_avg.name = 'scale_x_0_avg'
    scale_x_10_avg = T.mean(scale[10, :, 0])
    scale_x_10_avg.name = 'scale_x_10_avg'
    scale_x_20_avg = T.mean(scale[-1, :, 0])
    scale_x_20_avg.name = 'scale_x_20_avg'

    monitorings = [error_rate,
                   location_x_0_avg, location_x_10_avg, location_x_20_avg,
                   scale_x_0_avg, scale_x_10_avg, scale_x_20_avg]
    model.monitorings = monitorings

    return model
Exemple #56
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(dim=500,
                          mlp_hidden_dims=[400, 4],
                          batch_size=100,
                          image_shape=(100, 100),
                          patch_shape=(28, 28),
                          weights_init=Glorot(),
                          biases_init=Constant(0))
    model.initialize()
    h, c, location, scale = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [500, 100, 10],
                     weights_init=Glorot(),
                     biases_init=Constant(0))
    model.h = h
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)

    location_x_avg = T.mean(location[:, 0])
    location_x_avg.name = 'location_x_avg'
    location_y_avg = T.mean(location[:, 1])
    location_y_avg.name = 'location_y_avg'
    scale_x_avg = T.mean(scale[:, 0])
    scale_x_avg.name = 'scale_x_avg'
    scale_y_avg = T.mean(scale[:, 1])
    scale_y_avg.name = 'scale_y_avg'

    location_x_std = T.std(location[:, 0])
    location_x_std.name = 'location_x_std'
    location_y_std = T.std(location[:, 1])
    location_y_std.name = 'location_y_std'
    scale_x_std = T.std(scale[:, 0])
    scale_x_std.name = 'scale_x_std'
    scale_y_std = T.std(scale[:, 1])
    scale_y_std.name = 'scale_y_std'

    monitorings = [error_rate,
                   location_x_avg, location_y_avg, scale_x_avg, scale_y_avg,
                   location_x_std, location_y_std, scale_x_std, scale_y_std]

    return cost, monitorings
Exemple #57
0
class GRUInitialState(GatedRecurrent):
    def __init__(self, attended_dim, **kwargs):
        super(GRUInitialState, self).__init__(**kwargs)
        self.attended_dim = attended_dim
        self.initial_transformer = MLP(activations=[Tanh()],
                                       dims=[attended_dim, self.dim],
                                       name='state_initializer')
        self.children.append(self.initial_transformer)

    @application
    def initial_state(self, state_name, batch_size, *args, **kwargs):
        attended = kwargs['attended']
        if state_name == 'states':
            initial_state = self.initial_transformer.apply(
                attended[0, :, -self.attended_dim:])
            return initial_state
        return super(GRUInitialState, self).initial_state(state_name, batch_size, *args, **kwargs)