예제 #1
0
def test_model():
    x = tensor.matrix('x')
    mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1")
    mlp2 = MLP([Tanh()], [30, 40], name="mlp2")
    h1 = mlp1.apply(x)
    h2 = mlp2.apply(h1)

    model = Model(h2)
    assert model.get_top_bricks() == [mlp1, mlp2]
    # The order of parameters returned is deterministic but
    # not sensible.
    assert list(model.get_parameter_dict().items()) == [
        ('/mlp2/linear_0.b', mlp2.linear_transformations[0].b),
        ('/mlp1/linear_1.b', mlp1.linear_transformations[1].b),
        ('/mlp1/linear_0.b', mlp1.linear_transformations[0].b),
        ('/mlp1/linear_0.W', mlp1.linear_transformations[0].W),
        ('/mlp1/linear_1.W', mlp1.linear_transformations[1].W),
        ('/mlp2/linear_0.W', mlp2.linear_transformations[0].W)
    ]

    # Test getting and setting parameter values
    mlp3 = MLP([Tanh()], [10, 10])
    mlp3.allocate()
    model3 = Model(mlp3.apply(x))
    parameter_values = {
        '/mlp/linear_0.W': 2 * numpy.ones(
            (10, 10), dtype=theano.config.floatX),
        '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX)
    }
    model3.set_parameter_values(parameter_values)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[0].get_value() == 2)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[1].get_value() == 3)
    got_parameter_values = model3.get_parameter_values()
    assert len(got_parameter_values) == len(parameter_values)
    for name, value in parameter_values.items():
        assert_allclose(value, got_parameter_values[name])

    # Test exception is raised if parameter shapes don't match
    def helper():
        parameter_values = {
            '/mlp/linear_0.W': 2 * numpy.ones(
                (11, 11), dtype=theano.config.floatX),
            '/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX)
        }
        model3.set_parameter_values(parameter_values)

    assert_raises(ValueError, helper)

    # Test name conflict handling
    mlp4 = MLP([Tanh()], [10, 10])

    def helper():
        Model(mlp4.apply(mlp3.apply(x)))

    assert_raises(ValueError, helper)
예제 #2
0
def test_extract_parameter_values():
    mlp = MLP([Identity(), Identity()], [10, 20, 10])
    mlp.allocate()
    param_values = extract_parameter_values(mlp)
    assert len(param_values) == 4
    assert isinstance(param_values['/mlp/linear_0.W'], numpy.ndarray)
    assert isinstance(param_values['/mlp/linear_0.b'], numpy.ndarray)
    assert isinstance(param_values['/mlp/linear_1.W'], numpy.ndarray)
    assert isinstance(param_values['/mlp/linear_1.b'], numpy.ndarray)
예제 #3
0
def test_inject_parameter_values():
    mlp = MLP([Identity()], [10, 10])
    mlp.allocate()
    param_values = {
        '/mlp/linear_0.W': 2 * numpy.ones((10, 10), dtype=floatX),
        '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=floatX)
    }
    inject_parameter_values(mlp, param_values)
    assert numpy.all(mlp.linear_transformations[0].params[0].get_value() == 2)
    assert numpy.all(mlp.linear_transformations[0].params[1].get_value() == 3)
예제 #4
0
def test_model():
    x = tensor.matrix('x')
    mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1")
    mlp2 = MLP([Tanh()], [30, 40], name="mlp2")
    h1 = mlp1.apply(x)
    h2 = mlp2.apply(h1)

    model = Model(h2)
    assert model.get_top_bricks() == [mlp1, mlp2]
    # The order of parameters returned is deterministic but
    # not sensible.
    assert list(model.get_parameter_dict().items()) == [
        ('/mlp2/linear_0.b', mlp2.linear_transformations[0].b),
        ('/mlp1/linear_1.b', mlp1.linear_transformations[1].b),
        ('/mlp1/linear_0.b', mlp1.linear_transformations[0].b),
        ('/mlp1/linear_0.W', mlp1.linear_transformations[0].W),
        ('/mlp1/linear_1.W', mlp1.linear_transformations[1].W),
        ('/mlp2/linear_0.W', mlp2.linear_transformations[0].W)]

    # Test getting and setting parameter values
    mlp3 = MLP([Tanh()], [10, 10])
    mlp3.allocate()
    model3 = Model(mlp3.apply(x))
    parameter_values = {
        '/mlp/linear_0.W': 2 * numpy.ones((10, 10),
                                          dtype=theano.config.floatX),
        '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX)}
    model3.set_parameter_values(parameter_values)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[0].get_value() == 2)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[1].get_value() == 3)
    got_parameter_values = model3.get_parameter_values()
    assert len(got_parameter_values) == len(parameter_values)
    for name, value in parameter_values.items():
        assert_allclose(value, got_parameter_values[name])

    # Test exception is raised if parameter shapes don't match
    def helper():
        parameter_values = {
            '/mlp/linear_0.W': 2 * numpy.ones((11, 11),
                                              dtype=theano.config.floatX),
            '/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX)}
        model3.set_parameter_values(parameter_values)
    assert_raises(ValueError, helper)

    # Test name conflict handling
    mlp4 = MLP([Tanh()], [10, 10])

    def helper():
        Model(mlp4.apply(mlp3.apply(x)))
    assert_raises(ValueError, helper)
예제 #5
0
class Window(Initializable, Feedforward):
    @lazy(allocation=['dwin', 'n_mot', 'vect_size', 'n_hidden'])
    def __init__(self, dwin, n_mot, vect_size, n_hidden, n_out=2, **kwargs):
        super(Window, self).__init__(**kwargs)
        self.dwin = dwin
        self.n_mot = n_mot
        self.vect_size = vect_size
        self.n_hidden = n_hidden
        self.n_out = n_out
        self.n_tables = len(self.vect_size)
        self.tables = [
            LookUpTable(self.vect_size[i],
                        self.n_mot[i],
                        weights_init=IsotropicGaussian(0.001),
                        use_bias=False) for i in range(self.n_tables)
        ]
        self.mlp = MLP(activations=[Tanh()] * len(self.n_hidden) +
                       [Identity()],
                       dims=[self.dwin * sum(self.vect_size)] + self.n_hidden +
                       [self.n_out],
                       weights_init=IsotropicGaussian(0.001),
                       biases_init=Constant(0.))
        self.parameters = []
        self.children = self.tables + [self.mlp]

    def _initialize(self):
        for i in range(self.n_tables):
            self.tables[i].initialize()
        self.mlp.initialize()
        W = self.parameters[0]
        self.weights_init.initialize(W, self.rng)

    def _allocate(self):
        for i in range(self.n_tables):
            self.tables[i].allocate()
        self.mlp.allocate()
        W = shared_floatx_nans((sum(self.n_mot), sum(self.vect_size)),
                               name='W')
        add_role(W, WEIGHT)
        self.parameters.append(W)

    def update_transition_matrix(self):
        W_tmp = self.parameters[0]
        params_lookup = [
            getParams(table, T.itensor3()) for table in self.tables
        ]
        index_row = 0
        index_col = 0
        for i in range(len(self.tables)):
            W_tmp_value = W_tmp.get_value()
            p_value = params_lookup[i][0].get_value()
            W_tmp_value[index_row: index_row+ p_value.shape[1], index_col: index_col+p_value.shape[0]] =\
             p_value.transpose()
            index_row += p_value.shape[1]
            index_col += p_value.shape[0]
            W_tmp.set_value(W_tmp_value)

    def update_lookup_weights(self):
        W_tmp = self.parameters[0]
        params_lookup = [
            getParams(table, T.itensor3()) for table in self.tables
        ]
        index_row = 0
        index_col = 0
        for i in range(len(self.tables)):
            W_tmp_value = W_tmp.get_value().transpose()
            p_value = params_lookup[i][0].get_value()
            params_lookup[i][0].set_value(
                W_tmp_value[index_col:index_col + p_value.shape[0],
                            index_row:index_row + p_value.shape[1]])
            index_row += p_value.shape[1]
            index_col += p_value.shape[0]

    def get_Params(self):
        params = getParams(self.mlp, T.matrix())
        self.update_transition_matrix()
        weights = []
        biases = []
        for p in params:
            if p.ndim == 1:
                biases.append(p)
            else:
                weights.append(p)
        if len(params[0].name) == 1:
            # words has not been renamed yet
            if weights[0].shape[-1].eval() == self.n_out:
                weights.reverse()
                biases.reverse()
            # add the lookuptables weights
            weights = [self.parameters[0]] + weights
            assert len(weights) == len(biases) + 1
            for w, index in zip(weights, range(len(weights))):
                w.name = "layer_" + str(index) + "_" + w.name
            for b, index in zip(biases, range(len(biases))):
                b.name = "layer_" + str(index + len(weights) -
                                        len(biases)) + "_" + b.name
        else:
            weights = [self.parameters[0]] + weights
        return weights, biases

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_):
        outputs = [
            self.tables[i].apply(input_[:, i]) for i in xrange(self.n_tables)
        ]  # (batch_size, vector_size[i], dwin)
        outputs = [output.dimshuffle((1, 0, 2)) for output in outputs]
        output = T.concatenate(outputs,
                               axis=0)  # (sum vector_size, batch_size, dwin)
        output = output.dimshuffle((1, 0, 2))
        shape = output.shape
        output = output.reshape((shape[0], shape[1] * shape[2]))
        return self.mlp.apply(output)

    @application(inputs=['input_'], outputs=['output'])
    def embedding(self, input_):
        input_ = input_.dimshuffle(('x', 0, 1))
        outputs = [
            self.tables[i].apply(input_[:, i]) for i in xrange(self.n_tables)
        ]  # (batch_size, vector_size[i], nb_words)
        outputs = [output.dimshuffle((1, 0, 2)) for output in outputs]
        output = T.concatenate(
            outputs, axis=0)  # (sum vector_size, batch_size, nb_words)
        return output.dimshuffle((1, 2, 0))

    def _push_allocation_config(self):
        for i in range(self.n_tables):
            self.tables[i]._push_allocation_config()
        self.mlp._push_allocation_config()
예제 #6
0
class ConvPoolNlp(Initializable, Feedforward):
    """
        This is layer make a convolution and a subsampling on an input sentence
    """
    @lazy(allocation=['n_out', 'dwin', 'vector_size', 'n_hidden_layer'])
    def __init__(self, n_out, dwin, vector_size, n_hidden_layer, **kwargs):
        super(ConvPoolNlp, self).__init__(**kwargs)
        self.vector_size = vector_size
        self.n_hidden_layer = n_hidden_layer
        self.dwin = dwin
        self.n_out = n_out

        self.rectifier = Rectifier()
        """
	self.convolution = Convolutional(filter_size=(1,self.filter_size),num_filters=self.num_filter,num_channels=1,
					weights_init=IsotropicGaussian(0.01), use_bias=False)
	"""
        # second dimension is of fixed size sum(vect_size) less the fiter_size borders
        self.mlp = MLP(activations=[Rectifier()] * len(self.n_hidden_layer) +
                       [Identity()],
                       dims=[self.n_out] + self.n_hidden_layer + [2],
                       weights_init=IsotropicGaussian(0.01),
                       biases_init=Constant(0.))

        self.parameters = []
        self.children = []
        #self.children.append(self.lookup)
        #self.children.append(self.convolution)
        self.children.append(self.mlp)
        self.children.append(self.rectifier)

    def _allocate(self):
        W = shared_floatx_nans((self.n_out, self.dwin * self.vector_size),
                               name='W')
        b = shared_floatx_nans((self.n_out, ), name='b')
        add_role(b, BIAS)
        add_role(W, WEIGHT)
        self.parameters.append(W)
        self.parameters.append(b)
        self.mlp.allocate()

    @property
    def b(self):
        return self.parameters[0]

    @property
    def b(self):
        return self.parameters[0]

    def _initialize(self):
        #self.allocate()
        #import pdb
        #pdb.set_trace()
        W, b = self.parameters
        self.weights_init.initialize(W, self.rng)
        self.biases_init.initialize(b, self.rng)
        #self.convolution.initialize()
        self.mlp.initialize()

#self.lookup.initialize()

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_):
        W, b = self.parameters
        #input_ = self.lookup.embedding(input_)
        #input_ = input_.dimshuffle(('x', 0, 1, 2))
        convolved_inputs, _ = theano.scan(fn=lambda i, A, W, b: T.dot(
            W, flat_submatrix(input_, i, self.dwin)) + b,
                                          sequences=T.arange(input_.shape[0] -
                                                             self.dwin),
                                          non_sequences=[input_, W, b])
        output = T.concatenate([convolved_inputs])
        #output = self.rectifier.apply(output)
        output = T.max(output, axis=0)
        output = output.dimshuffle(('x', 0))
        return self.mlp.apply(output)

    def _push_allocation_config(self):
        #self.convolution._push_allocation_config()
        self.mlp._push_allocation_config()