Esempio n. 1
0
class Window(Initializable, Feedforward):
    @lazy(allocation=['dwin', 'n_mot', 'vect_size', 'n_hidden'])
    def __init__(self, dwin, n_mot, vect_size, n_hidden, n_out=2, **kwargs):
        super(Window, self).__init__(**kwargs)
        self.dwin = dwin
        self.n_mot = n_mot
        self.vect_size = vect_size
        self.n_hidden = n_hidden
        self.n_out = n_out
        self.n_tables = len(self.vect_size)
        self.tables = [
            LookUpTable(self.vect_size[i],
                        self.n_mot[i],
                        weights_init=IsotropicGaussian(0.001),
                        use_bias=False) for i in range(self.n_tables)
        ]
        self.mlp = MLP(activations=[Tanh()] * len(self.n_hidden) +
                       [Identity()],
                       dims=[self.dwin * sum(self.vect_size)] + self.n_hidden +
                       [self.n_out],
                       weights_init=IsotropicGaussian(0.001),
                       biases_init=Constant(0.))
        self.parameters = []
        self.children = self.tables + [self.mlp]

    def _initialize(self):
        for i in range(self.n_tables):
            self.tables[i].initialize()
        self.mlp.initialize()
        W = self.parameters[0]
        self.weights_init.initialize(W, self.rng)

    def _allocate(self):
        for i in range(self.n_tables):
            self.tables[i].allocate()
        self.mlp.allocate()
        W = shared_floatx_nans((sum(self.n_mot), sum(self.vect_size)),
                               name='W')
        add_role(W, WEIGHT)
        self.parameters.append(W)

    def update_transition_matrix(self):
        W_tmp = self.parameters[0]
        params_lookup = [
            getParams(table, T.itensor3()) for table in self.tables
        ]
        index_row = 0
        index_col = 0
        for i in range(len(self.tables)):
            W_tmp_value = W_tmp.get_value()
            p_value = params_lookup[i][0].get_value()
            W_tmp_value[index_row: index_row+ p_value.shape[1], index_col: index_col+p_value.shape[0]] =\
             p_value.transpose()
            index_row += p_value.shape[1]
            index_col += p_value.shape[0]
            W_tmp.set_value(W_tmp_value)

    def update_lookup_weights(self):
        W_tmp = self.parameters[0]
        params_lookup = [
            getParams(table, T.itensor3()) for table in self.tables
        ]
        index_row = 0
        index_col = 0
        for i in range(len(self.tables)):
            W_tmp_value = W_tmp.get_value().transpose()
            p_value = params_lookup[i][0].get_value()
            params_lookup[i][0].set_value(
                W_tmp_value[index_col:index_col + p_value.shape[0],
                            index_row:index_row + p_value.shape[1]])
            index_row += p_value.shape[1]
            index_col += p_value.shape[0]

    def get_Params(self):
        params = getParams(self.mlp, T.matrix())
        self.update_transition_matrix()
        weights = []
        biases = []
        for p in params:
            if p.ndim == 1:
                biases.append(p)
            else:
                weights.append(p)
        if len(params[0].name) == 1:
            # words has not been renamed yet
            if weights[0].shape[-1].eval() == self.n_out:
                weights.reverse()
                biases.reverse()
            # add the lookuptables weights
            weights = [self.parameters[0]] + weights
            assert len(weights) == len(biases) + 1
            for w, index in zip(weights, range(len(weights))):
                w.name = "layer_" + str(index) + "_" + w.name
            for b, index in zip(biases, range(len(biases))):
                b.name = "layer_" + str(index + len(weights) -
                                        len(biases)) + "_" + b.name
        else:
            weights = [self.parameters[0]] + weights
        return weights, biases

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_):
        outputs = [
            self.tables[i].apply(input_[:, i]) for i in xrange(self.n_tables)
        ]  # (batch_size, vector_size[i], dwin)
        outputs = [output.dimshuffle((1, 0, 2)) for output in outputs]
        output = T.concatenate(outputs,
                               axis=0)  # (sum vector_size, batch_size, dwin)
        output = output.dimshuffle((1, 0, 2))
        shape = output.shape
        output = output.reshape((shape[0], shape[1] * shape[2]))
        return self.mlp.apply(output)

    @application(inputs=['input_'], outputs=['output'])
    def embedding(self, input_):
        input_ = input_.dimshuffle(('x', 0, 1))
        outputs = [
            self.tables[i].apply(input_[:, i]) for i in xrange(self.n_tables)
        ]  # (batch_size, vector_size[i], nb_words)
        outputs = [output.dimshuffle((1, 0, 2)) for output in outputs]
        output = T.concatenate(
            outputs, axis=0)  # (sum vector_size, batch_size, nb_words)
        return output.dimshuffle((1, 2, 0))

    def _push_allocation_config(self):
        for i in range(self.n_tables):
            self.tables[i]._push_allocation_config()
        self.mlp._push_allocation_config()
Esempio n. 2
0
class ConvPoolNlp(Initializable, Feedforward):
    """
        This is layer make a convolution and a subsampling on an input sentence
    """
    @lazy(allocation=['n_out', 'dwin', 'vector_size', 'n_hidden_layer'])
    def __init__(self, n_out, dwin, vector_size, n_hidden_layer, **kwargs):
        super(ConvPoolNlp, self).__init__(**kwargs)
        self.vector_size = vector_size
        self.n_hidden_layer = n_hidden_layer
        self.dwin = dwin
        self.n_out = n_out

        self.rectifier = Rectifier()
        """
	self.convolution = Convolutional(filter_size=(1,self.filter_size),num_filters=self.num_filter,num_channels=1,
					weights_init=IsotropicGaussian(0.01), use_bias=False)
	"""
        # second dimension is of fixed size sum(vect_size) less the fiter_size borders
        self.mlp = MLP(activations=[Rectifier()] * len(self.n_hidden_layer) +
                       [Identity()],
                       dims=[self.n_out] + self.n_hidden_layer + [2],
                       weights_init=IsotropicGaussian(0.01),
                       biases_init=Constant(0.))

        self.parameters = []
        self.children = []
        #self.children.append(self.lookup)
        #self.children.append(self.convolution)
        self.children.append(self.mlp)
        self.children.append(self.rectifier)

    def _allocate(self):
        W = shared_floatx_nans((self.n_out, self.dwin * self.vector_size),
                               name='W')
        b = shared_floatx_nans((self.n_out, ), name='b')
        add_role(b, BIAS)
        add_role(W, WEIGHT)
        self.parameters.append(W)
        self.parameters.append(b)
        self.mlp.allocate()

    @property
    def b(self):
        return self.parameters[0]

    @property
    def b(self):
        return self.parameters[0]

    def _initialize(self):
        #self.allocate()
        #import pdb
        #pdb.set_trace()
        W, b = self.parameters
        self.weights_init.initialize(W, self.rng)
        self.biases_init.initialize(b, self.rng)
        #self.convolution.initialize()
        self.mlp.initialize()

#self.lookup.initialize()

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_):
        W, b = self.parameters
        #input_ = self.lookup.embedding(input_)
        #input_ = input_.dimshuffle(('x', 0, 1, 2))
        convolved_inputs, _ = theano.scan(fn=lambda i, A, W, b: T.dot(
            W, flat_submatrix(input_, i, self.dwin)) + b,
                                          sequences=T.arange(input_.shape[0] -
                                                             self.dwin),
                                          non_sequences=[input_, W, b])
        output = T.concatenate([convolved_inputs])
        #output = self.rectifier.apply(output)
        output = T.max(output, axis=0)
        output = output.dimshuffle(('x', 0))
        return self.mlp.apply(output)

    def _push_allocation_config(self):
        #self.convolution._push_allocation_config()
        self.mlp._push_allocation_config()