Beispiel #1
0
    def __init__(self, x, y, vocab_size, embed_dim, label_n):
        """
        x: theano.tensor.imatrix, (minibatch size, 3)
            the tree matrix of the minibatch
            for each row, (node id, left child id, right child id)

        y: theano.tensor.ivector, (minibatch size,)
            the labels

        vocab_size: int
            vocabulary size, including both the words and phrases
        
        embed_dim: int
            the embedding dimension

        """
        assert x.ndim == 2
        assert y.ndim == 1

        parent_ids = x[:, 0]
        children_ids = x[:, 1:]

        rng = np.random.RandomState(1234)

        self.embedding = theano.shared(
            value=rng.normal(0, 0.05, (vocab_size, embed_dim)),
            name='embedding',
            borrow=True,
        )

        self.rntn_layer = RNTNLayer(rng, embed_dim)

        # Update the embedding by
        # forwarding the embedding from bottom to up
        # and getting the vector for each node in each tree

        def update_embedding(child_indices, my_index, embedding):

            assert child_indices.ndim == 1
            assert my_index.ndim == 0

            return T.switch(
                T.eq(
                    child_indices[0], -1
                ),  # NOTE: not using all() because it's non-differentiable
                embedding,  # if no child, return the word embedding
                T.set_subtensor(
                    embedding[
                        my_index],  # otherwise, compute the embedding of RNTN layer
                    self.rntn_layer.output(embedding[child_indices[0]],
                                           embedding[child_indices[1]])))

        final_embedding, updates = theano.scan(
            fn=update_embedding,
            sequences=[children_ids, parent_ids],
            outputs_info=self.
            embedding,  # we should pass the whole matrix and fill in the positions if necessary
        )

        self.update_embedding = theano.function(
            inputs=[x],
            updates=[(self.embedding,
                      T.set_subtensor(self.embedding[parent_ids],
                                      final_embedding[-1][parent_ids]))])

        # the logistic regression layer that predicts the label
        self.logreg_layer = LogisticRegression(
            rng,
            input=final_embedding[-1][parent_ids],
            n_in=embed_dim,
            n_out=label_n)

        cost = self.logreg_layer.nnl(y)

        params = self.logreg_layer.params + self.rntn_layer.params + [
            self.embedding
        ]
        self.params = params

        param_shapes = self.logreg_layer.param_shapes + self.rntn_layer.param_shapes + [
            (vocab_size, embed_dim)
        ]

        grads = [T.grad(cost=cost, wrt=p) for p in params]

        updates = build_adadelta_updates(params,
                                         param_shapes,
                                         grads,
                                         epsilon=0.1)

        # TODO: in this step, forward propagation is done again besides the one in `update_embedding`
        #       this extra computation should be avoided
        self.train = theano.function(inputs=[x, y], updates=updates)
params = [theano.shared(value = param_val,
                        name = "param-%d" %(i),
                        borrow = True)
          for i, param_val in enumerate(np_params)]


cost = T.sum(T.dot(params[0], params[1]))# some cost function


param_shapes = [(2,2), (2,2)] 
param_grads = [T.grad(cost, param) for param in params]

assert len(np_params) == len(params) == len(param_shapes) == len(param_grads)

updates = build_adadelta_updates(params, param_shapes, param_grads, 
                                 rho = rho, epsilon = epsilon)

update = theano.function(inputs = [], 
                         outputs = params, 
                         updates = updates)


for i in range(n_iter):
    update()


#######################
# NUMPY IMPLEMENTATION
#######################

dummy_params = [T.dmatrix('dp1'), T.dmatrix('dp2')]
Beispiel #3
0
    def __init__(self, x, y, vocab_size, embed_dim, label_n):
        """
        x: theano.tensor.imatrix, (minibatch size, 3)
            the tree matrix of the minibatch
            for each row, (node id, left child id, right child id)

        y: theano.tensor.ivector, (minibatch size,)
            the labels

        vocab_size: int
            vocabulary size, including both the words and phrases
        
        embed_dim: int
            the embedding dimension

        """
        assert x.ndim == 2
        assert y.ndim == 1
        
        parent_ids = x[:,0]
        children_ids = x[:,1:]
        
        rng = np.random.RandomState(1234)     

        self.embedding = theano.shared(
            value = rng.normal(0, 0.05, (vocab_size, embed_dim)),
            name = 'embedding',
            borrow = True,
        )        
        
        self.rntn_layer = RNTNLayer(rng, embed_dim)

        # Update the embedding by
        # forwarding the embedding from bottom to up
        # and getting the vector for each node in each tree
        
        def update_embedding(child_indices, my_index, embedding):

            assert child_indices.ndim == 1
            assert my_index.ndim == 0

            return T.switch(T.eq(child_indices[0], -1), # NOTE: not using all() because it's non-differentiable
                            embedding, # if no child, return the word embedding
                            T.set_subtensor(embedding[my_index], # otherwise, compute the embedding of RNTN layer
                                            self.rntn_layer.output(embedding[child_indices[0]], 
                                                                   embedding[child_indices[1]])
                                        )
            )
            
        final_embedding, updates = theano.scan(
            fn = update_embedding, 
            sequences = [children_ids, parent_ids],
            outputs_info = self.embedding, # we should pass the whole matrix and fill in the positions if necessary
        )
                

        self.update_embedding = theano.function(inputs = [x], 
                                                updates = [(self.embedding, 
                                                            T.set_subtensor(self.embedding[parent_ids], final_embedding[-1][parent_ids]))])

        # the logistic regression layer that predicts the label
        self.logreg_layer = LogisticRegression(rng, 
                                          input = final_embedding[-1][parent_ids], 
                                          n_in = embed_dim,
                                          n_out = label_n
        )
        
        cost = self.logreg_layer.nnl(y)

        params = self.logreg_layer.params + self.rntn_layer.params + [self.embedding]
        self.params = params

        param_shapes = self.logreg_layer.param_shapes + self.rntn_layer.param_shapes + [(vocab_size, embed_dim)]
        
        grads = [T.grad(cost = cost, wrt=p) for p in params]
        
        updates = build_adadelta_updates(params, param_shapes, grads, epsilon = 0.1)
        
        # TODO: in this step, forward propagation is done again besides the one in `update_embedding`
        #       this extra computation should be avoided
        self.train = theano.function(inputs = [x, y], 
                                     updates = updates)
params = [theano.shared(value = param_val,
                        name = "param-%d" %(i),
                        borrow = True)
          for i, param_val in enumerate(np_params)]


cost = T.sum(T.dot(params[0], params[1]))# some cost function


param_shapes = [(2,2), (2,2)] 
param_grads = [T.grad(cost, param) for param in params]

assert len(np_params) == len(params) == len(param_shapes) == len(param_grads)

updates = build_adadelta_updates(params, param_shapes, param_grads, 
                                 rho = rho, epsilon = epsilon)

update = theano.function(inputs = [], 
                         outputs = params, 
                         updates = updates)


for i in xrange(n_iter):
    update()


#######################
# NUMPY IMPLEMENTATION
#######################

dummy_params = [T.dmatrix('dp1'), T.dmatrix('dp2')]