def __init__(self, x, y, vocab_size, embed_dim, label_n): """ x: theano.tensor.imatrix, (minibatch size, 3) the tree matrix of the minibatch for each row, (node id, left child id, right child id) y: theano.tensor.ivector, (minibatch size,) the labels vocab_size: int vocabulary size, including both the words and phrases embed_dim: int the embedding dimension """ assert x.ndim == 2 assert y.ndim == 1 parent_ids = x[:, 0] children_ids = x[:, 1:] rng = np.random.RandomState(1234) self.embedding = theano.shared( value=rng.normal(0, 0.05, (vocab_size, embed_dim)), name='embedding', borrow=True, ) self.rntn_layer = RNTNLayer(rng, embed_dim) # Update the embedding by # forwarding the embedding from bottom to up # and getting the vector for each node in each tree def update_embedding(child_indices, my_index, embedding): assert child_indices.ndim == 1 assert my_index.ndim == 0 return T.switch( T.eq( child_indices[0], -1 ), # NOTE: not using all() because it's non-differentiable embedding, # if no child, return the word embedding T.set_subtensor( embedding[ my_index], # otherwise, compute the embedding of RNTN layer self.rntn_layer.output(embedding[child_indices[0]], embedding[child_indices[1]]))) final_embedding, updates = theano.scan( fn=update_embedding, sequences=[children_ids, parent_ids], outputs_info=self. embedding, # we should pass the whole matrix and fill in the positions if necessary ) self.update_embedding = theano.function( inputs=[x], updates=[(self.embedding, T.set_subtensor(self.embedding[parent_ids], final_embedding[-1][parent_ids]))]) # the logistic regression layer that predicts the label self.logreg_layer = LogisticRegression( rng, input=final_embedding[-1][parent_ids], n_in=embed_dim, n_out=label_n) cost = self.logreg_layer.nnl(y) params = self.logreg_layer.params + self.rntn_layer.params + [ self.embedding ] self.params = params param_shapes = self.logreg_layer.param_shapes + self.rntn_layer.param_shapes + [ (vocab_size, embed_dim) ] grads = [T.grad(cost=cost, wrt=p) for p in params] updates = build_adadelta_updates(params, param_shapes, grads, epsilon=0.1) # TODO: in this step, forward propagation is done again besides the one in `update_embedding` # this extra computation should be avoided self.train = theano.function(inputs=[x, y], updates=updates)
params = [theano.shared(value = param_val, name = "param-%d" %(i), borrow = True) for i, param_val in enumerate(np_params)] cost = T.sum(T.dot(params[0], params[1]))# some cost function param_shapes = [(2,2), (2,2)] param_grads = [T.grad(cost, param) for param in params] assert len(np_params) == len(params) == len(param_shapes) == len(param_grads) updates = build_adadelta_updates(params, param_shapes, param_grads, rho = rho, epsilon = epsilon) update = theano.function(inputs = [], outputs = params, updates = updates) for i in range(n_iter): update() ####################### # NUMPY IMPLEMENTATION ####################### dummy_params = [T.dmatrix('dp1'), T.dmatrix('dp2')]
def __init__(self, x, y, vocab_size, embed_dim, label_n): """ x: theano.tensor.imatrix, (minibatch size, 3) the tree matrix of the minibatch for each row, (node id, left child id, right child id) y: theano.tensor.ivector, (minibatch size,) the labels vocab_size: int vocabulary size, including both the words and phrases embed_dim: int the embedding dimension """ assert x.ndim == 2 assert y.ndim == 1 parent_ids = x[:,0] children_ids = x[:,1:] rng = np.random.RandomState(1234) self.embedding = theano.shared( value = rng.normal(0, 0.05, (vocab_size, embed_dim)), name = 'embedding', borrow = True, ) self.rntn_layer = RNTNLayer(rng, embed_dim) # Update the embedding by # forwarding the embedding from bottom to up # and getting the vector for each node in each tree def update_embedding(child_indices, my_index, embedding): assert child_indices.ndim == 1 assert my_index.ndim == 0 return T.switch(T.eq(child_indices[0], -1), # NOTE: not using all() because it's non-differentiable embedding, # if no child, return the word embedding T.set_subtensor(embedding[my_index], # otherwise, compute the embedding of RNTN layer self.rntn_layer.output(embedding[child_indices[0]], embedding[child_indices[1]]) ) ) final_embedding, updates = theano.scan( fn = update_embedding, sequences = [children_ids, parent_ids], outputs_info = self.embedding, # we should pass the whole matrix and fill in the positions if necessary ) self.update_embedding = theano.function(inputs = [x], updates = [(self.embedding, T.set_subtensor(self.embedding[parent_ids], final_embedding[-1][parent_ids]))]) # the logistic regression layer that predicts the label self.logreg_layer = LogisticRegression(rng, input = final_embedding[-1][parent_ids], n_in = embed_dim, n_out = label_n ) cost = self.logreg_layer.nnl(y) params = self.logreg_layer.params + self.rntn_layer.params + [self.embedding] self.params = params param_shapes = self.logreg_layer.param_shapes + self.rntn_layer.param_shapes + [(vocab_size, embed_dim)] grads = [T.grad(cost = cost, wrt=p) for p in params] updates = build_adadelta_updates(params, param_shapes, grads, epsilon = 0.1) # TODO: in this step, forward propagation is done again besides the one in `update_embedding` # this extra computation should be avoided self.train = theano.function(inputs = [x, y], updates = updates)
params = [theano.shared(value = param_val, name = "param-%d" %(i), borrow = True) for i, param_val in enumerate(np_params)] cost = T.sum(T.dot(params[0], params[1]))# some cost function param_shapes = [(2,2), (2,2)] param_grads = [T.grad(cost, param) for param in params] assert len(np_params) == len(params) == len(param_shapes) == len(param_grads) updates = build_adadelta_updates(params, param_shapes, param_grads, rho = rho, epsilon = epsilon) update = theano.function(inputs = [], outputs = params, updates = updates) for i in xrange(n_iter): update() ####################### # NUMPY IMPLEMENTATION ####################### dummy_params = [T.dmatrix('dp1'), T.dmatrix('dp2')]