def __init__(self, We_initial, params): initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) if params.npc > 0: pc = theano.shared(np.asarray(params.pc, dtype = config.floatX)) g1batchindices = T.imatrix() g1mask = T.matrix() scores = T.matrix() l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_average = lasagne_average_layer([l_emb, l_mask]) l_out = lasagne.layers.DenseLayer(l_average, params.layersize, nonlinearity=params.nonlinearity) embg = lasagne.layers.get_output(l_out, {l_in:g1batchindices, l_mask:g1mask}) if params.npc <= 0: print("#pc <=0, do not remove pc") elif params.npc == 1: print("#pc == 1") proj = embg.dot(pc.transpose()) embg = embg - theano.tensor.outer(proj, pc) else: print("#pc > 1") proj = embg.dot(pc.transpose()) embg = embg - theano.tensor.dot(proj, pc) l_in2 = lasagne.layers.InputLayer((None, params.layersize)) l_sigmoid = lasagne.layers.DenseLayer(l_in2, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid) l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 2, nonlinearity=T.nnet.softmax) X = lasagne.layers.get_output(l_softmax, {l_in2:embg}) cost = T.nnet.categorical_crossentropy(X,scores) prediction = T.argmax(X, axis=1) self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True) self.network_params.pop(0) # do not include the word embedding as network parameters self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True) reg = self.getRegTerm(params, We, initial_We) self.trainable = self.getTrainableParams(params) cost = T.mean(cost) + reg self.feedforward_function = theano.function([g1batchindices,g1mask], embg) self.scoring_function = theano.function([g1batchindices, g1mask],prediction) self.cost_function = theano.function([scores, g1batchindices, g1mask], cost) grads = theano.gradient.grad(cost, self.trainable) if params.clip: grads = [lasagne.updates.norm_constraint(grad, params.clip, list(range(grad.ndim))) for grad in grads] updates = params.learner(grads, self.trainable, params.eta) self.train_function = theano.function([scores, g1batchindices, g1mask], cost, updates=updates)
def __init__(self, We_initial, params): initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) g1batchindices = T.imatrix(); g2batchindices = T.imatrix() p1batchindices = T.imatrix(); p2batchindices = T.imatrix() g1mask = T.matrix(); g2mask = T.matrix() p1mask = T.matrix(); p2mask = T.matrix() l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_average = lasagne_average_layer([l_emb, l_mask]) l_1 = lasagne.layers.DenseLayer(l_average, params.hiddensize, W=lasagne.init.Normal(0.1), b=lasagne.init.Constant(0.), nonlinearity=params.nonlinearity) l_2 = lasagne.layers.DenseLayer(l_1, params.hiddensize, nonlinearity=params.nonlinearity) l_3 = lasagne.layers.DenseLayer(l_2, params.hiddensize, nonlinearity=params.nonlinearity) l_4 = lasagne.layers.DenseLayer(l_3, params.hiddensize, nonlinearity=params.nonlinearity) l_end = None if params.numlayers == 1: l_end = l_1 elif params.numlayers == 2: l_end = l_2 elif params.numlayers == 3: l_end = l_3 elif params.numlayers == 4: l_end = l_4 else: raise ValueError('Only 1-4 layers are supported currently.') embg1 = lasagne.layers.get_output(l_end, {l_in:g1batchindices, l_mask:g1mask}) embg2 = lasagne.layers.get_output(l_end, {l_in:g2batchindices, l_mask:g2mask}) embp1 = lasagne.layers.get_output(l_end, {l_in:p1batchindices, l_mask:p1mask}) embp2 = lasagne.layers.get_output(l_end, {l_in:p2batchindices, l_mask:p2mask}) g1g2 = (embg1*embg2).sum(axis=1) g1g2norm = T.sqrt(T.sum(embg1**2,axis=1)) * T.sqrt(T.sum(embg2**2,axis=1)) g1g2 = g1g2 / g1g2norm p1g1 = (embp1*embg1).sum(axis=1) p1g1norm = T.sqrt(T.sum(embp1**2,axis=1)) * T.sqrt(T.sum(embg1**2,axis=1)) p1g1 = p1g1 / p1g1norm p2g2 = (embp2*embg2).sum(axis=1) p2g2norm = T.sqrt(T.sum(embp2**2,axis=1)) * T.sqrt(T.sum(embg2**2,axis=1)) p2g2 = p2g2 / p2g2norm costp1g1 = params.margin - g1g2 + p1g1 costp1g1 = costp1g1*(costp1g1 > 0) costp2g2 = params.margin - g1g2 + p2g2 costp2g2 = costp2g2*(costp2g2 > 0) cost = costp1g1 + costp2g2 network_params = lasagne.layers.get_all_params(l_end, trainable=True) network_params.pop(0) self.all_params = lasagne.layers.get_all_params(l_end, trainable=True) #regularization l2 = 0.5*params.LC*sum(lasagne.regularization.l2(x) for x in network_params) if params.updatewords: word_reg = 0.5*params.LW*lasagne.regularization.l2(We-initial_We) cost = T.mean(cost) + l2 + word_reg else: cost = T.mean(cost) + l2 self.feedforward_function = theano.function([g1batchindices,g1mask], embg1) self.cost_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask], cost) prediction = g1g2 self.scoring_function = theano.function([g1batchindices, g2batchindices, g1mask, g2mask],prediction) self.train_function = None if params.updatewords: grads = theano.gradient.grad(cost, self.all_params) if params.clip: grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads] updates = params.learner(grads, self.all_params, params.eta) self.train_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask], cost, updates=updates) else: self.all_params = network_params grads = theano.gradient.grad(cost, self.all_params) if params.clip: grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads] updates = params.learner(grads, self.all_params, params.eta) self.train_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask], cost, updates=updates)
def __init__(self, We_initial, params): params.siamese = True ## Params initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) ## Symbolic Params # Input variable for a batch of sentences who seek # for target synonyms and antonyms in the next tensor senBatch_indices = T.imatrix(); senMask = T.matrix() # Input variable for a batch of positive and negative # examples (so syn, neg1, neg2, ...) targetBatch_indices = T.itensor3(); targetMask = T.tensor3() targets = T.matrix() ## First embedding input layer l_in_1 = lasagne.layers.InputLayer((None, None, 1)) l_mask_1 = lasagne.layers.InputLayer(shape=(None, None)) # First embedding layer and Knowledge Distillation's embedding Layer l_emb_1 = lasagne.layers.EmbeddingLayer(l_in_1, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_emb_1_reg = lasagne.layers.EmbeddingLayer(l_in_1, input_size=initial_We.get_value().shape[0], output_size=initial_We.get_value().shape[1], W=initial_We) l_emb_1_reg.params[l_emb_1_reg.W].remove('trainable') # First Average Layer and Knowledge Distillation's First Average Layer #l_emb_1_drop = lasagne.layers.DropoutLayer(l_emb_1, p=0.8) l_average_1 = lasagne_average_layer([l_emb_1, l_mask_1]) l_average_1_reg = lasagne_average_layer([l_emb_1_reg, l_mask_1]) in2embgs = lasagne.layers.get_output(l_emb_1, {l_in_1:senBatch_indices}, deterministic=True) embg1 = lasagne.layers.get_output(l_average_1, {l_in_1:senBatch_indices, l_mask_1:senMask}, deterministic=True) ## Second embedding input layer l_in_2 = lasagne.layers.InputLayer(shape=(None, None, None, 1)) l_mask_2 = lasagne.layers.InputLayer(shape=(None, None, None)) # Second embedding layer, the weights tied with the first embedding layer l_emb_2 = lasagne.layers.EmbeddingLayer(l_in_2, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=l_emb_1.W) l_emb_2_reg = lasagne.layers.EmbeddingLayer(l_in_2, input_size=initial_We.get_value().shape[0], output_size=initial_We.get_value().shape[1], W=l_emb_1_reg.W) l_emb_2_reg.params[l_emb_2_reg.W].remove('trainable') # Second Average Layer #l_emb_2 = lasagne.layers.DropoutLayer(l_emb_2, p=0.8) l_average_2 = averageLayer_matrix([l_emb_2, l_mask_2]) l_transpose_2 = lasagne.layers.DimshuffleLayer(l_average_2, (0,2,1)) # Knowledge Distillation's Second Average Layer l_average_2_reg = averageLayer_matrix([l_emb_2_reg, l_mask_2]) l_transpose_2_reg = lasagne.layers.DimshuffleLayer(l_average_2_reg, (0,2,1)) ## Layer Combination l_cosine = cosineLayer([l_average_1, l_transpose_2], We.get_value().shape[1]) g1g2 = lasagne.layers.get_output(l_cosine, {l_in_1:senBatch_indices, \ l_mask_1:senMask, l_in_2:targetBatch_indices, l_mask_2:targetMask}, deterministic=True) g1g2 = g1g2[:, 0] # Knowledge Distillation's Layer Combination l_cosine_reg = cosineLayer([l_average_1_reg, l_transpose_2_reg], We.get_value().shape[1]) l_final_layer = softMaxLayer(l_cosine) # Knowledge Distillation's Layer Combination l_final_layer_reg = softMaxLayer(l_cosine_reg) ## Objective Function prediction = lasagne.layers.get_output(l_final_layer, {l_in_1:senBatch_indices, \ l_mask_1:senMask, l_in_2:targetBatch_indices, l_mask_2:targetMask}) # Knowledge Distillation's Prediction prediction_reg = lasagne.layers.get_output(l_final_layer_reg, {l_in_1:senBatch_indices, \ l_mask_1:senMask, l_in_2:targetBatch_indices, l_mask_2:targetMask}) self.all_params = lasagne.layers.get_all_params(l_final_layer, trainable=True) loss = lasagne.objectives.categorical_crossentropy(prediction, targets) # Knowledge Distillation's Loss loss_reg = lasagne.objectives.categorical_crossentropy(prediction, prediction_reg) cost = params.LW*loss_reg.mean() + params.hyper_k1*loss.mean() #feedforward self.feedforward_function = theano.function([senBatch_indices,senMask], embg1) self.cost_function = theano.function([senBatch_indices, senMask, targetBatch_indices, targetMask, targets], cost) self.cost_distillation = theano.function([senBatch_indices, senMask, targetBatch_indices, targetMask], loss_reg.mean()) self.scoring_function = theano.function([senBatch_indices, senMask, targetBatch_indices, targetMask], g1g2) self.word2embeddings = theano.function([senBatch_indices], in2embgs) #updates grads = theano.gradient.grad(cost, self.all_params) if params.clip: grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads] updates = params.learner(grads, self.all_params, params.eta) self.train_function = theano.function([senBatch_indices, senMask, targetBatch_indices, targetMask, targets], cost, updates=updates)
def __init__(self, We_initial, params): print "WordModel" initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) g1batchindices = T.imatrix() g2batchindices = T.imatrix() g1mask = T.matrix() g2mask = T.matrix() scores = T.matrix() l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_out = lasagne_average_layer([l_emb, l_mask]) embg1 = lasagne.layers.get_output(l_out, { l_in: g1batchindices, l_mask: g1mask }) embg2 = lasagne.layers.get_output(l_out, { l_in: g2batchindices, l_mask: g2mask }) g1_dot_g2 = embg1 * embg2 g1_abs_g2 = abs(embg1 - embg2) lin_dot = lasagne.layers.InputLayer((None, We.get_value().shape[1])) lin_abs = lasagne.layers.InputLayer((None, We.get_value().shape[1])) l_sum = lasagne.layers.ConcatLayer([lin_dot, lin_abs]) l_sigmoid = lasagne.layers.DenseLayer( l_sum, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid) l_softmax = lasagne.layers.DenseLayer(l_sigmoid, params.nout, nonlinearity=T.nnet.softmax) X = lasagne.layers.get_output(l_softmax, { lin_dot: g1_dot_g2, lin_abs: g1_abs_g2 }) Y = T.log(X) cost = scores * (T.log(scores) - Y) cost = cost.sum(axis=1) / (float(params.nout)) prediction = 0. i = params.minval while i <= params.maxval: prediction = prediction + i * X[:, i - 1] i += 1 self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + \ lasagne.layers.get_all_params(l_softmax, trainable=True) self.network_params.pop(0) self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + \ lasagne.layers.get_all_params(l_softmax, trainable=True) reg = self.getRegTerm(params, We, initial_We) self.trainable = self.getTrainableParams(params) cost = T.mean(cost) + reg self.feedforward_function = theano.function([g1batchindices, g1mask], embg1) self.scoring_function = theano.function( [g1batchindices, g2batchindices, g1mask, g2mask], prediction) self.cost_function = theano.function( [scores, g1batchindices, g2batchindices, g1mask, g2mask], cost) grads = theano.gradient.grad(cost, self.trainable) if params.clip: grads = [ lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads ] updates = params.learner(grads, self.trainable, params.eta) self.train_function = theano.function( [scores, g1batchindices, g2batchindices, g1mask, g2mask], cost, updates=updates)
def __init__(self, We_initial, params): print "WordL1Model" initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) g1batchindices = T.imatrix() g2batchindices = T.imatrix() g1mask = T.matrix() g2mask = T.matrix() scores = T.matrix() l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_out = lasagne_average_layer([l_emb, l_mask]) embg1 = lasagne.layers.get_output(l_out, { l_in: g1batchindices, l_mask: g1mask }) embg2 = lasagne.layers.get_output(l_out, { l_in: g2batchindices, l_mask: g2mask }) def L2_norm(vec): return vec / np.sqrt((vec**2).sum() + 1e-4) embg1 = L2_norm(embg1) embg2 = L2_norm(embg2) gold = 0. i = params.minval while i <= params.maxval: gold = gold + i * scores[:, i - 1] i += 1 dif = (embg1 - embg2).norm(L=1, axis=1) sim = T.exp(-dif) sim = T.clip(sim, 1e-7, 1 - 1e-7) gold = T.clip(gold / 5.0, 1e-7, 1 - 1e-7) self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) self.network_params.pop(0) self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) reg = self.getRegTerm(params, We, initial_We) self.trainable = self.getTrainableParams(params) cost = T.mean((sim - gold)**2) + reg self.feedforward_function = theano.function([g1batchindices, g1mask], embg1) self.scoring_function = theano.function( [g1batchindices, g2batchindices, g1mask, g2mask], sim) self.cost_function = theano.function( [scores, g1batchindices, g2batchindices, g1mask, g2mask], cost) grads = theano.gradient.grad(cost, self.trainable) if params.clip: grads = [ lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads ] updates = params.learner(grads, self.trainable, params.eta) self.train_function = theano.function( [scores, g1batchindices, g2batchindices, g1mask, g2mask], cost, updates=updates)
def __init__(self, We_initial, params): p = None if params.traintype == "reg" or params.traintype == "rep": p = cPickle.load(file(params.regfile, 'rb')) print p #containes We, W, and b if params.traintype == "reg": print "regularizing to parameters" if params.traintype == "rep": print "not updating embeddings" initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) if params.traintype == "reg": initial_We = theano.shared( np.asarray(p[0].get_value(), dtype=config.floatX)) We = theano.shared( np.asarray(p[0].get_value(), dtype=config.floatX)) updatewords = True if params.traintype == "rep": We = theano.shared( np.asarray(p[0].get_value(), dtype=config.floatX)) updatewords = False g1batchindices = T.imatrix() g1mask = T.matrix() scores = T.matrix() if params.traintype == "reg" or params.traintype == "rep": W = np.asarray(p[1].get_value(), dtype=config.floatX) b = np.asarray(p[2].get_value(), dtype=config.floatX) l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_average = lasagne_average_layer([l_emb, l_mask]) l_out = lasagne.layers.DenseLayer(l_average, params.layersize, nonlinearity=params.nonlinearity) if params.traintype == "reg" or params.traintype == "rep": l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_average = lasagne_average_layer([l_emb, l_mask]) l_out = lasagne.layers.DenseLayer(l_average, params.layersize, nonlinearity=params.nonlinearity, W=W, b=b) embg = lasagne.layers.get_output(l_out, { l_in: g1batchindices, l_mask: g1mask }) l_in2 = lasagne.layers.InputLayer((None, We.get_value().shape[1])) l_sigmoid = lasagne.layers.DenseLayer( l_in2, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid) l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 2, nonlinearity=T.nnet.softmax) X = lasagne.layers.get_output(l_softmax, {l_in2: embg}) cost = T.nnet.categorical_crossentropy(X, scores) prediction = T.argmax(X, axis=1) self.network_params = lasagne.layers.get_all_params( l_out, trainable=True) + lasagne.layers.get_all_params( l_softmax, trainable=True) self.network_params.pop(0) self.all_params = lasagne.layers.get_all_params( l_out, trainable=True) + lasagne.layers.get_all_params( l_softmax, trainable=True) reg = self.getRegTerm(params, We, initial_We, l_out, l_softmax, p) self.trainable = self.getTrainableParams(params) cost = T.mean(cost) + reg self.feedforward_function = theano.function([g1batchindices, g1mask], embg) self.scoring_function = theano.function([g1batchindices, g1mask], prediction) self.cost_function = theano.function([scores, g1batchindices, g1mask], cost) grads = theano.gradient.grad(cost, self.trainable) if params.clip: grads = [ lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads ] updates = params.learner(grads, self.trainable, params.eta) self.train_function = theano.function([scores, g1batchindices, g1mask], cost, updates=updates)
def __init__(self, We_initial, params): if params.maxval: self.nout = params.maxval - params.minval + 1 #params initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) if params.npc > 0: pc = theano.shared(np.asarray(params.pc, dtype=config.floatX)) g1batchindices = T.imatrix() g2batchindices = T.imatrix() g1mask = T.matrix() g2mask = T.matrix() scores = T.matrix() l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_average = lasagne_average_layer([l_emb, l_mask]) l_out = lasagne.layers.DenseLayer(l_average, params.layersize, nonlinearity=params.nonlinearity) embg1 = lasagne.layers.get_output(l_out, { l_in: g1batchindices, l_mask: g1mask }) embg2 = lasagne.layers.get_output(l_out, { l_in: g2batchindices, l_mask: g2mask }) if params.npc <= 0: print "#pc <=0, do not remove pc" elif params.npc == 1: print "#pc == 1" proj1 = embg1.dot(pc.transpose()) proj2 = embg2.dot(pc.transpose()) embg1 = embg1 - theano.tensor.outer(proj1, pc) embg2 = embg2 - theano.tensor.outer(proj2, pc) else: print "#pc > 1" proj1 = embg1.dot(pc.transpose()) proj2 = embg2.dot(pc.transpose()) embg1 = embg1 - theano.tensor.dot(proj1, pc) embg2 = embg2 - theano.tensor.dot(proj2, pc) g1_dot_g2 = embg1 * embg2 g1_abs_g2 = abs(embg1 - embg2) lin_dot = lasagne.layers.InputLayer((None, params.layersize)) lin_abs = lasagne.layers.InputLayer((None, params.layersize)) l_sum = lasagne.layers.ConcatLayer([lin_dot, lin_abs]) l_sigmoid = lasagne.layers.DenseLayer( l_sum, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid) if params.task == "sim": l_softmax = lasagne.layers.DenseLayer(l_sigmoid, self.nout, nonlinearity=T.nnet.softmax) X = lasagne.layers.get_output(l_softmax, { lin_dot: g1_dot_g2, lin_abs: g1_abs_g2 }) Y = T.log(X) cost = scores * (T.log(scores) - Y) cost = cost.sum(axis=1) / (float(self.nout)) prediction = 0. i = params.minval while i <= params.maxval: prediction = prediction + i * X[:, i - 1] i += 1 elif params.task == "ent": l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 3, nonlinearity=T.nnet.softmax) X = lasagne.layers.get_output(l_softmax, { lin_dot: g1_dot_g2, lin_abs: g1_abs_g2 }) cost = theano.tensor.nnet.categorical_crossentropy(X, scores) prediction = T.argmax(X, axis=1) else: raise ValueError('Params.task not set correctly.') self.network_params = lasagne.layers.get_all_params( l_out, trainable=True) + lasagne.layers.get_all_params( l_softmax, trainable=True) self.network_params.pop( 0) # do not include the word embedding as network parameters self.all_params = lasagne.layers.get_all_params( l_out, trainable=True) + lasagne.layers.get_all_params( l_softmax, trainable=True) reg = self.getRegTerm(params, We, initial_We) self.trainable = self.getTrainableParams(params) cost = T.mean(cost) + reg self.feedforward_function = theano.function([g1batchindices, g1mask], embg1) self.scoring_function = theano.function( [g1batchindices, g2batchindices, g1mask, g2mask], prediction) self.cost_function = theano.function( [scores, g1batchindices, g2batchindices, g1mask, g2mask], cost) #updates grads = theano.gradient.grad(cost, self.trainable) if params.clip: grads = [ lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads ] updates = params.learner(grads, self.trainable, params.eta) self.train_function = theano.function( [scores, g1batchindices, g2batchindices, g1mask, g2mask], cost, updates=updates)
def __init__(self, We_initial, params): #params initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) self.We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) #symbolic params g1batchindices = T.imatrix() g2batchindices = T.imatrix() g3batchindices = T.imatrix() p1batchindices = T.imatrix() g1mask = T.matrix() g2mask = T.matrix() g3mask = T.matrix() p1mask = T.matrix() #get embeddings l_in = lasagne.layers.InputLayer((None, None, 1)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=self.We.get_value().shape[0], output_size=self.We.get_value().shape[1], W=self.We) l_average = lasagne_average_layer([l_emb, l_mask]) embg1 = lasagne.layers.get_output(l_average, { l_in: g1batchindices, l_mask: g1mask }) embg2 = lasagne.layers.get_output(l_average, { l_in: g2batchindices, l_mask: g2mask }) embg3 = lasagne.layers.get_output(l_average, { l_in: g3batchindices, l_mask: g3mask }) embp1 = lasagne.layers.get_output(l_average, { l_in: p1batchindices, l_mask: p1mask }) #objective function crt = T.concatenate([embg1, embg2, embg3], axis=1) crf = T.concatenate([embg1, embg2, embp1], axis=1) l_in2 = lasagne.layers.InputLayer( (None, self.We.get_value().shape[1] * 3)) d1 = lasagne.layers.DenseLayer( l_in2, self.We.get_value().shape[1], nonlinearity=lasagne.nonlinearities.tanh) l_sigmoid = lasagne.layers.DenseLayer( d1, 1, nonlinearity=lasagne.nonlinearities.sigmoid) st = lasagne.layers.get_output(l_sigmoid, {l_in2: crt}) sf = lasagne.layers.get_output(l_sigmoid, {l_in2: crf}) cost = params.margin - st + sf cost = cost * (cost > 0) #self.all_params = lasagne.layers.get_all_params(l_average, trainable=True) + [self.M, self.N] self.network_params = lasagne.layers.get_all_params( l_average, trainable=True) + lasagne.layers.get_all_params( l_sigmoid, trainable=True) self.network_params.pop(0) self.all_params = lasagne.layers.get_all_params( l_average, trainable=True) + lasagne.layers.get_all_params( l_sigmoid, trainable=True) l2 = 0.5 * params.LC * sum( lasagne.regularization.l2(x) for x in self.network_params) #word_reg = 0.5*params.LW*lasagne.regularization.l2(self.We-initial_We) + 0.5*params.LC*self.M.norm(2) + self.N.norm(2) word_reg = 0.5 * params.LW * lasagne.regularization.l2(self.We - initial_We) cost = T.mean(cost) + word_reg + l2 #feedforward self.feedforward_function = theano.function([g1batchindices, g1mask], embg1) self.cost_function = theano.function([ g1batchindices, g2batchindices, g3batchindices, p1batchindices, g1mask, g2mask, g3mask, p1mask ], cost, on_unused_input='warn') prediction = st * 4 + 1 self.scoring_function = theano.function([ g1batchindices, g2batchindices, g3batchindices, g1mask, g2mask, g3mask ], prediction, on_unused_input='warn') #updates grads = theano.gradient.grad(cost, self.all_params) if params.clip: grads = [ lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads ] updates = params.learner(grads, self.all_params, params.eta) self.train_function = theano.function([ g1batchindices, g2batchindices, g3batchindices, p1batchindices, g1mask, g2mask, g3mask, p1mask ], cost, updates=updates, on_unused_input='warn')
def __init__(self, We_initial, params): initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) self.dropout = params.dropout self.word_dropout = params.word_dropout g1batchindices = T.imatrix() g2batchindices = T.imatrix() p1batchindices = T.imatrix() p2batchindices = T.imatrix() g1mask = T.matrix() g2mask = T.matrix() p1mask = T.matrix() p2mask = T.matrix() l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) if params.dropout > 0: l_emb = lasagne.layers.DropoutLayer(l_emb, params.dropout) elif params.word_dropout > 0: l_emb = lasagne.layers.DropoutLayer(l_emb, params.word_dropout, shared_axes=(2, )) if params.model == "lstm": if params.outgate: l_lstm = lasagne.layers.LSTMLayer(l_emb, params.dim, peepholes=True, learn_init=False, mask_input=l_mask) else: l_lstm = lasagne_lstm_nooutput_layer(l_emb, params.dim, peepholes=True, learn_init=False, mask_input=l_mask) l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1) elif params.model == "bilstm": if params.outgate: l_rnn = lasagne.layers.LSTMLayer(l_emb, params.dim, learn_init=False, mask_input=l_mask) l_rnnb = lasagne.layers.LSTMLayer(l_emb, params.dim, learn_init=False, mask_input=l_mask, backwards=True) else: l_rnn = lasagne_lstm_nooutput_layer(l_emb, params.dim, learn_init=False, mask_input=l_mask) l_rnnb = lasagne_lstm_nooutput_layer(l_emb, params.dim, learn_init=False, mask_input=l_mask, backwards=True) if not params.sumlayer: l_outf = lasagne.layers.SliceLayer(l_rnn, -1, 1) l_outb = lasagne.layers.SliceLayer(l_rnnb, -1, 1) l_concat = lasagne.layers.ConcatLayer([l_outf, l_outb], axis=1) l_out = lasagne.layers.DenseLayer( l_concat, params.dim, nonlinearity=lasagne.nonlinearities.tanh) else: l_out = lasagne_sum_layer([l_rnn, l_rnnb]) l_out = lasagne_average_layer([l_out, l_mask], tosum=False) elif params.model == "lstmavg": if params.outgate: l_lstm = lasagne.layers.LSTMLayer(l_emb, params.dim, peepholes=True, learn_init=False, mask_input=l_mask) else: l_lstm = lasagne_lstm_nooutput_layer(l_emb, params.dim, peepholes=True, learn_init=False, mask_input=l_mask) l_out = lasagne_average_layer([l_lstm, l_mask], tosum=False) elif params.model == "bilstmavg": if params.outgate: l_rnn = lasagne.layers.LSTMLayer(l_emb, params.dim, learn_init=False, mask_input=l_mask) l_rnnb = lasagne.layers.LSTMLayer(l_emb, params.dim, learn_init=False, mask_input=l_mask, backwards=True) else: l_rnn = lasagne_lstm_nooutput_layer(l_emb, params.dim, learn_init=False, mask_input=l_mask) l_rnnb = lasagne_lstm_nooutput_layer(l_emb, params.dim, learn_init=False, mask_input=l_mask, backwards=True) if not params.sumlayer: l_concat = lasagne.layers.ConcatLayer([l_rnn, l_rnnb], axis=2) l_out = lasagne.layers.DenseLayer( l_concat, params.dim, num_leading_axes=-1, nonlinearity=lasagne.nonlinearities.tanh) l_out = lasagne_average_layer([l_out, l_mask], tosum=False) else: l_out = lasagne_sum_layer([l_rnn, l_rnnb]) l_out = lasagne_average_layer([l_out, l_mask], tosum=False) elif params.model == "gran": if params.outgate: l_lstm = lasagne_gran_layer(l_emb, params.dim, peepholes=True, learn_init=False, mask_input=l_mask, gran_type=params.gran_type) else: l_lstm = lasagne_gran_layer_nooutput_layer( l_emb, params.dim, peepholes=True, learn_init=False, mask_input=l_mask, gran_type=params.gran_type) if params.gran_type == 1 or params.gran_type == 2: l_out = lasagne_average_layer([l_lstm, l_mask], tosum=False) else: l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1) elif params.model == "bigran": if params.outgate: l_lstm = lasagne_gran_layer(l_emb, params.dim, peepholes=True, learn_init=False, mask_input=l_mask, gran_type=params.gran_type) l_lstmb = lasagne_gran_layer(l_emb, params.dim, peepholes=True, learn_init=False, mask_input=l_mask, backwards=True) else: l_lstm = lasagne_gran_layer_nooutput_layer( l_emb, params.dim, peepholes=True, learn_init=False, mask_input=l_mask, gran_type=params.gran_type) l_lstmb = lasagne_gran_layer_nooutput_layer(l_emb, params.dim, peepholes=True, learn_init=False, mask_input=l_mask, backwards=True) if not params.sumlayer: l_concat = lasagne.layers.ConcatLayer([l_lstm, l_lstmb], axis=2) l_out = lasagne.layers.DenseLayer( l_concat, params.dim, num_leading_axes=-1, nonlinearity=lasagne.nonlinearities.tanh) l_out = lasagne_average_layer([l_out, l_mask], tosum=False) else: l_out = lasagne_sum_layer([l_lstm, l_lstmb]) l_out = lasagne_average_layer([l_out, l_mask], tosum=False) elif params.model == "wordaverage": l_out = lasagne_average_layer([l_emb, l_mask], tosum=False) else: print "Invalid model specified. Exiting." sys.exit(0) self.final_layer = l_out embg1 = lasagne.layers.get_output(l_out, { l_in: g1batchindices, l_mask: g1mask }, deterministic=False) embg2 = lasagne.layers.get_output(l_out, { l_in: g2batchindices, l_mask: g2mask }, deterministic=False) embp1 = lasagne.layers.get_output(l_out, { l_in: p1batchindices, l_mask: p1mask }, deterministic=False) embp2 = lasagne.layers.get_output(l_out, { l_in: p2batchindices, l_mask: p2mask }, deterministic=False) t_embg1 = lasagne.layers.get_output(l_out, { l_in: g1batchindices, l_mask: g1mask }, deterministic=True) t_embg2 = lasagne.layers.get_output(l_out, { l_in: g2batchindices, l_mask: g2mask }, deterministic=True) def fix(x): return x * (x > 0) + 1E-10 * (x <= 0) #objective function g1g2 = (embg1 * embg2).sum(axis=1) g1g2norm = T.sqrt(fix(T.sum(embg1**2, axis=1))) * T.sqrt( fix(T.sum(embg2**2, axis=1))) g1g2 = g1g2 / g1g2norm p1g1 = (embp1 * embg1).sum(axis=1) p1g1norm = T.sqrt(fix(T.sum(embp1**2, axis=1))) * T.sqrt( fix(T.sum(embg1**2, axis=1))) p1g1 = p1g1 / p1g1norm p2g2 = (embp2 * embg2).sum(axis=1) p2g2norm = T.sqrt(fix(T.sum(embp2**2, axis=1))) * T.sqrt( fix(T.sum(embg2**2, axis=1))) p2g2 = p2g2 / p2g2norm costp1g1 = params.margin - g1g2 + p1g1 costp1g1 = costp1g1 * (costp1g1 > 0) costp2g2 = params.margin - g1g2 + p2g2 costp2g2 = costp2g2 * (costp2g2 > 0) cost = costp1g1 + costp2g2 network_params = lasagne.layers.get_all_params(l_out, trainable=True) network_params.pop(0) self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) self.layer = l_out print self.all_params #regularization l2 = 0.5 * params.LC * sum( lasagne.regularization.l2(x) for x in network_params) word_reg = 0.5 * params.LW * lasagne.regularization.l2(We - initial_We) cost = T.mean(cost) + l2 + word_reg g1g2 = (t_embg1 * t_embg2).sum(axis=1) g1g2norm = T.sqrt(T.sum(t_embg1**2, axis=1)) * T.sqrt( T.sum(t_embg2**2, axis=1)) g1g2 = g1g2 / g1g2norm self.feedforward_function = theano.function([g1batchindices, g1mask], t_embg1) prediction = g1g2 self.scoring_function = theano.function( [g1batchindices, g2batchindices, g1mask, g2mask], prediction) grads = theano.gradient.grad(cost, self.all_params) updates = params.learner(grads, self.all_params, params.eta) self.train_function = theano.function([ g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask ], cost, updates=updates) cost = costp1g1 + costp2g2 cost = T.mean(cost) self.cost_function = theano.function([ g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask ], cost) print "Num Params:", lasagne.layers.count_params(self.final_layer)
def __init__(self, We_initial, params): #params initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) #symbolic params g1batchindices = T.imatrix(); g2batchindices = T.imatrix() p1batchindices = T.imatrix(); p2batchindices = T.imatrix() g1mask = T.matrix(); g2mask = T.matrix() p1mask = T.matrix(); p2mask = T.matrix() #get embeddings l_in = lasagne.layers.InputLayer((None, None, 1)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_average = lasagne_average_layer([l_emb, l_mask]) embg1 = lasagne.layers.get_output(l_average, {l_in:g1batchindices, l_mask:g1mask}) embg2 = lasagne.layers.get_output(l_average, {l_in:g2batchindices, l_mask:g2mask}) embp1 = lasagne.layers.get_output(l_average, {l_in:p1batchindices, l_mask:p1mask}) embp2 = lasagne.layers.get_output(l_average, {l_in:p2batchindices, l_mask:p2mask}) #objective function g1g2 = (embg1*embg2).sum(axis=1) g1g2norm = T.sqrt(T.sum(embg1**2,axis=1)) * T.sqrt(T.sum(embg2**2,axis=1)) g1g2 = g1g2 / g1g2norm p1g1 = (embp1*embg1).sum(axis=1) p1g1norm = T.sqrt(T.sum(embp1**2,axis=1)) * T.sqrt(T.sum(embg1**2,axis=1)) p1g1 = p1g1 / p1g1norm p2g2 = (embp2*embg2).sum(axis=1) p2g2norm = T.sqrt(T.sum(embp2**2,axis=1)) * T.sqrt(T.sum(embg2**2,axis=1)) p2g2 = p2g2 / p2g2norm costp1g1 = params.margin - g1g2 + p1g1 costp1g1 = costp1g1*(costp1g1 > 0) costp2g2 = params.margin - g1g2 + p2g2 costp2g2 = costp2g2*(costp2g2 > 0) cost = costp1g1 + costp2g2 self.all_params = lasagne.layers.get_all_params(l_average, trainable=True) word_reg = 0.5*params.LW*lasagne.regularization.l2(We-initial_We) cost = T.mean(cost) + word_reg #feedforward self.feedforward_function = theano.function([g1batchindices,g1mask], embg1) self.cost_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask], cost) prediction = g1g2 self.scoring_function = theano.function([g1batchindices, g2batchindices, g1mask, g2mask],prediction) #updates grads = theano.gradient.grad(cost, self.all_params) if params.clip: grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads] updates = params.learner(grads, self.all_params, params.eta) self.train_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask], cost, updates=updates)
def __init__(self, We_initial, params): initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) g1batchindices = T.imatrix() g2batchindices = T.imatrix() p1batchindices = T.imatrix() p2batchindices = T.imatrix() g1mask = T.matrix() g2mask = T.matrix() p1mask = T.matrix() p2mask = T.matrix() l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_average = lasagne_average_layer([l_emb, l_mask]) l_1 = lasagne.layers.DenseLayer(l_average, params.hiddensize, W=lasagne.init.Normal(0.1), b=lasagne.init.Constant(0.), nonlinearity=params.nonlinearity) l_2 = lasagne.layers.DenseLayer(l_1, params.hiddensize, nonlinearity=params.nonlinearity) l_3 = lasagne.layers.DenseLayer(l_2, params.hiddensize, nonlinearity=params.nonlinearity) l_4 = lasagne.layers.DenseLayer(l_3, params.hiddensize, nonlinearity=params.nonlinearity) l_end = None if params.numlayers == 1: l_end = l_1 elif params.numlayers == 2: l_end = l_2 elif params.numlayers == 3: l_end = l_3 elif params.numlayers == 4: l_end = l_4 else: raise ValueError('Only 1-4 layers are supported currently.') embg1 = lasagne.layers.get_output(l_end, { l_in: g1batchindices, l_mask: g1mask }) embg2 = lasagne.layers.get_output(l_end, { l_in: g2batchindices, l_mask: g2mask }) embp1 = lasagne.layers.get_output(l_end, { l_in: p1batchindices, l_mask: p1mask }) embp2 = lasagne.layers.get_output(l_end, { l_in: p2batchindices, l_mask: p2mask }) g1g2 = (embg1 * embg2).sum(axis=1) g1g2norm = T.sqrt(T.sum(embg1**2, axis=1)) * T.sqrt( T.sum(embg2**2, axis=1)) g1g2 = g1g2 / g1g2norm p1g1 = (embp1 * embg1).sum(axis=1) p1g1norm = T.sqrt(T.sum(embp1**2, axis=1)) * T.sqrt( T.sum(embg1**2, axis=1)) p1g1 = p1g1 / p1g1norm p2g2 = (embp2 * embg2).sum(axis=1) p2g2norm = T.sqrt(T.sum(embp2**2, axis=1)) * T.sqrt( T.sum(embg2**2, axis=1)) p2g2 = p2g2 / p2g2norm costp1g1 = params.margin - g1g2 + p1g1 costp1g1 = costp1g1 * (costp1g1 > 0) costp2g2 = params.margin - g1g2 + p2g2 costp2g2 = costp2g2 * (costp2g2 > 0) cost = costp1g1 + costp2g2 network_params = lasagne.layers.get_all_params(l_end, trainable=True) network_params.pop(0) self.all_params = lasagne.layers.get_all_params(l_end, trainable=True) #regularization l2 = 0.5 * params.LC * sum( lasagne.regularization.l2(x) for x in network_params) if params.updatewords: word_reg = 0.5 * params.LW * lasagne.regularization.l2(We - initial_We) cost = T.mean(cost) + l2 + word_reg else: cost = T.mean(cost) + l2 self.feedforward_function = theano.function([g1batchindices, g1mask], embg1) self.cost_function = theano.function([ g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask ], cost) prediction = g1g2 self.scoring_function = theano.function( [g1batchindices, g2batchindices, g1mask, g2mask], prediction) self.train_function = None if params.updatewords: grads = theano.gradient.grad(cost, self.all_params) if params.clip: grads = [ lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads ] updates = params.learner(grads, self.all_params, params.eta) self.train_function = theano.function([ g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask ], cost, updates=updates) else: self.all_params = network_params grads = theano.gradient.grad(cost, self.all_params) if params.clip: grads = [ lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads ] updates = params.learner(grads, self.all_params, params.eta) self.train_function = theano.function([ g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask ], cost, updates=updates)
def __init__(self, We_initial, regfile=None, layersize=300, num_filters=4, filter_size=11, margin=0.4, LC=1e-6, LW=1e-6, updatewords=True, clip=1.0, eta=0.025): initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) g1batchindices = T.imatrix() g2batchindices = T.imatrix() p1batchindices = T.imatrix() p2batchindices = T.imatrix() g1mask = T.matrix() g2mask = T.matrix() p1mask = T.matrix() p2mask = T.matrix() l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) if regfile is None: l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_average = lasagne_average_layer([l_emb, l_mask]) l_reshape = lasagne.layers.ReshapeLayer(l_average, ([0], 1, -1)) l_conv = lasagne.layers.Conv1DLayer( l_reshape, num_filters=num_filters, filter_size=filter_size, stride=1, pad=filter_size / 2, nonlinearity=lasagne.nonlinearities.rectify) l_pooling = lasagne.layers.MaxPool1DLayer(l_conv, pool_size=2) # (batch_size, params.layersize) l_proj = lasagne.layers.DenseLayer( l_pooling, layersize, nonlinearity=lasagne.nonlinearities.tanh) else: logging.debug("loading regfile: {}".format(regfile)) p = cPickle.load(file(regfile, 'rb')) We = theano.shared( np.asarray(p[0].get_value(), dtype=config.floatX)) W_conv = np.asarray(p[1].get_value(), dtype=config.floatX) b_conv = np.asarray(p[2].get_value(), dtype=config.floatX) W_proj = np.asarray(p[3].get_value(), dtype=config.floatX) b_proj = np.asarray(p[4].get_value(), dtype=config.floatX) l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_average = lasagne_average_layer([l_emb, l_mask]) l_reshape = lasagne.layers.ReshapeLayer(l_average, ([0], 1, -1)) l_conv = lasagne.layers.Conv1DLayer( l_reshape, num_filters=num_filters, filter_size=filter_size, stride=1, pad=filter_size / 2, nonlinearity=lasagne.nonlinearities.rectify, W=W_conv, b=b_conv) l_pooling = lasagne.layers.MaxPool1DLayer(l_conv, pool_size=2) # (batch_size, params.layersize) l_proj = lasagne.layers.DenseLayer( l_pooling, layersize, nonlinearity=lasagne.nonlinearities.tanh, W=W_proj, b=b_proj) embg1 = lasagne.layers.get_output(l_proj, { l_in: g1batchindices, l_mask: g1mask }) embg2 = lasagne.layers.get_output(l_proj, { l_in: g2batchindices, l_mask: g2mask }) embp1 = lasagne.layers.get_output(l_proj, { l_in: p1batchindices, l_mask: p1mask }) embp2 = lasagne.layers.get_output(l_proj, { l_in: p2batchindices, l_mask: p2mask }) g1g2 = (embg1 * embg2).sum(axis=1) g1g2norm = T.sqrt(T.sum(embg1**2, axis=1)) * T.sqrt( T.sum(embg2**2, axis=1)) g1g2 = g1g2 / g1g2norm p1g1 = (embp1 * embg1).sum(axis=1) p1g1norm = T.sqrt(T.sum(embp1**2, axis=1)) * T.sqrt( T.sum(embg1**2, axis=1)) p1g1 = p1g1 / p1g1norm p2g2 = (embp2 * embg2).sum(axis=1) p2g2norm = T.sqrt(T.sum(embp2**2, axis=1)) * T.sqrt( T.sum(embg2**2, axis=1)) p2g2 = p2g2 / p2g2norm costp1g1 = margin - g1g2 + p1g1 costp1g1 = costp1g1 * (costp1g1 > 0) costp2g2 = margin - g1g2 + p2g2 costp2g2 = costp2g2 * (costp2g2 > 0) cost = costp1g1 + costp2g2 network_params = lasagne.layers.get_all_params(l_proj, trainable=True) network_params.pop(0) self.all_params = lasagne.layers.get_all_params(l_proj, trainable=True) l2 = 0.5 * LC * sum( lasagne.regularization.l2(x) for x in network_params) if updatewords: word_reg = 0.5 * LW * lasagne.regularization.l2(We - initial_We) cost = T.mean(cost) + l2 + word_reg else: cost = T.mean(cost) + l2 self.feedforward_function = theano.function([g1batchindices, g1mask], embg1) self.cost_function = theano.function([ g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask ], cost) prediction = g1g2 self.scoring_function = theano.function( [g1batchindices, g2batchindices, g1mask, g2mask], prediction) self.train_function = None if updatewords: grads = theano.gradient.grad(cost, self.all_params) if clip: grads = [ lasagne.updates.norm_constraint(grad, clip, range(grad.ndim)) for grad in grads ] updates = lasagne.updates.adam(grads, self.all_params, eta) self.train_function = theano.function([ g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask ], cost, updates=updates) else: self.all_params = network_params grads = theano.gradient.grad(cost, self.all_params) if clip: grads = [ lasagne.updates.norm_constraint(grad, clip, range(grad.ndim)) for grad in grads ] updates = lasagne.updates.adam(grads, self.all_params, eta) self.train_function = theano.function([ g1batchindices, g2batchindices, p1batchindices, p2batchindices, g1mask, g2mask, p1mask, p2mask ], cost, updates=updates)
def __init__(self, We_initial, params): #params initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) self.We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) self.M = theano.shared(np.eye(self.We.get_value().shape[1]).astype( theano.config.floatX), borrow=True) #self.N = theano.shared (np.eye (self.We.get_value().shape[1]).astype (theano.config.floatX), borrow=True) #symbolic params g1batchindices = T.imatrix() g2batchindices = T.imatrix() g3batchindices = T.imatrix() p1batchindices = T.imatrix() g1mask = T.matrix() g2mask = T.matrix() g3mask = T.matrix() p1mask = T.matrix() max_gg = T.scalar() min_gg = T.scalar() #get embeddings l_in = lasagne.layers.InputLayer((None, None, 1)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=self.We.get_value().shape[0], output_size=self.We.get_value().shape[1], W=self.We) l_average = lasagne_average_layer([l_emb, l_mask]) rt = lasagne.layers.get_output(l_emb, { l_in: g2batchindices, l_mask: g2mask }) rm = lasagne.layers.get_output(l_emb, { l_in: g3batchindices, l_mask: g3mask }) pt = T.max(rt, axis=1) pm = T.max(rm, axis=1) nt = T.min(rt, axis=1) nm = T.min(rm, axis=1) wt = T.concatenate([pt, nt], axis=1) wm = T.concatenate([pm, nm], axis=1) g1g2 = (wt * wm).sum(axis=1) g1g2norm = T.sqrt(T.sum(wt**2, axis=1)) * T.sqrt(T.sum(wm**2, axis=1)) g1g2 = g1g2 / g1g2norm max_g = (max_gg >= T.max(g1g2)) * max_gg + (max_gg < T.max(g1g2)) * T.max(g1g2) min_g = (min_gg <= T.min(g1g2)) * min_gg + (min_gg > T.min(g1g2)) * T.min(g1g2) g1g2 = (g1g2 - min_g) / (max_g - min_g) embg1 = lasagne.layers.get_output(l_average, { l_in: g1batchindices, l_mask: g1mask }) embg2 = lasagne.layers.get_output(l_average, { l_in: g2batchindices, l_mask: g2mask }) embg3 = lasagne.layers.get_output(l_average, { l_in: g3batchindices, l_mask: g3mask }) embp1 = lasagne.layers.get_output(l_average, { l_in: p1batchindices, l_mask: p1mask }) #objective function crt = T.nnet.sigmoid( T.sum(embg1 * T.dot(embg3, self.M), axis=1)) #+ T.sum(embg2 * T.dot(embg3, self.N), axis=1)) crf = T.nnet.sigmoid( T.sum(embg1 * T.dot(embp1, self.M), axis=1)) #+ T.sum(embg2 * T.dot(embp1, self.N), axis=1)) cost = params.margin - crt + crf cost = cost * (cost > 0) #self.all_params = lasagne.layers.get_all_params(l_average, trainable=True) + [self.M, self.N] self.all_params = lasagne.layers.get_all_params( l_average, trainable=True) + [self.M] #word_reg = 0.5*params.LW*lasagne.regularization.l2(self.We-initial_We) + 0.5*params.LC*self.M.norm(2) + self.N.norm(2) word_reg = 0.5 * params.LW * lasagne.regularization.l2( self.We - initial_We) + 0.5 * params.LC * self.M.norm(2) cost = T.mean(cost) + word_reg #feedforward self.feedforward_function = theano.function([g1batchindices, g1mask], embg1) self.cost_function = theano.function([ g1batchindices, g2batchindices, g3batchindices, p1batchindices, g1mask, g2mask, g3mask, p1mask ], cost, on_unused_input='warn') prediction = (crt + g1g2) * 2 + 1 self.scoring_function = theano.function([ g1batchindices, g2batchindices, g3batchindices, g1mask, g2mask, g3mask, max_gg, min_gg ], prediction, on_unused_input='warn') #updates grads = theano.gradient.grad(cost, self.all_params) if params.clip: grads = [ lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads ] updates = params.learner(grads, self.all_params, params.eta) self.train_function = theano.function([ g1batchindices, g2batchindices, g3batchindices, p1batchindices, g1mask, g2mask, g3mask, p1mask, max_gg, min_gg ], [cost, max_g, min_g], updates=updates, on_unused_input='warn')
def __init__(self, We_initial, Wc_initial, params): initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) We = theano.shared(np.asarray(We_initial, dtype=config.floatX)) initial_Wc = theano.shared(np.asarray(Wc_initial, dtype=config.floatX)) Wc = theano.shared(np.asarray(Wc_initial, dtype=config.floatX)) g1batchindices = T.imatrix() char_g1batchindices = T.itensor3() g2batchindices = T.imatrix() char_g2batchindices = T.itensor3() p1batchindices = T.imatrix() char_p1batchindices = T.itensor3() p2batchindices = T.imatrix() char_p2batchindices = T.itensor3() g1mask = T.matrix() char_g1mask = T.tensor3() g2mask = T.matrix() char_g2mask = T.tensor3() p1mask = T.matrix() char_p1mask = T.tensor3() p2mask = T.matrix() char_p2mask = T.tensor3() l_in = lasagne.layers.InputLayer((None, None)) l_char_in = lasagne.layers.InputLayer((None, None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_char_mask = lasagne.layers.InputLayer(shape=(None, None, None)) l_emb = lasagne.layers.EmbeddingLayer( l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_char_emb = lasagne.layers.EmbeddingLayer( l_char_in, input_size=Wc.get_value().shape[0], output_size=Wc.get_value().shape[1], W=Wc) #50*6*4*300 char_embg11 = lasagne.layers.get_output( l_char_emb, {l_char_in: char_g1batchindices}) word_embg11 = lasagne.layers.get_output(l_emb, {l_in: g1batchindices}) self.char_embg_function = theano.function([char_g1batchindices], char_embg11) self.word_embg_function = theano.function([g1batchindices], word_embg11) #char_embg1 = lasagne.layers.get_output(l_char_emb, {l_char_in: char_g1batchindices}) #self.char_representation_function = theano.function([char_g1batchindices], char_embg1) if params.nntype == 'word_char1': l_word_representation = word_representation_layer1( [l_emb, l_char_emb, l_char_mask]) #lasagne.nonlinearities.tanh elif params.nntype == 'word_char2': l_word_representation = word_representation_layer4( [l_emb, l_char_emb, l_char_mask]) elif params.nntype == 'word_char3': l_word_representation = word_representation_layer5( [l_emb, l_char_emb, l_char_mask]) else: print 'something wrong in ppdb_char_word_model !' l_average = lasagne_average_layer([l_word_representation, l_mask]) embg1 = lasagne.layers.get_output( l_average, { l_in: g1batchindices, l_mask: g1mask, l_char_in: char_g1batchindices, l_char_mask: char_g1mask }) embg2 = lasagne.layers.get_output( l_average, { l_in: g2batchindices, l_mask: g2mask, l_char_in: char_g2batchindices, l_char_mask: char_g2mask }) embp1 = lasagne.layers.get_output( l_average, { l_in: p1batchindices, l_mask: p1mask, l_char_in: char_p1batchindices, l_char_mask: char_p1mask }) embp2 = lasagne.layers.get_output( l_average, { l_in: p2batchindices, l_mask: p2mask, l_char_in: char_p2batchindices, l_char_mask: char_p2mask }) g1g2 = (embg1 * embg2).sum(axis=1) g1g2norm = T.sqrt(T.sum(embg1**2, axis=1)) * T.sqrt( T.sum(embg2**2, axis=1)) g1g2 = g1g2 / g1g2norm p1g1 = (embp1 * embg1).sum(axis=1) p1g1norm = T.sqrt(T.sum(embp1**2, axis=1)) * T.sqrt( T.sum(embg1**2, axis=1)) p1g1 = p1g1 / p1g1norm p2g2 = (embp2 * embg2).sum(axis=1) p2g2norm = T.sqrt(T.sum(embp2**2, axis=1)) * T.sqrt( T.sum(embg2**2, axis=1)) p2g2 = p2g2 / p2g2norm costp1g1 = params.margin - g1g2 + p1g1 costp1g1 = costp1g1 * (costp1g1 > 0) costp2g2 = params.margin - g1g2 + p2g2 costp2g2 = costp2g2 * (costp2g2 > 0) cost = costp1g1 + costp2g2 network_params = lasagne.layers.get_all_params(l_average, trainable=True) network_params.pop(0) self.all_params = lasagne.layers.get_all_params(l_average, trainable=True) #regularization l2 = 0.5 * params.LC * sum( lasagne.regularization.l2(x) for x in network_params) if params.updatewords: word_reg = 0.5 * params.LW * lasagne.regularization.l2(We - initial_We) char_reg = 0.5 * params.LWC * lasagne.regularization.l2(Wc - initial_Wc) cost = T.mean(cost) + l2 + word_reg + char_reg else: cost = T.mean(cost) + l2 self.feedforward_function = theano.function( [g1batchindices, char_g1batchindices, g1mask, char_g1mask], embg1) self.cost_function = theano.function([ g1batchindices, char_g1batchindices, g2batchindices, char_g2batchindices, p1batchindices, char_p1batchindices, p2batchindices, char_p2batchindices, g1mask, char_g1mask, g2mask, char_g2mask, p1mask, char_p1mask, p2mask, char_p2mask ], cost) prediction = g1g2 self.scoring_function = theano.function([ g1batchindices, char_g1batchindices, g2batchindices, char_g2batchindices, g1mask, char_g1mask, g2mask, char_g2mask ], prediction) self.train_function = None if params.updatewords: grads = theano.gradient.grad(cost, self.all_params) if params.clip: grads = [ lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads ] updates = params.learner(grads, self.all_params, params.eta) self.train_function = theano.function([ g1batchindices, char_g1batchindices, g2batchindices, char_g2batchindices, p1batchindices, char_p1batchindices, p2batchindices, char_p2batchindices, g1mask, char_g1mask, g2mask, char_g2mask, p1mask, char_p1mask, p2mask, char_p2mask ], cost, updates=updates) else: self.all_params = network_params grads = theano.gradient.grad(cost, self.all_params) if params.clip: grads = [ lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads ] updates = params.learner(grads, self.all_params, params.eta) self.train_function = theano.function([ g1batchindices, char_g1batchindices, g2batchindices, char_g2batchindices, p1batchindices, char_p1batchindices, p2batchindices, char_p2batchindices, g1mask, char_g1mask, g2mask, char_g2mask, p1mask, char_p1mask, p2mask, char_p2mask ], cost, updates=updates)
def __init__(self, We_initial, params): if params.maxval: self.nout = params.maxval - params.minval + 1 p = None if params.traintype == "reg" or params.traintype == "rep": p = cPickle.load(file(params.regfile, 'rb')) print p #containes We, W, and b if params.traintype == "reg": print "regularizing to parameters" raise NotImplementedError("Not implemented for DAN model.") if params.traintype == "rep": print "not updating embeddings" raise NotImplementedError("Not implemented for DAN model.") #params initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) if params.traintype == "reg": initial_We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX)) We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX)) updatewords = True if params.traintype == "rep": We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX)) updatewords = False #symbolic params g1batchindices = T.imatrix(); g2batchindices = T.imatrix() g1mask = T.matrix(); g2mask = T.matrix() scores = T.matrix() if params.traintype == "reg" or params.traintype == "rep": W = np.asarray(p[1].get_value(), dtype = config.floatX) b = np.asarray(p[2].get_value(), dtype = config.floatX) l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_average = lasagne_average_layer([l_emb, l_mask]) l_1 = lasagne.layers.DenseLayer(l_average, params.hiddensize, nonlinearity=params.nonlinearity) l_2 = lasagne.layers.DenseLayer(l_1, nonlinearity=params.nonlinearity) l_3 = lasagne.layers.DenseLayer(l_2, nonlinearity=params.nonlinearity) l_4 = lasagne.layers.DenseLayer(l_3, nonlinearity=params.nonlinearity) l_out = None if params.numlayers == 1: l_out = l_1 elif params.numlayers == 2: l_out = l_2 elif params.numlayers == 3: l_out = l_3 elif params.numlayers == 4: l_out = l_4 else: raise ValueError('Only 1-4 layers are supported currently.') embg1 = lasagne.layers.get_output(l_out, {l_in:g1batchindices, l_mask:g1mask}) embg2 = lasagne.layers.get_output(l_out, {l_in:g2batchindices, l_mask:g2mask}) g1_dot_g2 = embg1*embg2 g1_abs_g2 = abs(embg1-embg2) lin_dot = lasagne.layers.InputLayer((None, params.layersize)) lin_abs = lasagne.layers.InputLayer((None, params.layersize)) l_sum = lasagne.layers.ConcatLayer([lin_dot, lin_abs]) l_sigmoid = lasagne.layers.DenseLayer(l_sum, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid) if params.task == "sim": l_softmax = lasagne.layers.DenseLayer(l_sigmoid, self.nout, nonlinearity=T.nnet.softmax) X = lasagne.layers.get_output(l_softmax, {lin_dot:g1_dot_g2, lin_abs:g1_abs_g2}) Y = T.log(X) cost = scores*(T.log(scores) - Y) cost = cost.sum(axis=1)/(float(self.nout)) prediction = 0. i = params.minval while i<= params.maxval: prediction = prediction + i*X[:,i-1] i += 1 elif params.task == "ent": l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 3, nonlinearity=T.nnet.softmax) X = lasagne.layers.get_output(l_softmax, {lin_dot:g1_dot_g2, lin_abs:g1_abs_g2}) cost = theano.tensor.nnet.categorical_crossentropy(X,scores) prediction = T.argmax(X, axis=1) else: raise ValueError('Params.task not set correctly.') self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True) self.network_params.pop(0) self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True) reg = self.getRegTerm(params, We, initial_We, l_out, l_softmax, p) self.trainable = self.getTrainableParams(params) cost = T.mean(cost) + reg self.feedforward_function = theano.function([g1batchindices,g1mask], embg1) self.scoring_function = theano.function([g1batchindices, g2batchindices, g1mask, g2mask],prediction) self.cost_function = theano.function([scores, g1batchindices, g2batchindices, g1mask, g2mask], cost) #updates grads = theano.gradient.grad(cost, self.trainable) if params.clip: grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads] updates = params.learner(grads, self.trainable, params.eta) self.train_function = theano.function([scores, g1batchindices, g2batchindices, g1mask, g2mask], cost, updates=updates)
def __init__(self, We_initial, params): p = None if params.traintype == "reg" or params.traintype == "rep": p = cPickle.load(file(params.regfile, 'rb')) print p #containes We if params.traintype == "reg": print "regularizing to parameters" if params.traintype == "rep": print "not updating embeddings" initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) if params.traintype == "reg": initial_We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX)) We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX)) if params.traintype == "rep": We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX)) g1batchindices = T.imatrix() g1mask = T.matrix() scores = T.matrix() l_in = lasagne.layers.InputLayer((None, None, 1)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_out = lasagne_average_layer([l_emb, l_mask]) embg = lasagne.layers.get_output(l_out, {l_in:g1batchindices, l_mask:g1mask}) l_in2 = lasagne.layers.InputLayer((None, We.get_value().shape[1])) l_sigmoid = lasagne.layers.DenseLayer(l_in2, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid) l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 2, nonlinearity=T.nnet.softmax) X = lasagne.layers.get_output(l_softmax, {l_in2:embg}) cost = T.nnet.categorical_crossentropy(X,scores) prediction = T.argmax(X, axis=1) self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True) self.network_params.pop(0) self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True) reg = self.getRegTerm(params, We, initial_We) self.trainable = self.getTrainableParams(params) cost = T.mean(cost) + reg self.feedforward_function = theano.function([g1batchindices,g1mask], embg) self.scoring_function = theano.function([g1batchindices, g1mask],prediction) self.cost_function = theano.function([scores, g1batchindices, g1mask], cost) grads = theano.gradient.grad(cost, self.trainable) if params.clip: grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads] updates = params.learner(grads, self.trainable, params.eta) self.train_function = theano.function([scores, g1batchindices, g1mask], cost, updates=updates)
def __init__(self, We_initial, params): p = None if params.traintype == "reg" or params.traintype == "rep": p = cPickle.load(file(params.regfile, 'rb')) print p #containes We, W, and b if params.traintype == "reg": print "regularizing to parameters" raise NotImplementedError("Not implemented for DAN model.") if params.traintype == "rep": print "not updating embeddings" raise NotImplementedError("Not implemented for DAN model.") initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) We = theano.shared(np.asarray(We_initial, dtype = config.floatX)) if params.traintype == "reg": initial_We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX)) We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX)) updatewords = True if params.traintype == "rep": We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX)) updatewords = False g1batchindices = T.imatrix() g1mask = T.matrix() scores = T.matrix() if params.traintype == "reg" or params.traintype == "rep": W = np.asarray(p[1].get_value(), dtype = config.floatX) b = np.asarray(p[2].get_value(), dtype = config.floatX) l_in = lasagne.layers.InputLayer((None, None)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We) l_average = lasagne_average_layer([l_emb, l_mask]) l_1 = lasagne.layers.DenseLayer(l_average, params.hiddensize, nonlinearity=params.nonlinearity) l_2 = lasagne.layers.DenseLayer(l_1, params.hiddensize, nonlinearity=params.nonlinearity) l_3 = lasagne.layers.DenseLayer(l_2, params.hiddensize, nonlinearity=params.nonlinearity) l_4 = lasagne.layers.DenseLayer(l_3, params.hiddensize, nonlinearity=params.nonlinearity) l_out = None if params.numlayers == 1: l_out = l_1 elif params.numlayers == 2: l_out = l_2 elif params.numlayers == 3: l_out = l_3 elif params.numlayers == 4: l_out = l_4 else: raise ValueError('Only 1-4 layers are supported currently.') embg = lasagne.layers.get_output(l_out, {l_in:g1batchindices, l_mask:g1mask}) l_in2 = lasagne.layers.InputLayer((None, We.get_value().shape[1])) l_sigmoid = lasagne.layers.DenseLayer(l_in2, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid) l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 2, nonlinearity=T.nnet.softmax) X = lasagne.layers.get_output(l_softmax, {l_in2:embg}) cost = T.nnet.categorical_crossentropy(X,scores) prediction = T.argmax(X, axis=1) self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True) self.network_params.pop(0) self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True) reg = self.getRegTerm(params, We, initial_We, l_out, l_softmax, p) self.trainable = self.getTrainableParams(params) cost = T.mean(cost) + reg self.feedforward_function = theano.function([g1batchindices,g1mask], embg) self.scoring_function = theano.function([g1batchindices, g1mask],prediction) self.cost_function = theano.function([scores, g1batchindices, g1mask], cost) grads = theano.gradient.grad(cost, self.trainable) if params.clip: grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads] updates = params.learner(grads, self.trainable, params.eta) self.train_function = theano.function([scores, g1batchindices, g1mask], cost, updates=updates)