def build(self, v1_input_size, v2_input_size, architecture, regul_coef=0.0, dropout=0.0, lr=1e-3, batchnorm=False, seed=77): np.random.seed(seed) V1_sym = S.csr_matrix(name='view1', dtype='float32') V2_sym = S.csr_matrix(name='view2', dtype='float32') self.batchnorm = batchnorm logging.info('building deepcca network with batchnorm {} regul {} lr {} layers {} cca_dim {}'.format(str(self.batchnorm), regul_coef, lr, str(architecture), model_args.dccasize)) l_out_view1 = self.build_mlp(V1_sym, input_size=v1_input_size, architecture=architecture, dropout=dropout) l_out_view2 = self.build_mlp(V2_sym, input_size=v2_input_size, architecture=architecture, dropout=dropout) self.l_out_view1 = l_out_view1 self.l_out_view2 = l_out_view2 output_view1 = lasagne.layers.get_output(l_out_view1) output_view2 = lasagne.layers.get_output(l_out_view2) loss_cca, _ = self.cca_loss(output_view1, output_view2, cca_dim=model_args.dccasize) regul_loss1 = lasagne.regularization.regularize_network_params(l_out_view1, penalty=l2) regul_loss2 = lasagne.regularization.regularize_network_params(l_out_view2, penalty=l2) regul_loss = (regul_loss1 + regul_loss2) * regul_coef loss = loss_cca + regul_loss params = lasagne.layers.get_all_params(l_out_view1, trainable=True) + lasagne.layers.get_all_params(self.l_out_view2, trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-8) #updates = lasagne.updates.sgd(loss, params, learning_rate=lr) self.f_train = theano.function([V1_sym, V2_sym], loss_cca, updates=updates) self.f_val = theano.function([V1_sym, V2_sym], loss_cca) self.f_predict = theano.function([V1_sym, V2_sym], [output_view1, output_view2, loss_cca])
def build(self): print "start building" x_sym = sparse.csr_matrix("x", dtype="float32") y_sym = T.imatrix("y") gx_sym_1 = sparse.csr_matrix("x", dtype="float32") gx_sym_2 = sparse.csr_matrix("x", dtype="float32") l_x_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=x_sym) l_hid = layers.SparseLayer(l_x_in, 50) embedding = lasagne.layers.get_output(l_hid) self.emb_fn = theano.function([x_sym], embedding) l_y = lasagne.layers.DenseLayer(l_hid, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) py_sym = lasagne.layers.get_output(l_y) loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean() params = lasagne.layers.get_all_params(l_y, trainable=True) updates = lasagne.updates.sgd(loss, params, learning_rate=self.learning_rate) self.train_fn = theano.function([x_sym, y_sym], loss, updates=updates) l_gx_1 = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=gx_sym_1) l_gx_2 = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=gx_sym_2) l_gy_1 = layers.SparseLayer(l_gx_1, 50, W=l_hid.W, b=l_hid.b) l_gy_2 = layers.SparseLayer(l_gx_2, 50, W=l_hid.W, b=l_hid.b) gy_sym_1 = lasagne.layers.get_output(l_gy_1) gy_sym_2 = lasagne.layers.get_output(l_gy_2) g_loss = lasagne.objectives.squared_error(gy_sym_1, gy_sym_2).mean() g_params = lasagne.layers.get_all_params(l_gy_1) + lasagne.layers.get_all_params(l_gy_2) g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate=self.g_learning_rate) self.g_fn = theano.function([gx_sym_1, gx_sym_2], g_loss, updates=g_updates) acc = T.mean(T.eq(T.argmax(py_sym, axis=1), T.argmax(y_sym, axis=1))) self.test_fn = theano.function([x_sym, y_sym], acc) self.predict_fn = theano.function([x_sym], py_sym)
def __init__(self, rng, P_input, L2_input, **kwargs): #symbol declaration, initialization and definition x_1_tm1, x_t = (\ sparse.csr_matrix("x_1_tm1", dtype=theano.config.floatX),\ sparse.csr_matrix("x_t",dtype=theano.config.floatX)\ )\ if P_input is None else P_input[:2] #elements of history shape = kwargs.get("shape") if shape is not None: dict_size = shape[0] if len(shape) <= 1: del shape["shape"] else: shape["shape"] = shape["shape"][1:] else: dict_size = (16,1,32,32) D_1_tm1 = theano.shared(rng.normal(size=dict_size).astype(theano.config.floatX)) Dx_1_tm1 = sparse.dot(x_1_tm1, D_1_tm1)#array access=dot operation super(SequenceCNN, self).__init__(rng=rng, inputsymbol=Dx_1_tm1, **kwargs)#attaches new elements into the fgraph self.L2_output_1_tm1 = self.L2_output #elements of current time D_t = theano.shared(rng.normal(size=dict_size).astype(theano.config.floatX)) Dx_t = sparse.dot(x_t, D_t)#array access=dot operation self.L2_output_t = theano.clone(self.L2_output_1_tm1, replace={Dx_1_tm1:Dx_t}) #element prepartion for model building self.P_input = (x_1_tm1,x_t) self.params += [D_1_tm1, D_t] self.L2_output = self.L2_output_1_tm1*self.L2_output_t
def create_TrainFunc_tranPES(simfn, embeddings, marge=0.5, alpha=1., beta=1.): # parse the embedding data embedding = embeddings[0] # D x N matrix lembedding = embeddings[1] # declare the symbolic variables for training triples hp = S.csr_matrix('head positive') # N x batchsize matrix rp = S.csr_matrix('relation') tp = S.csr_matrix('tail positive') hn = S.csr_matrix('head negative') tn = S.csr_matrix('tail negative') lemb = T.scalar('embedding learning rate') lremb = T.scalar('relation learning rate') subtensorE = T.ivector('batch entities set') subtensorR = T.ivector('batch link set') # Generate the training positive and negative triples hpmat = S.dot(embedding.E, hp).T # batchsize x D dense matrix rpmat = S.dot(lembedding.E, rp).T tpmat = S.dot(embedding.E, tp).T hnmat = S.dot(embedding.E, hn).T tnmat = S.dot(embedding.E, tn).T # calculate the score pos = tranPES3(simfn, T.concatenate([hpmat, tpmat], axis=1).reshape((hpmat.shape[0], 2, hpmat.shape[1])).dimshuffle(0, 2, 1), hpmat, rpmat, tpmat) negh = tranPES3(simfn, T.concatenate([hnmat, tpmat], axis=1).reshape((hnmat.shape[0], 2, hnmat.shape[1])).dimshuffle(0, 2, 1), hnmat, rpmat, tpmat) negt = tranPES3(simfn, T.concatenate([hpmat, tnmat], axis=1).reshape((hpmat.shape[0], 2, hpmat.shape[1])).dimshuffle(0, 2, 1), hpmat, rpmat, tnmat) costh, outh = margeCost(pos, negh, marge) costt, outt = margeCost(pos, negt, marge) embreg = regEmb(embedding, subtensorE, alpha) lembreg = regLink(lembedding, subtensorR, beta) cost = costh + costt + embreg[0] + lembreg out = T.concatenate([outh, outt]) outc = embreg[1] # list of inputs to the function list_in = [lemb, lremb, hp, rp, tp, hn, tn, subtensorE, subtensorR] # updating the embeddings using gradient descend emb_grad = T.grad(cost, embedding.E) New_embedding = embedding.E - lemb*emb_grad remb_grad = T.grad(cost, lembedding.E) New_rembedding = lembedding.E - lremb * remb_grad updates = OrderedDict({embedding.E: New_embedding, lembedding.E: New_rembedding}) return theano.function(list_in, [cost, T.mean(out), T.mean(outc), embreg[0], lembreg], updates=updates, on_unused_input='ignore')
def optimize_func(transform_matrix): t0 = time.time() M = S.csr_matrix(dtype=theano.config.floatX) N = S.csr_matrix(dtype=theano.config.floatX) ON = S.csr_matrix(dtype=theano.config.floatX) lr = T.scalar('learning rate', dtype=theano.config.floatX) # print M, N, ON, lr TN = S.dot(transform_matrix, N) D = T.sqr(M - TN) # PD = S.sqr(N-ON) # PD = T.sqrt(S.sp_sum(PD, 1)) # TPD = T.sqr(TN - ON) # TPD = T.sqrt(TPD.sum(1)) # D2 = T.sqr(PD-TPD) cost = T.sum(D) #+ T.sum(D2) list_in = [lr, M, N, ON] gradient = T.grad(cost, transform_matrix) new_transform_matrix = transform_matrix - lr * gradient t1 = time.time() print 'opt func cost is ' + str(t1 - t0) return theano.function(list_in, cost, updates=[(transform_matrix, new_transform_matrix)], on_unused_input='ignore')
def build(self, pre_load=False, binary_graph=True): """build the model. This method should be called after self.add_data. """ hA = sparse.csr_matrix('hA', dtype='float32') # nh times nh gA = sparse.csr_matrix('gA', dtype='float32') # ng times ng Y = sparse.csr_matrix('Y', dtype='float32') # ng times nh x_index = T.ivector('xind') # y_index = T.ivector('yind') # # not sparse (due to SVD) hX = T.fmatrix('hX') # nh times Fh gX = T.fmatrix('gX') # ng times Fg # final dimension equals assert self.g_hidden_list[-1] == self.h_hidden_list[-1] g_pred, g_net = self.build_one_side(gX, gA, self.gX, self.sym_g, self.g_hidden_list) h_pred, h_net = self.build_one_side(hX, hA, self.hX, self.sym_h, self.h_hidden_list) # final layer g_pred * h_pred^T Y_pred = T.dot(g_pred, h_pred.T) # ng times nh # squared matrix loss_mat = lasagne.objectives.squared_error(Y_pred, Y) if binary_graph: loss = (loss_mat[x_index, y_index].sum() + loss_mat[self.pos_trn_x_index, self.pos_trn_y_index].sum() * self.pos_up_ratio) \ / (x_index.shape[0] + self.pos_trn_x_index.shape[0]) else: loss = loss_mat[x_index, y_index].mean() g_params = lasagne.layers.get_all_params(g_net) h_params = lasagne.layers.get_all_params(h_net) params = g_params + h_params self.l = [g_net, h_net] updates = lasagne.updates.adam(loss, params) grads = lasagne.updates.get_or_compute_grads(loss, params) grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads))) self.train_fn = theano.function([gX, hX, gA, hA, Y, x_index, y_index], [Y_pred, loss, grad_norm], updates=updates, on_unused_input='ignore', allow_input_downcast=True) self.test_fn = theano.function([gX, hX, gA, hA], Y_pred, on_unused_input='ignore', allow_input_downcast=True) # loading the parameters if pre_load: self.load_params()
def build(self): x_sym = sparse.csr_matrix("x", dtype="float32") self.x_sym = x_sym y_sym = T.imatrix("y") gx_sym = sparse.csr_matrix("gx", dtype="float32") gy_sym = T.ivector("gy") gz_sym = T.vector("gz") l_x_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=x_sym) l_gx_in = lasagne.layers.InputLayer(shape=(None, self.x.shape[1]), input_var=gx_sym) l_gy_in = lasagne.layers.InputLayer(shape=(None,), input_var=gy_sym) # l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) l_x_2 = layers.SparseLayer(l_x_in, self.embedding_size) W = l_x_2.W # l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) # l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis = 1) # l_x = layers.DenseLayer(l_x, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) l_x = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity=lasagne.nonlinearities.softmax) # l_x = layers.HybridLayer([l_x_in, l_x_2], self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W=W) l_gy = lasagne.layers.EmbeddingLayer(l_gy_in, input_size=self.num_ver, output_size=self.embedding_size) l_gx = lasagne.layers.ElemwiseMergeLayer([l_gx, l_gy], T.mul) pgy_sym = lasagne.layers.get_output(l_gx) g_loss = -T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis=1) * gz_sym)).sum() self.l = l_gx py_sym = lasagne.layers.get_output(l_x) self.ret_y = py_sym loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean() # loss += lasagne.objectives.categorical_crossentropy(lasagne.layers.get_output(l_x_1), y_sym).mean() # loss += lasagne.objectives.categorical_crossentropy(lasagne.layers.get_output(l_x_2), y_sym).mean() # params = lasagne.layers.get_all_params(l_x) # params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b] if not JOINT: params = [l_x.W, l_x.b] else: params = lasagne.layers.get_all_params(l_x) # params = [l_x.W1, l_x.W2, l_x.b] updates = lasagne.updates.sgd(loss, params, learning_rate=self.learning_rate) self.train_fn = theano.function([x_sym, y_sym], loss, updates=updates) g_params = lasagne.layers.get_all_params(l_gx) g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate=self.g_learning_rate) self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates=g_updates) acc = T.mean(T.eq(T.argmax(py_sym, axis=1), T.argmax(y_sym, axis=1))) self.test_fn = theano.function([x_sym, y_sym], acc)
def SimFn(fnsim, embeddings, leftop, rightop): """ This function returns a Theano function to measure the similarity score for sparse matrices inputs. :param fnsim: similarity function (on Theano variables). :param embeddings: an Embeddings instance. :param leftop: class for the 'left' operator. :param rightop: class for the 'right' operator. """ embedding, relationl, relationr = parse_embeddings(embeddings) # Inputs inpr = S.csr_matrix('inpr') inpl = S.csr_matrix('inpl') inpo = S.csr_matrix('inpo') # Graph #what is T? Are they tensor? lhs, rhs,rell,relr # we just created inpl and inplr inpo . what does it mean to calculate dot product? lhs = S.dot(embedding.E, inpl).T rhs = S.dot(embedding.E, inpr).T rell = S.dot(relationl.E, inpo).T relr = S.dot(relationr.E, inpo).T # what is this? #ref: #leftop = LayerMat('lin', state.ndim, state.nhid) #rightop = LayerMat('lin', state.ndim, state.nhid) # on call #ry = y.reshape((y.shape[0], self.n_inp, self.n_out)) #rx = x.reshape((x.shape[0], x.shape[1], 1)) #return self.act((rx * ry).sum(1)) simi = fnsim(leftop(lhs, rell), rightop(rhs, relr)) """ Theano function inputs. :input inpl: sparse csr matrix (representing the indexes of the 'left' entities), shape=(#examples, N [Embeddings]). :input inpr: sparse csr matrix (representing the indexes of the 'right' entities), shape=(#examples, N [Embeddings]). :input inpo: sparse csr matrix (representing the indexes of the relation member), shape=(#examples, N [Embeddings]). Theano function output :output simi: matrix of score values. """ return theano.function([inpl, inpr, inpo], [simi], on_unused_input='ignore')
def build(self): """build the model. This method should be called after self.add_data. """ x_sym = sparse.csr_matrix('x', dtype = 'float32') y_sym = T.imatrix('y') g_sym = T.imatrix('g') gy_sym = T.vector('gy') ind_sym = T.ivector('ind') l_x_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = x_sym) l_g_in = lasagne.layers.InputLayer(shape = (None, 2), input_var = g_sym) l_ind_in = lasagne.layers.InputLayer(shape = (None, ), input_var = ind_sym) l_gy_in = lasagne.layers.InputLayer(shape = (None, ), input_var = gy_sym) num_ver = max(self.graph.keys()) + 1 l_emb_in = lasagne.layers.SliceLayer(l_g_in, indices = 0, axis = 1) l_emb_in = lasagne.layers.EmbeddingLayer(l_emb_in, input_size = num_ver, output_size = self.embedding_size) l_emb_out = lasagne.layers.SliceLayer(l_g_in, indices = 1, axis = 1) if self.neg_samp > 0: l_emb_out = lasagne.layers.EmbeddingLayer(l_emb_out, input_size = num_ver, output_size = self.embedding_size) l_emd_f = lasagne.layers.EmbeddingLayer(l_ind_in, input_size = num_ver, output_size = self.embedding_size, W = l_emb_in.W) l_x_hid = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) if self.use_feature: l_emd_f = layers.DenseLayer(l_emd_f, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) l_y = lasagne.layers.ConcatLayer([l_x_hid, l_emd_f], axis = 1) l_y = layers.DenseLayer(l_y, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) else: l_y = layers.DenseLayer(l_emd_f, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) py_sym = lasagne.layers.get_output(l_y) loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean() if self.layer_loss and self.use_feature: hid_sym = lasagne.layers.get_output(l_x_hid) loss += lasagne.objectives.categorical_crossentropy(hid_sym, y_sym).mean() emd_sym = lasagne.layers.get_output(l_emd_f) loss += lasagne.objectives.categorical_crossentropy(emd_sym, y_sym).mean() if self.neg_samp == 0: l_gy = layers.DenseLayer(l_emb_in, num_ver, nonlinearity = lasagne.nonlinearities.softmax) pgy_sym = lasagne.layers.get_output(l_gy) g_loss = lasagne.objectives.categorical_crossentropy(pgy_sym, lasagne.layers.get_output(l_emb_out)).sum() else: l_gy = lasagne.layers.ElemwiseMergeLayer([l_emb_in, l_emb_out], T.mul) pgy_sym = lasagne.layers.get_output(l_gy) g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis = 1) * gy_sym)).sum() params = [l_emd_f.W, l_emd_f.b, l_x_hid.W, l_x_hid.b, l_y.W, l_y.b] if self.use_feature else [l_y.W, l_y.b] if self.update_emb: params = lasagne.layers.get_all_params(l_y) updates = lasagne.updates.sgd(loss, params, learning_rate = self.learning_rate) self.train_fn = theano.function([x_sym, y_sym, ind_sym], loss, updates = updates, on_unused_input = 'ignore') self.test_fn = theano.function([x_sym, ind_sym], py_sym, on_unused_input = 'ignore') self.l = [l_gy, l_y] g_params = lasagne.layers.get_all_params(l_gy, trainable = True) g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate = self.g_learning_rate) self.g_fn = theano.function([g_sym, gy_sym], g_loss, updates = g_updates, on_unused_input = 'ignore')
class MultinomialTester(utt.InferShapeTester): p = sparse.csr_matrix() _p = sp.csr_matrix( np.asarray([[0.0, 0.5, 0.0, 0.5], [0.1, 0.2, 0.3, 0.4], [0.0, 1.0, 0.0, 0.0], [0.3, 0.3, 0.0, 0.4]], dtype=config.floatX)) def setUp(self): super(MultinomialTester, self).setUp() self.op_class = Multinomial def test_op(self): n = tensor.lscalar() f = theano.function([self.p, n], multinomial(n, self.p)) _n = 5 tested = f(self._p, _n) assert tested.shape == self._p.shape assert np.allclose(np.floor(tested.todense()), tested.todense()) assert tested[2, 1] == _n n = tensor.lvector() f = theano.function([self.p, n], multinomial(n, self.p)) _n = np.asarray([1, 2, 3, 4], dtype='int64') tested = f(self._p, _n) assert tested.shape == self._p.shape assert np.allclose(np.floor(tested.todense()), tested.todense()) assert tested[2, 1] == _n[2] def test_infer_shape(self): self._compile_and_check([self.p], [multinomial(5, self.p)], [self._p], self.op_class, warn=False)
def _setup_vars(self, sparse_input): '''Setup Theano variables for our network. Parameters ---------- sparse_input : bool If True, create an input variable that can hold a sparse matrix. Defaults to False, which assumes all arrays are dense. Returns ------- vars : list of theano variables A list of the variables that this network requires as inputs. ''' # x represents our network's input. self.x = TT.matrix('x') if sparse_input: self.x = SS.csr_matrix('x') # this variable holds the target outputs for input x. self.targets = TT.matrix('targets') # the weight array is provided to ensure that different target values # are taken into account with different weights during optimization. self.weights = TT.matrix('weights') if self.weighted: return [self.x, self.targets, self.weights] return [self.x, self.targets]
def setup_vars(self): '''Setup Theano variables required by our network. The default variable for a network is simply `x`, which represents the input to the network. Subclasses may override this method to specify additional variables. For example, a supervised model might specify an additional variable that represents the target output for a particular input. Returns ------- vars : list of theano variables A list of the variables that this network requires as inputs. ''' # x represents our network's input. if self.is_sparse_input: self.x = SS.csr_matrix('x', dtype=FLOAT) else: self.x = TT.matrix('x') # the weight array is provided to ensure that different target values # are taken into account with different weights during optimization. self.weights = TT.matrix('weights') if self.kwargs.get('weighted'): return [self.x, self.weights] return [self.x]
def bspline_basis(n, eval_points, degree=3): n_knots = n + degree + 1 knots = np.linspace(0, 1, n_knots - 2 * degree) knots = np.r_[[0] * degree, knots, [1] * degree] basis_funcs = interpolate.BSpline(knots, np.eye(n), k=degree) Bx = basis_funcs(eval_points) return sparse.csr_matrix(Bx)
def _setup_vars(self, sparse_input): '''Setup Theano variables for our network. Parameters ---------- sparse_input : bool If True, create an input variable that can hold a sparse matrix. Defaults to False, which assumes all arrays are dense. Returns ------- vars : list of theano variables A list of the variables that this network requires as inputs. ''' # x represents our network's input. self.x = TT.matrix('x') if sparse_input: self.x = SS.csr_matrix('x') # for a classifier, this specifies the correct labels for a given input. self.labels = TT.ivector('labels') # and the weights are reshaped to be just a vector. self.weights = TT.vector('weights') if self.weighted: return [self.x, self.labels, self.weights] return [self.x, self.labels]
def get_train_function(self): # specify the computational graph target = T.matrix('target') weight = theano.shared(np.random.randn(len(self.feature_map), len(self.label_map)), name='weight') feat_mat = sparse.csr_matrix(name='feat_mat') mask_mat = sparse.csr_matrix(name='mask_mat') sum_pred = sparse.dot( mask_mat, T.nnet.softmax( sparse.dot(feat_mat, weight) ) ) pred = sum_pred / sum_pred.sum(axis=1).reshape((sum_pred.shape[0], 1)) objective = T.nnet.categorical_crossentropy(pred, target).sum() + self.param.l2_regularization * (weight ** 2).sum() grad_weight = T.grad(objective, weight) # print 'Compiling function ...' # compile the function train = theano.function(inputs = [feat_mat, mask_mat, target], outputs = [objective, weight], updates=[(weight, weight - 0.1*grad_weight)] ) return train
def __init__(self, in_dim, out_dim=None, weighted=False, sparse_input=False, output_name='out'): self.input = Loss.F_CONTAINERS[in_dim]('input') if sparse_input is True or \ isinstance(sparse_input, str) and sparse_input.lower() == 'csr': assert in_dim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csr_matrix('input') if isinstance(sparse_input, str) and sparse_input.lower() == 'csc': assert in_dim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csc_matrix('input') self.variables = [self.input] self.target = None if out_dim: self.target = Loss.F_CONTAINERS[out_dim]('target') self.variables.append(self.target) self.weight = None if weighted: self.weight = Loss.F_CONTAINERS[out_dim or in_dim]('weight') self.variables.append(self.weight) self.output_name = output_name if ':' not in self.output_name: self.output_name += ':out'
def __init__(self, size, name="in", ndim=2, sparse=False): self.input = util.FLOAT_CONTAINERS[ndim](name) if sparse is True or isinstance(sparse, str) and sparse.lower() == "csr": assert ndim == 2, "Theano only supports sparse arrays with 2 dims" self.input = SS.csr_matrix("input") if isinstance(sparse, str) and sparse.lower() == "csc": assert ndim == 2, "Theano only supports sparse arrays with 2 dims" self.input = SS.csc_matrix("input") super(Input, self).__init__(size=size, name=name, inputs=0, activation="linear", ndim=ndim, sparse=sparse)
def __init__(self, sparse_coding, nb_negative, embed_dims=128, context_dims=128, init_embeddings=None, negprob_table=None, optimizer='adam'): super(NCELangModelV4, self).__init__(weighted_inputs=False) vocab_size = sparse_coding.shape[0] # the extra word is for OOV self.nb_base = sparse_coding.shape[1] - 1 self.vocab_size = vocab_size self.embed_dim = embed_dims self.optimizer = optimizers.get(optimizer) self.nb_negative = nb_negative self.loss = categorical_crossentropy self.loss_fnc = objective_fnc(self.loss) self.sparse_coding = sparse_coding if negprob_table is None: negprob_table_ = np.ones(shape=(vocab_size,), dtype=theano.config.floatX)/vocab_size negprob_table = theano.shared(negprob_table_) self.neg_prob_table = negprob_table_ else: self.neg_prob_table = negprob_table.astype(theano.config.floatX) negprob_table = theano.shared(negprob_table.astype(theano.config.floatX)) self.sampler = TableSampler(self.neg_prob_table) self.add_input(name='idxes', ndim=3, dtype='int32') idxes = self.inputs['idxes'].get_output(True) shape = idxes.shape[1:] codes = tsp.csr_matrix('sp-codes', dtype=floatX) nb_pos_words = shape[0] * shape[1] pos_codes = codes[:nb_pos_words] self.add_node(Identity(inputs={True: pos_codes, False: pos_codes}), name='codes_flat') self.add_node(Identity(inputs={True: shape, False: shape}), name='sents_shape') self.add_node(Identity(inputs={True: codes, False: codes}), name='sparse_codes') self.add_node(SparseEmbedding(self.nb_base+1, embed_dims, weights=init_embeddings), name='embedding', inputs=('codes_flat', 'sents_shape')) self.add_node(LangLSTMLayer(embed_dims, output_dim=context_dims), name='encoder', inputs='embedding') # seq.add(Dropout(0.5)) self.add_node(PartialSoftmaxV4(input_dim=context_dims, base_size=self.nb_base+1), name='part_prob', inputs=('idxes', 'sparse_codes', 'encoder')) self.add_node(Dense(input_dim=context_dims, output_dim=1, activation='exponential'), name='normalizer', inputs='encoder') self.add_node(LookupProb(negprob_table), name='lookup_prob', inputs='idxes') self.add_node(SharedWeightsDense(self.nodes['part_prob'].W, self.nodes['part_prob'].b, self.sparse_coding, activation='exponential'), name='true_unnorm_prob', inputs='encoder') self.add_node(ActivationLayer(name='normalization'), name='true_prob', inputs='true_unnorm_prob') self.add_output('pos_prob', node='part_prob') self.add_output('neg_prob', node='lookup_prob') self.add_output('pred_prob', node='true_prob') self.add_output('normalizer', node='normalizer') self.add_output('unrm_prob', node='true_unnorm_prob')
def __init__(self, size, name='in', ndim=2, sparse=False, **kwargs): self.input = util.FLOAT_CONTAINERS[ndim](name) if sparse is True or \ isinstance(sparse, util.basestring) and sparse.lower() == 'csr': assert ndim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csr_matrix('input') if isinstance(sparse, util.basestring) and sparse.lower() == 'csc': assert ndim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csc_matrix('input') super(Input, self).__init__( size=size, name=name, activation='linear', ndim=ndim, sparse=sparse)
def build(self): x_sym = sparse.csr_matrix('x', dtype = 'float32') self.x_sym = x_sym y_sym = T.imatrix('y') gx_sym = sparse.csr_matrix('gx', dtype = 'float32') gy_sym = T.ivector('gy') l_x_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = x_sym) l_gx_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = gx_sym) l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) l_x_2 = layers.SparseLayer(l_x_in, self.embedding_size) W = l_x_2.W embedding = lasagne.layers.get_output(l_x_2) self.emb_fn = theano.function([x_sym], embedding) l_x_2 = lasagne.layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis = 1) l_x = lasagne.layers.DenseLayer(l_x, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax) self.num_ver = max(self.graph.keys()) + 1 l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W = W) l_gx = lasagne.layers.DenseLayer(l_gx, self.num_ver, nonlinearity = lasagne.nonlinearities.softmax) py_sym = lasagne.layers.get_output(l_x) self.ret_y = py_sym loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean() pgy_sym = lasagne.layers.get_output(l_gx) g_loss = lasagne.objectives.categorical_crossentropy(pgy_sym, gy_sym).sum() # params = lasagne.layers.get_all_params(l_x) params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b] updates = lasagne.updates.sgd(loss, params, learning_rate = self.learning_rate) self.train_fn = theano.function([x_sym, y_sym], loss, updates = updates) g_params = lasagne.layers.get_all_params(l_gx) g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate = self.g_learning_rate) self.g_fn = theano.function([gx_sym, gy_sym], g_loss, updates = g_updates) acc = T.mean(T.eq(T.argmax(py_sym, axis = 1), T.argmax(y_sym, axis = 1))) self.test_fn = theano.function([x_sym, y_sym], acc)
def test_sparse(self): mySymbolicSparseList = TypedListType(sparse.SparseType("csr", theano.config.floatX))() mySymbolicSparse = sparse.csr_matrix() z = Count()(mySymbolicSparseList, mySymbolicSparse) f = theano.function([mySymbolicSparseList, mySymbolicSparse], z) x = sp.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) y = sp.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) self.assertTrue(f([x, y, y], y) == 2)
def test_sparse(self): mySymbolicSparseList = TypedListType( sparse.SparseType('csr', theano.config.floatX))() mySymbolicSparse = sparse.csr_matrix() z = Count()(mySymbolicSparseList, mySymbolicSparse) f = theano.function([mySymbolicSparseList, mySymbolicSparse], z) x = sp.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) y = sp.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) self.assertTrue(f([x, y, y], y) == 2)
def ForwardFn(fnsim, embeddings, leftop, rightop, marge=1.0): """ This function returns a theano function to perform a forward step, contrasting couples of positive and negative triplets. members are given as sparse matrices. For one positive triplet there is one negative triplet. :param fnsim: similarity function (on theano variables). :param embeddings: an embeddings instance. :param leftop: class for the 'left' operator. :param rightop: class for the 'right' operator. :param marge: marge for the cost function. :note: this is useful for W_SABIE [Weston et al., IJCAI 2011] """ embedding, relationl, relationr = parse_embeddings(embeddings) # inputs inpr = S.csr_matrix() inpl = S.csr_matrix() inpo = S.csr_matrix() inpln = S.csr_matrix() inprn = S.csr_matrix() inpon = S.csr_matrix() # graph lhs = S.dot(embedding.E, inpl).T rhs = S.dot(embedding.E, inpr).T rell = S.dot(relationl.E, inpo).T relr = S.dot(relationr.E, inpo).T lhsn = S.dot(embedding.E, inpln).T rhsn = S.dot(embedding.E, inprn).T relln = S.dot(relationl.E, inpon).T relrn = S.dot(relationr.E, inpon).T simi = fnsim(leftop(lhs, rell), rightop(rhs, relr)) simin = fnsim(leftop(lhsn, relln), rightop(rhsn, relrn)) cost, out = margincost(simi, simin, marge) """ Theano function inputs. :input inpl: sparse csr matrix representing the indexes of the positive triplet 'left' member, shape=(#examples,N [Embeddings]). :input inpr: sparse csr matrix representing the indexes of the positive triplet 'right' member, shape=(#examples,N [Embeddings]). :input inpo: sparse csr matrix representing the indexes of the positive triplet relation member, shape=(#examples,N [Embeddings]). :input inpln: sparse csr matrix representing the indexes of the negative triplet 'left' member, shape=(#examples,N [Embeddings]). :input inprn: sparse csr matrix representing the indexes of the negative triplet 'right' member, shape=(#examples,N [Embeddings]). :input inpon: sparse csr matrix representing the indexes of the negative triplet relation member, shape=(#examples,N [Embeddings]). Theano function output. :output out: binary vector representing when the margin is violated, i.e. when an update occurs. """ return theano.function([inpl, inpr, inpo, inpln, inprn, inpon], [out], on_unused_input='ignore')
def encode(vars, factors, fictional_factor, number_of_entites, facts): for var in vars: var.u = sparse.csr_matrix(var.label) # var.u = T.dmatrix(var.label) for rel in factors: if rel.label not in relation_lookup: #Label encode the relation label. rel_label = label_encoder.transform([rel.label])[0] #Get a matrix for this relation. relation_lookup[rel.label] = create_relation_matrix( rel_label, number_of_entites, facts) rel.M = sparse.csr_matrix(rel.label) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # DEBUG # print "DEBUGGING ENCODER" # print "Vars" # for v in vars: # print v.label # print v.u, ' ', v.u.__class__ # print "Factors" # for f in factors: # print f.label # print f.i.label, ', ', f.o.label # print f.i.u, ', ', f.o.u # print "Factor Shared Vars" # for f in factors: # print f.M.__class__ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return vars, factors, fictional_factor
def SimFn(fnsim, embeddings, leftop, rightop, op=''): """ This function returns a Theano function to measure the similarity score for sparse matrices inputs. :param fnsim: similarity function (on Theano variables). :param embeddings: an Embeddings instance. :param leftop: class for the 'left' operator. :param rightop: class for the 'right' operator. """ embedding, relationl, relationr = parse_embeddings(embeddings) # Inputs inpr, inpl, inpo = S.csr_matrix('inpr'), S.csr_matrix( 'inpl'), S.csr_matrix('inpo') # Graph lhs = S.dot(embedding.E, inpl).T rhs = S.dot(embedding.E, inpr).T rell = S.dot(relationl.E, inpo).T relr = S.dot(relationr.E, inpo).T lop, rop = leftop(lhs, rell), rightop(rhs, relr) simi = fnsim(lop, rop) """ Theano function inputs. :input inpl: sparse csr matrix (representing the indexes of the 'left' entities), shape=(#examples, N [Embeddings]). :input inpr: sparse csr matrix (representing the indexes of the 'right' entities), shape=(#examples, N [Embeddings]). :input inpo: sparse csr matrix (representing the indexes of the relation member), shape=(#examples, N [Embeddings]). Theano function output :output simi: matrix of score values. """ return theano.function([inpl, inpr, inpo], [simi], on_unused_input='ignore')
def get_train_function(self): # specify the computational graph weight = theano.shared(np.random.randn(len(self.feature_map), len(self.label_map)), name='weight') # weight = theano.shared(np.zeros((len(self.feature_map), len(self.label_map))), name='weight') feat_mat = sparse.csr_matrix(name='feat_mat') f_target = T.matrix('f_target') f_mask_mat = sparse.csr_matrix(name='f_mask_mat') f_sum_pred = sparse.dot( f_mask_mat, T.nnet.softmax( sparse.dot(feat_mat, weight) ) ) f_pred = f_sum_pred / f_sum_pred.sum(axis=1).reshape((f_sum_pred.shape[0], 1)) i_target = T.matrix('i_target') i_mask_mat = sparse.csr_matrix(name='l_mask_mat') i_pred = sparse.dot( i_mask_mat, T.nnet.softmax( sparse.dot(feat_mat, weight) ) ) objective = self.param.feature_lambda * T.nnet.categorical_crossentropy(f_pred, f_target).sum() + T.nnet.categorical_crossentropy(i_pred, i_target).sum() + self.param.l2_lambda * (weight ** 2).sum() / 2 grad_weight = T.grad(objective, weight) # print 'Compiling function ...' # compile the function train = theano.function(inputs = [feat_mat, f_mask_mat, f_target, i_mask_mat, i_target], outputs = [objective, weight], updates=[(weight, weight - 0.1*grad_weight)] ) return train
def test_sparse(self): if not scipy_imported: raise SkipTest("Optional package SciPy not installed") mySymbolicSparseList = TypedListType(sparse.SparseType("csr", theano.config.floatX))() mySymbolicSparse = sparse.csr_matrix() z = Count()(mySymbolicSparseList, mySymbolicSparse) f = theano.function([mySymbolicSparseList, mySymbolicSparse], z) x = sp.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) y = sp.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) self.assertTrue(f([x, y, y], y) == 2)
def __init__(self, size, name='in', ndim=2, sparse=False, **kwargs): self.input = util.FLOAT_CONTAINERS[ndim](name) if sparse is True or \ isinstance(sparse, util.basestring) and sparse.lower() == 'csr': assert ndim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csr_matrix('input') if isinstance(sparse, util.basestring) and sparse.lower() == 'csc': assert ndim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csc_matrix('input') super(Input, self).__init__(size=size, name=name, activation='linear', ndim=ndim, sparse=sparse)
def __init__(self, rng, P_input, L2_input=None, **kwargs): #1.symbol declaration, initialization and definition I = sparse.csr_matrix("I") if P_input is None else P_input shape = kwargs.get("shape") or [(16,1,32,32), (4,16,16,2,2), (4,4,4,2,2)] dict_size, kwargs["shape"] = shape[0], shape[1:] D = theano.shared(\ rng.uniform(low=-1,high=1,size=dict_size).astype(theano.config.floatX)\ ) DI = sparse.dot(I, D)#array access=dot operation #2.attaches I and D into the fgraph super(SparseCNN, self).__init__(rng=rng, P_input=DI, **kwargs) self.params += [D,] self.P_input = I#take I as input for the sparseCNN
def test_sparse(self): sp = pytest.importorskip("scipy") mySymbolicSparseList = TypedListType( sparse.SparseType("csr", theano.config.floatX))() mySymbolicSparse = sparse.csr_matrix() z = Count()(mySymbolicSparseList, mySymbolicSparse) f = theano.function([mySymbolicSparseList, mySymbolicSparse], z) x = sp.sparse.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) y = sp.sparse.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) assert f([x, y, y], y) == 2
def SimFn(fnsim, embeddings, leftop, rightop, op=''): """ This function returns a Theano function to measure the similarity score for sparse matrices inputs. :param fnsim: similarity function (on Theano variables). :param embeddings: an Embeddings instance. :param leftop: class for the 'left' operator. :param rightop: class for the 'right' operator. """ embedding, relationl, relationr = parse_embeddings(embeddings) # Inputs inpr, inpl, inpo = S.csr_matrix('inpr'), S.csr_matrix('inpl'), S.csr_matrix('inpo') # Graph lhs = S.dot(embedding.E, inpl).T rhs = S.dot(embedding.E, inpr).T rell = S.dot(relationl.E, inpo).T relr = S.dot(relationr.E, inpo).T lop, rop = leftop(lhs, rell), rightop(rhs, relr) simi = fnsim(lop, rop) """ Theano function inputs. :input inpl: sparse csr matrix (representing the indexes of the 'left' entities), shape=(#examples, N [Embeddings]). :input inpr: sparse csr matrix (representing the indexes of the 'right' entities), shape=(#examples, N [Embeddings]). :input inpo: sparse csr matrix (representing the indexes of the relation member), shape=(#examples, N [Embeddings]). Theano function output :output simi: matrix of score values. """ return theano.function([inpl, inpr, inpo], [simi], on_unused_input='ignore')
def _generate_train_model_batch_function(self): #s = T.matrix('s', dtype=self.floatX) s = S.csr_matrix('s', dtype=self.floatX) #u = T.vector('u', dtype=self.intX) i = T.vector('i', dtype=self.intX) y = T.vector('y', dtype=self.intX) #items = T.vector('items', dtype=self.intX) Sit = self.S sit = s.T #Uu = self.U[u] Iy = self.I[y] BSy = self.BS[y] #BUy = self.BU[y] BIy = self.BI[y] I1i = self.I1[i] I2y = self.I2[y] #predU = T.dot( Iy, Uu.T ).T + BUy.flatten() se = S.dot( Sit.T, sit ) #se = T.dot( Sit.T, sit ) predS = T.dot( Iy, se ).T + BSy.flatten() predI = T.dot( I1i, I2y.T ) + BIy.flatten() pred = predS + predI #+ predU pred = getattr(self, self.activation )( pred ) cost = getattr(self, self.objective )( pred, y ) param_list = [self.S] fullparam_list = [self.I,self.I1,self.I2,self.BI,self.BS] #+ [self.U] subparam_list = [Iy,I1i,I2y,BIy,BSy] #+ [Uu] subparam_idx = [y,i,y,y,y] #+ [u] updates = self.descent( cost, param_list, fullparam_list, subparam_list, subparam_idx, self.learning_rate, momentum=self.momentum ) #updates = getattr(self, self.learn)(cost, [self.U,self.S,self.I,self.IC,self.BI,self.BS], self.learning_rate) #updates = getattr(self, self.learn)(cost, , ,, self.learning_rate, momentum=self.momentum) #self.train_model_batch = theano.function(inputs=[s, i, u, y, items], outputs=cost, updates=updates ) inp = [s, i, y] #+ [u] self.train_model_batch = theano.function(inputs=inp, outputs=cost, updates=updates )
def __init__(self, model_path, bow_path, feat_path): # voabulary_file = os.path.join('result', 'msr2013train_voabulary_query_bow.pkl') self.count_vect, self.tf_transformer = cPickle.load(open(bow_path, 'rb')) self.img_feats = BigFile(feat_path) # print model_path devise_model = cPickle.load(open(model_path, 'rb')) # words_vec = T.matrix(dtype=theano.config.floatX) words_vec = sparse.csr_matrix(dtype=theano.config.floatX) img_vec = T.matrix(dtype=theano.config.floatX) # compile a predictor function self.predict_model = theano.function( inputs=[words_vec, img_vec], outputs=devise_model.predict_score_one2many(words_vec, img_vec), allow_input_downcast=True)
def test_sparse(self): if not scipy_imported: raise SkipTest('Optional package SciPy not installed') mySymbolicSparseList = TypedListType( sparse.SparseType('csr', theano.config.floatX))() mySymbolicSparse = sparse.csr_matrix() z = Count()(mySymbolicSparseList, mySymbolicSparse) f = theano.function([mySymbolicSparseList, mySymbolicSparse], z) x = sp.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) y = sp.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) self.assertTrue(f([x, y, y], y) == 2)
def __init__(self, in_dim, out_dim, weighted=False, sparse_input=False, output_name="out:out"): self.input = Loss.F_CONTAINERS[in_dim]("input") if sparse_input is True or isinstance(sparse_input, str) and sparse_input.lower() == "csr": assert in_dim == 2, "Theano only supports sparse arrays with 2 dims" self.input = SS.csr_matrix("input") if isinstance(sparse_input, str) and sparse_input.lower() == "csc": assert in_dim == 2, "Theano only supports sparse arrays with 2 dims" self.input = SS.csc_matrix("input") self.target = Loss.I_CONTAINERS[out_dim]("target") self.variables = [self.input, self.target] self.weight = None if weighted: self.weight = Loss.F_CONTAINERS[out_dim]("weight") self.variables.append(self.weight) self.output_name = output_name
def __init__(self, name='in', ndim=2, sparse=False, **kwargs): shape = kwargs.get('shape') if shape: ndim = 1 + len(shape) else: kwargs['shape'] = (None, ) * (ndim - 2) + (kwargs.pop('size'), ) self.input = util.FLOAT_CONTAINERS[ndim](name) if sparse is True or \ isinstance(sparse, util.basestring) and sparse.lower() == 'csr': assert ndim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csr_matrix('input') if isinstance(sparse, util.basestring) and sparse.lower() == 'csc': assert ndim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csc_matrix('input') kwargs.setdefault('activation', 'linear') super(Input, self).__init__(name=name, **kwargs)
def __init__(self, name='in', ndim=2, sparse=False, **kwargs): shape = kwargs.get('shape') if shape: ndim = 1 + len(shape) else: kwargs['shape'] = (None, ) * (ndim - 2) + (kwargs.pop('size'), ) self.input = util.FLOAT_CONTAINERS[ndim](name) if sparse is True or \ isinstance(sparse, util.basestring) and sparse.lower() == 'csr': assert ndim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csr_matrix('input') if isinstance(sparse, util.basestring) and sparse.lower() == 'csc': assert ndim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csc_matrix('input') kwargs['activation'] = 'linear' super(Input, self).__init__(name=name, **kwargs)
def test_sparse(self): if not scipy_imported: pytest.skip("Optional package SciPy not installed") mySymbolicSparseList = TypedListType( sparse.SparseType("csr", theano.config.floatX) )() mySymbolicSparse = sparse.csr_matrix() z = Count()(mySymbolicSparseList, mySymbolicSparse) f = theano.function([mySymbolicSparseList, mySymbolicSparse], z) x = sp.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) y = sp.csr_matrix(random_lil((10, 40), theano.config.floatX, 3)) assert f([x, y, y], y) == 2
def __init__(self, in_dim, out_dim, weighted=False, sparse_input=False, output_name='out:out'): self.input = Loss.F_CONTAINERS[in_dim]('input') if sparse_input is True or \ isinstance(sparse_input, str) and sparse_input.lower() == 'csr': assert in_dim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csr_matrix('input') if isinstance(sparse_input, str) and sparse_input.lower() == 'csc': assert in_dim == 2, 'Theano only supports sparse arrays with 2 dims' self.input = SS.csc_matrix('input') self.target = Loss.I_CONTAINERS[out_dim]('target') self.variables = [self.input, self.target] self.weight = None if weighted: self.weight = Loss.F_CONTAINERS[out_dim]('weight') self.variables.append(self.weight) self.output_name = output_name
def __init__(self, rng, x, topic_num=100): #input L2_input = sparse.csr_matrix("x",dtype=theano.config.floatX) #params vocab_size = x.shape[1] mu, sigma = x.data.mean(), x.data.var()**0.5 rng = numpy.random.RandomState(numpy.random.randint(2**32-1)) if rng is None else rng self.L2_w = theano.shared(\ numpy.asarray(\ rng.normal(loc=mu,scale=sigma,size=(vocab_size, topic_num)),\ dtype=theano.config.floatX\ ),\ borrow=True\ ) self.L2_b = theano.shared(numpy.zeros(topic_num,dtype=theano.config.floatX), borrow=True) self.params = [self.L2_w, self.L2_b] #stick-breaking:sticks->orthgonal sticks L2_stick = sparse.dot(L2_input,self.L2_w)+self.L2_b-\ 0.5*(L2_input.size/vocab_size*tensor.sum(self.L2_w**2,0)+self.L2_b**2) zero_space = tensor.zeros((L2_input.shape[0],1),dtype=theano.config.floatX) L2_orth_stick = tensor.join(1, L2_stick, zero_space)\ - tensor.join(1, zero_space, tensor.cumsum(L2_stick,1)) Pasterik_orth_stick = tensor.log(1 + tensor.exp(L2_orth_stick)) #training model definition Likelihood = tensor.mean(Pasterik_orth_stick) grads = theano.grad(Likelihood, self.params)#gradient w.r.t params eta = tensor.scalar("eta") updates = [(param, param+eta*grad) for param, grad in zip(self.params, grads)] self._fit = theano.function(\ inputs=[L2_input, eta],\ outputs=Likelihood,\ updates=updates\ ) #predict model definition self._predict = theano.function(\ inputs=[L2_input],\ outputs=tensor.argmax(L2_stick,axis=-1)\ ) self._codec = theano.function(\ inputs=[L2_input],\ outputs=L2_stick>0\ )
def fit(self, X, y): if self.select_cols is not None: _X = X[:, self.select_cols] else: _X = X self.w = theano.shared( value=np.random.normal(0, 0.001, (_X.shape[1], 1)), # random initialize name="w", borrow=False ) x_ = TT.matrix("X") y_ = TT.matrix("y") l_ = tsparse.csr_matrix("l") e = ((y_ - TT.dot(x_, self.w)) ** 2).sum() l1_penalty = abs(self.w).sum() l2_penalty = TT.sqrt((self.w * self.w).sum()) s_sparse_penalty = theano.dot(theano.dot(self.w.T, l_), self.w) loss = ( e + self.lambda_1 * l1_penalty + self.lambda_2 * l2_penalty + self.alpha * s_sparse_penalty ).sum() x_train, x_valid, y_train, y_valid = cv.train_test_split(_X, y) downhill.minimize( loss, XYLDataset(x_train, y_train, self.L, batch_size=self.batch_size), valid=XYLDataset(x_valid, y_valid, self.L, batch_size=x_valid.shape[0]), params=[self.w], inputs=[x_, y_, l_], algo="rmsprop", **self.downhill_args ) w = self.w.get_value() self.coef_dist = [ (abs(w) > x).sum() for x in [0.01, 0.001, 0.0001, 0.00001, 0.000001] ]
def placeholder(shape=None, ndim=None, dtype=_FLOATX, sparse=False, name=None): '''Instantiate an input data placeholder variable. ''' if shape is None and ndim is None: raise Exception('Specify either a shape or ndim value.') if shape is not None: ndim = len(shape) else: shape = tuple([None for _ in range(ndim)]) broadcast = (False,) * ndim if sparse: _assert_sparse_module() x = th_sparse_module.csr_matrix(name=name, dtype=dtype) else: x = T.TensorType(dtype, broadcast)(name) x._keras_shape = shape x._uses_learning_phase = False return x
def __init__(self, rng, x, topic_num=100): #input L2_input = sparse.csr_matrix("x",dtype=theano.config.floatX) #params vocab_size = x.shape[1] mu, sigma = x.data.mean(), 2.56*x.data.var()**0.5 rng = numpy.random.RandomState(numpy.random.randint(2**32-1)) if rng is None else rng self.L2_w = theano.shared(\ numpy.asarray(\ mu + (mu if mu < sigma else sigma)*rng.uniform(low=-1,high=1,size=(vocab_size, topic_num)),\ dtype=theano.config.floatX\ ),\ borrow=True\ ) self.L2_b = theano.shared(numpy.zeros(topic_num, dtype=theano.config.floatX), borrow=True) self.params = [self.L2_w, self.L2_b] #output L2_topic = sparse.dot(L2_input,self.L2_w)+self.L2_b #difference based objective function Pasterik_topic = tensor.log(tensor.sum(tensor.exp(L2_topic-L2_topic.max(-1, keepdims=True)),-1))#avoiding overflow d_xw_w2 = tensor.mean(Pasterik_topic) -\ 0.5*(L2_input.size*tensor.mean(self.L2_w*self.L2_w)+tensor.dot(self.L2_b,self.L2_b)) grads = theano.grad(d_xw_w2, self.params)#gradient w.r.t params eta = tensor.scalar("eta") updates = [(param, param+eta*grad) for param, grad in zip(self.params, grads)] #training model definition self._fit = theano.function(\ inputs=[L2_input, eta],\ outputs=d_xw_w2, \ updates=updates\ ) #predict model definition self._predict = theano.function(\ inputs=[L2_input],\ outputs=tensor.argmax(L2_topic,axis=-1)\ )
def main_sparse(): import scipy.sparse as sp import theano from theano import config import theano.tensor as tensor from theano import sparse A = sparse.csr_matrix(dtype=config.floatX) x = tensor.matrix(dtype=config.floatX) rval, updates = theano.scan(fn=lambda x, A: sparse.basic.structured_dot(A,x), outputs_info = x,non_sequences=A, name='Markv_chn', n_steps=1000) final_y = rval[-1] mark_chn = theano.function(inputs=[A,x], outputs=final_y, updates=updates) for s in range(7): S = 2**s for l in xrange(7): N = 10**l # Initialize the matrix indices = np.empty(N*S) for j in xrange(S): indices[j*N:(j+1)*N] = np.random.permutation(N) indptr = np.array(range(N+1))*S A = sp.csr_matrix((np.random.rand(S*N), indices, indptr), shape=(N, N),dtype=config.floatX) # Initialize the state vector x = np.array(np.random.rand(N,1),dtype=config.floatX) t1 = time() # Now lets do the repeated multiplication y = mark_chn(A,x) dt = time() - t1 print('Time taken for S = %d and N = %d is %f seconds' % (S, N,dt))
def build_feature_layers(self, i, f_name): for j in range(len(self.objective.examples[i].const_list)): if type(self.fea_vecs[f_name]) == sp.csr_matrix: input_var = sparse.csr_matrix(dtype = 'float32') else: input_var = T.matrix() input_layer = lasagne.layers.InputLayer(shape = (None, self.fea_vecs[f_name].shape[1]), input_var = input_var) self.input_vars[(i, j, f_name)] = input_var for var_list in self.objective.features[f_name].var_lists: var_tup = tuple(var_list) if (f_name, var_tup, 0) not in self.parameters: W = lasagne.init.GlorotUniform() b = lasagne.init.Constant(0.) else: W = self.parameters[(f_name, var_tup, 0)] b = self.parameters[(f_name, var_tup, 1)] if type(self.fea_vecs[f_name]) == sp.csr_matrix: feature_layer = layers.SparseLayer(input_layer, 1, nonlinearity = lasagne.nonlinearities.sigmoid, W = W, b = b) else: feature_layer = lasagne.layers.DenseLayer(input_layer, 1, nonlinearity = lasagne.nonlinearities.sigmoid, W = W, b = b) self.parameters[(f_name, var_tup, 0)] = feature_layer.W self.parameters[(f_name, var_tup, 1)] = feature_layer.b self.feature_layers[(i, j, f_name, var_tup)] = feature_layer
def build_model(self, A, use_text=True, use_labels=True, seed=77): np.random.seed(seed) logging.info('Graphconv model input size {}, output size {} and hidden layers {} regul {} dropout {}.'.format(self.input_size, self.output_size, str(self.hid_size_list), self.regul_coef, self.drop_out)) self.X_sym = S.csr_matrix(name='inputs', dtype=self.dtype) self.train_indices_sym = T.lvector() self.dev_indices_sym = T.lvector() self.test_indices_sym = T.lvector() self.A_sym = S.csr_matrix(name='NormalizedAdj', dtype=self.dtype) self.train_y_sym = T.lvector() self.dev_y_sym = T.lvector() #nonlinearity = lasagne.nonlinearities.rectify #Wh = lasagne.init.GlorotUniform(gain='relu') nonlinearity = lasagne.nonlinearities.tanh Wh = lasagne.init.GlorotUniform(gain=1) #input layer l_in = lasagne.layers.InputLayer(shape=(None, self.input_size), input_var=self.X_sym) l_hid = SparseInputDenseLayer(l_in, num_units=self.hid_size_list[0], nonlinearity=nonlinearity) #add hidden layers l_hid = lasagne.layers.dropout(l_hid, p=self.drop_out) num_inputs_txt = int(np.prod(l_hid.output_shape[1:])) Wt_txt = lasagne.init.Orthogonal() self.gate_layers = [] logging.info('{} gconv layers'.format(len(self.hid_size_list))) if len(self.hid_size_list) > 1: for i, hid_size in enumerate(self.hid_size_list): if i == 0: #we have already added the first hidden layer which is nonconvolutional continue else: if self.highway: l_hid, l_t_hid = highway_dense(l_hid, gconv=True, nonlinearity=nonlinearity, Wt=Wt_txt, Wh=Wh) self.gate_layers.append(l_t_hid) else: l_hid = ConvolutionDenseLayer2(l_hid, num_units=hid_size, nonlinearity=nonlinearity) self.l_out = ConvolutionDenseLayer3(l_hid, num_units=self.output_size, nonlinearity=lasagne.nonlinearities.softmax) self.output = lasagne.layers.get_output(self.l_out, {l_in:self.X_sym}, A=self.A_sym, deterministic=False) self.train_output = self.output[self.train_indices_sym, :] self.train_pred = self.train_output.argmax(-1) self.dev_output = self.output[self.dev_indices_sym, :] self.dev_pred = self.dev_output.argmax(-1) self.train_acc = T.mean(T.eq(self.train_pred, self.train_y_sym)) self.dev_acc = T.mean(T.eq(self.dev_pred, self.dev_y_sym)) self.train_loss = lasagne.objectives.categorical_crossentropy(self.train_output, self.train_y_sym).mean() if self.regul_coef > 0: #add l1 regularization self.train_loss += lasagne.regularization.regularize_network_params(self.l_out, penalty=lasagne.regularization.l1) * self.regul_coef #add l2 regularization self.train_loss += lasagne.regularization.regularize_network_params(self.l_out, penalty=lasagne.regularization.l2) * self.regul_coef self.dev_loss = lasagne.objectives.categorical_crossentropy(self.dev_output, self.dev_y_sym).mean() #deterministic output self.determ_output = lasagne.layers.get_output(self.l_out, {l_in:self.X_sym}, A=self.A_sym, deterministic=True) self.test_output = self.determ_output[self.test_indices_sym, :] self.test_pred = self.test_output.argmax(-1) self.gate_outputs = [] self.f_gates = [] for i, l in enumerate(self.gate_layers): self.gate_outputs.append(lasagne.layers.get_output(l, {l_in:self.X_sym}, A=self.A_sym, deterministic=True)) self.f_gates.append(theano.function([self.X_sym, self.A_sym], self.gate_outputs[i], on_unused_input='warn')) parameters = lasagne.layers.get_all_params(self.l_out, trainable=True) updates = lasagne.updates.adam(self.train_loss, parameters, learning_rate=2e-3, beta1=0.9, beta2=0.999, epsilon=1e-8) self.f_train = theano.function([self.X_sym, self.train_y_sym, self.dev_y_sym, self.A_sym, self.train_indices_sym, self.dev_indices_sym], [self.train_loss, self.train_acc, self.dev_loss, self.dev_acc, self.output], updates=updates, on_unused_input='warn')#, mode=theano.compile.MonitorMode(pre_func=inspect_inputs, post_func=inspect_outputs)) self.f_val = theano.function([self.X_sym, self.A_sym, self.test_indices_sym], [self.test_pred, self.test_output], on_unused_input='warn') self.init_params = lasagne.layers.get_all_param_values(self.l_out) return self.l_out
def test_c_against_sparse_mat_transp_mul(self): # like test_c_against_mat_transp_mul but using a sparse matrix and a kernel # that is smaller than the image if not theano.sparse.enable_sparse: raise SkipTest('Optional package sparse disabled') batchSize = self.rng.randint(1, 3) filterWidth = self.rng.randint(1, 8) filterHeight = self.rng.randint(1, 8) filterDur = self.rng.randint(1, 8) self.d.get_value(borrow=True, return_internal_type=True)[0] = \ self.rng.randint(1, 15) self.d.get_value(borrow=True, return_internal_type=True)[1] = \ self.rng.randint(1, 15) self.d.get_value(borrow=True, return_internal_type=True)[2] = \ self.rng.randint(1, 15) dr = self.d.get_value(borrow=True)[0] dc = self.d.get_value(borrow=True)[1] dt = self.d.get_value(borrow=True)[2] numFilters = self.rng.randint(1, 3) row_steps = self.rng.randint(1, 4) col_steps = self.rng.randint(1, 4) time_steps = self.rng.randint(1, 4) # print (row_steps,col_steps,time_steps) videoDur = (time_steps - 1) * dt + filterDur + self.rng.randint(0, 3) videoWidth = (col_steps - 1) * dc + filterWidth + self.rng.randint( 0, 3) videoHeight = (row_steps - 1) * dr + filterHeight + self.rng.randint( 0, 3) inputChannels = self.rng.randint(1, 15) self.W.set_value(self.random_tensor(numFilters, filterHeight, filterWidth, filterDur, inputChannels), borrow=True) self.b.set_value(self.random_tensor(numFilters), borrow=True) # just needed so H_shape works self.V.set_value(self.random_tensor(batchSize, videoHeight, videoWidth, videoDur, inputChannels), borrow=True) self.rb.set_value(self.random_tensor(inputChannels), borrow=True) H_shape = self.H_shape_func() # make index maps h = N.zeros(H_shape[1:], dtype='int32') r = N.zeros(H_shape[1:], dtype='int32') c = N.zeros(H_shape[1:], dtype='int32') t = N.zeros(H_shape[1:], dtype='int32') for qi in xrange(0, H_shape[4]): h[:, :, :, qi] = qi for qi in xrange(0, H_shape[1]): r[qi, :, :, :] = qi for qi in xrange(0, H_shape[2]): c[:, qi, :, :] = qi for qi in xrange(0, H_shape[3]): t[:, :, qi, :] = qi hn = H_shape[1] * H_shape[2] * H_shape[3] * H_shape[4] h = h.reshape((hn)) r = r.reshape((hn)) c = c.reshape((hn)) t = t.reshape((hn)) Hv = self.random_tensor(*H_shape) Vv = self.transp_func(Hv, [videoHeight, videoWidth, videoDur]) n = inputChannels * videoHeight * videoWidth * videoDur rbim = N.zeros((videoHeight, videoWidth, videoDur, inputChannels)) for qi in xrange(0, inputChannels): rbim[:, :, :, qi] = self.rb.get_value(borrow=True)[qi] rbv = rbim.reshape((n)) W_mat = N.zeros((hn, n)) Vv_mat = N.zeros((n, batchSize)) Hv_mat = N.zeros((hn, batchSize)) for qi in xrange(0, hn): hi = h[qi] ri = r[qi] ci = c[qi] ti = t[qi] placed_filter = N.zeros(self.V.get_value(borrow=True).shape[1:]) placed_filter[ri * dr:ri * dr + self.W.get_value(borrow=True).shape[1], ci * dc:ci * dc + self.W.get_value(borrow=True).shape[2], ti * dt:ti * dt + self.W.get_value( borrow=True).shape[3], :] = self.W.get_value( borrow=True)[hi, :, :, :, :] W_mat[qi, :] = placed_filter.reshape((n)) Hv_mat[qi, :] = Hv[:, ri, ci, ti, hi] for qi in xrange(0, batchSize): Vv_mat[:, qi] = Vv[qi, :, :, :, :].reshape((n)) W_mat_T = sparse.csr_matrix(W_mat.transpose()) temp = W_mat_T * Hv_mat V_mat = (temp.transpose() + rbv).transpose() if N.abs(V_mat - Vv_mat).max() > 1e-5: print('mul') print(V_mat) print('conv') print(Vv_mat) for i in xrange(0, n): for j in xrange(0, batchSize): if abs(V_mat[i, j] - Vv_mat[i, j]) > 1e-5: print(('wrong at %d,%d: %f mul versus %f conv' % (i, j, V_mat[i, j], Vv_mat[i, j]))) assert False
def TrainFn(fnsim, embeddings, leftop, rightop, marge=1.0): """ This function returns a theano function to perform a training iteration, contrasting couples of positive and negative triplets. members are given as sparse matrices. for one positive triplet there is one negative triplet. :param fnsim: similarity function (on theano variables). :param embeddings: an embeddings instance. :param leftop: class for the 'left' operator. :param rightop: class for the 'right' operator. :param marge: marge For the cost function. """ embedding, relationl, relationr = parse_embeddings(embeddings) # Inputs inpr = S.csr_matrix() inpl = S.csr_matrix() inpo = S.csr_matrix() inpln = S.csr_matrix() inprn = S.csr_matrix() inpon = S.csr_matrix() lrparams = T.scalar('lrparams') lrembeddings = T.scalar('lrembeddings') # Graph ## Positive triplet lhs = S.dot(embedding.E, inpl).T rhs = S.dot(embedding.E, inpr).T rell = S.dot(relationl.E, inpo).T relr = S.dot(relationr.E, inpo).T simi = fnsim(leftop(lhs, rell), rightop(rhs, relr)) ## Negative triplet lhsn = S.dot(embedding.E, inpln).T rhsn = S.dot(embedding.E, inprn).T relln = S.dot(relationl.E, inpon).T relrn = S.dot(relationr.E, inpon).T simin = fnsim(leftop(lhsn, relln), rightop(rhsn, relrn)) cost, out = margincost(simi, simin, marge) # Parameters gradients if hasattr(fnsim, 'params'): # If the similarity function has some parameters, we update them too. gradientsparams = T.grad(cost, leftop.params + rightop.params + fnsim.params) updates = OrderedDict((i, i - lrparams * j) for i, j in zip( leftop.params + rightop.params + fnsim.params, gradientsparams)) else: gradientsparams = T.grad(cost, leftop.params + rightop.params) updates = OrderedDict((i, i - lrparams * j) for i, j in zip( leftop.params + rightop.params, gradientsparams)) # Embeddings gradients gradients_embedding = T.grad(cost, embedding.E) newE = embedding.E - lrembeddings * gradients_embedding updates.update({embedding.E: newE}) if type(embeddings) == list: # If there are different embeddings for the relation member. gradients_embedding = T.grad(cost, relationl.E) newE = relationl.E - lrparams * gradients_embedding updates.update({relationl.E: newE}) gradients_embedding = T.grad(cost, relationr.E) newE = relationr.E - lrparams * gradients_embedding updates.update({relationr.E: newE}) """ Theano function inputs. :input lrembeddings: learning rate for the embeddings. :input lrparams: learning rate for the parameters. :input inpl: sparse csr matrix representing the indexes of the positive triplet 'left' member, shape=(#examples,N [Embeddings]). :input inpr: sparse csr matrix representing the indexes of the positive triplet 'right' member, shape=(#examples,N [Embeddings]). :input inpo: sparse csr matrix representing the indexes of the positive triplet relation member, shape=(#examples,N [Embeddings]). :input inpln: sparse csr matrix representing the indexes of the negative triplet 'left' member, shape=(#examples,N [Embeddings]). :input inprn: sparse csr matrix representing the indexes of the negative triplet 'right' member, shape=(#examples,N [Embeddings]). :input inpon: sparse csr matrix representing the indexes of the negative triplet relation member, shape=(#examples,N [Embeddings]). Theano function output. :output mean(cost): average cost. :output mean(out): ratio of examples for which the margin is violated, i.e. for which an update occurs. """ return theano.function([lrembeddings, lrparams, inpl, inpr, inpo, inpln, inprn, inpon], [T.mean(cost), T.mean(out)], updates=updates, on_unused_input='ignore')
parser.add_argument('--param_reg', help = 'the regularization factor of the parameters', type = float, default = 0.001) parser.add_argument('--ent_reg', help = 'the factor of entropy regularization', type = float, default = 0.0) args = parser.parse_args() lasagne.random.set_rng(np.random) np.random.seed(0) features, labels, label_set = data.read_content_citeseer(args.corpus) split = data.split_data(labels, args.seeds) maxf = get_maxf(features) trainx, trainy = constuct_dataset(features, labels, label_set, split[0], maxf) testx, testy = constuct_dataset(features, labels, label_set, split[1], maxf) allx, ally = constuct_dataset(features, labels, label_set, features.keys(), maxf) input_var = sparse.csr_matrix(name = 'x', dtype = 'float32') un_var = sparse.csr_matrix(name = 'ux', dtype = 'float32') target_var = T.imatrix('targets') ent_target = T.ivector('ent_targets') network, l_entropy = build_model(input_var, maxf + 1, trainy.shape[1], args.ent_reg > 0, un_var) prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() + regularize_layer_params(network, l2) * args.param_reg if args.ent_reg > 0.0: ent_pred = lasagne.layers.get_output(l_entropy) loss += lasagne.objectives.binary_crossentropy(ent_pred, ent_target).mean() * args.ent_reg params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=args.learning_rate, momentum = 0.9)
def ForwardFn1Member(fnsim, embeddings, leftop, rightop, marge=1.0, rel=True): """ This function returns a theano function to perform a forward step, contrasting positive and negative triplets. members are given as sparse matrices. For one positive triplet there are two or three (if rel == True) negative triplets. To create a negative triplet we replace only one member at a time. :param fnsim: similarity function (on theano variables). :param embeddings: an embeddings instance. :param leftop: class for the 'left' operator. :param rightop: class for the 'right' operator. :param marge: marge for the cost function. :param rel: boolean, if true we also contrast w.r.t. a negative relation member. :note: this is useful for W_SABIE [Weston et al., IJCAI 2011] """ embedding, relationl, relationr = parse_embeddings(embeddings) # inputs inpr = S.csr_matrix() inpl = S.csr_matrix() inpo = S.csr_matrix() inpln = S.csr_matrix() inprn = S.csr_matrix() # graph lhs = S.dot(embedding.E, inpl).T rhs = S.dot(embedding.E, inpr).T rell = S.dot(relationl.E, inpo).T relr = S.dot(relationr.E, inpo).T lhsn = S.dot(embedding.E, inpln).T rhsn = S.dot(embedding.E, inprn).T simi = fnsim(leftop(lhs, rell), rightop(rhs, relr)) similn = fnsim(leftop(lhsn, rell), rightop(rhs, relr)) simirn = fnsim(leftop(lhs, rell), rightop(rhsn, relr)) costl, outl = margincost(simi, similn, marge) costr, outr = margincost(simi, simirn, marge) list_in = [inpl, inpr, inpo, inpln] list_out = [outl, outr] if rel: inpon = S.csr_matrix() relln = S.dot(relationl.E, inpon).T relrn = S.dot(relationr.E, inpon).T simion = fnsim(leftop(lhs, relln), rightop(rhs, relrn)) costo, outo = margincost(simi, simion, marge) out = T.concatenate([outl, outr, outo]) list_in += [inpon] list_out += [outo] """ Theano function inputs. :input inpl: sparse csr matrix representing the indexes of the positive triplet 'left' member, shape=(#examples,N [Embeddings]). :input inpr: sparse csr matrix representing the indexes of the positive triplet 'right' member, shape=(#examples,N [Embeddings]). :input inpo: sparse csr matrix representing the indexes of the positive triplet relation member, shape=(#examples,N [Embeddings]). :input inpln: sparse csr matrix representing the indexes of the negative triplet 'left' member, shape=(#examples,N [Embeddings]). :input inprn: sparse csr matrix representing the indexes of the negative triplet 'right' member, shape=(#examples,N [Embeddings]). :opt input inpon: sparse csr matrix representing the indexes of the negative triplet relation member, shape=(#examples,N [Embeddings]). Theano function output. :output outl: binary vector representing when the margin is violated, i.e. when an update occurs, for the 'left' member. :output outr: binary vector representing when the margin is violated, i.e. when an update occurs, for the 'right' member. :opt output outo: binary vector representing when the margin is violated, i.e. when an update occurs, for the relation member. """ return theano.function(list_in, list_out, on_unused_input='ignore')
def TrainFn1Member(fnsim, embeddings, leftop, rightop, marge=1.0, rel=True): """ This function returns a theano function to perform a training iteration, contrasting positive and negative triplets. members are given as sparse matrices. For one positive triplet there are two or three (if rel == True) negative triplets. To create a negative triplet we replace only one member at a time. :param fnsim: similarity function (on theano variables). :param embeddings: an embeddings instance. :param leftop: class for the 'left' operator. :param rightop: class for the 'right' operator. :param marge: marge for the cost function. :param rel: boolean, if true we also contrast w.r.t. a negative relation member. """ embedding, relationl, relationr = parse_embeddings(embeddings) # Inputs inpr = S.csr_matrix() inpl = S.csr_matrix() inpo = S.csr_matrix() inpln = S.csr_matrix() inprn = S.csr_matrix() lrparams = T.scalar('lrparams') lrembeddings = T.scalar('lrembeddings') # Graph lhs = S.dot(embedding.E, inpl).T rhs = S.dot(embedding.E, inpr).T rell = S.dot(relationl.E, inpo).T relr = S.dot(relationr.E, inpo).T lhsn = S.dot(embedding.E, inpln).T rhsn = S.dot(embedding.E, inprn).T simi = fnsim(leftop(lhs, rell), rightop(rhs, relr)) # Negative 'left' member similn = fnsim(leftop(lhsn, rell), rightop(rhs, relr)) # Negative 'right' member simirn = fnsim(leftop(lhs, rell), rightop(rhsn, relr)) costl, outl = margincost(simi, similn, marge) costr, outr = margincost(simi, simirn, marge) cost = costl + costr out = T.concatenate([outl, outr]) # List of inputs of the function list_in = [lrembeddings, lrparams, inpl, inpr, inpo, inpln, inprn] if rel: # If rel is True, we also consider a negative relation member inpon = S.csr_matrix() relln = S.dot(relationl.E, inpon).T relrn = S.dot(relationr.E, inpon).T simion = fnsim(leftop(lhs, relln), rightop(rhs, relrn)) costo, outo = margincost(simi, simion, marge) cost += costo out = T.concatenate([out, outo]) list_in += [inpon] if hasattr(fnsim, 'params'): # If the similarity function has some parameters, we update them too. gradientsparams = T.grad(cost, leftop.params + rightop.params + fnsim.params) updates = OrderedDict((i, i - lrparams * j) for i, j in zip( leftop.params + rightop.params + fnsim.params, gradientsparams)) else: gradientsparams = T.grad(cost, leftop.params + rightop.params) updates = OrderedDict((i, i - lrparams * j) for i, j in zip( leftop.params + rightop.params, gradientsparams)) gradients_embedding = T.grad(cost, embedding.E) newE = embedding.E - lrembeddings * gradients_embedding updates.update({embedding.E: newE}) if type(embeddings) == list: # If there are different embeddings for the relation member. gradients_embedding = T.grad(cost, relationl.E) newE = relationl.E - lrparams * gradients_embedding updates.update({relationl.E: newE}) gradients_embedding = T.grad(cost, relationr.E) newE = relationr.E - lrparams * gradients_embedding updates.update({relationr.E: newE}) """ Theano function inputs. :input lrembeddings: learning rate for the embeddings. :input lrparams: learning rate for the parameters. :input inpl: sparse csr matrix representing the indexes of the positive triplet 'left' member, shape=(#examples,N [Embeddings]). :input inpr: sparse csr matrix representing the indexes of the positive triplet 'right' member, shape=(#examples,N [Embeddings]). :input inpo: sparse csr matrix representing the indexes of the positive triplet relation member, shape=(#examples,N [Embeddings]). :input inpln: sparse csr matrix representing the indexes of the negative triplet 'left' member, shape=(#examples,N [Embeddings]). :input inprn: sparse csr matrix representing the indexes of the negative triplet 'right' member, shape=(#examples,N [Embeddings]). :opt input inpon: sparse csr matrix representing the indexes of the negative triplet relation member, shape=(#examples,N [Embeddings]). Theano function output. :output mean(cost): average cost. :output mean(out): ratio of examples for which the margin is violated, i.e. for which an update occurs. """ return theano.function(list_in, [T.mean(cost), T.mean(out)], updates=updates, on_unused_input='ignore')