def defmodel(self): winp, rinp, hinp = T.ivectors("winp", "rinp", "hinp") nwinp, nrinp, nhinp = T.ivectors("nwinp", "nrinp", "nhinp") dotp = self.builddot(winp, rinp, hinp, self.rnnu) ndotp = self.builddot(nwinp, nrinp, nhinp, self.rnnu) dotp = dotp.reshape((dotp.shape[0], 1)) ndotp = ndotp.reshape((ndotp.shape[0], 1)) return [dotp, ndotp], [rinp, winp, hinp, nrinp, nwinp, nhinp]
def defmodel(self): ''' Define model ''' winp, rinp, hinp = T.ivectors("winp", "rinp", "hinp") nwinp, nrinp, nhinp = T.ivectors("nwinp", "nrinp", "nhinp") dotp = self.builddot(winp, rinp, hinp) ndotp = self.builddot(nwinp, nrinp, nhinp) dotp = dotp.reshape((dotp.shape[0], 1)) ndotp = ndotp.reshape((ndotp.shape[0], 1)) return [dotp, ndotp], [winp, rinp, hinp, nwinp, nrinp, nhinp]
def defmodel(self): ''' Define model :return: ([positive dot product, negative dot product], [positive left index variable, positive right index variable, negative left index var, negative right index var]) ''' winp, hinp = T.ivectors("winp", "hinp") nwinp, nhinp = T.ivectors("nwinp", "nhinp") dotp = T.nnet.sigmoid(T.sum(self.W[winp, :] * self.H[:, hinp].T, axis=1)) ndotp = T.nnet.sigmoid(T.sum(self.W[nwinp, :] * self.H[:, nhinp].T, axis=1)) dotp = dotp.reshape((dotp.shape[0], 1)) ndotp = ndotp.reshape((ndotp.shape[0], 1)) return [dotp, ndotp], [winp, hinp, nwinp, nhinp]
def _create_hessian_update_mechanism(self): # create hessian theano variables: self.create_shared_variables_hessian_variables() # symbolic examples: examples = [] example_indices = [] examples_tuples = [] for i in range(self.batch_size): index, label = T.ivectors(['indices', 'labels']) examples_tuples.append((index, label)) examples.append(index) example_indices.append(index) examples.append(label) indices = theano_unique(T.concatenate(example_indices)) t0 = time.time() print("1/2 Calculating gradients...") hf_updates, costs = self.symbolic_one_step(examples_tuples, indices) t1 = time.time() print("Took %.2fs to compute gradients\n2/2 Compiling hessian updates..." % (t1 - t0)) self.update_fun = theano.function(examples, sum(costs), updates = hf_updates, mode = self.theano_mode) t2 = time.time() print("Took %.2fs to compile hessian updates" % (t2 - t1))
def defmodel(self): lhs = T.ivector("lhs") rhs, nrhs = T.ivectors("rhs","nrhs") lhsemb = self.entembs[lhs, :] rhsemb = self.W[rhs, :] nrhsemb = self.W[nrhs, :] pdot = T.batched_dot(lhsemb, rhsemb) ndot = T.batched_dot(lhsemb, nrhsemb) return pdot, ndot, [lhs, rhs, nrhs]
def defmodel(self): ''' define model :return: ([output variable, gold standard variable], [index variable for W, index variable for H]) ''' winp, hinp = T.ivectors("winp", "hinp") outp = self.X[winp, hinp] dotp = T.sum(self.W[winp, :] * self.H[:, hinp].T, axis=1) return [dotp, outp], [winp, hinp]
def test_rebuild_strict(self): # Test fix for error reported at # https://groups.google.com/d/topic/theano-users/BRK0UEB72XA/discussion w = tensor.imatrix() x, y = tensor.ivectors("x", "y") z = x * y f = theano.function([w, y], z, givens=[(x, w)], rebuild_strict=False) z_val = f(np.ones((3, 5), dtype="int32"), np.arange(5, dtype="int32")) assert z_val.ndim == 2 assert np.all(z_val == np.ones((3, 5)) * np.arange(5))
def test1(self): # Test fix for error reported at # https://groups.google.com/d/topic/theano-users/BRK0UEB72XA/discussion w = tensor.imatrix() x, y = tensor.ivectors('x', 'y') z = x * y f = theano.function([w, y], z, givens=[(x, w)], rebuild_strict=False) z_val = f(numpy.ones((3, 5), dtype='int32'), numpy.arange(5, dtype='int32')) assert z_val.ndim == 2 assert numpy.all(z_val == numpy.ones((3, 5)) * numpy.arange(5))
def predict(self, idxs): ''' :param win: vector of tuples of integer indexes for embeddings :return: vector of floats of predictions ''' idxs = np.asarray(idxs).astype("int32") print([idxs[:, i] for i in range(idxs.shape[1])]) winp, hinp = T.ivectors("winpp", "hinpp") dotp = T.nnet.sigmoid(T.sum(self.W[winp, :] * self.H[:, hinp].T, axis=1)) pfun = theano.function( inputs=[winp, hinp], outputs=[dotp] ) return pfun(*[idxs[:, i] for i in range(idxs.shape[1])])
def predict(self, idxs): ''' :param win: vector of tuples of integer indexes for embeddings :return: vector of floats of predictions ''' idxs = np.asarray(idxs).astype("int32") print([idxs[:, i] for i in range(idxs.shape[1])]) winp, rinp, hinp = T.ivectors("winpp", "rinpp", "hinpp") dotp = self.builddot(winp, rinp, hinp) pfun = theano.function( inputs=[winp, hinp], outputs=[dotp] ) return pfun(*[idxs[:, i] for i in range(idxs.shape[1])])
def defmodel(self): sidx, ridx, oidx = T.ivectors("sidx", "ridx", "oidx") outp = self.builddot(sidx, ridx, self.rnnu) # (batsize, dims) Nclasses = int(math.ceil(math.sqrt(self.vocabsize))) Noutsperclass = int(math.ceil(math.sqrt(self.vocabsize))) ''' H-sm self.sm1w = theano.shared(np.random.random((self.dims, Nclasses)).astype("float32")*scale-offset) self.sm1b = theano.shared(np.random.random((Nclasses,)).astype("float32")*scale-offset) self.sm2w = theano.shared(np.random.random((Nclasses, self.dims, Noutsperclass)).astype("float32")*scale-offset) self.sm2b = theano.shared(np.random.random((Nclasses, Noutsperclass)).astype("float32")*scale-offset)''' ''' H-sm probs = h_softmax(outp, self.batsize, self.vocabsize, Nclasses, Noutsperclass, self.sm1w, self.sm1b, self.sm2w, self.sm2b, oidx)''' outdot = T.dot(outp, self.smlayer) probs = T.nnet.softmax(outdot) #showgraph(probs) return probs, [sidx, ridx], oidx # probs: (batsize, vocabsize)
def test_adv_subtensor(): # Test the advancedsubtensor on gpu. shp = (2, 3, 4) shared = gpuarray_shared_constructor xval = np.arange(np.prod(shp), dtype=theano.config.floatX).reshape(shp) idx1, idx2 = tensor.ivectors('idx1', 'idx2') idxs = [idx1, None, slice(0, 2, 1), idx2, None] x = shared(xval, name='x') expr = x[idxs] f = theano.function([idx1, idx2], expr, mode=mode_with_gpu) assert sum([isinstance(node.op, GpuAdvancedSubtensor) for node in f.maker.fgraph.toposort()]) == 1 idx1_val = [0, 1] idx2_val = [0, 1] rval = f(idx1_val, idx2_val) rep = xval[idx1_val, None, slice(0, 2, 1), idx2_val, None] assert np.allclose(rval, rep)
def train(self, X, numrows=None, numcols=None, evalinter=10): self.initvars(X, numrows=numrows, numcols=numcols) # define errors and costs winp, hinp = T.ivectors("winp", "hinp") nwinp, nhinp = T.ivectors("nwinp", "nhinp") dotp = T.sum(self.W[winp, :] * self.H[:, hinp].T, axis=1) ndotp = T.sum(self.W[nwinp, :] * self.H[:, nhinp].T, axis=1) dotp = dotp.reshape((dotp.shape[0], 1)) ndotp = ndotp.reshape((ndotp.shape[0], 1)) #embed() tErr = T.sum(T.max(T.concatenate([T.zeros_like(dotp), 1 - dotp + ndotp], axis=1), axis=1)) # hinge contrast tReg = (1./2.) * (T.sum(self.W[winp, :]**2) * self.Wreg + T.sum(self.H[:, hinp]**2) * self.Hreg) tCost = tErr + tReg #embed() # get gradients gW = T.grad(tCost, self.W) gH = T.grad(tCost, self.H) numsam = X.shape[0] batsize = int(ceil(numsam*1./self.numbats)) numbats = self.numbats # define updates and function updW = (self.W, T.clip(self.W - self.lr * numbats * gW, 0, np.infty)) updH = (self.H, T.clip(self.H - self.lr * numbats * gH, 0, np.infty)) trainf = theano.function( inputs=[winp, hinp, nwinp, nhinp], outputs=[tErr], updates=[updW, updH], profile=True ) negrate = self.negrate def batchloop(): c = 0 idxs = range(X.shape[0]) np.random.shuffle(idxs) prevperc = -1. maxc = numbats ts = 0. toterr = 0. while c < maxc-1: sliceidxs = idxs[c*batsize: min((c+1)*batsize, len(idxs))] possamples = X[sliceidxs].copy() samples = np.concatenate([possamples]*(negrate+1)) samples = np.concatenate([samples, samples], axis=1) for i in range(samples.shape[0]): corruptcolumn = np.random.choice([2, 3]) samples[i, corruptcolumn] = np.random.randint(0, numrows if corruptcolumn == 2 else numcols) #region Percentage counting perc = round(c*100./maxc) if perc > prevperc: print("iter progress %.0f" % perc + "% ", end='\r') prevperc = perc #endregion toterr += trainf(samples[:, 0].astype("int32"), samples[:, 1].astype("int32"), samples[:, 2].astype("int32"), samples[:, 3].astype("int32"))[0] c += 1 return toterr err = self.trainloop(X, batchloop, evalinter=0) return self.W.get_value(), self.H.get_value(), err
def train(self, X, numrows=None, numcols=None, evalinter=10): self.initvars(X, numrows=numrows, numcols=numcols) # define errors and costs winp, hinp = T.ivectors("winp", "hinp") outp = T.fvector("outp") dotp = T.sum(self.W[winp, :] * self.H[:, hinp].T, axis=1) # embed() tErr = (1./2.) * T.sum((outp - dotp)**2) # MSE tReg = (1./2.) * (T.sum(self.W[winp, :]**2) * self.Wreg + T.sum(self.H[:, hinp]**2) * self.Hreg) tCost = tErr + tReg # embed() # get gradients gW = T.grad(tCost, self.W) gH = T.grad(tCost, self.H) numsam = X.shape[0] batsize = int(ceil(numsam*1./self.numbats)) numbats = self.numbats # define updates and function updW = (self.W, T.clip(self.W - self.lr * numbats * gW, 0, np.infty)) updH = (self.H, T.clip(self.H - self.lr * numbats * gH, 0, np.infty)) trainf = theano.function( inputs=[winp, hinp, outp], outputs=[tErr], updates=[updW, updH], profile=True ) negrate = self.negrate def batchloop(): c = 0 idxs = range(X.shape[0]) np.random.shuffle(idxs) prevperc = -1. maxc = numbats ts = 0. toterr = 0. while c < maxc-1: sliceidxs = idxs[c*batsize: min((c+1)*batsize, len(idxs))] possamples = X[sliceidxs] posouts = np.ones((possamples.shape[0],), dtype="float32") negsamples = [] for i in range(possamples.shape[0]): for j in range(negrate): corruptdis = possamples[i, :] columntocorrupt = np.random.choice(len(corruptdis)) corruptdis[columntocorrupt] = np.random.randint(0, numrows if columntocorrupt == 0 else numcols) negsamples.append(corruptdis) negsamples = np.asarray(negsamples) negouts = np.zeros((negsamples.shape[0],), dtype="float32") if possamples.ndim != negsamples.ndim: embed() samples = np.concatenate((possamples, negsamples), axis=0) outs = np.concatenate((posouts, negouts)) #region Percentage counting perc = round(c*100./maxc) if perc > prevperc: print("iter progress %.0f" % perc + "% ", end='\r') prevperc = perc #endregion toterr += trainf(samples[:, 0].astype("int32"), samples[:, 1].astype("int32"), outs)[0] c += 1 return toterr err = self.trainloop(X, batchloop, evalinter=0) return self.W.get_value(), self.H.get_value(), err
def getpredf( self ): # function to compute the predicted vector given entity and relation winp, rinp = T.ivectors("winpp", "rinpp") om = self.prebuilddot(winp, rinp, self.rnnu) return theano.function(inputs=[rinp, winp], outputs=[om])
def getpreddotf( self ): # function to compute the score for a triple (array) given the indexes winp, rinp, hinp = T.ivectors("winppp", "rinppp", "hinppp") om = self.builddot(winp, rinp, hinp, self.rnnu) return theano.function(inputs=[rinp, winp, hinp], outputs=[om])
def get_rec_prob_func(self): # Works with NCE x, y = T.ivectors('x', 'y') rec_prob_func = theano.function([x, y], self.get_sym_rec_prob(x, y)) return rec_prob_func
def defmodel(self): sidx = T.ivector("sidx") pathidxs = T.imatrix("pathidxs") zidx, nzidx = T.ivectors("zidx", "nzidx") # rhs corruption only dotp, ndotp = self.definnermodel(sidx, pathidxs, zidx, nzidx) return dotp, ndotp, [sidx, pathidxs, zidx, nzidx]
def getpredf(self): # function to compute the predicted vector given entity and relation winp, rinp = T.ivectors("winpp", "rinpp") om = self.prebuilddot(winp, rinp, self.rnnu) return theano.function(inputs=[rinp, winp], outputs=[om])
def init_functions(self): '''Construct functions for the model''' # Construct the objective function # Input variables u_i, y_s, y_t = T.ivectors(['u_i', 'y_s', 'y_t']) dropout = T.fscalar(name='p') # Intermediate variables: n_examples * n_songs item_scores = T.dot(self._U[u_i], self._V.T) + self._b # subtract off the row-wise max for numerical stability item_scores = item_scores - item_scores.max(axis=1, keepdims=True) e_scores = T.exp(item_scores) if T.gt(dropout, 0.0): # Construct a random dropout mask retain_prob = 1.0 - dropout M = self._rng.binomial(e_scores.shape, p=retain_prob, dtype=theano.config.floatX) # Importance weight so that E[M[i,j]] = 1 M /= retain_prob # The positive examples should always be sampled M = theano.tensor.set_subtensor(M[T.arange(y_t.shape[0]), y_t], 1.0) e_scores = e_scores * M # Edge feasibilities: n_examples * n_edges prev_feas = sparse_slice_rows(self.H, y_s) # Detect and reset initial-state transitions prev_feas = theano.tensor.set_subtensor(prev_feas[y_s < 0, :], 1) # Raw edge probabilities: n_examples * n_edges edge_given_prev = T.nnet.softmax(prev_feas * self._w) # Compute edge normalization factors: n_examples * n_edges # sum of score mass in each edge for each user edge_norms = ts.dot(e_scores, self.H) # Slice the edge weights according to incoming feasibilities: n_examples next_weight = e_scores[T.arange(y_t.shape[0]), y_t] # Marginalize: n_examples * n_edges next_feas = sparse_slice_rows(self.H, y_t) probs = next_weight * T.sum(next_feas * (edge_given_prev / (_EPS + edge_norms)), axis=1, keepdims=True) # Data likelihood term ll = T.log(probs) avg_ll = ll.mean() # Priors w_prior = -0.5 * self.edge_reg * (self._w**2).sum() b_prior = -0.5 * self.bias_reg * (self._b**2).sum() u_prior = -0.5 * self.user_reg * (self._U**2).sum() v_prior = -0.5 * self.song_reg * (self._V**2).sum() # negative log-MAP objective cost = -1.0 * (avg_ll + u_prior + v_prior + b_prior + w_prior) # Construct the updates variables = [] if 'e' in self.params: variables.append(self._w) if 'b' in self.params: variables.append(self._b) if 'u' in self.params: variables.append(self._U) if 's' in self.params: variables.append(self._V) updates = lasagne.updates.adagrad(cost, variables) self._train = theano.function(inputs=[u_i, y_s, y_t, dropout], outputs=[avg_ll, cost], updates=updates) self._loglikelihood = theano.function(inputs=[u_i, y_s, y_t, theano.Param(dropout, default=0.0, name='p')], outputs=[ll])
def fit(self, X, learning_rate=1e-5, mu=0.99, activation=T.nnet.relu, RecurrentUnit=LSTM, normalize=True, epochs=10, show_fig=False): N = len(X) D = self.D V = self.V We = init_weight(V, D) # embedding matrix self.hidden_layers = [] Mi = D for Mo in self.hidden_layer_sizes: ru = RecurrentUnit(Mi, Mo, activation) self.hidden_layers.append(ru) Mi = Mo Wo = init_weight(Mi, V) bo = np.zeros(V) self.We = theano.shared(We) self.Wo = theano.shared(Wo) self.bo = theano.shared(bo) self.params = [self.Wo, self.bo] for ru in self.hidden_layers: self.params += ru.params thX = T.ivectors('X') thY = T.ivectors('Y') Z = self.We[thX] for ru in self.hidden_layers: Z = ru.output(Z) py_x = T.nnet.softmax( Z.dot(self.Wo) + self.bo) # ????這裡的py_x 不是用scan function 跑出來的,所以不需要另做擷取(y[:, 0, :]) prediction = T.argmax(py_x, axis=1) self.predict_op = theano.function(inputs=[thX], outputs=[py_x, prediction], allow_input_downcast=True) cost = -T.mean(T.log(py_x[T.arange(thY.shape[0]), thY])) grads = T.grad(cost, self.params) dparams = [theano.shared(p.get_value() * 0) for p in self.params] dWe = theano.shared(self.We.get_value() * 0) gWe = T.grad(cost, self.We) dWe_update = mu * dWe - learning_rate * gWe We_update = self.We + dWe_update if normalize: We_update /= We_update.norm(2) # 這裡對 theano.shared 型別做 norm(2) updates = [(p, p + mu * dp - learning_rate * g) for p, dp, g in zip(self.params, dparams, grads)] + [ (dp, mu * dp - learning_rate * g) for dp, g in zip(dparams, grads) ] + [(self.We, We_update), (dWe, dWe_update)] self.train_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction, Z], updates=updates) costs = [] for i in range(epochs): t0 = datetime.now() X = shuffle(X) n_correct = 0 n_total = 0 cost = 0 for j in range(N): if np.random.random() < 0.01 or len(X[j]) <= 1: input_sequence = [0] + X[j] output_sequence = X[j] + [1] else: input_sequence = [0] + X[j][:-1] output_sequence = X[j] n_total += len(output_sequence) # test: try: # we set 0 to start and 1 to end c, p, z = self.train_op(input_sequence, output_sequence) # print(z) except Exception as e: PYX, pred = self.predict_op(input_sequence) print("input_sequence len:", len(input_sequence)) print("PYX.shape:", PYX.shape) print("pred.shape:", pred.shape) raise e # print('p:', p) cost += c for pj, xj in zip(p, output_sequence): if pj == xj: n_correct += 1 if j % 200 == 0: # 下面這行這個代替 pirnt() ,兩者功能一樣 sys.stdout.write("j/N: %d/%d correct rate so far: %f\r" % (j, N, float(n_correct) / n_total)) sys.stdout.flush() print("i:", i, "cost:", cost, "correct rate:", (float(n_correct) / n_total), 'time for epoch:', (datetime.now() - t0)) costs.append(cost) if show_fig: plt.plot(costs) plt.show()
def getpreddotf(self): # function to compute the score for a triple (array) given the indexes winp, rinp, hinp = T.ivectors("winppp", "rinppp", "hinppp") om = self.builddot(winp, rinp, hinp, self.rnnu) return theano.function(inputs=[rinp, winp, hinp], outputs=[om])
def __theano_build__(self): E, V, U, W, b, c, ML = self.E, self.V, self.U, self.W, self.b, self.c, self.ML batch_size = self.batch_size # mx = T.imatrix('mx') # my = T.imatrix('my') start = T.iscalar('start') batch_len = T.iscalar('batch_len') # x = T.ivector('x') # y = T.ivector('y') bx = T.ivectors(batch_size) by = T.ivectors(batch_size) for i in np.arange(batch_size): bx[i] = T.cast(self.gx[start+i*batch_len:start+(i+1)*batch_len], dtype='int32') by[i] = T.cast(self.gy[start+i*batch_len:start+(i+1)*batch_len], dtype='int32') prediction = T.ivectors(batch_size) bce = T.dvectors(batch_size) bout = T.dvectors(batch_size) def forward_prop_step(x_t, s_t1_prev, s_t2_prev): # This is how we calculated the hidden state in a simple RNN. No longer! # s_t = T.tanh(U[:,x_t] + W.dot(s_t1_prev)) # print "are we here?" # Word embedding layer # print type(x_t) x_e = E[:,x_t] # print "are we here?" # weight for MLE weight = ML[:,x_t] # GRU Layer 1 z_t1 = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_t1_prev) + b[0]) r_t1 = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_t1_prev) + b[1]) c_t1 = T.tanh(U[2].dot(x_e) + W[2].dot(s_t1_prev * r_t1) + b[2]) s_t1 = (T.ones_like(z_t1) - z_t1) * c_t1 + z_t1 * s_t1_prev # GRU Layer 2 z_t2 = T.nnet.hard_sigmoid(U[3].dot(s_t1) + W[3].dot(s_t2_prev) + b[3]) r_t2 = T.nnet.hard_sigmoid(U[4].dot(s_t1) + W[4].dot(s_t2_prev) + b[4]) c_t2 = T.tanh(U[5].dot(s_t1) + W[5].dot(s_t2_prev * r_t2) + b[5]) s_t2 = (T.ones_like(z_t2) - z_t2) * c_t2 + z_t2 * s_t2_prev # Final output calculation # Theano's softmax returns a matrix with one row, we only need the row o_t = T.nnet.softmax(V.dot(s_t2) + c + weight)[0] return [o_t, s_t1, s_t2] for bs in np.arange(batch_size): # o will be the output vector for each word in vocabulary [bout[bs], s, s2], updates = theano.scan( forward_prop_step, sequences=bx[bs], truncate_gradient=self.bptt_truncate, outputs_info=[None, dict(initial=T.zeros(self.hidden_dim)), dict(initial=T.zeros(self.hidden_dim))]) #index prediction prediction[bs] = T.argmax(bout[bs], axis=1) bce[bs] = T.sum(T.nnet.categorical_crossentropy(bout[bs], by[bs])) cost = T.mean(bce) + 0.01*(T.sum(E**2) + T.sum(V**2) + T.sum(U**2) + T.sum(W**2) + T.sum(b**2) + T.sum(c**2)) # Gradients dE = T.grad(cost, E) dU = T.grad(cost, U) dW = T.grad(cost, W) db = T.grad(cost, b) dV = T.grad(cost, V) dc = T.grad(cost, c) # for minibatch, it goes like this: # loop through all samples in batch and get sample derivative # accumulative all sample derivative to get batch derivative # update all parameters using batch derivative # Assign functions self.predict_prob = theano.function([start,batch_len], bout) self.predict_class = theano.function([start,batch_len], prediction) self.optimization_error = theano.function([start,batch_len],cost) self.cross_entropy_loss = theano.function([start,batch_len], T.mean(bce)) self.bptt = theano.function([start,batch_len], [dE, dU, dW, db, dV, dc]) # SGD parameters learning_rate = T.scalar('learning_rate') decay = T.scalar('decay') # rmsprop cache updates mE = decay * self.mE + (1 - decay) * dE ** 2 mU = decay * self.mU + (1 - decay) * dU ** 2 mW = decay * self.mW + (1 - decay) * dW ** 2 mV = decay * self.mV + (1 - decay) * dV ** 2 mb = decay * self.mb + (1 - decay) * db ** 2 mc = decay * self.mc + (1 - decay) * dc ** 2 #rmsprop self.batch_step = theano.function( [start,batch_len,learning_rate, theano.In(decay, value=0.9)], [], updates=[(E, E - learning_rate * dE / T.sqrt(mE + 1e-6)), (U, U - learning_rate * dU / T.sqrt(mU + 1e-6)), (W, W - learning_rate * dW / T.sqrt(mW + 1e-6)), (V, V - learning_rate * dV / T.sqrt(mV + 1e-6)), (b, b - learning_rate * db / T.sqrt(mb + 1e-6)), (c, c - learning_rate * dc / T.sqrt(mc + 1e-6)), (self.mE, mE), (self.mU, mU), (self.mW, mW), (self.mV, mV), (self.mb, mb), (self.mc, mc) ]) tx = T.ivector() ty = T.ivector() [tout, _, _], _ = theano.scan(forward_prop_step, sequences=tx, truncate_gradient=self.bptt_truncate, outputs_info=[None, dict(initial=T.zeros(self.hidden_dim)), dict(initial=T.zeros(self.hidden_dim)) ]) sce = T.sum(T.nnet.categorical_crossentropy(tout, ty)) self.example_loss = theano.function([tx,ty], sce, on_unused_input='warn') self.example_prediction = theano.function([tx,ty],[tout, T.argmax(tout, axis=1), sce])
def get_posterior_func(self): # Works with NCE x, y = T.ivectors('x', 'y') posterior_func = theano.function([x, y], self.get_sym_posterior_num(x, y)) return posterior_func