Example #1
0
 def defmodel(self):
     winp, rinp, hinp = T.ivectors("winp", "rinp", "hinp")
     nwinp, nrinp, nhinp = T.ivectors("nwinp", "nrinp", "nhinp")
     dotp = self.builddot(winp, rinp, hinp, self.rnnu)
     ndotp = self.builddot(nwinp, nrinp, nhinp, self.rnnu)
     dotp = dotp.reshape((dotp.shape[0], 1))
     ndotp = ndotp.reshape((ndotp.shape[0], 1))
     return [dotp, ndotp], [rinp, winp, hinp, nrinp, nwinp, nhinp]
Example #2
0
 def defmodel(self):
     winp, rinp, hinp = T.ivectors("winp", "rinp", "hinp")
     nwinp, nrinp, nhinp = T.ivectors("nwinp", "nrinp", "nhinp")
     dotp = self.builddot(winp, rinp, hinp, self.rnnu)
     ndotp = self.builddot(nwinp, nrinp, nhinp, self.rnnu)
     dotp = dotp.reshape((dotp.shape[0], 1))
     ndotp = ndotp.reshape((ndotp.shape[0], 1))
     return [dotp, ndotp], [rinp, winp, hinp, nrinp, nwinp, nhinp]
Example #3
0
 def defmodel(self):
     '''
     Define model
     '''
     winp, rinp, hinp = T.ivectors("winp", "rinp", "hinp")
     nwinp, nrinp, nhinp = T.ivectors("nwinp", "nrinp", "nhinp")
     dotp = self.builddot(winp, rinp, hinp)
     ndotp = self.builddot(nwinp, nrinp, nhinp)
     dotp = dotp.reshape((dotp.shape[0], 1))
     ndotp = ndotp.reshape((ndotp.shape[0], 1))
     return [dotp, ndotp], [winp, rinp, hinp, nwinp, nrinp, nhinp]
Example #4
0
 def defmodel(self):
     '''
     Define model
     :return: ([positive dot product, negative dot product],
     [positive left index variable, positive right index variable, negative left index var, negative right index var])
     '''
     winp, hinp = T.ivectors("winp", "hinp")
     nwinp, nhinp = T.ivectors("nwinp", "nhinp")
     dotp = T.nnet.sigmoid(T.sum(self.W[winp, :] * self.H[:, hinp].T, axis=1))
     ndotp = T.nnet.sigmoid(T.sum(self.W[nwinp, :] * self.H[:, nhinp].T, axis=1))
     dotp = dotp.reshape((dotp.shape[0], 1))
     ndotp = ndotp.reshape((ndotp.shape[0], 1))
     return [dotp, ndotp], [winp, hinp, nwinp, nhinp]
Example #5
0
	def _create_hessian_update_mechanism(self):

		# create hessian theano variables:
		self.create_shared_variables_hessian_variables()

		# symbolic examples:
		examples = []
		example_indices = []
		examples_tuples = []
		for i in range(self.batch_size):
			index, label = T.ivectors(['indices', 'labels'])
			examples_tuples.append((index, label))
			examples.append(index)
			example_indices.append(index)
			examples.append(label)

		indices = theano_unique(T.concatenate(example_indices))

		t0 = time.time()

		print("1/2 Calculating gradients...")

		hf_updates, costs = self.symbolic_one_step(examples_tuples, indices)

		t1 = time.time()
		print("Took %.2fs to compute gradients\n2/2 Compiling hessian updates..." % (t1 - t0))

		self.update_fun = theano.function(examples, sum(costs), updates = hf_updates, mode = self.theano_mode)

		t2 = time.time()
		print("Took %.2fs to compile hessian updates" % (t2 - t1))
Example #6
0
 def defmodel(self):
     lhs = T.ivector("lhs")
     rhs, nrhs = T.ivectors("rhs","nrhs")
     lhsemb = self.entembs[lhs, :]
     rhsemb = self.W[rhs, :]
     nrhsemb = self.W[nrhs, :]
     pdot = T.batched_dot(lhsemb, rhsemb)
     ndot = T.batched_dot(lhsemb, nrhsemb)
     return pdot, ndot, [lhs, rhs, nrhs]
Example #7
0
 def defmodel(self):
     '''
     define model
     :return: ([output variable, gold standard variable], [index variable for W, index variable for H])
     '''
     winp, hinp = T.ivectors("winp", "hinp")
     outp = self.X[winp, hinp]
     dotp = T.sum(self.W[winp, :] * self.H[:, hinp].T, axis=1)
     return [dotp, outp], [winp, hinp]
Example #8
0
 def test_rebuild_strict(self):
     # Test fix for error reported at
     # https://groups.google.com/d/topic/theano-users/BRK0UEB72XA/discussion
     w = tensor.imatrix()
     x, y = tensor.ivectors("x", "y")
     z = x * y
     f = theano.function([w, y], z, givens=[(x, w)], rebuild_strict=False)
     z_val = f(np.ones((3, 5), dtype="int32"), np.arange(5, dtype="int32"))
     assert z_val.ndim == 2
     assert np.all(z_val == np.ones((3, 5)) * np.arange(5))
Example #9
0
 def test1(self):
     # Test fix for error reported at
     # https://groups.google.com/d/topic/theano-users/BRK0UEB72XA/discussion
     w = tensor.imatrix()
     x, y = tensor.ivectors('x', 'y')
     z = x * y
     f = theano.function([w, y], z, givens=[(x, w)], rebuild_strict=False)
     z_val = f(numpy.ones((3, 5), dtype='int32'), numpy.arange(5, dtype='int32'))
     assert z_val.ndim == 2
     assert numpy.all(z_val == numpy.ones((3, 5)) * numpy.arange(5))
Example #10
0
 def predict(self, idxs):
     '''
     :param win: vector of tuples of integer indexes for embeddings
     :return: vector of floats of predictions
     '''
     idxs = np.asarray(idxs).astype("int32")
     print([idxs[:, i] for i in range(idxs.shape[1])])
     winp, hinp = T.ivectors("winpp", "hinpp")
     dotp = T.nnet.sigmoid(T.sum(self.W[winp, :] * self.H[:, hinp].T, axis=1))
     pfun = theano.function(
         inputs=[winp, hinp],
         outputs=[dotp]
     )
     return pfun(*[idxs[:, i] for i in range(idxs.shape[1])])
Example #11
0
 def predict(self, idxs):
     '''
     :param win: vector of tuples of integer indexes for embeddings
     :return: vector of floats of predictions
     '''
     idxs = np.asarray(idxs).astype("int32")
     print([idxs[:, i] for i in range(idxs.shape[1])])
     winp, rinp, hinp = T.ivectors("winpp", "rinpp", "hinpp")
     dotp = self.builddot(winp, rinp, hinp)
     pfun = theano.function(
         inputs=[winp, hinp],
         outputs=[dotp]
     )
     return pfun(*[idxs[:, i] for i in range(idxs.shape[1])])
Example #12
0
 def defmodel(self):
     sidx, ridx, oidx = T.ivectors("sidx", "ridx", "oidx")
     outp = self.builddot(sidx, ridx, self.rnnu) # (batsize, dims)
     Nclasses = int(math.ceil(math.sqrt(self.vocabsize)))
     Noutsperclass = int(math.ceil(math.sqrt(self.vocabsize)))
     ''' H-sm
     self.sm1w = theano.shared(np.random.random((self.dims, Nclasses)).astype("float32")*scale-offset)
     self.sm1b = theano.shared(np.random.random((Nclasses,)).astype("float32")*scale-offset)
     self.sm2w = theano.shared(np.random.random((Nclasses, self.dims, Noutsperclass)).astype("float32")*scale-offset)
     self.sm2b = theano.shared(np.random.random((Nclasses, Noutsperclass)).astype("float32")*scale-offset)'''
     ''' H-sm
     probs = h_softmax(outp, self.batsize, self.vocabsize, Nclasses, Noutsperclass, self.sm1w, self.sm1b, self.sm2w, self.sm2b, oidx)'''
     outdot = T.dot(outp, self.smlayer)
     probs = T.nnet.softmax(outdot)
     #showgraph(probs)
     return probs, [sidx, ridx], oidx # probs: (batsize, vocabsize)
Example #13
0
 def defmodel(self):
     sidx, ridx, oidx = T.ivectors("sidx", "ridx", "oidx")
     outp = self.builddot(sidx, ridx, self.rnnu)  # (batsize, dims)
     Nclasses = int(math.ceil(math.sqrt(self.vocabsize)))
     Noutsperclass = int(math.ceil(math.sqrt(self.vocabsize)))
     ''' H-sm
     self.sm1w = theano.shared(np.random.random((self.dims, Nclasses)).astype("float32")*scale-offset)
     self.sm1b = theano.shared(np.random.random((Nclasses,)).astype("float32")*scale-offset)
     self.sm2w = theano.shared(np.random.random((Nclasses, self.dims, Noutsperclass)).astype("float32")*scale-offset)
     self.sm2b = theano.shared(np.random.random((Nclasses, Noutsperclass)).astype("float32")*scale-offset)'''
     ''' H-sm
     probs = h_softmax(outp, self.batsize, self.vocabsize, Nclasses, Noutsperclass, self.sm1w, self.sm1b, self.sm2w, self.sm2b, oidx)'''
     outdot = T.dot(outp, self.smlayer)
     probs = T.nnet.softmax(outdot)
     #showgraph(probs)
     return probs, [sidx, ridx], oidx  # probs: (batsize, vocabsize)
Example #14
0
def test_adv_subtensor():
    # Test the advancedsubtensor on gpu.
    shp = (2, 3, 4)
    shared = gpuarray_shared_constructor
    xval = np.arange(np.prod(shp), dtype=theano.config.floatX).reshape(shp)
    idx1, idx2 = tensor.ivectors('idx1', 'idx2')
    idxs = [idx1, None, slice(0, 2, 1), idx2, None]
    x = shared(xval, name='x')
    expr = x[idxs]
    f = theano.function([idx1, idx2], expr, mode=mode_with_gpu)
    assert sum([isinstance(node.op, GpuAdvancedSubtensor)
               for node in f.maker.fgraph.toposort()]) == 1
    idx1_val = [0, 1]
    idx2_val = [0, 1]
    rval = f(idx1_val, idx2_val)
    rep = xval[idx1_val, None, slice(0, 2, 1), idx2_val, None]
    assert np.allclose(rval, rep)
Example #15
0
    def train(self, X, numrows=None, numcols=None, evalinter=10):
        self.initvars(X, numrows=numrows, numcols=numcols)
        # define errors and costs
        winp, hinp = T.ivectors("winp", "hinp")
        nwinp, nhinp = T.ivectors("nwinp", "nhinp")
        dotp = T.sum(self.W[winp, :] * self.H[:, hinp].T, axis=1)
        ndotp = T.sum(self.W[nwinp, :] * self.H[:, nhinp].T, axis=1)
        dotp = dotp.reshape((dotp.shape[0], 1))
        ndotp = ndotp.reshape((ndotp.shape[0], 1))

        #embed()

        tErr = T.sum(T.max(T.concatenate([T.zeros_like(dotp), 1 - dotp + ndotp], axis=1), axis=1)) # hinge contrast
        tReg = (1./2.) * (T.sum(self.W[winp, :]**2) * self.Wreg + T.sum(self.H[:, hinp]**2) * self.Hreg)
        tCost = tErr + tReg
        #embed()
        # get gradients
        gW = T.grad(tCost, self.W)
        gH = T.grad(tCost, self.H)

        numsam = X.shape[0]
        batsize = int(ceil(numsam*1./self.numbats))
        numbats = self.numbats

        # define updates and function
        updW = (self.W, T.clip(self.W - self.lr * numbats * gW, 0, np.infty))
        updH = (self.H, T.clip(self.H - self.lr * numbats * gH, 0, np.infty))
        trainf = theano.function(
            inputs=[winp, hinp, nwinp, nhinp],
            outputs=[tErr],
            updates=[updW, updH],
            profile=True
        )

        negrate = self.negrate

        def batchloop():
            c = 0
            idxs = range(X.shape[0])
            np.random.shuffle(idxs)
            prevperc = -1.
            maxc = numbats
            ts = 0.
            toterr = 0.
            while c < maxc-1:
                sliceidxs = idxs[c*batsize: min((c+1)*batsize, len(idxs))]
                possamples = X[sliceidxs].copy()
                samples = np.concatenate([possamples]*(negrate+1))
                samples = np.concatenate([samples, samples], axis=1)
                for i in range(samples.shape[0]):
                    corruptcolumn = np.random.choice([2, 3])
                    samples[i, corruptcolumn] = np.random.randint(0, numrows if corruptcolumn == 2 else numcols)
                #region Percentage counting
                perc = round(c*100./maxc)
                if perc > prevperc:
                    print("iter progress %.0f" % perc + "% ", end='\r')
                    prevperc = perc
                #endregion

                toterr += trainf(samples[:, 0].astype("int32"), samples[:, 1].astype("int32"),
                                 samples[:, 2].astype("int32"), samples[:, 3].astype("int32"))[0]
                c += 1
            return toterr
        err = self.trainloop(X, batchloop, evalinter=0)

        return self.W.get_value(), self.H.get_value(), err
Example #16
0
    def train(self, X, numrows=None, numcols=None, evalinter=10):
        self.initvars(X, numrows=numrows, numcols=numcols)
        # define errors and costs
        winp, hinp = T.ivectors("winp", "hinp")
        outp = T.fvector("outp")
        dotp = T.sum(self.W[winp, :] * self.H[:, hinp].T, axis=1)
        # embed()
        tErr = (1./2.) * T.sum((outp - dotp)**2) # MSE
        tReg = (1./2.) * (T.sum(self.W[winp, :]**2) * self.Wreg + T.sum(self.H[:, hinp]**2) * self.Hreg)
        tCost = tErr + tReg
        # embed()
        # get gradients
        gW = T.grad(tCost, self.W)
        gH = T.grad(tCost, self.H)

        numsam = X.shape[0]
        batsize = int(ceil(numsam*1./self.numbats))
        numbats = self.numbats

        # define updates and function
        updW = (self.W, T.clip(self.W - self.lr * numbats * gW, 0, np.infty))
        updH = (self.H, T.clip(self.H - self.lr * numbats * gH, 0, np.infty))
        trainf = theano.function(
            inputs=[winp, hinp, outp],
            outputs=[tErr],
            updates=[updW, updH],
            profile=True
        )

        negrate = self.negrate

        def batchloop():
            c = 0
            idxs = range(X.shape[0])
            np.random.shuffle(idxs)
            prevperc = -1.
            maxc = numbats
            ts = 0.
            toterr = 0.
            while c < maxc-1:
                sliceidxs = idxs[c*batsize: min((c+1)*batsize, len(idxs))]
                possamples = X[sliceidxs]
                posouts = np.ones((possamples.shape[0],), dtype="float32")
                negsamples = []
                for i in range(possamples.shape[0]):
                    for j in range(negrate):
                        corruptdis = possamples[i, :]
                        columntocorrupt = np.random.choice(len(corruptdis))
                        corruptdis[columntocorrupt] = np.random.randint(0, numrows if columntocorrupt == 0 else numcols)
                        negsamples.append(corruptdis)
                negsamples = np.asarray(negsamples)
                negouts = np.zeros((negsamples.shape[0],), dtype="float32")
                if possamples.ndim != negsamples.ndim:
                    embed()
                samples = np.concatenate((possamples, negsamples), axis=0)
                outs = np.concatenate((posouts, negouts))
                #region Percentage counting
                perc = round(c*100./maxc)
                if perc > prevperc:
                    print("iter progress %.0f" % perc + "% ", end='\r')
                    prevperc = perc
                #endregion

                toterr += trainf(samples[:, 0].astype("int32"), samples[:, 1].astype("int32"), outs)[0]
                c += 1
            return toterr
        err = self.trainloop(X, batchloop, evalinter=0)

        return self.W.get_value(), self.H.get_value(), err
Example #17
0
 def getpredf(
     self
 ):  # function to compute the predicted vector given entity and relation
     winp, rinp = T.ivectors("winpp", "rinpp")
     om = self.prebuilddot(winp, rinp, self.rnnu)
     return theano.function(inputs=[rinp, winp], outputs=[om])
Example #18
0
 def getpreddotf(
     self
 ):  # function to compute the score for a triple (array) given the indexes
     winp, rinp, hinp = T.ivectors("winppp", "rinppp", "hinppp")
     om = self.builddot(winp, rinp, hinp, self.rnnu)
     return theano.function(inputs=[rinp, winp, hinp], outputs=[om])
Example #19
0
 def get_rec_prob_func(self):
   # Works with NCE
   x, y = T.ivectors('x', 'y')
   rec_prob_func = theano.function([x, y], self.get_sym_rec_prob(x, y))
   return rec_prob_func
Example #20
0
 def defmodel(self):
     sidx = T.ivector("sidx")
     pathidxs = T.imatrix("pathidxs")
     zidx, nzidx = T.ivectors("zidx", "nzidx")  # rhs corruption only
     dotp, ndotp = self.definnermodel(sidx, pathidxs, zidx, nzidx)
     return dotp, ndotp, [sidx, pathidxs, zidx, nzidx]
Example #21
0
 def getpredf(self):             # function to compute the predicted vector given entity and relation
     winp, rinp = T.ivectors("winpp", "rinpp")
     om = self.prebuilddot(winp, rinp, self.rnnu)
     return theano.function(inputs=[rinp, winp], outputs=[om])
Example #22
0
    def init_functions(self):
        '''Construct functions for the model'''

        # Construct the objective function

        #   Input variables
        u_i, y_s, y_t = T.ivectors(['u_i', 'y_s', 'y_t'])

        dropout = T.fscalar(name='p')

        #   Intermediate variables: n_examples * n_songs
        item_scores = T.dot(self._U[u_i], self._V.T) + self._b

        # subtract off the row-wise max for numerical stability
        item_scores = item_scores - item_scores.max(axis=1, keepdims=True)

        e_scores = T.exp(item_scores)

        if T.gt(dropout, 0.0):
            # Construct a random dropout mask
            retain_prob = 1.0 - dropout
            M = self._rng.binomial(e_scores.shape,
                                   p=retain_prob,
                                   dtype=theano.config.floatX)

            # Importance weight so that E[M[i,j]] = 1
            M /= retain_prob

            # The positive examples should always be sampled
            M = theano.tensor.set_subtensor(M[T.arange(y_t.shape[0]), y_t],
                                            1.0)

            e_scores = e_scores * M

        #   Edge feasibilities: n_examples * n_edges
        prev_feas = sparse_slice_rows(self.H, y_s)
        #   Detect and reset initial-state transitions
        prev_feas = theano.tensor.set_subtensor(prev_feas[y_s < 0, :], 1)

        #   Raw edge probabilities: n_examples * n_edges
        edge_given_prev = T.nnet.softmax(prev_feas * self._w)

        #   Compute edge normalization factors: n_examples * n_edges
        #     sum of score mass in each edge for each user
        edge_norms = ts.dot(e_scores, self.H)

        #   Slice the edge weights according to incoming feasibilities: n_examples
        next_weight = e_scores[T.arange(y_t.shape[0]), y_t]

        #   Marginalize: n_examples * n_edges
        next_feas = sparse_slice_rows(self.H, y_t)

        probs = next_weight * T.sum(next_feas * (edge_given_prev / (_EPS + edge_norms)),
                                    axis=1,
                                    keepdims=True)

        # Data likelihood term
        ll = T.log(probs)
        avg_ll = ll.mean()

        # Priors
        w_prior = -0.5 * self.edge_reg * (self._w**2).sum()
        b_prior = -0.5 * self.bias_reg * (self._b**2).sum()
        u_prior = -0.5 * self.user_reg * (self._U**2).sum()
        v_prior = -0.5 * self.song_reg * (self._V**2).sum()

        # negative log-MAP objective
        cost = -1.0 * (avg_ll + u_prior + v_prior + b_prior + w_prior)

        # Construct the updates
        variables = []
        if 'e' in self.params:
            variables.append(self._w)
        if 'b' in self.params:
            variables.append(self._b)
        if 'u' in self.params:
            variables.append(self._U)
        if 's' in self.params:
            variables.append(self._V)

        updates = lasagne.updates.adagrad(cost, variables)

        self._train = theano.function(inputs=[u_i, y_s, y_t, dropout],
                                      outputs=[avg_ll, cost],
                                      updates=updates)

        self._loglikelihood = theano.function(inputs=[u_i, y_s, y_t,
                                                      theano.Param(dropout,
                                                                   default=0.0,
                                                                   name='p')],
                                              outputs=[ll])
Example #23
0
 def defmodel(self):
     sidx = T.ivector("sidx")
     pathidxs = T.imatrix("pathidxs")
     zidx, nzidx = T.ivectors("zidx", "nzidx") # rhs corruption only
     dotp, ndotp = self.definnermodel(sidx, pathidxs, zidx, nzidx)
     return dotp, ndotp, [sidx, pathidxs, zidx, nzidx]
    def fit(self,
            X,
            learning_rate=1e-5,
            mu=0.99,
            activation=T.nnet.relu,
            RecurrentUnit=LSTM,
            normalize=True,
            epochs=10,
            show_fig=False):
        N = len(X)
        D = self.D
        V = self.V

        We = init_weight(V, D)  # embedding matrix
        self.hidden_layers = []
        Mi = D
        for Mo in self.hidden_layer_sizes:
            ru = RecurrentUnit(Mi, Mo, activation)
            self.hidden_layers.append(ru)
            Mi = Mo

        Wo = init_weight(Mi, V)
        bo = np.zeros(V)

        self.We = theano.shared(We)
        self.Wo = theano.shared(Wo)
        self.bo = theano.shared(bo)
        self.params = [self.Wo, self.bo]
        for ru in self.hidden_layers:
            self.params += ru.params

        thX = T.ivectors('X')
        thY = T.ivectors('Y')

        Z = self.We[thX]
        for ru in self.hidden_layers:
            Z = ru.output(Z)
        py_x = T.nnet.softmax(
            Z.dot(self.Wo) +
            self.bo)  # ????這裡的py_x 不是用scan function 跑出來的,所以不需要另做擷取(y[:, 0, :])

        prediction = T.argmax(py_x, axis=1)
        self.predict_op = theano.function(inputs=[thX],
                                          outputs=[py_x, prediction],
                                          allow_input_downcast=True)

        cost = -T.mean(T.log(py_x[T.arange(thY.shape[0]), thY]))
        grads = T.grad(cost, self.params)
        dparams = [theano.shared(p.get_value() * 0) for p in self.params]

        dWe = theano.shared(self.We.get_value() * 0)
        gWe = T.grad(cost, self.We)
        dWe_update = mu * dWe - learning_rate * gWe
        We_update = self.We + dWe_update
        if normalize:
            We_update /= We_update.norm(2)  # 這裡對 theano.shared 型別做 norm(2)

        updates = [(p, p + mu * dp - learning_rate * g)
                   for p, dp, g in zip(self.params, dparams, grads)] + [
                       (dp, mu * dp - learning_rate * g)
                       for dp, g in zip(dparams, grads)
                   ] + [(self.We, We_update), (dWe, dWe_update)]

        self.train_op = theano.function(inputs=[thX, thY],
                                        outputs=[cost, prediction, Z],
                                        updates=updates)

        costs = []
        for i in range(epochs):
            t0 = datetime.now()
            X = shuffle(X)
            n_correct = 0
            n_total = 0
            cost = 0
            for j in range(N):
                if np.random.random() < 0.01 or len(X[j]) <= 1:
                    input_sequence = [0] + X[j]
                    output_sequence = X[j] + [1]
                else:
                    input_sequence = [0] + X[j][:-1]
                    output_sequence = X[j]
                n_total += len(output_sequence)

                # test:
                try:
                    # we set 0 to start and 1 to end
                    c, p, z = self.train_op(input_sequence, output_sequence)
                    # print(z)
                except Exception as e:
                    PYX, pred = self.predict_op(input_sequence)
                    print("input_sequence len:", len(input_sequence))
                    print("PYX.shape:", PYX.shape)
                    print("pred.shape:", pred.shape)
                    raise e
                # print('p:', p)
                cost += c
                for pj, xj in zip(p, output_sequence):
                    if pj == xj:
                        n_correct += 1
                if j % 200 == 0:
                    # 下面這行這個代替 pirnt() ,兩者功能一樣
                    sys.stdout.write("j/N: %d/%d correct rate so far: %f\r" %
                                     (j, N, float(n_correct) / n_total))
                    sys.stdout.flush()
            print("i:", i, "cost:", cost, "correct rate:",
                  (float(n_correct) / n_total), 'time for epoch:',
                  (datetime.now() - t0))
            costs.append(cost)

        if show_fig:
            plt.plot(costs)
            plt.show()
Example #25
0
 def getpreddotf(self):          # function to compute the score for a triple (array) given the indexes
     winp, rinp, hinp = T.ivectors("winppp", "rinppp", "hinppp")
     om = self.builddot(winp, rinp, hinp, self.rnnu)
     return theano.function(inputs=[rinp, winp,   hinp], outputs=[om])
    def __theano_build__(self):
        E, V, U, W, b, c, ML = self.E, self.V, self.U, self.W, self.b, self.c, self.ML
        batch_size = self.batch_size

#        mx = T.imatrix('mx')
#        my = T.imatrix('my')

        start = T.iscalar('start')
        batch_len = T.iscalar('batch_len')

#        x = T.ivector('x')
#        y = T.ivector('y')

        bx = T.ivectors(batch_size)
        by = T.ivectors(batch_size)

        for i in np.arange(batch_size):
            bx[i] = T.cast(self.gx[start+i*batch_len:start+(i+1)*batch_len], dtype='int32')
            by[i] = T.cast(self.gy[start+i*batch_len:start+(i+1)*batch_len], dtype='int32')

        prediction = T.ivectors(batch_size)
        bce = T.dvectors(batch_size)
        bout = T.dvectors(batch_size)

        def forward_prop_step(x_t, s_t1_prev, s_t2_prev):
            # This is how we calculated the hidden state in a simple RNN. No longer!
            # s_t = T.tanh(U[:,x_t] + W.dot(s_t1_prev))

#            print "are we here?"
            # Word embedding layer
#            print type(x_t)
            x_e = E[:,x_t]
#            print "are we here?"
            # weight for MLE
            weight = ML[:,x_t]

            # GRU Layer 1
            z_t1 = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_t1_prev) + b[0])
            r_t1 = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_t1_prev) + b[1])
            c_t1 = T.tanh(U[2].dot(x_e) + W[2].dot(s_t1_prev * r_t1) + b[2])
            s_t1 = (T.ones_like(z_t1) - z_t1) * c_t1 + z_t1 * s_t1_prev

            # GRU Layer 2
            z_t2 = T.nnet.hard_sigmoid(U[3].dot(s_t1) + W[3].dot(s_t2_prev) + b[3])
            r_t2 = T.nnet.hard_sigmoid(U[4].dot(s_t1) + W[4].dot(s_t2_prev) + b[4])
            c_t2 = T.tanh(U[5].dot(s_t1) + W[5].dot(s_t2_prev * r_t2) + b[5])
            s_t2 = (T.ones_like(z_t2) - z_t2) * c_t2 + z_t2 * s_t2_prev

            # Final output calculation
            # Theano's softmax returns a matrix with one row, we only need the row
            o_t = T.nnet.softmax(V.dot(s_t2) + c + weight)[0]

            return [o_t, s_t1, s_t2]

        for bs in np.arange(batch_size):
            # o will be the output vector for each word in vocabulary
            [bout[bs], s, s2], updates = theano.scan(
                forward_prop_step,
                sequences=bx[bs],
                truncate_gradient=self.bptt_truncate,
                outputs_info=[None,
                              dict(initial=T.zeros(self.hidden_dim)),
                              dict(initial=T.zeros(self.hidden_dim))])
            #index prediction
            prediction[bs] = T.argmax(bout[bs], axis=1)
            bce[bs] = T.sum(T.nnet.categorical_crossentropy(bout[bs], by[bs]))

        cost = T.mean(bce) + 0.01*(T.sum(E**2) + T.sum(V**2) + T.sum(U**2) + T.sum(W**2) + T.sum(b**2) + T.sum(c**2))

        # Gradients
        dE = T.grad(cost, E)
        dU = T.grad(cost, U)
        dW = T.grad(cost, W)
        db = T.grad(cost, b)
        dV = T.grad(cost, V)
        dc = T.grad(cost, c)

        # for minibatch, it goes like this:
        # loop through all samples in batch and get sample derivative
        # accumulative all sample derivative to get batch derivative
        # update all parameters using batch derivative

        # Assign functions
        self.predict_prob = theano.function([start,batch_len], bout)
        self.predict_class = theano.function([start,batch_len], prediction)
        self.optimization_error = theano.function([start,batch_len],cost)
        self.cross_entropy_loss = theano.function([start,batch_len], T.mean(bce))
        self.bptt = theano.function([start,batch_len], [dE, dU, dW, db, dV, dc])

        # SGD parameters
        learning_rate = T.scalar('learning_rate')
        decay = T.scalar('decay')

        # rmsprop cache updates
        mE = decay * self.mE + (1 - decay) * dE ** 2
        mU = decay * self.mU + (1 - decay) * dU ** 2
        mW = decay * self.mW + (1 - decay) * dW ** 2
        mV = decay * self.mV + (1 - decay) * dV ** 2
        mb = decay * self.mb + (1 - decay) * db ** 2
        mc = decay * self.mc + (1 - decay) * dc ** 2

        #rmsprop
        self.batch_step = theano.function(
            [start,batch_len,learning_rate, theano.In(decay, value=0.9)],
            [],
            updates=[(E, E - learning_rate * dE / T.sqrt(mE + 1e-6)),
                     (U, U - learning_rate * dU / T.sqrt(mU + 1e-6)),
                     (W, W - learning_rate * dW / T.sqrt(mW + 1e-6)),
                     (V, V - learning_rate * dV / T.sqrt(mV + 1e-6)),
                     (b, b - learning_rate * db / T.sqrt(mb + 1e-6)),
                     (c, c - learning_rate * dc / T.sqrt(mc + 1e-6)),
                     (self.mE, mE),
                     (self.mU, mU),
                     (self.mW, mW),
                     (self.mV, mV),
                     (self.mb, mb),
                     (self.mc, mc)
                    ])

        tx = T.ivector()
        ty = T.ivector()
        [tout, _, _], _ = theano.scan(forward_prop_step,
                                    sequences=tx,
                                    truncate_gradient=self.bptt_truncate,
                                    outputs_info=[None,
                                                  dict(initial=T.zeros(self.hidden_dim)),
                                                  dict(initial=T.zeros(self.hidden_dim))
                                                  ])

        sce = T.sum(T.nnet.categorical_crossentropy(tout, ty))
        self.example_loss = theano.function([tx,ty], sce, on_unused_input='warn')
        self.example_prediction = theano.function([tx,ty],[tout, T.argmax(tout, axis=1), sce])
Example #27
0
 def get_posterior_func(self):
   # Works with NCE
   x, y = T.ivectors('x', 'y')
   posterior_func = theano.function([x, y], self.get_sym_posterior_num(x, y))
   return posterior_func