Exemple #1
0
 def _build_BPF(self):
     print('start building the Bayesian probabilistic model')
     self.x_u = theano.shared(self.train_u)
     self.x_i = theano.shared(self.train_i)
     self.y_r = theano.shared(self.train_r)
     self.y_r_ui = theano.shared(np.array(self.nn_r_ui))
     assert (len(self.y_r.get_value()) == len(self.y_r_ui.get_value()))
     with pm.Model() as self.bncf:  #define the prior and likelihood
         b_u = pm.Normal('b_u', 0, sd=1, shape=self.shape[0])
         b_i = pm.Normal('b_i', 0, sd=1, shape=self.shape[1])
         u = pm.Normal('u', 0, sd=1)
         tY = pm.Deterministic(
             'tY',
             tt.add(
                 tt.add(tt.add(b_u[self.x_u], b_i[self.x_i]), self.y_r_ui),
                 u))
         #tY = pm.Deterministic('tY', ((b_u[self.x_u]+b_i[self.x_i])+self.y_r_ui)+u)#b_u+b_i+u+nn_r_ui
         nY = pm.Deterministic('nY', pm.math.sigmoid(tY))
         # likelihood of observed data
         Y = pm.Bernoulli(
             'Y', nY,
             observed=self.y_r)  #total_size=self.y_r.get_value().shape[0]
     with self.bncf:  #inference
         approx = pm.fit(n=1000, method=pm.ADVI())
         self.trace = approx.sample(draws=500)
     with self.bncf:  #posterior prediction
         ppc = pm.sample_posterior_predictive(self.trace, progressbar=True)
         self.by_r_ui = ppc['Y'].mean(axis=0)
     print('done building the Bayesian probabilistic model')
Exemple #2
0
    def mcmc(ll, *frvs):
        full_observations = dict(observations)
        full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, frvs)]))
        
        loglik = -full_log_likelihood(full_observations)

        proposals = free_RVs_prop
        H = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + loglik

# -- this should be an inner loop
        g = []
        g.append(tensor.grad(loglik, frvs))
        
        proposals = [(p - epsilon*gg[0]/2.) for p, gg in zip(proposals, g)]

        rvsp = [(rvs + epsilon*rvp) for rvs,rvp in zip(frvs, proposals)]
        
        full_observations = dict(observations)
        full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, rvsp)]))
        new_loglik = -full_log_likelihood(full_observations)
        
        gnew = []
        gnew.append(tensor.grad(new_loglik, rvsp))
        proposals = [(p - epsilon*gn[0]/2.) for p, gn in zip(proposals, gnew)]
# --
        
        Hnew = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + new_loglik

        dH = Hnew - H
        accept = tensor.or_(dH < 0., U < tensor.exp(-dH))

        return [tensor.switch(accept, -new_loglik, ll)] + \
            [tensor.switch(accept, p, f) for p, f in zip(rvsp, frvs)], \
            {}, theano.scan_module.until(accept)
Exemple #3
0
def vgd_kernel_tensor(X0):

    XY = T.batched_dot(X0, X0.transpose(0,2,1))
    x2 = T.reshape(T.sum(T.square(X0),axis=2), (X0.shape[0], X0.shape[1], 1))
    X2e = T.repeat(x2, X0.shape[1], axis=2)
    H = T.sub(T.add(X2e, X2e.transpose(0,2,1)), 2 * XY)

    V = H.flatten(2)

    # median distance
    h = T.switch(T.eq((V.shape[1] % 2), 0),
            # if even vector
            T.mean(T.sort(V)[:, ((V.shape[1] // 2) - 1): ((V.shape[1] // 2) + 1)], axis=1),
             # if odd vector
            T.sort(V)[:, V.shape[1] // 2])

    h = T.sqrt(0.5 * h / T.log(X0.shape[1].astype('float32') + 1.0))
    # h = T.maximum(h, T.zeros_like(h) + 1e-4)

    # h = h / 2
    Kxy = T.exp(-H / T.tile(h.dimshuffle(0, 'x', 'x'), (1, X0.shape[1], X0.shape[1])) ** 2 / 2.0)

    dxkxy = - T.batched_dot(Kxy, X0)
    sumkxy = T.sum(Kxy, axis=2).dimshuffle(0, 1, 'x')
    dxkxy = T.add(dxkxy, T.mul(X0, sumkxy)) / (T.tile(h.dimshuffle(0, 'x', 'x'), (1, X0.shape[1], X0.shape[2])) ** 2)

    return (Kxy, dxkxy, h)
def vgd_kernel(X0):
    XY = T.dot(X0, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)

    V = H.flatten()

    # median distance
    h = T.switch(
        T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[((V.shape[0] // 2) - 1):((V.shape[0] // 2) + 1)]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h**2 / 2.0)

    dxkxy = -T.dot(Kxy, X0)
    sumkxy = T.sum(Kxy, axis=1).dimshuffle(0, 'x')
    dxkxy = T.add(dxkxy, T.mul(X0, sumkxy)) / (h**2)

    return (Kxy, dxkxy, h)
Exemple #5
0
 def __init__(self, rng, input3, initial_hidden, n_in, n_hidden):
     self.input3 = input3
     self.initial_hidden = initial_hidden
                     
     matrix1 = numpy.asarray( rng.uniform(
             low  = - numpy.sqrt(6./(n_in + n_hidden)),
             high = numpy.sqrt(6./(n_in + n_hidden)),
             size = (n_in, n_hidden)), dtype = 'float32')
     
     self.W1 = theano.shared(value = matrix1, name = 'W1')
     
     matrix2 = numpy.asarray( rng.uniform(
             low  = - numpy.sqrt(6./(n_hidden + n_hidden)),
             high = numpy.sqrt(6./(n_hidden + n_hidden)),
             size = (n_hidden, n_hidden)), dtype = 'float32')
     
     self.W2 = theano.shared(value = matrix2, name = 'W2')
     
     b_values = numpy.zeros((n_hidden,), dtype= 'float32')
     
     self.b = theano.shared(value = b_values, name ='b')
     
     #self.intial_hidden = theano.shared(numpy.zeros(n_hidden, ), dtype = 'float32', name = 'intial_hidden')
     
     self.output = T.tanh( T.add(T.add(T.dot(self.input3, self.W1), T.dot(self.initial_hidden, self.W2)), self.b))
     
     self.params = [self.W2, self.b, self.W1]
Exemple #6
0
    def __init__(self, n_in, n_out, input_data_list, activation_fn=tanh):
        self.n_in = n_in
        self.n_out = n_out
        self.activation_fn = activation_fn
        self.w = theano.shared(
            np.asarray(
                np.random.uniform(
                    low=-np.sqrt(6.0/(n_in+n_out)), high=np.sqrt(6.0/(n_in+n_out)), size=(n_in, n_out)),
                    dtype=theano.config.floatX),
            name='w', borrow=True)
        # self.b = theano.shared(
        #     np.asarray(np.zeros((n_out)), dtype=theano.config.floatX),
        #     name='b', borrow=True)
        # self.params = [self.w, self.b]
        # self.params = [self.w]

        self.b = theano.shared(
                    np.asarray(np.random.normal(loc=0.0, scale=1.0/(n_in+n_out), size=(n_out,)),
                    dtype=theano.config.floatX),
                    name='b', borrow=True)
        self.params = [self.w, self.b]

        # self.w = T._shared(
        #     np.asarray(
        #         np.random.uniform(
        #             low=-np.sqrt(6.0/(n_in+n_out)), high=np.sqrt(6.0/(n_in+n_out)), size=(n_in, n_out)),
        #             dtype=theano.config.floatX),
        #     name='w', borrow=True)
        # self.b = T._shared(
        #     np.asarray(np.zeros((n_out)), dtype=theano.config.floatX),
        #     name='b', borrow=True)

        self.q, self.d = input_data_list
        self.output = [self.activation_fn(T.add(TS.basic.structured_dot(self.q, self.w), self.b)), \
                self.activation_fn(T.add(TS.basic.structured_dot(self.d, self.w), self.b))]
Exemple #7
0
    def logp(self, value):
        """
        Calculate log-probability of AR distribution at specified value.

        Parameters
        ----------
        value: numeric
            Value for which log-probability is calculated.

        Returns
        -------
        TensorVariable
        """
        if self.constant:
            x = tt.add(*[self.rho[i + 1] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)])
            eps = value[self.p:] - self.rho[0] - x
        else:
            if self.p == 1:
                x = self.rho * value[:-1]
            else:
                x = tt.add(*[self.rho[i] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)])
            eps = value[self.p:] - x

        innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps)
        init_like = self.init.logp(value[:self.p])

        return tt.sum(innov_like) + tt.sum(init_like)
Exemple #8
0
    def output(self, train):
        X = self.get_input(train) # shape: (nb_samples, time (padded with zeros at the end), input_dim)
        # new shape: (time, nb_samples, input_dim) -> because theano.scan iterates over main dimension
        X = X.dimshuffle((1, 0, 2))

        xf = self.activation(T.dot(X, self.W_if) + self.b_if)
        xb = self.activation(T.dot(X, self.W_ib) + self.b_ib)
        b_o=self.b_o
        b_on= T.repeat(T.repeat(b_o.reshape((1,self.output_dim)),X.shape[0],axis=0).reshape((1,X.shape[0],self.output_dim)),X.shape[1],axis=0)
        # Iterate forward over the first dimension of the x array (=time).
        outputs_f, updates_f = theano.scan(
            self._step,  # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i])
            sequences=xf,  # tensors to iterate over, inputs to _step
            # initialization of the output. Input to _step with default tap=-1.
            outputs_info=alloc_zeros_matrix(X.shape[1], self.output_dim),
            non_sequences=[self.W_ff,self.b_f],  # static inputs to _step
            truncate_gradient=self.truncate_gradient
        )
        # Iterate backward over the first dimension of the x array (=time).
        outputs_b, updates_b = theano.scan(
            self._step,  # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i])
            sequences=xb,  # tensors to iterate over, inputs to _step
            # initialization of the output. Input to _step with default tap=-1.
            outputs_info=alloc_zeros_matrix(X.shape[1], self.output_dim),
            non_sequences=[self.W_bb,self.b_b],  # static inputs to _step
            truncate_gradient=self.truncate_gradient,
            go_backwards=True  # Iterate backwards through time
        )
        #return outputs_f.dimshuffle((1, 0, 2))
        if self.return_sequences:
            return T.add(T.tensordot(T.add(outputs_f.dimshuffle((1, 0, 2)), outputs_b[::-1].dimshuffle((1,0,2))),self.W_o,[[2],[0]]),b_on)
        return T.concatenate((outputs_f[-1], outputs_b[0]))
Exemple #9
0
    def __init__(self, rng, input3, initial_hidden, n_in, n_hidden):
        self.input3 = input3
        self.initial_hidden = initial_hidden

        matrix1 = numpy.asarray(rng.uniform(
            low=-numpy.sqrt(6. / (n_in + n_hidden)),
            high=numpy.sqrt(6. / (n_in + n_hidden)),
            size=(n_in, n_hidden)),
                                dtype='float32')

        self.W1 = theano.shared(value=matrix1, name='W1')

        matrix2 = numpy.asarray(rng.uniform(
            low=-numpy.sqrt(6. / (n_hidden + n_hidden)),
            high=numpy.sqrt(6. / (n_hidden + n_hidden)),
            size=(n_hidden, n_hidden)),
                                dtype='float32')

        self.W2 = theano.shared(value=matrix2, name='W2')

        b_values = numpy.zeros((n_hidden, ), dtype='float32')

        self.b = theano.shared(value=b_values, name='b')

        #self.intial_hidden = theano.shared(numpy.zeros(n_hidden, ), dtype = 'float32', name = 'intial_hidden')

        self.output = T.tanh(
            T.add(
                T.add(T.dot(self.input3, self.W1),
                      T.dot(self.initial_hidden, self.W2)), self.b))

        self.params = [self.W2, self.b, self.W1]
    def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3,
                       t_n_out):

        cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1)

        pre_w_out_sig = T.dot(pre_out_sig, w_ifco)
        pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2)

        preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco)



        inner_act = self.activation # T.nnet.hard_sigmoid #T.tanh # T.nnet.hard_sigmoid T.tanh
        gate_act = self.sigmoid()  # T.nnet.hard_sigmoid #T.nnet.sigmoid

        # Input Gate
        ig_t1 = gate_act(preact[:, 0:t_n_out])
        # Forget Gate
        fg_t1 = gate_act(preact[:, 1 * t_n_out:2 * t_n_out])
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(preact[:, 2 * t_n_out:3 * t_n_out])))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig

        cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3)
        # Output Gate
        og_t1 = gate_act(preact[:, 3 * t_n_out:4 * t_n_out])
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1_ln))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1]
Exemple #11
0
 def calc_output(self,in_tens):
     if in_tens.ndim == 1:
         prod = T.dot(self.W,in_tens)
         return T.add(prod,self.b)
     elif in_tens.ndim == 2:
         #batched inputs
         prod = T.dot(self.W,in_tens)
         return T.add(self.b[:,None],prod)
Exemple #12
0
    def output(self, train):
        X = self.get_input(train)
        X = X.dimshuffle((1,0,2))


        if self.is_entity:
            Entity = X[-1:].dimshuffle(1,0,2)
            X = X[:-1]

        b_y = self.b_y
        b_yn = T.repeat(T.repeat(b_y.reshape((1,self.output_dim)),X.shape[0],axis=0).reshape((1,X.shape[0],self.output_dim)), X.shape[1], axis=0)

        xif = T.dot(X, self.W_if) + self.b_if
        xib = T.dot(X, self.W_ib) + self.b_ib

        xff = T.dot(X, self.W_ff) + self.b_ff
        xfb = T.dot(X, self.W_fb) + self.b_fb

        xcf = T.dot(X, self.W_cf) + self.b_cf
        xcb = T.dot(X, self.W_cb) + self.b_cb

        xof = T.dot(X, self.W_of) + self.b_of
        xob = T.dot(X, self.W_ob) + self.b_ob

        [outputs_f, memories_f], updates_f = theano.scan(
            self._step,
            sequences=[xif, xff, xof, xcf],
            outputs_info=[
                alloc_zeros_matrix(X.shape[1], self.output_dim),
                alloc_zeros_matrix(X.shape[1], self.output_dim)
            ],
            non_sequences=[self.U_if, self.U_ff, self.U_of, self.U_cf],
            truncate_gradient=self.truncate_gradient
        )
        [outputs_b, memories_b], updates_b = theano.scan(
            self._step,
            sequences=[xib, xfb, xob, xcb],
            outputs_info=[
                alloc_zeros_matrix(X.shape[1], self.output_dim),
                alloc_zeros_matrix(X.shape[1], self.output_dim)
            ],
            non_sequences=[self.U_ib, self.U_fb, self.U_ob, self.U_cb],
            truncate_gradient=self.truncate_gradient
        )
        if self.return_sequences:
            y = T.add(T.add(
                    T.tensordot(outputs_f.dimshuffle((1,0,2)), self.W_yf, [[2],[0]]),
                    T.tensordot(outputs_b[::-1].dimshuffle((1,0,2)), self.W_yb, [[2],[0]])),
                b_yn)
            # y = T.add(T.tensordot(
            #     T.add(outputs_f.dimshuffle((1, 0, 2)),
            #           outputs_b[::-1].dimshuffle((1,0,2))),
            #     self.W_y,[[2],[0]]),b_yn)
            if self.is_entity:
                return T.concatenate([y, Entity], axis=1)
            else:
                return y
        return T.concatenate((outputs_f[-1], outputs_b[0]))
 def f1_score(self, y):
     n_total = y.shape[0]
     n_relevant_documents_predicted = T.sum(T.eq(T.ones(self.y_pred.shape), self.y_pred))
     two_vector = T.add(T.ones(self.y_pred.shape), T.ones(self.y_pred.shape))
     n_relevant_predicted_correctly = T.sum(T.eq(T.add(self.y_pred, y), two_vector))
     precision = T.true_div(n_relevant_predicted_correctly, n_relevant_documents_predicted)
     recall = T.true_div(n_relevant_predicted_correctly, n_total)
     f1_score =  T.mul(2.0, T.true_div(T.mul(precision, recall), T.add(precision, recall)))
     return [f1_score, precision, recall]
Exemple #14
0
    def _generate_pred_model_function(self):
        u = T.iscalar('u')
        i = T.iscalar('i')
        pred = T.add(T.dot(self.W[u], self.H.T), self.B)
        self.get_user_res = theano.function(inputs=[u], outputs=pred)

        pred2A = T.dot(self.W[u], self.H[i].T)
        pred2 = T.add(pred2A, self.B[i])
        self.get_user_item_res = theano.function(inputs=[u, i], outputs=pred2)
Exemple #15
0
    def __call__(self,M,*inputs):
        summands = [Xi.dot(Wiz) for (Xi,Wiz) in zip(inputs,self.Wizs)] + [M.dot(self.Wmz),self.bz]
        z = TT.nnet.sigmoid(TT.add(*summands))

        summands = [Xi.dot(Wir) for (Xi,Wir) in zip(inputs,self.Wirs)] + [M.dot(self.Wmr),self.br]
        r = TT.nnet.sigmoid(TT.add(*summands))

        summands = [Xi.dot(Wim) for (Xi,Wim) in zip(inputs,self.Wims)] + [(r*M).dot(self.Wmm),self.bm]
        Mtarg = TT.tanh(TT.add(*summands)) #pylint: disable=E1111

        Mnew = (1-z)*M + z*Mtarg
        return Mnew
    def scan_y(cur_step):
        # Compute pairwise affinities
        sum_y = tensor.sum(tensor.square(y_arg), 1)
        num = 1 / (1 + tensor.add(tensor.add(-2 * tensor.dot(y_arg, y_arg.T), sum_y).T, sum_y))
        num = tensor.set_subtensor(num[range(n),range(n)], 0)

        Q = num / tensor.sum(num)
        Q = tensor.maximum(Q, 1e-12)

        PQ = p_arg - Q

        def inner(pq_i, num_i, y_arg_i):
            return tensor.sum(tensor.tile(pq_i * num_i, (no_dims, 1)).T * (y_arg_i - y_arg), 0)
        dy_arg, _ = theano.scan(inner,
                outputs_info = None,
                sequences = [PQ, num, y_arg])
        dy_arg = tensor.cast(dy_arg,FLOATX)
        # dy_arg = y_arg

        momentum = ifelse(tensor.lt(cur_step, 20), 
                initial_momentum_f, 
                final_momentum_f)

        indexsa = tensor.neq((dy_arg>0), (iy_arg>0)).nonzero()
        indexsb = tensor.eq((dy_arg>0), (iy_arg>0)).nonzero()
        resulta = tensor.set_subtensor(gains_arg[indexsa], gains_arg[indexsa]+0.2)
        resultb = tensor.set_subtensor(resulta[indexsb], resulta[indexsb]*0.8)

        indexs_min = (resultb<min_gain_f).nonzero()
        new_gains_arg = tensor.set_subtensor(resultb[indexs_min], min_gain_f)

        # last step in simple version of SNE
        new_iy_arg = momentum * iy_arg - eta * (new_gains_arg * dy_arg)
        new_y_arg = y_arg + new_iy_arg
        new_y_arg = new_y_arg - tensor.tile(tensor.mean(new_y_arg, 0), (n, 1))

        # # Compute current value of cost function
        # if (cur_step + 1) % 10 == 0:
        #     C = tensor.sum(p_arg * tensor.log(p_arg / Q))
        #     print "Iteration ", (cur_step + 1), ": error is ", C

        # Stop lying about P-values

        # new_p_arg = p_arg
        # if cur_step == 2:
        #     new_p_arg = p_arg / 4
            # p_arg = p_arg / 4
            # p_arg.set_value(p_arg.get_value / 4)
        new_p_arg = ifelse(tensor.eq(cur_step, 100), 
                p_arg / 4, 
                p_arg)
        return [(y_arg,new_y_arg),(iy_arg,new_iy_arg), (gains_arg,new_gains_arg),(p_arg,new_p_arg)]
Exemple #17
0
    def __call__(self,M,*inputs):
        assert len(inputs) == len(self.Wizs)
        summands = [Xi.dot(Wiz) for (Xi,Wiz) in zip(inputs,self.Wizs)] + [M.dot(self.Wmz),self.bz]
        z = TT.nnet.sigmoid(TT.add(*summands))

        summands = [Xi.dot(Wir) for (Xi,Wir) in zip(inputs,self.Wirs)] + [M.dot(self.Wmr),self.br]
        r = TT.nnet.sigmoid(TT.add(*summands))

        summands = [Xi.dot(Wim) for (Xi,Wim) in zip(inputs,self.Wims)] + [(r*M).dot(self.Wmm),self.bm]
        Mtarg = TT.tanh(TT.add(*summands)) #pylint: disable=E1111

        Mnew = (1-z)*M + z*Mtarg
        return Mnew
Exemple #18
0
    def output(self, train):
        X = self.get_input(
            train
        )  # shape: (nb_samples, time (padded with zeros at the end), input_dim)
        # new shape: (time, nb_samples, input_dim) -> because theano.scan iterates over main dimension
        X = X.dimshuffle((1, 0, 2))
        lenX = X.shape[0]
        Entity = X[lenX - 1:].dimshuffle(1, 0, 2)

        X = X[:lenX - 1]
        b_o = self.b_o
        b_on = T.repeat(T.repeat(b_o.reshape((1, self.output_dim)),
                                 X.shape[0],
                                 axis=0).reshape(
                                     (1, X.shape[0], self.output_dim)),
                        X.shape[1],
                        axis=0)
        xf = self.activation(T.dot(X, self.W_if) + self.b_if)
        xb = self.activation(T.dot(X, self.W_ib) + self.b_ib)

        # Iterate forward over the first dimension of the x array (=time).
        outputs_f, updates_f = theano.scan(
            self.
            _step,  # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i])
            sequences=xf,  # tensors to iterate over, inputs to _step
            # initialization of the output. Input to _step with default tap=-1.
            outputs_info=alloc_zeros_matrix(X.shape[1], self.output_dim),
            non_sequences=[self.W_ff, self.b_f],  # static inputs to _step
            truncate_gradient=self.truncate_gradient)
        # Iterate backward over the first dimension of the x array (=time).
        outputs_b, updates_b = theano.scan(
            self.
            _step,  # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i])
            sequences=xb,  # tensors to iterate over, inputs to _step
            # initialization of the output. Input to _step with default tap=-1.
            outputs_info=alloc_zeros_matrix(X.shape[1], self.output_dim),
            non_sequences=[self.W_bb, self.b_b],  # static inputs to _step
            truncate_gradient=self.truncate_gradient,
            go_backwards=True  # Iterate backwards through time
        )
        #return outputs_f.dimshuffle((1, 0, 2))
        if self.return_sequences:
            return T.concatenate([
                T.add(
                    T.tensordot(
                        T.add(outputs_f.dimshuffle(
                            (1, 0, 2)), outputs_b[::-1].dimshuffle((1, 0, 2))),
                        self.W_o, [[2], [0]]), b_on), Entity
            ],
                                 axis=1)
        return T.concatenate((outputs_f[-1], outputs_b[0]))
    def logp(self, value):
        if self.constant:
            x = tt.add(*[self.rho[i + 1] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)])
            eps = value[self.p:] - self.rho[0] - x
        else:
            if self.p == 1:
                x = self.rho * value[:-1]
            else:
                x = tt.add(*[self.rho[i] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)])
            eps = value[self.p:] - x

        innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps)
        init_like = self.init.logp(value[:self.p])

        return tt.sum(innov_like) + tt.sum(init_like)
Exemple #20
0
    def logp(self, value):
        if self.constant:
            x = tt.add(*[self.rho[i + 1] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)])
            eps = value[self.p:] - self.rho[0] - x
        else:
            if self.p == 1:
                x = self.rho * value[:-1]
            else:
                x = tt.add(*[self.rho[i] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)])
            eps = value[self.p:] - x

        innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps)
        init_like = self.init.logp(value[:self.p])

        return tt.sum(innov_like) + tt.sum(init_like)
Exemple #21
0
    def __init__(self, **kwargs):
        super(ResNet, self).__init__(**kwargs)

        assert self.status[1] == 2, "Only accept 2 sources!"
        assert self.status[0], "Only accept cnn layers!"

        x = self.sources[0]
        f_x = self.sources[1]

        time = x.output.shape[0]
        batch = x.output.shape[1]

        self.input = T.add(x.Output, f_x.Output)
        self.Output = T.nnet.relu(self.input)

        if self.attrs['batch_norm']:
            self.Output = self.batch_norm(
                h=self.Output.reshape(
                    (self.Output.shape[0], self.Output.shape[1] *
                     self.Output.shape[2] * self.Output.shape[3])),
                dim=self.attrs['n_out'],
                force_sample=self.force_sample).reshape(self.Output.shape)

        output2 = self.Output.dimshuffle(
            0, 2, 3, 1)  # (time*batch, out-row, out-col, nb feature maps)
        self.output = output2.reshape(
            (time, batch, output2.shape[1] * output2.shape[2] *
             output2.shape[3]))  # (time, batch, out-dim)
def splittings(omega, x, l):

    vals = []
    for n in range(1, n2 + 1):  # 0 to 35?

        area = 0
        kern = np.loadtxt("kerns/l.{l:.0f}_n.{n:.0f}".format(l=l, n=n),
                          skiprows=1)
        # This is bad:
        if x.size < 4800:
            v = int(x.size / n2)
            kern = kern[0::v]

        # Shouldn't this just be a dot product?
        for j in range(1, x.size):
            area = tt.add(area, (x[j] - x[j - 1]) * tt.dot(omega[j], kern[j]))

        beta_mask = (beta[:, 0] == l) * (beta[:, 1] == n)
        delta = tt.dot(beta[beta_mask, 2], area)
        vals.append(delta)

    vals = tt.as_tensor_variable(vals)
    vals = tt.squeeze(vals)
    print("vals")
    print(vals.tag.test_value)
    return vals
def test_meta_classes():
    vec_tt = tt.vector('vec')
    vec_m = MetaSymbol.from_obj(vec_tt)
    assert vec_m.obj == vec_tt
    assert type(vec_m) == MetaTensorVariable

    # This should invalidate the underlying base object.
    vec_m.index = 0
    assert vec_m.obj is None
    assert vec_m.reify().type == vec_tt.type
    assert vec_m.reify().name == vec_tt.name

    vec_type_m = vec_m.type
    assert type(vec_type_m) == MetaTensorType
    assert vec_type_m.dtype == vec_tt.dtype
    assert vec_type_m.broadcastable == vec_tt.type.broadcastable
    assert vec_type_m.name == vec_tt.type.name

    assert graph_equal(tt.add(1, 2), mt.add(1, 2).reify())

    meta_var = mt.add(1, var()).reify()
    assert isinstance(meta_var, MetaTensorVariable)
    assert isinstance(meta_var.owner.op.obj, theano.Op)
    assert isinstance(meta_var.owner.inputs[0].obj, tt.TensorConstant)

    test_vals = [1, 2.4]
    meta_vars = MetaSymbol.from_obj(test_vals)
    assert meta_vars == [MetaSymbol.from_obj(x) for x in test_vals]
Exemple #24
0
 def th_logp(self, prior=False, noise=False):
     if prior:
         random_vars = self.model.free_RVs
     else:
         random_vars = self.model.basic_RVs
     factors = [var.logpt for var in random_vars] + self.model.potentials
     return tt.add(*map(tt.sum, factors))
def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,
                target_acceptance_rate, stepsize_inc, stepsize_dec,
                stepsize_min, stepsize_max, avg_acceptance_slowness):
    
  
    # broadcast `accept` scalar to tensor with the same dimensions as final_pos.
    accept_matrix = accept.dimshuffle(0, *(('x',) * (final_pos.ndim - 1)))

    # if accept is True, update to `final_pos` else stay put
    new_positions = TT.switch(accept_matrix, final_pos, positions)

   
    ## STEPSIZE UPDATES ##
    # if acceptance rate is too low, our sampler is too "noisy" and we reduce
    # the stepsize. If it is too high, our sampler is too conservative, we can
    # get away with a larger stepsize (resulting in better mixing).
    _new_stepsize = TT.switch(avg_acceptance_rate > target_acceptance_rate,
                              stepsize * stepsize_inc, stepsize * stepsize_dec)
                              
    # maintain stepsize in [stepsize_min, stepsize_max]
    new_stepsize = TT.clip(_new_stepsize, stepsize_min, stepsize_max)

   
    # perform exponential moving average
    mean_dtype = theano.scalar.upcast(accept.dtype, avg_acceptance_rate.dtype)
    new_acceptance_rate = TT.add(
        avg_acceptance_slowness * avg_acceptance_rate,
        (1.0 - avg_acceptance_slowness) * accept.mean(dtype=mean_dtype))

    return [(positions, new_positions), (stepsize, new_stepsize), (avg_acceptance_rate, new_acceptance_rate)]
Exemple #26
0
 def sum_logdets(self):
     dets = [self.logdet]
     current = self
     while not current.isroot:
         current = current.parent
         dets.append(current.logdet)
     return tt.add(*dets)
Exemple #27
0
    def __call__(self, X):
        XY = X.dot(X.T)
        x2 = tt.sum(X ** 2, axis=1).dimshuffle(0, 'x')
        X2e = tt.repeat(x2, X.shape[0], axis=1)
        H = X2e + X2e.T - 2. * XY

        V = tt.sort(H.flatten())
        length = V.shape[0]
        # median distance
        m = tt.switch(tt.eq((length % 2), 0),
                      # if even vector
                      tt.mean(V[((length // 2) - 1):((length // 2) + 1)]),
                      # if odd vector
                      V[length // 2])

        h = .5 * m / tt.log(floatX(H.shape[0]) + floatX(1))

        #  RBF
        Kxy = tt.exp(-H / h / 2.0)

        # Derivative
        dxkxy = -tt.dot(Kxy, X)
        sumkxy = tt.sum(Kxy, axis=1).dimshuffle(0, 'x')
        dxkxy = tt.add(dxkxy, tt.mul(X, sumkxy)) / h

        return Kxy, dxkxy
Exemple #28
0
 def sum_logdets(self):
     dets = [self.logdet]
     current = self
     while not current.isroot:
         current = current.parent
         dets.append(current.logdet)
     return tt.add(*dets)
Exemple #29
0
    def __init__(self, net, mixfrac=1.0, maxiter=25):
        #print( 'the mixfrac=', mixfrac) ################################### 0.1

        #mixfrac=1.0
        EzPickle.__init__(self, net, mixfrac, maxiter)
        self.net = net
        self.mixfrac = mixfrac

        self.ez_for_net = EzFlat(self.net.trainable_weights)

        x_nx = net.input
        self.predict = theano.function([x_nx], net.output, **FNOPTS)

        ypred_ny = net.output
        ytarg_ny = T.matrix("ytarg")
        var_list = net.trainable_weights  # vfnet的可训练参数
        l2 = 1e-3 * T.add(*[T.square(v).sum() for v in var_list])
        N = x_nx.shape[0]
        mse = T.sum(T.square(ytarg_ny - ypred_ny)) / N
        symb_args = [x_nx, ytarg_ny]
        loss = mse + l2
        self.opt = LbfgsOptimizer(loss,
                                  var_list,
                                  symb_args,
                                  maxiter=maxiter,
                                  extra_losses={
                                      "mse": mse,
                                      "l2": l2
                                  })
def ctc_loss(y_true, y_pred):
	
	def path_probs(predict, y_sym):
		pred_y = predict[:, y_sym]
		rr = recurrence_relation(y_sym.shape[0])

		def step(p_curr, p_prev,rr):
			return p_curr * T.dot(p_prev, rr)

		probabilities, _ = theano.scan(
			step,
			sequences=[pred_y],
			outputs_info=[T.eye(y_sym.shape[0])[0]],
			non_sequences=[rr]
			)
		return probabilities
	
	y_sym_a=T.argmax(y_true,axis=-1)
	n=T.cast(T.add(T.mul(2, y_true.shape[0] - T.sum(y_true[:,-1])),1),'int16')
	y_sym=T.cast(y_sym_a[:n],'int16')
	y_pred = T.clip(y_pred, epsilon, 1.0-epsilon)
	
	forward_probs = path_probs(y_pred, y_sym)
	backward_probs = path_probs(y_pred[::-1], y_sym[::-1])[::-1, ::-1]
	probs = forward_probs * backward_probs / y_pred[:, y_sym]
	total_probs = T.sum(probs)
	#total_probs=T.sum(forward_probs[-1,-2:])
	return -T.log(total_probs)
Exemple #31
0
    def __call__(self, X):
        XY = X.dot(X.T)
        x2 = tt.sum(X**2, axis=1).dimshuffle(0, 'x')
        X2e = tt.repeat(x2, X.shape[0], axis=1)
        H = X2e + X2e.T - 2. * XY

        V = tt.sort(H.flatten())
        length = V.shape[0]
        # median distance
        m = tt.switch(
            tt.eq((length % 2), 0),
            # if even vector
            tt.mean(V[((length // 2) - 1):((length // 2) + 1)]),
            # if odd vector
            V[length // 2])

        h = .5 * m / tt.log(floatX(H.shape[0]) + floatX(1))

        #  RBF
        Kxy = tt.exp(-H / h / 2.0)

        # Derivative
        dxkxy = -tt.dot(Kxy, X)
        sumkxy = tt.sum(Kxy, axis=-1, keepdims=True)
        dxkxy = tt.add(dxkxy, tt.mul(X, sumkxy)) / h

        return Kxy, dxkxy
Exemple #32
0
    def test_softmax_optimizations_w_bias2(self):
        x = tensor.matrix('x')
        b = tensor.vector('b')
        c = tensor.vector('c')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot

        env = gof.Env(
                [x, b, c, one_of_n],
                [op(softmax(T.add(x,b,c)), one_of_n)])
        assert env.outputs[0].owner.op == op

        print 'BEFORE'
        for node in env.toposort():
            print node.op
        print '----'

        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)

        print 'AFTER'
        for node in env.toposort():
            print node.op
        print '===='
        assert len(env.toposort()) == 3

        assert str(env.outputs[0].owner.op) == 'OutputGuard'
        assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
Exemple #33
0
    def logp(self, z):
        factors = ([tt.sum(var.logpt)for var in self.model.basic_RVs] +
                   [tt.sum(var) for var in self.model.potentials])

        p = self.approx.to_flat_input(tt.add(*factors))
        p = theano.clone(p, {self.input: z})
        return p
Exemple #34
0
 def _lmul(self, x, T):
     if T:
         if len(self.col_shape())>1:
             x2 = x.flatten(2)
         else:
             x2 = x
         n_rows = x2.shape[0]
         offset = 0
         xWlist = []
         assert len(self._col_sizes) == len(self._Wlist)
         for size, W in zip(self._col_sizes, self._Wlist):
             # split the output rows into pieces
             x_s = x2[:,offset:offset+size]
             # multiply each piece by one transform
             xWlist.append(
                     W.lmul(
                         x_s.reshape(
                             (n_rows,)+W.col_shape()),
                         T))
             offset += size
         # sum the results
         rval = tensor.add(*xWlist)
     else:
         # multiply the input by each transform
         xWlist = [W.lmul(x,T).flatten(2) for W in self._Wlist]
         # join the resuls
         rval = tensor.join(1, *xWlist)
     return rval
def run_MCMC_ARp(x, y, draws, p, resmdl):
    phi_means = resmdl.params[:p]
    phi_sd = resmdl.bse[:p]

    with Model() as model8:
        alpha = Normal('alpha', mu=0, sd=10)
        beta = Normal('beta', mu=0, sd=10)
        sd = HalfNormal('sd', sd=10)
        phi = Normal('phi', mu=phi_means, sd=phi_sd, shape=p)
        y = tt.as_tensor(y)
        x = tt.as_tensor(x)
        y_r = y[p:]
        x_r = x[p:]
        resids = y - beta * x - alpha

        u = tt.add(*[phi[i] * resids[p - (i + 1):-(i + 1)] for i in range(p)])
        mu = alpha + beta * x_r + u
        data = Normal('y_r', mu=mu, sd=sd, observed=y_r)

    with model8:
        if p == 1:
            step = None
        else:
            step = Metropolis([phi])
        tune = int(draws / 5)
        trace = sample(draws, tune=tune, step=step, progressbar=False)

    print(summary(trace, varnames=['alpha', 'beta', 'sd', 'phi']))
    #plt.show(forestplot(trace, varnames=['alpha', 'beta', 'sd', 'phi']))
    #traceplot(trace, varnames=['alpha', 'beta', 'sd', 'phi'])
    return trace
Exemple #36
0
 def logpt(self):
     """Theano scalar of log-probability of the model"""
     with self:
         factors = [var.logpt for var in self.basic_RVs] + self.potentials
         logp = tt.add(*map(tt.sum, factors))
         logp.name = '__logp'
         return logp
Exemple #37
0
    def __init__(self, net, mixfrac=1.0, maxiter=25):
        EzPickle.__init__(self, net, mixfrac, maxiter)
        self.net = net
        self.mixfrac = mixfrac

        x_nx = net.input  # input layer of the keras net
        self.predict = theano.function([x_nx], net.output,
                                       **FNOPTS)  # compiled theano func

        ypred_ny = net.output  # input layer of the keras net
        ytarg_ny = T.matrix("ytarg")
        var_list = net.trainable_weights
        # l2 regularization with reg coeff of 1e-3
        l2 = 1e-3 * T.add(*[T.square(v).sum() for v in var_list])
        N = x_nx.shape[0]
        mse = T.sum(T.square(ytarg_ny - ypred_ny)) / N  # mean squared error
        symb_args = [x_nx, ytarg_ny]
        loss = mse + l2  # loss = mse + l2 reg
        # set the optimizer as the manually coded Lfbgs Optimizer
        self.opt = LbfgsOptimizer(loss,
                                  var_list,
                                  symb_args,
                                  maxiter=maxiter,
                                  extra_losses={
                                      "mse": mse,
                                      "l2": l2
                                  })
Exemple #38
0
  def __init__(self, **kwargs):
    super(ResNet, self).__init__(**kwargs)

    assert self.status[1] == 2, "Only accept 2 sources!"
    assert self.status[0], "Only accept cnn layers!"

    x = self.sources[0]
    f_x = self.sources[1]

    time = x.output.shape[0]
    batch = x.output.shape[1]

    self.input = T.add(x.Output, f_x.Output)
    self.Output = T.nnet.relu(self.input)

    if self.attrs['batch_norm']:
      self.Output = self.batch_norm(
        h=self.Output.reshape(
          (self.Output.shape[0],
           self.Output.shape[1] * self.Output.shape[2] * self.Output.shape[3])
        ),
        dim=self.attrs['n_out'],
        force_sample=self.force_sample
      ).reshape(self.Output.shape)

    output2 = self.Output.dimshuffle(0, 2, 3, 1)  # (time*batch, out-row, out-col, nb feature maps)
    self.output = output2.reshape((time, batch, output2.shape[1] * output2.shape[2] * output2.shape[3]))  # (time, batch, out-dim)
Exemple #39
0
def rbf_kernel(X):

    XY = T.dot(X, X.T)
    x2 = T.sum(X**2, axis=1).dimshuffle(0, 'x')
    X2e = T.repeat(x2, X.shape[0], axis=1)
    H = X2e +  X2e.T - 2. * XY

    V = H.flatten()
    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = .5 * h / T.log(T.cast(H.shape[0] + 1., theano.config.floatX))

    # compute the rbf kernel
    kxy = T.exp(-H / h / 2.0)

    dxkxy = -T.dot(kxy, X)
    sumkxy = T.sum(kxy, axis=1).dimshuffle(0, 'x')
    dxkxy = T.add(dxkxy, T.mul(X, sumkxy)) / h

    return kxy, dxkxy
Exemple #40
0
 def _mean_h_given_v(self, v):
     alpha = self.usable_alpha()
     return tensor.add(
                 self.b,
                 -0.5 * ldot(v * v, self.Lambda) if self.Lambda else 0,
                 self.mu * ldot(v, self.W),
                 0.5 * tensor.sqr(ldot(v, self.W))/alpha)
Exemple #41
0
def add_merge_MultiBatchBeamGradAddOp(node):
  if node.op != T.add: return False
  if len(node.inputs) < 2: return False
  grad_op_idx = None
  grad_op_v = None
  grad_op = None
  for i, input in enumerate(node.inputs):
    if input.owner and isinstance(input.owner.op, MultiBatchBeamGradAddOp):
      grad_op = input.owner.op
      if not grad_op.inplace:  # we cannot merge when we operate inplace on it
        grad_op_v = input
        grad_op_idx = i
        break
  if grad_op_idx is None: return False
  sum_inputs = [node.inputs[i] for i in range(len(node.inputs)) if i != grad_op_idx]
  if grad_op.zero_with_shape:
    # Make new grad_op without zero_with_shape.
    kwargs = {k: getattr(grad_op, k) for k in grad_op.__props__}
    kwargs["zero_with_shape"] = False
    grad_op = grad_op.__class__(**kwargs)
  else:
    old_grad_op_input0 = grad_op_v.owner.inputs[0]
    sum_inputs = [old_grad_op_input0] + sum_inputs
  assert len(sum_inputs) > 0
  if len(sum_inputs) == 1:
    new_grad_op_input0 = sum_inputs[0]
  else:
    new_grad_op_input0 = T.add(*sum_inputs)
  new_grad_op_inputs = [new_grad_op_input0] + grad_op_v.owner.inputs[1:]
  new_v = grad_op(*new_grad_op_inputs)
  return [new_v]
Exemple #42
0
 def logp_norm(self, z):
     t = self.approx.normalizing_constant
     factors = [tt.sum(var.logpt) / t for var in self.model.basic_RVs + self.model.potentials]
     logpt = tt.add(*factors)
     p = self.approx.to_flat_input(logpt)
     p = theano.clone(p, {self.input: z})
     return p
Exemple #43
0
def variational_gradient_estimate(
    vars, model, minibatch_RVs=[], minibatch_tensors=[], total_size=None, 
    n_mcsamples=1, random_seed=20090425):
    """Calculate approximate ELBO and its (stochastic) gradient. 
    """

    theano.config.compute_test_value = 'ignore'
    shared = make_shared_replacements(vars, model)

    # Correction sample size 
    r = 1 if total_size is None else \
        float(total_size) / minibatch_tensors[0].shape[0]

    other_RVs = set(model.basic_RVs) - set(minibatch_RVs)
    factors = [r * var.logpt for var in minibatch_RVs] + \
              [var.logpt for var in other_RVs] + model.potentials
    logpt = tt.add(*map(tt.sum, factors))
    
    [logp], inarray = join_nonshared_inputs([logpt], vars, shared)

    uw = dvector('uw')
    uw.tag.test_value = np.concatenate([inarray.tag.test_value,
                                        inarray.tag.test_value])

    elbo = elbo_t(logp, uw, inarray, n_mcsamples=n_mcsamples, random_seed=random_seed)

    # Gradient
    grad = gradient(elbo, [uw])

    return grad, elbo, shared, uw
Exemple #44
0
    def test_softmax_optimizations_w_bias2(self):
        x = tensor.matrix('x')
        b = tensor.vector('b')
        c = tensor.vector('c')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot

        env = gof.Env([x, b, c, one_of_n],
                      [op(softmax(T.add(x, b, c)), one_of_n)])
        assert env.outputs[0].owner.op == op

        print 'BEFORE'
        for node in env.toposort():
            print node.op
        print '----'

        theano.compile.mode.optdb.query(
            theano.compile.mode.OPT_FAST_RUN).optimize(env)

        print 'AFTER'
        for node in env.toposort():
            print node.op
        print '===='
        assert len(env.toposort()) == 3

        assert str(env.outputs[0].owner.op) == 'OutputGuard'
        assert env.outputs[0].owner.inputs[
            0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
Exemple #45
0
    def logp(self, z):
        factors = ([tt.sum(var.logpt) for var in self.model.basic_RVs] +
                   [tt.sum(var) for var in self.model.potentials])

        p = self.approx.to_flat_input(tt.add(*factors))
        p = theano.clone(p, {self.input: z})
        return p
    def create_weight_update_with_momentum_functions(self):
        weight_updates_with_momentum = []
        for i in range(len(self.weights)):
            weight_updates_with_momentum.append(
                (self.weights[i], g(T.add(self.weights[i], self.H.L.momentum_weights[i])))
            )

        self.weight_updates_with_momentum_function = function(inputs=[], updates=weight_updates_with_momentum)
Exemple #47
0
    def mcmc(ll, *frvs):
        proposals = [s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, frvs)]
        proposals_rev = [s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, proposals)]

        full_observations = dict(observations)
        full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, proposals)]))
        new_log_likelihood = full_log_likelihood(full_observations)

        logratio = new_log_likelihood - ll \
            + tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals_rev, frvs)]) \
            - tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals, proposals)])
                   
        accept = tensor.gt(logratio, tensor.log(U))
        
        return [tensor.switch(accept, new_log_likelihood, ll)] + \
               [tensor.switch(accept, p, f) for p, f in zip(proposals, frvs)], \
               {}, theano.scan_module.until(accept)
Exemple #48
0
 def logp_norm(self, z):
     t = self.approx.normalizing_constant
     factors = ([tt.sum(var.logpt) / t for var in self.model.basic_RVs] +
                [tt.sum(var) / t for var in self.model.potentials])
     logpt = tt.add(*factors)
     p = self.approx.to_flat_input(logpt)
     p = theano.clone(p, {self.input: z})
     return p
    def train(self, train_inputs, train_targets, optimizer=lgn.updates.adagrad,
              minibatch_size=None, n_epochs=1000, optimizer_kwargs=None,
              objective=lgn.objectives.squared_error):
        print("training network")

        # loss function
        lgn_outputs = lgn.layers.get_output(
            [lgn.layers.ReshapeLayer(self.params[o].output,
                                     (self.batch_size, self.seq_len,
                                      self.params[o].output.num_units))
             for o in train_targets],
            deterministic=False)
        target_vars = [T.ftensor3("%s_targets" % o)
                       for o in train_targets]
        losses = [objective(o, t).mean()
                  for o, t in zip(lgn_outputs, target_vars)]

        # sum the losses for all the outputs to get the overall objective
        if len(losses) == 1:
            loss = losses[0]
        else:
            loss = T.add(*losses)

        # compile training update function
        params = lgn.layers.get_all_params([self.params[o].output
                                            for o in train_targets],
                                           trainable=True)

        if optimizer_kwargs is None:
            optimizer_kwargs = {}
        updates = optimizer(loss, params, **optimizer_kwargs)
        self.train_func = theano.function(
            [self.params[x].input.input_var for x in train_inputs] +
            target_vars, loss, updates=updates)

        # print("layers", lgn.layers.get_all_layers([self.params[o].output
        #                                     for o in train_targets]))
        # print("params", lgn.layers.get_all_params([self.params[o].output
        #                                     for o in train_targets]))

        # run training epochs
        with ProgressTracker(n_epochs, TerminalProgressBar()) as progress:
            n_inputs = len(list(train_inputs.values())[0])
            minibatch_size = minibatch_size or n_inputs
            for _ in range(n_epochs):
                indices = np.random.permutation(n_inputs)
                for start in range(0, n_inputs - minibatch_size + 1,
                                   minibatch_size):
                    minibatch = indices[start:start + minibatch_size]

                    self.train_func(*(
                        [train_inputs[x][minibatch] for x in train_inputs] +
                        [train_targets[x][minibatch] for x in train_targets]))

                progress.step()

        print("training complete")
    def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, w_hidden_hidden, b_act):

        pre_w_sig = T.dot(pre_out_sig, w_hidden_hidden)
        inner_act = self.activation
        out_sig = inner_act(T.add(cur_w_in_sig, pre_w_sig, b_act))

        mask = T.addbroadcast(mask, 1)
        out_sig_m = mask * out_sig + (1. - mask) * pre_out_sig
        return [out_sig_m]
Exemple #51
0
 def mean_conv_v_given_s_h(self, s, h, With_fast):
     """Return the mean of binary-valued visible units v, given h and s
     """
     W = self.get_filters(With_fast)
     conv_v_bias = self.get_conv_v_bias(With_fast)
     shW = self.convdot(s*h, W)        
     rval = nnet.sigmoid(
             tensor.add(shW, conv_v_bias))
     return rval
Exemple #52
0
    def __init__(self, n_in, n_out, input_data_list, activation_fn=tanh):
        self.n_in = n_in
        self.n_out = n_out
        self.activation_fn = activation_fn
        self.w = theano.shared(
            np.asarray(
                np.random.uniform(
                    low=-np.sqrt(6.0 / (n_in + n_out)), high=np.sqrt(6.0 / (n_in + n_out)), size=(n_in, n_out)
                ),
                dtype=theano.config.floatX,
            ),
            name="w",
            borrow=True,
        )
        # self.b = theano.shared(
        #     np.asarray(np.zeros((n_out)), dtype=theano.config.floatX),
        #     name='b', borrow=True)
        # self.params = [self.w, self.b]
        # self.params = [self.w]

        self.b = theano.shared(
            np.asarray(
                np.random.normal(loc=0.0, scale=1.0 / (n_in + n_out), size=(n_out,)), dtype=theano.config.floatX
            ),
            name="b",
            borrow=True,
        )
        self.params = [self.w, self.b]

        # self.w = T._shared(
        #     np.asarray(
        #         np.random.uniform(
        #             low=-np.sqrt(6.0/(n_in+n_out)), high=np.sqrt(6.0/(n_in+n_out)), size=(n_in, n_out)),
        #             dtype=theano.config.floatX),
        #     name='w', borrow=True)
        # self.b = T._shared(
        #     np.asarray(np.zeros((n_out)), dtype=theano.config.floatX),
        #     name='b', borrow=True)

        self.q, self.d = input_data_list
        self.output = [
            self.activation_fn(T.add(TS.basic.structured_dot(self.q, self.w), self.b)),
            self.activation_fn(T.add(TS.basic.structured_dot(self.d, self.w), self.b)),
        ]
def add_matrix(matrix1, matrix2):
    if len(matrix1) != len(matrix2) or len(matrix1[0]) != len(matrix2[0]):
        raise Exception('Matrizes não estão alinhadas')
    x = shared(np.asmatrix(matrix1, 'float32'))
    y = shared(np.asmatrix(matrix2, 'float32'))

    z = T.add(x, y)
    f = function([], sandbox.cuda.basic_ops.gpu_from_host(z))

    return f()
Exemple #54
0
 def free_energy_given_v(self, v):
     sigmoid_arg = self._mean_h_given_v(v)
     hterm = tensor.sum(
             tensor.nnet.softplus(sigmoid_arg),
             axis=range(1,sigmoid_arg.ndim))
     return tensor.add(
             0.5 * tensor.sum(
                 self.usable_beta() * (v**2),
                 axis=range(1,v.ndim)),
             -hterm)
Exemple #55
0
    def free_energy_given_v(self, v):
        """
        .. todo::

            WRITEME
        """
        sigmoid_arg = self.input_to_h_from_v(v)
        return tensor.add(
                0.5 * (self.B * (v ** 2)).sum(axis=1),
                -tensor.nnet.softplus(sigmoid_arg).sum(axis=1))