Exemple #1
0
def get_training_model(Ws_s, bs_s, dropout=False, lambd=10.0, kappa=1.0):
    # Build a dual network, one for the real move, one for a fake random move
    # Train on a negative log likelihood of classifying the right move

    xc_s, xc_p = get_model(Ws_s, bs_s, dropout=dropout)
    xr_s, xr_p = get_model(Ws_s, bs_s, dropout=dropout)
    xp_s, xp_p = get_model(Ws_s, bs_s, dropout=dropout)

    #loss = -T.log(sigmoid(xc_p + xp_p)).mean() # negative log likelihood
    #loss += -T.log(sigmoid(-xp_p - xr_p)).mean() # negative log likelihood

    cr_diff = xc_p - xr_p
    loss_a = -T.log(sigmoid(cr_diff)).mean()

    cp_diff = kappa * (xc_p + xp_p)
    loss_b = -T.log(sigmoid( cp_diff)).mean()
    loss_c = -T.log(sigmoid(-cp_diff)).mean()

    # Add regularization terms
    reg = 0
    for x in Ws_s + bs_s:
        reg += lambd * (x ** 2).mean()

    loss = loss_a + loss_b + loss_c
    return xc_s, xr_s, xp_s, loss, reg, loss_a, loss_b, loss_c
Exemple #2
0
def sample_gradient():
    print "微分"
    x, y = T.dscalars("x", "y")
    z = (x+2*y)**2
    # dz/dx
    gx = T.grad(z, x)
    fgx = theano.function([x,y], gx)
    print fgx(1.0, 1.0)
    # dz/dy
    gy = T.grad(z, y)
    fgy = theano.function([x,y], gy)
    print fgy(1.0, 1.0)
    # d{sigmoid(x)}/dx
    x = T.dscalar("x")
    sig = sigmoid(x)
    dsig = T.grad(sig, x)
    f = theano.function([x], dsig)
    print f(0.0)
    print f(1.0)
    # d{sigmoid(<x,w>)}/dx
    w = T.dscalar("w")
    sig = sigmoid(T.dot(x,w))
    dsig = T.grad(sig, x)
    f = theano.function([x, w], dsig)
    print f(1.0, 2.0)
    print f(3.0, 4.0)
    print
Exemple #3
0
    def lstm_output(self, y_prev, ch_prev):
        """calculates info to pass to next time step.
        ch_prev is a vector of size 2*hdim"""

        c_prev = ch_prev[:self.hdim]#T.vector('c_prev')
        h_prev = ch_prev[self.hdim:]#T.vector('h_prev')

        # gates (input, forget, output)
        i_t = sigmoid(T.dot(self.Ui, h_prev))
        f_t = sigmoid(T.dot(self.Uf, h_prev))
        o_t = sigmoid(T.dot(self.Uo, h_prev))
        # new memory cell
        c_new_t = T.tanh(T.dot(self.Uc, h_prev))
        # final memory cell
        c_t = f_t * c_prev + i_t * c_new_t
        # final hidden state
        h_t = o_t * T.tanh(c_t)

        # Input vector for softmax
        theta_t = T.dot(self.U, h_t) + self.b
        # Softmax prob vector
        y_hat_t = softmax(theta_t.T).T
        # Softmax wraps output in another list, why??
        # (specifically it outputs a 2-d row, not a 1-d column)
        # y_hat_t = y_hat_t[0]
        # Compute new cost
        out_label = T.argmax(y_hat_t)

        # final joint state
        ch_t = T.concatenate([c_t, h_t])

        return (out_label, ch_t), scan_module.until(T.eq(out_label, self.out_end))
Exemple #4
0
 def forward(self, data, h):
     z = NNET.sigmoid(THT.dot(data, self.Wz) + THT.dot(h, self.Uz) + self.bz)
     r = NNET.sigmoid(THT.dot(data, self.Wr) + THT.dot(h, self.Ur) + self.br)
     c = THT.tanh(THT.dot(data, self.Wg) + THT.dot(r * h, self.Ug) + self.bg)
     out = (1 - z) * h + z * c
     
     return out
Exemple #5
0
    def make_ann(self, hidden_layers, lr):
        self.W = [
            theano.shared(
                rng.uniform(-0.1, 0.1, size=(784, hidden_layers[0])))
        ]
        self.B = [theano.shared(rng.uniform(-0.1, 0.1, size=(784)))]
        innput = T.vector('innput')
        self.X = [Tann.sigmoid(T.dot(innput, self.W[0]) + self.B[0])]
        params = [self.W[0], self.B[0]]
        for n in range(1, len(hidden_layers)):
            #Finding number of inputs
            n_in = hidden_layers[n - 1]
            n_out = hidden_layers[n]
            #making Bias and weights for a layer
            self.W.append(
                theano.shared(rng.uniform(-0.1, 0.1, size=(n_in, n_out))))
            #
            self.B.append(theano.shared(rng.uniform(-0.1, 0.1, size=(n_in))))
            #
            self.X.append(
                Tann.sigmoid(T.dot(self.W[n], self.W[n - 1]) + self.B[n]))
            params.append(self.W[n])
            params.append(self.B[n])
        #
        error = T.sum((innput - self.W[-1])**2)
        print(error)
        print(params)
        #

        gradients = T.grad(error, params)

        backprop_acts = [(p, p - self.lrate * g)
                         for p, g in zip(params, gradients)]
        self.predictor = theano.function([innput], [self.X])
        self.trainer = theano.function([innput], error, updates=backprop_acts)
def __step(img, prev_bbox, prev_att, state):
	cx = (prev_bbox[:, 2] + prev_bbox[:, 0]) / 2.
	cy = (prev_bbox[:, 3] + prev_bbox[:, 1]) / 2.
	sigma = TT.exp(prev_att[:, 0]) * (max(img_col, img_row) / 2)
	fract = TT.exp(prev_att[:, 1])
        amplifier = TT.exp(prev_att[:, 2])

        eps = 1e-8

	abs_cx = (cx + 1) / 2. * (img_col - 1)
	abs_cy = (cy + 1) / 2. * (img_row - 1)
	abs_stride = (fract * (max(img_col, img_row) - 1)) * ((1. / (NUM_N - 1.)) if NUM_N > 1 else 0)

	FX, FY = __filterbank(abs_cx, abs_cy, abs_stride, sigma)
	unnormalized_mask = (FX.dimshuffle(0, 'x', 1, 'x', 2) * FY.dimshuffle(0, 1, 'x', 2, 'x')).sum(axis=2).sum(axis=1)
	mask = unnormalized_mask# / (unnormalized_mask.sum(axis=2).sum(axis=1) + eps).dimshuffle(0, 'x', 'x')
	masked_img = (mask.dimshuffle(0, 'x', 1, 2) * img) * amplifier.dimshuffle(0, 'x', 'x', 'x')

	conv1 = conv2d(masked_img, conv1_filters, subsample=(conv1_stride, conv1_stride))
	act1 = TT.tanh(conv1)
	flat1 = TT.reshape(act1, (batch_size, conv1_output_dim))
	gru_in = TT.concatenate([flat1, prev_bbox], axis=1)
	gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz)
	gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br)
	gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg)
	gru_h = (1 - gru_z) * state + gru_z * gru_h_
	bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2)
	att = TT.dot(gru_h, W_fc3) + b_fc3

	return bbox, att, gru_h, mask
Exemple #7
0
    def test_local_sigm_times_exp(self):
        """
        Test the `local_sigm_times_exp` optimization.
        exp(x) * sigm(-x) -> sigm(x)
        exp(-x) * sigm(x) -> sigm(-x)
        """
        def match(func, ops):
            # print [node.op.scalar_op for node in func.maker.fgraph.toposort()]
            assert [node.op for node in func.maker.fgraph.toposort()] == ops
        m = self.get_mode(excluding=['local_elemwise_fusion', 'inplace'])
        x, y = tensor.vectors('x', 'y')

        f = theano.function([x], sigmoid(-x) * tensor.exp(x), mode=m)
        match(f, [sigmoid])
        assert check_stack_trace(f, ops_to_check=sigmoid)

        f = theano.function([x], sigmoid(x) * tensor.exp(-x), mode=m)
        match(f, [tensor.neg, sigmoid])
        assert check_stack_trace(f, ops_to_check=sigmoid)

        f = theano.function([x], -(-(-(sigmoid(x)))) * tensor.exp(-x), mode=m)
        match(f, [tensor.neg, sigmoid, tensor.neg])
        # assert check_stack_trace(f, ops_to_check=sigmoid)

        f = theano.function(
            [x, y],
            (sigmoid(x) * sigmoid(-y) * -tensor.exp(-x) *
                tensor.exp(x * y) * tensor.exp(y)), mode=m)
        topo = f.maker.fgraph.toposort()
        for op, nb in [(sigmoid, 2), (tensor.mul, 2),
                       (tensor.neg, 1), (tensor.exp, 1)]:
            assert sum([n.op == op for n in topo]) == nb
Exemple #8
0
    def test_local_sigm_times_exp(self):
        """
        Test the `local_sigm_times_exp` optimization.
        exp(x) * sigm(-x) -> sigm(x)
        exp(-x) * sigm(x) -> sigm(-x)
        """
        def match(func, ops):
            # print [node.op.scalar_op for node in func.maker.fgraph.toposort()]
            assert [node.op for node in func.maker.fgraph.toposort()] == ops
        m = self.get_mode(excluding=['local_elemwise_fusion', 'inplace'])
        x, y = tensor.vectors('x', 'y')

        f = theano.function([x], sigmoid(-x) * tensor.exp(x), mode=m)
        match(f, [sigmoid])

        f = theano.function([x], sigmoid(x) * tensor.exp(-x), mode=m)
        match(f, [tensor.neg, sigmoid])

        f = theano.function([x], -(-(-(sigmoid(x)))) * tensor.exp(-x), mode=m)
        match(f, [tensor.neg, sigmoid, tensor.neg])

        f = theano.function(
                [x, y],
                (sigmoid(x) * sigmoid(-y) * -tensor.exp(-x) *
                 tensor.exp(x * y) * tensor.exp(y)),
                mode=m)
        match(f, [sigmoid, tensor.mul, tensor.neg, tensor.exp, sigmoid,
                  tensor.mul])
Exemple #9
0
    def test_log1msigm_to_softplus(self):
        x = T.matrix()

        out = T.log(1 - sigmoid(x))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 2
        assert isinstance(topo[0].op.scalar_op,
                          theano.tensor.nnet.sigm.ScalarSoftplus)
        assert isinstance(topo[1].op.scalar_op, theano.scalar.Neg)
        f(numpy.random.rand(54, 11).astype(config.floatX))

        # Same test with a flatten
        out = T.log(1 - T.flatten(sigmoid(x)))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 3
        assert isinstance(topo[0].op, T.Flatten)
        assert isinstance(topo[1].op.scalar_op,
                          theano.tensor.nnet.sigm.ScalarSoftplus)
        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
        f(numpy.random.rand(54, 11).astype(config.floatX))

        # Same test with a reshape
        out = T.log(1 - sigmoid(x).reshape([x.size]))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        #assert len(topo) == 3
        assert any(isinstance(node.op, T.Reshape) for node in topo)
        assert any(isinstance(getattr(node.op, 'scalar_op', None),
                              theano.tensor.nnet.sigm.ScalarSoftplus)
                   for node in topo)
        f(numpy.random.rand(54, 11).astype(config.floatX))
Exemple #10
0
    def build_custom_ann(self, layer_list, ann_type = "rlu", nb = 784):
        '''

        '''
        layer_list = [nb] + layer_list
        input = T.dvector('input')
        target = T.wvector('target')
        w_list = []
        x_list = []
        w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[0],layer_list[1]))))
        if ann_type == "rlu":
            x_list.append(T.switch(T.dot(input,w_list[0]) > 0, T.dot(input,w_list[0]), 0))
        elif ann_type == "sigmoid":
            x_list.append(Tann.sigmoid(T.dot(input, w_list[0])))
        elif ann_type == "ht":
            x_list.append(T.tanh(T.dot(input, w_list[0])))

        for count in range(0, len(layer_list) - 2):
            w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[count + 1],layer_list[count + 2]))))
            if ann_type=="rlu":
                x_list.append(T.switch(T.dot(x_list[count],w_list[count + 1]) > 0, T.dot(x_list[count], w_list[count + 1]), 0))
            elif ann_type == "sigmoid":
                x_list.append(Tann.sigmoid(T.dot(x_list[count],w_list[count + 1])))
            elif ann_type == "ht":
                x_list.append(T.tanh(T.dot(x_list[count],w_list[count + 1])))
        w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[-1], 10))))
        x_list.append(T.switch(T.dot(x_list[-1],w_list[-1]) > 0, T.dot(x_list[-1],w_list[-1]), 0))

        error = T.sum(pow((target - x_list[-1]), 2))
        params = w_list
        gradients = T.grad(error, params) 
        backprops = [(p, p - self.lrate*g) for p,g in zip(params,gradients)]

        self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input], outputs=x_list[-1], allow_input_downcast=True)
        def scan_function(input, inter_output, W, U, Wz, Uz, Wr, Ur, buw, bz, br):
            rj = nnet.sigmoid(T.dot(input, Wr) + T.dot(inter_output, Ur) + br)
            zj = nnet.sigmoid(T.dot(input, Wz) + T.dot(inter_output, Uz) + bz)
            htilde = T.tanh(T.dot(input, W) + rj * T.dot(inter_output, U) + buw)
            inter_output = zj * inter_output + (1 - zj) * htilde

            return inter_output
Exemple #12
0
 def fp(self, x, _):
   relu = lambda x: T.max(x, 0)
   h = self.model.hiddens["h_%d" % self.hidden_id]['val']
   c = self.model.hiddens["c_%d" % self.hidden_id]['val']
   it = sigmoid(T.dot(x, self.Wxi) + T.dot(h, self.Whi) + T.dot(c, self.Wci) + self.Bi)
   ft = sigmoid(T.dot(x, self.Wxf) + T.dot(h, self.Whf) + T.dot(c, self.Wcf) + self.Bf)
   self.ct = ft * c + it * T.tanh(T.dot(x, self.Wxc) + T.dot(h, self.Whc) + self.Bc)
   ot = sigmoid(T.dot(x, self.Wxo) + T.dot(h, self.Who) + T.dot(self.ct, self.Wco) + self.Bo)
   self.output = ot * T.tanh(self.ct)
Exemple #13
0
    def gru_timestep(self, x_t, h_prev):

        Lx_t = self.L[:,x_t]
        # gates (update, reset)
        z_t = sigmoid(T.dot(self.Wz, Lx_t) + T.dot(self.Uz, h_prev))
        r_t = sigmoid(T.dot(self.Wr, Lx_t) + T.dot(self.Ur, h_prev))
        # combine them
        h_new_t = T.tanh(T.dot(self.Wh, Lx_t) + r_t * T.dot(self.Uh, h_prev))
        h_t = z_t * h_prev + (1 - z_t) * h_new_t
        return h_t
Exemple #14
0
def rbm_ais_gibbs_for_v(rbmA_params, rbmB_params, beta, v_sample, seed=23098):
    """
    Parameters:
    -----------
    rbmA_params: list
        Parameters of the baserate model (usually infinite temperature). List
        should be of length 3 and contain numpy.ndarrays corresponding to model
        parameters (weights, visbias, hidbias).

    rbmB_params: list
        similar to rbmA_params, but for model at temperature 1.

    beta: theano.shared
        scalar, represents inverse temperature at which we wish to sample from.

    v_sample: theano.shared
        matrix of shape (n_runs, nvis), state of current particles.

    seed: int
        optional seed parameter for sampling from binomial units.
    """

    (weights_a, visbias_a, hidbias_a) = rbmA_params
    (weights_b, visbias_b, hidbias_b) = rbmB_params

    theano_rng = RandomStreams(seed)

    # equation 15 (Salakhutdinov & Murray 2008)
    ph_a = nnet.sigmoid(
        (1 - beta) * (tensor.dot(v_sample, weights_a) + hidbias_a))
    ha_sample = theano_rng.binomial(
        size=(v_sample.shape[0], len(hidbias_a)),
        n=1,
        p=ph_a,
        dtype=config.floatX)

    # equation 16 (Salakhutdinov & Murray 2008)
    ph_b = nnet.sigmoid(beta * (tensor.dot(v_sample, weights_b) + hidbias_b))
    hb_sample = theano_rng.binomial(
        size=(v_sample.shape[0], len(hidbias_b)),
        n=1,
        p=ph_b,
        dtype=config.floatX)

    # equation 17 (Salakhutdinov & Murray 2008)
    pv_act = (1 - beta) * (tensor.dot(ha_sample, weights_a.T) + visbias_a) + \
                beta * (tensor.dot(hb_sample, weights_b.T) + visbias_b)
    pv = nnet.sigmoid(pv_act)
    new_v_sample = theano_rng.binomial(
        size=(v_sample.shape[0], len(visbias_b)),
        n=1,
        p=pv,
        dtype=config.floatX)

    return new_v_sample
Exemple #15
0
 def get_reconstruction_cost(self, updates, pre_nv):
     '''
     Approximation to the reconstruction error
     '''
     cross_entropy = T.mean(
         T.sum(self.inputs * T.log(sigmoid(pre_nv)) + 
               (1-self.inputs) * T.log(1 - sigmoid(pre_nv)),
               axis=1
         )
     )
     return cross_entropy
Exemple #16
0
 def new_output(self, y_prev, h_prev):
     # gates (update, reset)
     z_t = sigmoid(T.dot(self.Uz, h_prev))
     r_t = sigmoid(T.dot(self.Ur, h_prev))
     # combine them
     h_new_t = T.tanh(r_t * T.dot(self.Uh, h_prev))
     h_t = z_t * h_prev + (1 - z_t) * h_new_t
     # compute new out_label
     y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T
     out_label = T.argmax(y_hat_t)
     
     return (out_label, h_t), scan_module.until(T.eq(out_label, self.out_end))
    def _step(x_, h_, c_):
        preact = tensor.dot(tensor.concatenate((h_, input_layer(x_, h_))), W)
        preact += b

        i = nnet.sigmoid(_slice(preact, 0, n_hidden))
        f = nnet.sigmoid(_slice(preact, 1, n_hidden))
        o = nnet.sigmoid(_slice(preact, 2, n_hidden))
        c = nnet.sigmoid(_slice(preact, 3, n_hidden))

        c = f * c_ + i * c
        h = o * tensor.tanh(c)
        return h, c
def _step(img, prev_bbox, state):
	# of (batch_size, nr_filters, some_rows, some_cols)
	conv1 = conv2d(img, conv1_filters, subsample=(conv1_stride, conv1_stride))
	act1 = TT.tanh(conv1)
	flat1 = TT.reshape(act1, (batch_size, conv1_output_dim))
	gru_in = TT.concatenate([flat1, prev_bbox], axis=1)
	gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz)
	gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br)
	gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg)
	gru_h = (1-gru_z) * state + gru_z * gru_h_
	bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2)
	return bbox, gru_h
    def _step(x_, h_, c_):
        preact = tensor.dot(h_, U)
        preact += x_

        i = nnet.sigmoid(_slice(preact, 0, n_hidden))
        f = nnet.sigmoid(_slice(preact, 1, n_hidden))
        o = nnet.sigmoid(_slice(preact, 2, n_hidden))
        c = tensor.tanh(_slice(preact, 3, n_hidden))

        c = f * c_ + i * c
        h = o * tensor.tanh(c)
        return h, c
Exemple #20
0
	def build_ann(self, nb = 784, nh = 2, learning_rate = 0.1):
		w1 = theano.shared(np.random.uniform(-.1,.1,size=(nb,nh)))
		w2 = theano.shared(np.random.uniform(-.1,.1,size=(nh,nb)))
		input = T.dvector('input')
		b1 = theano.shared(np.random.uniform(-.1,.1,size=nh))
		b2 = theano.shared(np.random.uniform(-.1,.1,size=nb))
		x1 = Tann.sigmoid(T.dot(input,w1) + b1)
		x2 = Tann.sigmoid(T.dot(x1,w2) + b2)
		error = T.sum((input - x2)**2)
		params = [w1,b1,w2,b2]
		gradients = T.grad(error,params)
		backprop_acts = [(p, p - learning_rate*g) for p,g in zip(params,gradients)]
		self.predictor = theano.function([input],[x2,x1])
		self.trainer = theano.function([input],error,updates=backprop_acts)
Exemple #21
0
    def dgru_output(self, x_t, old_label, h_prev):

        Lx_t = self.L[:,x_t]
        # gates (update, reset)
        z_t = sigmoid(T.dot(self.Wz, Lx_t) + T.dot(self.Uz, h_prev))
        r_t = sigmoid(T.dot(self.Wr, Lx_t) + T.dot(self.Ur, h_prev))
        # combine them
        h_new_t = T.tanh(T.dot(self.Wh, Lx_t) + r_t * T.dot(self.Uh, h_prev))
        h_t = z_t * h_prev + (1 - z_t) * h_new_t

        y_hat_t = softmax(T.dot(self.U, h_t) + self.b)[0]
        out_label = T.argmax(y_hat_t)

        return out_label, h_t
    def _step(x_, m_, h_, c_):
        preact = tensor.dot(h_, U)
        preact += x_

        i = nnet.sigmoid(_slice(preact, 0, n_hidden))
        f = nnet.sigmoid(_slice(preact, 1, n_hidden))
        o = nnet.sigmoid(_slice(preact, 2, n_hidden))
        c = tensor.tanh(_slice(preact, 3, n_hidden))

        c = f * c_ + i * c
        c = m_[:, None] * c + (1. - m_)[:, None] * c_
        
        h = o * tensor.tanh(c)
        h = m_[:, None] * h + (1. - m_)[:, None] * h_
        return h, c
Exemple #23
0
    def build_ann(self,nb,nh):
        w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh)))
        w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, 10)))
        input = T.fmatrix()
        target = T.fmatrix()
        x1 = Tann.sigmoid(T.dot(input,w1))
        x2 = Tann.sigmoid(T.dot(x1,w2))
        error = T.sum(pow((target - x2), 2))
        params = [w1, w2]
        gradients = T.grad(error, params)
        backprops = self.backprop_acts(params, gradients)

        self.get_x1 = theano.function(inputs=[input, target], outputs=error, allow_input_downcast=True)
        self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input], outputs=x2, allow_input_downcast=True)
Exemple #24
0
    def dgru_timestep(self, x_t, old_cost, h_prev, ys):

        Lx_t = self.L[:,x_t]
        # gates (update, reset)
        z_t = sigmoid(T.dot(self.Wz, Lx_t) + T.dot(self.Uz, h_prev))
        r_t = sigmoid(T.dot(self.Wr, Lx_t) + T.dot(self.Ur, h_prev))
        # combine them
        h_new_t = T.tanh(T.dot(self.Wh, Lx_t) + r_t * T.dot(self.Uh, h_prev))
        h_t = z_t * h_prev + (1 - z_t) * h_new_t
        y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T
        cost = T.sum(-T.log(y_hat_t[ys, T.arange(ys.shape[0])]))
        # We don't divide yet by batch size
        new_cost = old_cost + cost
        
        return cost, h_t
    def buildann(self, nb , nh, nob, lr):
        x = []

        #weights with initial random values between -0.1 and 0.1
        for i in range(len(nh)):
            if i == 0:
                self.w.append(theano.shared(np.random.uniform(-.1, .1, size = (nb, nh[i]))))
            if i != 0:
                self.w.append(theano.shared(np.random.uniform(-.1, .1, size = (nh[i - 1], nh[i]))))
            if i == len(nh) - 1:
                self.w.append(theano.shared(np.random.uniform(-.1, .1, size = (nh[i], nob))))

        #input is the image, label is the possible answers(0 to 9)
        input = T.dvector ('input')
        label = T.dvector ('label')

        #node values with initial random values between -0.1 and 0.1
        for i in range(len(nh)):
            self.b.append(theano.shared(np.random.uniform(-.1, .1, size = nh[i])))
            if i == len(nh) - 1:
                self.b.append(theano.shared(np.random.uniform(-.1, .1, size = nob)))

        #activation functions
        for i in range(len(nh)):
            if i == 0:
                x.append(Tann.sigmoid(T.dot(input, self.w[i]) + self.b[i]))
            x.append(Tann.sigmoid(T.dot(x[i], self.w[i + 1]) + self.b[i + 1]))

        #error calculation, which gives least error for right guesses
        error = T.sum((x[len(nh)] - label)**2)

        #parameters needed for the gradient search
        params = []
        for i in range(len(self.w)):
            params.append(self.w[i])
            params.append(self.b[i])

        #gradient search
        gradients = T.grad(error, params)

        #backpropagation for updating weigths and node values
        backprop_acts = [(p, p - self.lrate * g) for p,g in zip(params, gradients)]

        #testing function
        self.predictor = theano.function([input], x[len(nh)])

        #training function
        self.trainer = theano.function([input, label], [x[len(nh)], error], updates = backprop_acts)
def __step(img, prev_bbox, state, timestep):
	conv1 = conv2d(img, conv1_filters, subsample=(conv1_stride, conv1_stride), border_mode='half')
	act1 = NN.relu(conv1)
	flat1 = TT.reshape(act1, (-1, conv1_output_dim))
	gru_in = TT.concatenate([flat1, prev_bbox], axis=1)
	gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz)
	gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br)
	gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg)
	gru_h = (1 - gru_z) * state + gru_z * gru_h_
	bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2)

        bbox_cx = ((bbox[:, 2] + bbox[:, 0]) / 2 + 1) / 2 * img_row
        bbox_cy = ((bbox[:, 3] + bbox[:, 1]) / 2 + 1) / 2 * img_col
        bbox_w = TT.abs_(bbox[:, 2] - bbox[:, 0]) / 2 * img_row
        bbox_h = TT.abs_(bbox[:, 3] - bbox[:, 1]) / 2 * img_col
        x = TT.arange(img_row, dtype=T.config.floatX)
        y = TT.arange(img_col, dtype=T.config.floatX)
	mx = TT.maximum(TT.minimum(-TT.abs_(x.dimshuffle('x', 0) - bbox_cx.dimshuffle(0, 'x')) + bbox_w.dimshuffle(0, 'x') / 2., 1), 1e-4)
	my = TT.maximum(TT.minimum(-TT.abs_(y.dimshuffle('x', 0) - bbox_cy.dimshuffle(0, 'x')) + bbox_h.dimshuffle(0, 'x') / 2., 1), 1e-4)
        bbox_mask = mx.dimshuffle(0, 1, 'x') * my.dimshuffle(0, 'x', 1)

        new_cls1_f = cls_f
        new_cls1_b = cls_b

        mask = act1 * bbox_mask.dimshuffle(0, 'x', 1, 2)

        new_featmaps = TG.disconnected_grad(TT.set_subtensor(featmaps[:, timestep], mask))
	new_featmaps.name = 'new_featmaps'
        new_probmaps = TG.disconnected_grad(TT.set_subtensor(probmaps[:, timestep], bbox_mask))
	new_probmaps.name = 'new_probmaps'

        train_featmaps = TG.disconnected_grad(new_featmaps[:, :timestep+1].reshape(((timestep + 1) * batch_size, conv1_nr_filters, img_row, img_col)))
	train_featmaps.name = 'train_featmaps'
        train_probmaps = TG.disconnected_grad(new_probmaps[:, :timestep+1])
	train_probmaps.name = 'train_probmaps'

        for _ in range(0, 5):
		train_convmaps = conv2d(train_featmaps, new_cls1_f, subsample=(cls1_stride, cls1_stride), border_mode='half').reshape((batch_size, timestep + 1, batch_size, img_row, img_col))
		train_convmaps.name = 'train_convmaps'
		train_convmaps_selected = train_convmaps[TT.arange(batch_size).repeat(timestep+1), TT.tile(TT.arange(timestep+1), batch_size), TT.arange(batch_size).repeat(timestep+1)].reshape((batch_size, timestep+1, img_row, img_col))
		train_convmaps_selected.name = 'train_convmaps_selected'
		train_predmaps = NN.sigmoid(train_convmaps_selected + new_cls1_b.dimshuffle(0, 'x', 'x', 'x'))
		train_loss = NN.binary_crossentropy(train_predmaps, train_probmaps).mean()
                train_grad_cls1_f, train_grad_cls1_b = T.grad(train_loss, [new_cls1_f, new_cls1_b])
                new_cls1_f -= train_grad_cls1_f * 0.1
                new_cls1_b -= train_grad_cls1_b * 0.1

	return (bbox, gru_h, timestep + 1, mask, bbox_mask), {cls_f: TG.disconnected_grad(new_cls1_f), cls_b: TG.disconnected_grad(new_cls1_b), featmaps: TG.disconnected_grad(new_featmaps), probmaps: TG.disconnected_grad(new_probmaps)}
Exemple #27
0
    def __init__(self,
                 input=tensor.dvector('input'),
                 target=tensor.dvector('target'),
                 n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw):
        super(NNet, self).__init__(**kw)

        self.input = input
        self.target = target
        self.lr = shared(lr, 'learning_rate')
        self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
        self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
        # print self.lr.type

        self.hidden = sigmoid(tensor.dot(self.w1, self.input))
        self.output = tensor.dot(self.w2, self.hidden)
        self.cost = tensor.sum((self.output - self.target)**2)

        self.sgd_updates = {
            self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1),
            self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2)}

        self.sgd_step = pfunc(
            params=[self.input, self.target],
            outputs=[self.output, self.cost],
            updates=self.sgd_updates)

        self.compute_output = pfunc([self.input], self.output)

        self.output_from_hidden = pfunc([self.hidden], self.output)
Exemple #28
0
def get_model(Ws, bs, dropout=False):
    v = T.matrix('input')
    m = T.matrix('missing')
    q = T.matrix('target')
    k = T.vector('normalization factor')

    # Set all missing/target values to 0.5
    keep_mask = (1-m) * (1-q)
    h = keep_mask * (v * 2 - 1) # Convert to +1, -1
    
    # Normalize layer 0
    h *= k.dimshuffle(0, 'x')

    for l in xrange(len(Ws)):
        h = T.dot(h, Ws[l]) + bs[l]

        if l < len(Ws) - 1:
            h = h * (h > 0) # relu
            if dropout:
                mask = srng.binomial(n=1, p=0.5, size=h.shape)
                h = h * mask * 2

    output = sigmoid(h)
    LL = v * T.log(output) + (1 - v) * T.log(1 - output)
    # loss = -(q * LL).sum() / q.sum()
    loss = -((1 - m) * LL).sum() / (1 - m).sum()

    return v, m, q, k, output, loss
Exemple #29
0
def makelayer(X, input_size, output_size):
    w = np.random.randn(input_size + 1, output_size)
    W = theano.shared(np.asarray(w, dtype=theano.config.floatX))
    bias = np.asarray(np.random.randn(1), dtype=theano.config.floatX)
    B = theano.shared(bias)
    new_X = T.concatenate([X, B])
    return nnet.sigmoid(T.dot(W.T, new_X)), W, B
Exemple #30
0
def get_update(Ws_s, bs_s):
    x, fx = train.get_model(Ws_s, bs_s)

    # Ground truth (who won)
    y = T.vector('y')

    # Compute loss (just log likelihood of a sigmoid fit)
    y_pred = sigmoid(fx)
    loss = -( y * T.log(y_pred) + (1 - y) * T.log(1 - y_pred)).mean()

    # Metrics on the number of correctly predicted ones
    frac_correct = ((fx > 0) * y + (fx < 0) * (1 - y)).mean()

    # Updates
    learning_rate_s = T.scalar(dtype=theano.config.floatX)
    momentum_s = T.scalar(dtype=theano.config.floatX)
    updates = train.nesterov_updates(loss, Ws_s + bs_s, learning_rate_s, momentum_s)
    
    f_update = theano.function(
        inputs=[x, y, learning_rate_s, momentum_s],
        outputs=[loss, frac_correct],
        updates=updates,
        )

    return f_update
Exemple #31
0
import numpy as np

from theano import shared, function
import theano.tensor as T
from theano.tensor.nnet import sigmoid

# Refer to ex02 for more on Theano.

# Model:

x = T.matrix()

W = shared(0.01 * np.random.randn(784, 10))
b = shared(np.zeros(10))

y = sigmoid(T.dot(x, W) + b)

# cost

target = T.matrix()

cost = T.mean((y - target)**2)

# Alterantively, you can use the following
# which adds some regularization
cost = T.mean((y - target)**2) + 0.0001 * T.sum(W**2)

# Functions to use model:

feedforward = function([x], y)
Exemple #32
0
def layer(x, w):
    b = np.array([1], dtype=theano.config.floatX)
    new_x = T.concatenate([x, b])
    m = T.dot(w.T, new_x)  #theta1: 3x3 * x: 3x1 = 3x1 ;;; theta2: 1x4 * 4x1
    h = nnet.sigmoid(m)
    return h
Exemple #33
0
    def __init__(self, rng, input, filter_shape, poolsize=(2,2), stride=None, if_pool=False, act=None, share_with=None,
                 tied=None, border_mode='valid'):
        self.input = input

        if share_with:
            self.W = share_with.W
            self.b = share_with.b

            self.W_delta = share_with.W_delta
            self.b_delta = share_with.b_delta

        elif tied:
            self.W = tied.W.dimshuffle(1,0,2,3)
            self.b = tied.b

            self.W_delta = tied.W_delta.dimshuffle(1,0,2,3)
            self.b_delta = tied.b_delta

        else:
            fan_in = np.prod(filter_shape[1:])
            poolsize_size = np.prod(poolsize) if poolsize else 1
            fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / poolsize_size)
            W_bound = np.sqrt(6. / (fan_in + fan_out))
            self.W = theano.shared(
                np.asarray(
                    rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                    dtype=theano.config.floatX
                ),
                borrow=True
            )
            b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, borrow=True)

            self.W_delta = theano.shared(
                np.zeros(filter_shape, dtype=theano.config.floatX),
                borrow=True
            )

            self.b_delta = theano.shared(value=b_values, borrow=True)

        conv_out = nnet.conv2d(
            input=input,
            filters=self.W,
            filter_shape=filter_shape,
            border_mode=border_mode)

        #if poolsize:
        if if_pool:
            pooled_out = downsample.max_pool_2d(
                input=conv_out,
                ds=poolsize,
                st=stride,
                ignore_border=True)
            tmp = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')
        else:
            tmp = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')

        if act == ConvolutionLayer.ACT_TANH:
            self.output = T.tanh(tmp)
        elif act == ConvolutionLayer.ACT_SIGMOID:
            self.output = nnet.sigmoid(tmp)
        elif act == ConvolutionLayer.ACT_ReLu:
            self.output = tmp * (tmp>0)
        elif act == ConvolutionLayer.ACT_SoftPlus:
            self.output = T.log2(1+T.exp(tmp))
        else:
            self.output = tmp

        # store parameters of this layer
        self.params = [self.W, self.b]
        self.deltas = [self.W_delta, self.b_delta]
Exemple #34
0
 def set_output(self):
     self._output = sigmoid(self._prev_layer.output)
Exemple #35
0
    def __theano_train__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """

        uidx = T.iscalar()
        msk = T.imatrix()
        dist_pos = T.fmatrix()
        dist_neg = T.fmatrix()

        seq_n, seq_len = msk.shape  # 315 x 315
        tu = self.t[uidx]  # (20, )
        xpidxs = self.tra_buys_masks[uidx]  # (1264, )
        xqidxs = self.tra_buys_neg_masks[uidx]  # (1264, )
        gps = self.g[xpidxs[:seq_len]]  # (315, 20)
        hps, hqs = self.h[xpidxs[1:seq_len +
                                 1]], self.h[xqidxs[1:seq_len +
                                                    1]]  # (315, 20)
        zps, zqs = self.z[xpidxs[1:seq_len + 1]], self.z[xqidxs[1:seq_len + 1]]

        guiq_pqs = Unique(False, False, False)(xpidxs)
        uiq_g = self.g[guiq_pqs]

        pqs = T.concatenate((xpidxs, xqidxs))
        uiq_pqs = Unique(False, False, False)(pqs)
        uiq_h = self.h[uiq_pqs]
        uiq_z = self.z[uiq_pqs]

        t_z = T.sum(tu * zps, 1)  # (315, )
        n_h = T.sum(msk, 1)  # (315, )
        expand_g = gps.reshape((1, seq_len, n_hidden)) * msk.reshape(
            (seq_n, seq_len, 1))  # (315, 315, 20)
        sp = T.sum(
            T.sum(expand_g * hps.reshape(
                (seq_n, 1, n_hidden)), 2) * self.f_d(dist_pos), 1
        ) / n_h + t_z  # [(315, 315) * (315, 315)] -> (315, ) / (315, ) + (315, )
        sq = T.sum(
            T.sum(expand_g * hqs.reshape(
                (seq_n, 1, n_hidden)), 2) * self.f_d(dist_neg), 1) / n_h + t_z

        # sp = T.sum(T.sum(expand_g * hps.reshape((seq_n, 1, n_hidden)), 2), 1) / n_h + t_z
        # sq = T.sum(T.sum(expand_g * hqs.reshape((seq_n, 1, n_hidden)), 2), 1) / n_h + t_z

        loss = T.sum(T.log(sigmoid(sp - sq)))
        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = T.sum([T.sum(par**2) for par in [gps, hps, hqs, zps, zqs]])
        seq_costs = (-loss + 0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra)
                       for par, gra in zip(self.params, seq_grads)]
        update_g = T.set_subtensor(
            uiq_g, uiq_g - lr * T.grad(seq_costs, self.g)[guiq_pqs])
        update_h = T.set_subtensor(
            uiq_h, uiq_h - lr * T.grad(seq_costs, self.h)[uiq_pqs])
        update_t = T.set_subtensor(tu,
                                   tu - lr * T.grad(seq_costs, self.t)[uidx])
        update_z = T.set_subtensor(
            uiq_z, uiq_z - lr * T.grad(seq_costs, self.z)[uiq_pqs])
        seq_updates.append((self.g, update_g))
        seq_updates.append((self.h, update_h))
        seq_updates.append((self.t, update_t))
        seq_updates.append((self.z, update_z))
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        self.seq_train = theano.function(
            inputs=[uidx, dist_pos, dist_neg, msk],
            outputs=loss,
            updates=seq_updates)
Exemple #36
0
    def test_perform_sigm_times_exp(self):
        """
        Test the core function doing the `sigm_times_exp` optimization.

        It is easier to test different graph scenarios this way than by
        compiling a theano function.
        """
        x, y, z, t = tensor.vectors('x', 'y', 'z', 't')
        exp = tensor.exp

        def ok(expr1, expr2):
            trees = [parse_mul_tree(e) for e in (expr1, expr2)]
            perform_sigm_times_exp(trees[0])
            trees[0] = simplify_mul(trees[0])
            good = theano.gof.graph.is_same_graph(
                    compute_mul(trees[0]),
                    compute_mul(trees[1]))
            if not good:
                print(trees[0])
                print(trees[1])
                print('***')
                theano.printing.debugprint(compute_mul(trees[0]))
                print('***')
                theano.printing.debugprint(compute_mul(trees[1]))
            assert good
        ok(sigmoid(x) * exp(-x), sigmoid(-x))
        ok(-x * sigmoid(x) * (y * (-1 * z) * exp(-x)),
           -x * sigmoid(-x) * (y * (-1 * z)))
        ok(-sigmoid(-x) *
           (exp(y) * (-exp(-z) * 3 * -exp(x)) *
            (y * 2 * (-sigmoid(-y) * (z + t) * exp(z)) * sigmoid(z))) *
           -sigmoid(x),
           sigmoid(x) *
           (-sigmoid(y) * (-sigmoid(-z) * 3) * (y * 2 * ((z + t) * exp(z)))) *
           -sigmoid(x))
        ok(exp(-x) * -exp(-x) * (-sigmoid(x) * -sigmoid(x)),
           -sigmoid(-x) * sigmoid(-x))
        ok(-exp(x) * -sigmoid(-x) * -exp(-x),
           -sigmoid(-x))
Exemple #37
0
 def forward(self, x):
     return nnet.sigmoid(x)