Example #1
0
    def test_mixed_dtype(self):
        x1 = tensor.vector('x1', dtype='int32')
        x2 = tensor.vector('x2', dtype=self.dtype)
        y1 = tensor.vector('y1', dtype='int32')
        y2 = tensor.vector('y2', dtype=self.dtype)
        c = tensor.iscalar('c')
        f = theano.function([c, x1, x2, y1, y2],
                            ifelse(c, (x1, x2), (y1, y2)), mode=self.mode)
        self.assertFunctionContains1(f, self.get_ifelse(2))
        rng = numpy.random.RandomState(utt.fetch_seed())

        xlen = rng.randint(200)
        ylen = rng.randint(200)

        vx1 = numpy.asarray(rng.uniform(size=(xlen,)) * 3, 'int32')
        vx2 = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype)
        vy1 = numpy.asarray(rng.uniform(size=(ylen,)) * 3, 'int32')
        vy2 = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype)

        o1, o2 = f(1, vx1, vx2, vy1, vy2)
        assert numpy.allclose(vx1, o1)
        assert numpy.allclose(vx2, o2)

        o1, o2 = f(0, vx1, vx2, vy1, vy2)
        assert numpy.allclose(vy1, o1)
        assert numpy.allclose(vy2, o2)
Example #2
0
    def test_multiple_out(self):
        x1 = tensor.vector('x1', dtype=self.dtype)
        x2 = tensor.vector('x2', dtype=self.dtype)
        y1 = tensor.vector('y1', dtype=self.dtype)
        y2 = tensor.vector('y2', dtype=self.dtype)
        c = tensor.iscalar('c')
        z = ifelse(c, (x1, x2), (y1, y2))
        f = theano.function([c, x1, x2, y1, y2], z, mode=self.mode)
        self.assertFunctionContains1(f, self.get_ifelse(2))

        ifnode = [x for x in f.maker.fgraph.toposort()
                  if isinstance(x.op, IfElse)][0]
        assert len(ifnode.outputs) == 2

        rng = numpy.random.RandomState(utt.fetch_seed())

        x1len = rng.randint(200)
        x2len = rng.randint(200)
        y1len = rng.randint(200)
        y2len = rng.randint(200)

        vx1 = numpy.asarray(rng.uniform(size=(x1len,)), self.dtype)
        vx2 = numpy.asarray(rng.uniform(size=(x2len,)), self.dtype)
        vy1 = numpy.asarray(rng.uniform(size=(y1len,)), self.dtype)
        vy2 = numpy.asarray(rng.uniform(size=(y2len,)), self.dtype)

        ovx1, ovx2 = f(1, vx1, vx2, vy1, vy2)
        ovy1, ovy2 = f(0, vx1, vx2, vy1, vy2)
        assert numpy.allclose(vx1, ovx1)
        assert numpy.allclose(vy1, ovy1)
        assert numpy.allclose(vx2, ovx2)
        assert numpy.allclose(vy2, ovy2)
Example #3
0
    def test_multiple_out_grad(self):
        # Tests that we can compute the gradients through lazy if
        x1 = tensor.vector('x1')
        x2 = tensor.vector('x2')
        y1 = tensor.vector('y1')
        y2 = tensor.vector('y2')
        c = tensor.iscalar('c')
        z = ifelse(c, (x1, x2), (y1, y2))
        grads = tensor.grad(z[0].sum() + z[1].sum(),
                            [x1, x2, y1, y2])

        f = theano.function([c, x1, x2, y1, y2], grads)
        rng = numpy.random.RandomState(utt.fetch_seed())

        lens = [rng.randint(200) for i in range(4)]
        values = [numpy.asarray(rng.uniform(size=(l,)), theano.config.floatX)
                  for l in lens]
        outs_1 = f(1, *values)
        assert all([x.shape[0] == y for x, y in zip(outs_1, lens)])
        assert numpy.all(outs_1[0] == 1.)
        assert numpy.all(outs_1[1] == 1.)
        assert numpy.all(outs_1[2] == 0.)
        assert numpy.all(outs_1[3] == 0.)

        outs_0 = f(0, *values)
        assert all([x.shape[0] == y for x, y in zip(outs_1, lens)])
        assert numpy.all(outs_0[0] == 0.)
        assert numpy.all(outs_0[1] == 0.)
        assert numpy.all(outs_0[2] == 1.)
        assert numpy.all(outs_0[3] == 1.)
Example #4
0
def calculate(w1, w2, data, display):
    x = T.vector('x')
    w = T.vector('w')

    s = 1 / (1 + T.exp(-T.dot(x, w)))
    logistic = theano.function([x, w], s)

    if display:
        print("With: w1 = %f and w2 = %f" % (w1, w2))

    sum_error = 0
    sum_error_square = 0
    if isinstance(data, str) or not len(data):
        if not len(data):
            data = 'Data.txt'
        with open('dataFiles/' + data) as fp:
            reader = csv.reader(fp, delimiter=',')
            for line in reader:
                data.append([int(line[0]), float(line[1]), float(line[2])])
    if display:
        print('y\t\tf(x)\t\tE\t\tE^2')
    for i in range(0, len(data)):
        x1 = data[i][1]
        x2 = data[i][2]
        f = logistic([x1, x2], [w1, w2])
        e = data[i][0] - f
        e2 = e ** 2
        sum_error += e
        sum_error_square += e2
        if display:
            print('%f\t%f\t%f\t%f' % (data[i][0], f, e, e2))
    if display:
        print("\nSum:\t\t\t\t%f\t%f" % (sum_error, sum_error_square))
    return sum_error_square
Example #5
0
    def test_grad_lazy_if(self):
        # Tests that we can compute the gradients through lazy if
        x = tensor.vector('x', dtype=self.dtype)
        y = tensor.vector('y', dtype=self.dtype)
        c = tensor.iscalar('c')
        z = ifelse(c, x, y)
        gx, gy = tensor.grad(z.sum(), [x, y])

        f = theano.function([c, x, y], [self.cast_output(gx),
                                        self.cast_output(gy)],
                            mode=self.mode)
        # There is only 2 of the 3 ifelse that are moved on the GPU.
        # The one that stay on the CPU is for the shape.
        self.assertFunctionContains(f, self.get_ifelse(1), min=2, max=3)
        rng = numpy.random.RandomState(utt.fetch_seed())

        xlen = rng.randint(200)
        ylen = rng.randint(200)

        vx = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype)
        vy = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype)
        gx0, gy0 = f(1, vx, vy)
        assert numpy.allclose(gx0.shape, vx.shape)
        assert numpy.allclose(gy0.shape, vy.shape)
        assert numpy.all(numpy.asarray(gx0) == 1.)
        assert numpy.all(numpy.asarray(gy0) == 0.)

        gx0, gy0 = f(0, vx, vy)
        assert numpy.allclose(gx0.shape, vx.shape)
        assert numpy.allclose(gy0.shape, vy.shape)
        assert numpy.all(numpy.asarray(gx0) == 0.)
        assert numpy.all(numpy.asarray(gy0) == 1.)
Example #6
0
    def test_bincountOp(self):
        w = T.vector('w')
        for dtype in ('int8', 'int16', 'int32', 'int64',
                      'uint8', 'uint16', 'uint32', 'uint64'):
            # uint64 always fails
            # int64 and uint32 also fail if python int are 32-bit
            int_bitwidth = theano.gof.python_int_bitwidth()
            if int_bitwidth == 64:
                numpy_unsupported_dtypes = ('uint64',)
            if int_bitwidth == 32:
                numpy_unsupported_dtypes = ('uint32', 'int64', 'uint64')

            x = T.vector('x', dtype=dtype)

            if dtype in numpy_unsupported_dtypes:
                self.assertRaises(TypeError, bincount, x)

            else:
                a = np.random.random_integers(50, size=(25)).astype(dtype)
                weights = np.random.random((25,)).astype(config.floatX)

                f1 = theano.function([x], bincount(x))
                f2 = theano.function([x, w], bincount(x, weights=w))

                assert (np.bincount(a) == f1(a)).all()
                assert np.allclose(np.bincount(a, weights=weights),
                                   f2(a, weights))
                if not numpy_16:
                    continue
                f3 = theano.function([x], bincount(x, minlength=23))
                f4 = theano.function([x], bincount(x, minlength=5))
                assert (np.bincount(a, minlength=23) == f3(a)).all()
                assert (np.bincount(a, minlength=5) == f4(a)).all()
Example #7
0
    def test_softmax_optimizations_w_bias2(self):
        x = tensor.matrix('x')
        b = tensor.vector('b')
        c = tensor.vector('c')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot

        env = gof.Env(
                [x, b, c, one_of_n],
                [op(softmax(T.add(x,b,c)), one_of_n)])
        assert env.outputs[0].owner.op == op

        print 'BEFORE'
        for node in env.toposort():
            print node.op
        print '----'

        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)

        print 'AFTER'
        for node in env.toposort():
            print node.op
        print '===='
        assert len(env.toposort()) == 3

        assert str(env.outputs[0].owner.op) == 'OutputGuard'
        assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
Example #8
0
 def __init__(self, vocab_size, dim, lr=0.5):
     W = np.asarray(np.random.rand(vocab_size, dim),
                    dtype=theano.config.floatX) / float(dim)
     W1 = np.asarray((np.random.rand(vocab_size, dim)),
                     dtype=theano.config.floatX) / float(dim)
     self.W = theano.shared(W, name='W', borrow=True)
     self.W1 = theano.shared(W1, name='W1', borrow=True)
     gW = np.asarray(np.ones((vocab_size, dim)), dtype=theano.config.floatX)
     gW1 = np.asarray(
         np.ones((vocab_size, dim)), dtype=theano.config.floatX)
     self.gW = theano.shared(gW, name='gW', borrow=True)
     self.gW1 = theano.shared(gW1, name='gW1', borrow=True)
     X = T.vector()
     fX = T.vector()
     ind_W = T.ivector()
     ind_W1 = T.ivector()
     w = self.W[ind_W, :]
     w1 = self.W1[ind_W1, :]
     cost = T.sum(fX * ((T.sum(w * w1, axis=1) - X) ** 2))
     grad = T.clip(T.grad(cost, [w, w1]), -5.0, 5.0)
     updates1 = [(self.gW, T.inc_subtensor(self.gW[ind_W, :],
                                           grad[0] ** 2))]
     updates2 = [(self.gW1, T.inc_subtensor(self.gW1[ind_W1, :],
                                            grad[1] ** 2))]
     updates3 = [(self.W, T.inc_subtensor(self.W[ind_W, :],
                                          - (lr / T.sqrt(self.gW[ind_W, :])) *
                                          grad[0]))]
     updates4 = [(self.W1, T.inc_subtensor(self.W1[ind_W1, :],
                                           - (lr / T.sqrt(self.gW1[ind_W1, :])) *
                                           grad[1]))]
     updates = updates1 + updates2 + updates3 + updates4
     self.cost_fn = theano.function(
         inputs=[ind_W, ind_W1, X, fX], outputs=cost, updates=updates)
Example #9
0
def _compile_func():
    beta = T.vector('beta')
    b = T.scalar('b')
    X = T.matrix('X')
    y = T.vector('y')
    C = T.scalar('C')
    params = [beta, b, X, y, C]
    cost = 0.5 * (T.dot(beta, beta) + b * b) + C * T.sum(
        T.nnet.softplus(
            -T.dot(T.diag(y), T.dot(X, beta) + b)
        )
    )
    # Function computing in one go the cost, its gradient
    # with regard to beta and with regard to the bias.
    cost_grad = theano.function(params,[
        cost,
        T.grad(cost, beta),
        T.grad(cost, b)
    ])

    # Function for computing element-wise sigmoid, used for
    # prediction.
    log_predict = theano.function(
        [beta, b, X],
        T.nnet.sigmoid(b + T.dot(X, beta)),
        on_unused_input='warn'
    )

    return (cost_grad, log_predict)
Example #10
0
 def test_wrong_coefficient_matrix(self):
     x = tensor.vector()
     y = tensor.vector()
     z = tensor.scalar()
     b = theano.tensor.nlinalg.lstsq()(x, y, z)
     f = function([x, y, z], b)
     self.assertRaises(np.linalg.linalg.LinAlgError, f, [2, 1], [2, 1], 1)
Example #11
0
 def test_wrong_rcond_dimension(self):
     x = tensor.vector()
     y = tensor.vector()
     z = tensor.vector()
     b = theano.tensor.nlinalg.lstsq()(x, y, z)
     f = function([x, y, z], b)
     self.assertRaises(np.linalg.LinAlgError, f, [2, 1], [2, 1], [2, 1])
Example #12
0
    def _compile_bp(self):
        '''
        compile backpropagation foreach of the dqns.
        '''
        self.bprop_by_goal = {}
        for (goal, dqn) in self.dqn_by_goal.items():
            states = dqn.states
            action_values = dqn.action_values
            params = dqn.params
            targets = T.vector('target')
            shared_values = T.vector('shared_values')
            last_actions = T.lvector('action')

            # loss function.
            mse = layers.MSE(action_values[T.arange(action_values.shape[0]),
                                last_actions], targets) \
                    + T.mean(abs(action_values[T.arange(action_values.shape[0]),
                                    last_actions] - shared_values))
            # l2 penalty.
            l2_penalty = 0.
            for param in params:
                l2_penalty += (param ** 2).sum()

            cost = mse + self.l2_reg * l2_penalty

            # back propagation.
            updates = optimizers.Adam(cost, params, alpha=self.lr)

            td_errors = T.sqrt(mse)
            self.bprop_by_goal[goal] = theano.function(inputs=[states, last_actions, targets, shared_values],
                                        outputs=td_errors, updates=updates)
Example #13
0
def test_tagging():
    brick = TestBrick(0)
    x = tensor.vector('x')
    y = tensor.vector('y')
    z = tensor.vector('z')

    def check_output_variable(o):
        assert get_application_call(o).application.brick is brick
        assert (get_application_call(o.owner.inputs[0]).application.brick
                is brick)

    # Case 1: both positional arguments are provided.
    u, v = brick.apply(x, y)
    for o in [u, v]:
        check_output_variable(o)

    # Case 2: `b` is given as a keyword argument.
    u, v = brick.apply(x, y=y)
    for o in [u, v]:
        check_output_variable(o)

    # Case 3: two positional and one keyword argument.
    u, v, w = brick.apply(x, y, z=z)
    for o in [u, v, w]:
        check_output_variable(o)

    # Case 4: one positional argument.
    u, v = brick.apply(x)
    check_output_variable(u)
    assert v == 1

    # Case 5: variable was wrapped in a list. We can not handle that.
    u, v = brick.apply([x])
    assert_raises(AttributeError, check_output_variable, u)
Example #14
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, params=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) +
        [Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()])
    main_loop.run()
    return main_loop
Example #15
0
    def test_infer_shape(self):
        for ndim in [1, 3]:
            x = T.TensorType(config.floatX, [False] * ndim)()
            shp = (np.arange(ndim) + 1) * 3
            a = np.random.random(shp).astype(config.floatX)

            for axis in self._possible_axis(ndim):
                for dtype in ["int8", "uint8", "uint64"]:
                    r_var = T.scalar(dtype=dtype)
                    r = np.asarray(3, dtype=dtype)
                    if dtype in self.numpy_unsupported_dtypes:
                        r_var = T.vector(dtype=dtype)
                        self.assertRaises(TypeError, repeat, x, r_var)
                    else:
                        self._compile_and_check([x, r_var],
                                                [RepeatOp(axis=axis)(x, r_var)],
                                                [a, r],
                                                self.op_class)

                        r_var = T.vector(dtype=dtype)
                        if axis is None:
                            r = np.random.randint(
                                1, 6, size=a.size).astype(dtype)
                        elif a.size > 0:
                            r = np.random.randint(
                                1, 6, size=a.shape[axis]).astype(dtype)
                        else:
                            r = np.random.randint(
                                1, 6, size=(10,)).astype(dtype)

                        self._compile_and_check(
                            [x, r_var],
                            [RepeatOp(axis=axis)(x, r_var)],
                            [a, r],
                            self.op_class)
Example #16
0
def test_check_theano_variable():
    check_theano_variable(None, 3, 'float')
    check_theano_variable([[1, 2]], 2, 'int')
    assert_raises(ValueError, check_theano_variable,
                  tensor.vector(), 2, 'float')
    assert_raises(ValueError, check_theano_variable,
                  tensor.vector(), 1, 'int')
Example #17
0
    def test_softmax_optimizations_w_bias_vector(self):
        x = tensor.vector('x')
        b = tensor.vector('b')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot
        fgraph = gof.FunctionGraph(
                [x, b, one_of_n],
                [op(softmax(x + b), one_of_n)])
        assert fgraph.outputs[0].owner.op == op
        #print 'BEFORE'
        #for node in fgraph.toposort():
        #    print node.op
        #print printing.pprint(node.outputs[0])
        #print '----'

        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
        #print 'AFTER'
        #for node in fgraph.toposort():
        #    print node.op
        #print '===='
        assert len(fgraph.toposort()) == 3
        assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
        assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
                crossentropy_softmax_argmax_1hot_with_bias)
Example #18
0
    def test_swap_SharedVariable_with_given(self):
        # A special testcase for logistic_sgd.py in Deep Learning Tutorial
        # This test assert that SharedVariable in different function have same storage

        train_x = theano.shared(value=np.random.rand(10, 10).astype(config.floatX))
        test_x = theano.shared(value=np.random.rand(10, 10).astype(config.floatX))

        train_y = theano.shared(value=np.random.rand(10, 1).astype(config.floatX))
        test_y = theano.shared(value=np.random.rand(10, 1).astype(config.floatX))

        i = T.iscalar('index')
        x = T.vector('x')
        y = T.vector('y')
        # this formular has no sense but for a test
        out = (T.sum(x) - y) ** 2
        train = theano.function([i], out,
                                givens={x: train_x[i], y: train_y[i]},
                                updates={train_x: train_x + 0.1})

        test_def = theano.function([i], out, givens={x: test_x[i], y: test_y[i]})
        test_cpy = train.copy(swap={train_x: test_x, train_y: test_y},
                              delete_updates=True)

        for in1, in2 in zip(test_def.maker.inputs, test_cpy.maker.inputs):
            assert in1.value is in2.value
Example #19
0
    def test_multiple_outputs(self):
        m = tensor.matrix('m')
        v = tensor.vector('v')
        m_ = tensor.matrix('m_')
        v_ = tensor.vector('v_')

        mval = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
        vval = self.rng.uniform(size=(7,)).astype(theano.config.floatX)
        m_val = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
        v_val = self.rng.uniform(size=(7,)).astype(theano.config.floatX)

        rop_out1 = tensor.Rop([m, v, m + v], [m, v], [m_, v_])
        assert isinstance(rop_out1, list)
        assert len(rop_out1) == 3
        rop_out2 = tensor.Rop((m, v, m + v), [m, v], [m_, v_])
        assert isinstance(rop_out2, tuple)
        assert len(rop_out2) == 3
        lop_out1 = tensor.Lop([m, v, m + v], (m, v), [m_, v_])
        assert isinstance(lop_out1, tuple)
        assert len(lop_out1) == 2
        lop_out2 = tensor.Lop((m, v, m + v), [m, v], [m_, v_])
        assert isinstance(lop_out2, list)
        assert len(lop_out2) == 2

        all_outs = []
        for o in rop_out1, rop_out2, lop_out1, lop_out2:
            all_outs.extend(o)
        f = theano.function([m, v, m_, v_], all_outs)
        f(mval, vval, m_val, v_val)
Example #20
0
    def test_normal_vector(self):
        rng_R = random_state_type()
        avg = tensor.vector()
        std = tensor.vector()
        post_r, out = normal(rng_R, avg=avg, std=std)
        assert out.ndim == 1
        f = compile.function([rng_R, avg, std], [post_r, out], accept_inplace=True)

        def as_floatX(thing):
            return numpy.asarray(thing, dtype=theano.config.floatX)

        avg_val = [1, 2, 3]
        std_val = as_floatX([0.1, 0.2, 0.3])
        rng = numpy.random.RandomState(utt.fetch_seed())
        numpy_rng = numpy.random.RandomState(utt.fetch_seed())

        # Arguments of size (3,)
        rng0, val0 = f(rng, avg_val, std_val)
        numpy_val0 = as_floatX(numpy_rng.normal(loc=as_floatX(avg_val), scale=as_floatX(std_val)))
        assert numpy.all(val0 == numpy_val0)

        # arguments of size (2,)
        rng1, val1 = f(rng0, avg_val[:-1], std_val[:-1])
        numpy_val1 = numpy.asarray(numpy_rng.normal(loc=avg_val[:-1], scale=std_val[:-1]), dtype=theano.config.floatX)
        assert numpy.all(val1 == numpy_val1)

        # Specifying the size explicitly
        g = compile.function([rng_R, avg, std], normal(rng_R, avg=avg, std=std, size=(3,)), accept_inplace=True)
        rng2, val2 = g(rng1, avg_val, std_val)
        numpy_val2 = numpy.asarray(numpy_rng.normal(loc=avg_val, scale=std_val, size=(3,)), dtype=theano.config.floatX)
        assert numpy.all(val2 == numpy_val2)
        self.assertRaises(ValueError, g, rng2, avg_val[:-1], std_val[:-1])
Example #21
0
def test_theano():
    import theano
    import theano.tensor as tt
    import matplotlib.pyplot as plt

    lif_params = dict(tau_rc=0.02, tau_ref=0.002, gain=1, bias=1, amp=1. / 63.04)
    softlif_params = dict(lif_params)
    softlif_params['sigma'] = 0.01

    x = np.linspace(-1, 1)

    lif = get_theano_fn('lif', lif_params)
    sx = tt.vector()
    lif = theano.function([sx], lif(sx))

    softlif = get_theano_fn('softlif', softlif_params)
    sx = tt.vector()
    softlif = theano.function([sx], softlif(sx))

    y_lif = lif(x)
    y_softlif = softlif(x)

    plt.figure()
    plt.plot(x, y_lif)
    plt.plot(x, y_softlif)
    plt.show()
def build_model(reg_constant=0.1, var1_name='var1', var2_name='var2'):
    """
    Build MF model in theano
    :param reg_constant: Regularization constant
    :param var1_name: Name of first variable (e.g. users)
    :param var2_name: Name of second variable (e.g. items)
    :return: theano function implementing MF model
    """

    ratings = T.vector('ratings')
    var1_vector = T.vector('{}_vector'.format(var1_name))
    var2_matrix = T.matrix('{}_matrix'.format(var2_name))

    predictions = T.dot(var2_matrix[:, 1:], var1_vector[1:]) + var2_matrix[:, 0] + var1_vector[0]

    prediction_error = ((ratings - predictions) ** 2).sum()
    l2_penalty = (var1_vector ** 2).sum() + (var2_matrix ** 2).sum().sum()

    cost = prediction_error + reg_constant * l2_penalty

    var1_grad, var2_grad = T.grad(cost, [var1_vector, var2_matrix])
    var1_grad /= var2_matrix.shape[0]

    f = theano.function(inputs=[ratings, var1_vector, var2_matrix], outputs=[cost, var1_grad, var2_grad])

    return f
Example #23
0
    def test_grad_lazy_if(self):
        # Tests that we can compute the gradients through lazy if
        x = tensor.vector('x')
        y = tensor.vector('y')
        c = tensor.iscalar('c')
        z = ifelse(c, x, y)
        gx, gy = tensor.grad(z.sum(), [x, y])

        f = theano.function([c, x, y], [gx, gy])
        rng = numpy.random.RandomState(utt.fetch_seed())

        xlen = rng.randint(200)
        ylen = rng.randint(200)

        vx = numpy.asarray(rng.uniform(size=(xlen,)), theano.config.floatX)
        vy = numpy.asarray(rng.uniform(size=(ylen,)), theano.config.floatX)
        gx0, gy0 = f(1, vx, vy)
        assert numpy.allclose(gx0.shape, vx.shape)
        assert numpy.allclose(gy0.shape, vy.shape)
        assert numpy.all(gx0 == 1.)
        assert numpy.all(gy0 == 0.)

        gx0, gy0 = f(0, vx, vy)
        assert numpy.allclose(gx0.shape, vx.shape)
        assert numpy.allclose(gy0.shape, vy.shape)
        assert numpy.all(gx0 == 0.)
        assert numpy.all(gy0 == 1.)
Example #24
0
    def __init__(self,dic_size,window,unit_id,tag_num,net_size,weight_decay,word_dim = 50, learning_rate = 0.1):
        def f_softplus(x): return T.log(T.exp(x) + 1)# - np.log(2)
        def f_rectlin(x): return x*(x>0)
        def f_rectlin2(x): return x*(x>0) + 0.01 * x
        nonlinear = {'tanh': T.tanh, 'sigmoid': T.nnet.sigmoid, 'softplus': f_softplus, 'rectlin': f_rectlin, 'rectlin2': f_rectlin2}
        self.non_unit = nonlinear[unit_id]
        self.weight_decay = weight_decay
        self.tag_num = tag_num
        self.window_size = window
        self.learning_rate = learning_rate
        self.worddim = word_dim
        self.w, self.b, self.A = self.init_w(net_size,tag_num)
        self.w2vtable = self.init_wtable(word_dim,dic_size)#table of word vectors
        x = T.vector('x')
        w = []
        b = []
        for i in range(len(self.w)):
            w.append(T.matrix())
            b.append(T.vector())

        output = self.network(x,w,b)
        og = []
        for j in range(self.tag_num):
            og.extend(T.grad(output[j],w+b+[x]))

        self.outfunction = theano.function([x]+w+b, output)
        self.goutfunction = theano.function([x]+w+b,[output]+og)
Example #25
0
def test_logpy():
    x = tensor.vector()
    y = tensor.vector()
    z = tensor.inc_subtensor(x[1:3], y)
    node = z.owner

    # otw theano chokes on var attributes when nose tries to print a traceback
    # XXX this should be un-monkey-patched after the test runs by e.g. a
    # context manager decorator
    theano.gof.Apply.__repr__ = object.__repr__
    theano.gof.Apply.__str__ = object.__str__

    w = dict((name, var(name)) for name in [
        'start', 'stop', 'step', 'set_instead_of_inc', 'inputs', 'outputs',
        'inplace', 'whole_op', 'dta',
        ])

    pattern = raw_init(theano.Apply,
        op=raw_init(theano.tensor.IncSubtensor,
            idx_list=[slice(w['start'], w['stop'], w['step'])],
            inplace=w['inplace'],
            set_instead_of_inc=w['set_instead_of_inc'],
            destroyhandler_tolerate_aliased=w['dta']),
        inputs=w['inputs'],
        outputs=w['outputs'])

    match, = run(0, w, (eq, node, pattern))

    assert match['stop'] == 3
    assert match['inputs'] == [x, y]
def test_rmsprop_0():
    # input
    x = TT.vector(name='x')
    B = theano.shared(floatX(np.ones((3, 5))), name='B')
    c = theano.shared(floatX(np.ones(3)), name='c')
    params = [B, c]
    # output
    y_pred = TT.nnet.softmax(TT.dot(B, x.T).T + c)
    y_gold = TT.vector(name="y_gold")
    # cost and grads
    cost = TT.sum((y_pred - y_gold)**2)
    grads = TT.grad(cost, wrt=params)
    # funcs
    cost_func, update_func, rms_params = rmsprop(params, grads,
                                                 [x], y_gold, cost)
    # check return values
    assert len(rms_params) == 4
    assert isinstance(rms_params[0][0], TT.sharedvar.TensorSharedVariable)
    assert not np.any(rms_params[0][0].get_value())
    # check convergence
    X = [floatX(np.random.rand(5)) for _ in xrange(N)]
    Y = [floatX(np.random.rand(3)) for _ in xrange(N)]
    icost = init_cost = end_cost = 0.
    for i in xrange(MAX_I):
        icost = 0.
        for x, y in zip(X, Y):
            icost += cost_func(x, y)
            update_func()
        if i == 0:
            init_cost = icost
        elif i == MAX_I - 1:
            end_cost = icost
    assert end_cost < init_cost
Example #27
0
 def __init__(self, C, D):
     self.W = theano.shared(np.ones((C,D), dtype='float32'))
     t_M = T.matrix('M', dtype='float32')
     t_vM = T.vector('M', dtype='float32')
     t_Y = T.vector('Y', dtype='float32')
     t_I = T.vector('I', dtype='float32')
     t_s = T.vector('s', dtype='float32')
     t_eps = T.scalar('epsilon', dtype='float32')
     self.input_integration = theano.function(
         [t_Y],
         T.dot(T.log(self.W),t_Y),
         allow_input_downcast=True
         )
     self.M_summation = theano.function(
         [t_M],
         T.sum(t_M, axis=0),
         allow_input_downcast=True
         )
     self.recurrent_softmax = theano.function(
         [t_I,t_vM],
         t_vM*T.exp(t_I)/T.sum(t_vM*T.exp(t_I)),
         allow_input_downcast=True
         )
     self.weight_update = theano.function(
         [t_Y,t_s,t_eps],
         self.W,
         updates={
             self.W:
             self.W + t_eps*(T.outer(t_s,t_Y) - t_s[:,np.newaxis]*self.W)
             },
         allow_input_downcast=True
         )
     self.epsilon = None
     self._Y = None
     self._s = None
Example #28
0
    def test_lop_override(self, cls_ofg):
        x = T.vector()
        y = 1. / (1. + T.exp(-x))

        def lop_ov(inps, outs, grads):
            y_, = outs
            dedy_, = grads
            return [2. * y_ * (1. - y_) * dedy_]

        y_, dedy = T.vector(), T.vector()
        op_lop_ov = cls_ofg([x, y_, dedy], [2. * y_ * (1. - y_) * dedy])

        xx = T.vector()
        yy1 = T.sum(T.nnet.sigmoid(xx))
        gyy1 = 2. * T.grad(yy1, xx)

        for ov in [lop_ov, op_lop_ov]:
            op = cls_ofg([x], [y], lop_overrides=ov)
            yy2 = T.sum(op(xx))
            gyy2 = T.grad(yy2, xx)
            fn = function([xx], [gyy1, gyy2])

            xval = np.random.rand(32).astype(config.floatX)
            y1val, y2val = fn(xval)
            assert np.allclose(y1val, y2val)
Example #29
0
 def setup_decoder_step(self):
   """Advance the decoder by one step.  Used at test time."""
   y_t = T.lscalar('y_t_for_dec')
   c_prev = T.vector('c_prev_for_dec')
   h_prev = T.vector('h_prev_for_dec')
   h_t = self.spec.f_dec(y_t, c_prev, h_prev)
   self._decoder_step = theano.function(inputs=[y_t, c_prev, h_prev], outputs=h_t)
Example #30
0
    def test_uniform_vector(self):
        rng_R = random_state_type()
        low = tensor.vector()
        high = tensor.vector()
        post_r, out = uniform(rng_R, low=low, high=high)
        assert out.ndim == 1
        f = compile.function([rng_R, low, high], [post_r, out], accept_inplace=True)

        def as_floatX(thing):
            return numpy.asarray(thing, dtype=theano.config.floatX)

        low_val = as_floatX([0.1, 0.2, 0.3])
        high_val = as_floatX([1.1, 2.2, 3.3])
        rng = numpy.random.RandomState(utt.fetch_seed())
        numpy_rng = numpy.random.RandomState(utt.fetch_seed())

        # Arguments of size (3,)
        rng0, val0 = f(rng, low_val, high_val)
        numpy_val0 = as_floatX(numpy_rng.uniform(low=low_val, high=high_val))
        assert numpy.all(val0 == numpy_val0)

        # arguments of size (2,)
        rng1, val1 = f(rng0, low_val[:-1], high_val[:-1])
        numpy_val1 = as_floatX(numpy_rng.uniform(low=low_val[:-1], high=high_val[:-1]))
        assert numpy.all(val1 == numpy_val1)

        # Specifying the size explicitly
        g = compile.function([rng_R, low, high], uniform(rng_R, low=low, high=high, size=(3,)), accept_inplace=True)
        rng2, val2 = g(rng1, low_val, high_val)
        numpy_val2 = as_floatX(numpy_rng.uniform(low=low_val, high=high_val, size=(3,)))
        assert numpy.all(val2 == numpy_val2)
        self.assertRaises(ValueError, g, rng2, low_val[:-1], high_val[:-1])
Example #31
0
def test_gru_init_val_error():
    # check if errors are raised when init is non matrix tensorVariable
    vector = T.vector()
    with pytest.raises(ValueError):
        l_rec = GRULayer(InputLayer((2, 2, 3)), 5, hid_init=vector)
Example #32
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import theano
import theano.tensor as T

# 共有変数の定義
W = theano.shared(np.array([[1, 2, 3], [4, 5, 6]], dtype=theano.config.floatX), name='W', borrow=True)
b = theano.shared(np.array([1, 1], dtype=theano.config.floatX), name='b', borrow=True)

# 共有変数の取得
print W.get_value()
print b.get_value()

# シンボルの作成
x = T.vector('x')

# シンボルと共有変数を組み立てて数式を定義
y = T.dot(W, x) + b
print type(y)

# 関数を定義してコンパイル
f = theano.function(inputs=[x], outputs=y)

print f([1, 1, 1])
Example #33
0
    def __init__(self,
                 rng,
                 n_in,
                 n_out,
                 n_h,
                 dropout=0,
                 sigma_g=sigmoid,
                 sigma_c=hyperbolic_tangent,
                 sigma_h=hyperbolic_tangent,
                 sigma_y=softmax,
                 dropout_rate=0,
                 obj='c'):
        '''
        :param rng: Numpy RandomState
        :param n_in: Input dimension (int)
        :param n_out: Output dimension (int)
        :param n_h: Hidden dimension (int)
        :param sigma_g, sigma_c, sigma_h, sigma_y: activation functions
        :param dropout_rate: dropout rate (float)
        :param obj: objective type, 'c' for classification with cross entropy loss, 'r' for regression with MSE loss. (['c','r'])
        '''

        Wf_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)),
                          np.sqrt(6. / (n_in + n_h)), (n_h, n_in))
        Uf_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)),
                          np.sqrt(6. / (n_h + n_h)), (n_h, n_h))
        bf_ = np.zeros(n_h)

        Wi_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)),
                          np.sqrt(6. / (n_in + n_h)), (n_h, n_in))
        Ui_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)),
                          np.sqrt(6. / (n_h + n_h)), (n_h, n_h))
        bi_ = np.zeros(n_h)

        Wo_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)),
                          np.sqrt(6. / (n_in + n_h)), (n_h, n_in))
        Uo_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)),
                          np.sqrt(6. / (n_h + n_h)), (n_h, n_h))
        bo_ = np.zeros(n_h)

        Wc_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)),
                          np.sqrt(6. / (n_in + n_h)), (n_h, n_in))
        Uc_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)),
                          np.sqrt(6. / (n_h + n_h)), (n_h, n_h))
        bc_ = np.zeros(n_h)

        Wy_ = rng.uniform(-np.sqrt(6. / (n_out + n_h)),
                          np.sqrt(6. / (n_out + n_h)), (n_out, n_h))
        by_ = np.zeros(n_out)

        h0_ = rng.uniform(-np.sqrt(3. / (2. * n_h)), np.sqrt(3. / (2. * n_h)),
                          n_h)
        c0_ = rng.uniform(-np.sqrt(3. / (2. * n_h)), np.sqrt(3. / (2. * n_h)),
                          n_h)

        # Theano: Created shared variables
        Wf = theano.shared(name='Wf', value=Wf_.astype(theano.config.floatX))
        Uf = theano.shared(name='Uf', value=Uf_.astype(theano.config.floatX))
        bf = theano.shared(name='bf', value=bf_.astype(theano.config.floatX))

        Wi = theano.shared(name='Wi', value=Wi_.astype(theano.config.floatX))
        Ui = theano.shared(name='Ui', value=Ui_.astype(theano.config.floatX))
        bi = theano.shared(name='bi', value=bi_.astype(theano.config.floatX))

        Wo = theano.shared(name='Wo', value=Wo_.astype(theano.config.floatX))
        Uo = theano.shared(name='Uo', value=Uo_.astype(theano.config.floatX))
        bo = theano.shared(name='bo', value=bo_.astype(theano.config.floatX))

        Wc = theano.shared(name='Wc', value=Wc_.astype(theano.config.floatX))
        Uc = theano.shared(name='Uc', value=Uc_.astype(theano.config.floatX))
        bc = theano.shared(name='bc', value=bc_.astype(theano.config.floatX))

        Wy = theano.shared(name='Wy', value=Wy_.astype(theano.config.floatX))
        by = theano.shared(name='by', value=by_.astype(theano.config.floatX))

        h0 = theano.shared(name='h0', value=h0_.astype(theano.config.floatX))
        c0 = theano.shared(name='c0', value=c0_.astype(theano.config.floatX))

        self.p = [
            Wf, Uf, bf, Wi, Ui, bi, Wo, Uo, bo, Wc, Uc, bc, Wy, by, c0, h0
        ]

        seq_len = T.iscalar('seq_len')
        self.seq_len = seq_len

        self.x = T.vector()
        x_scan = T.reshape(self.x, [seq_len, n_in], ndim=2)

        if dropout_rate > 0:
            np.random.seed(int(time.time()))

            # for training
            def masked_forward_prop_step(x_t, h_t_prev, c_t_prev):
                f_t = sigma_g(Wf.dot(x_t) + Uf.dot(h_t_prev) + bf)
                i_t = sigma_g(Wi.dot(x_t) + Ui.dot(h_t_prev) + bi)
                o_t = sigma_g(Wo.dot(x_t) + Uo.dot(h_t_prev) + bo)
                c_t = i_t * sigma_c(Wc.dot(x_t) + Uc.dot(h_t_prev) + bc)
                c_t += c_t_prev * f_t
                h_t = o_t * sigma_h(c_t)
                y_t = Wy.dot(h_t) + by
                mask = np.random.binomial(np.ones(n_h, dtype=int),
                                          1.0 - dropout_rate)
                masked_h_t = h_t * T.cast(mask, theano.config.floatX)

                return [y_t, masked_h_t, c_t]

            # for testing
            def forward_prop_step(x_t, h_t_prev, c_t_prev):
                f_t = sigma_g(Wf.dot(x_t) + Uf.dot(h_t_prev) + bf)
                i_t = sigma_g(Wi.dot(x_t) + Ui.dot(h_t_prev) + bi)
                o_t = sigma_g(Wo.dot(x_t) + Uo.dot(h_t_prev) + bo)
                c_t = i_t * sigma_c(Wc.dot(x_t) + Uc.dot(h_t_prev) + bc)
                c_t += c_t_prev * f_t
                h_t = o_t * sigma_h(c_t)
                h_t = (1.0 - dropout_rate) * h_t
                y_t = Wy.dot(h_t) + by

                return [y_t, h_t, c_t]

            [o_train, _, _], _ = theano.scan(masked_forward_prop_step,
                                             sequences=[x_scan],
                                             outputs_info=[None, h0, c0],
                                             n_steps=seq_len)

            [o_test, _, _], _ = theano.scan(forward_prop_step,
                                            sequences=[x_scan],
                                            outputs_info=[None, h0, c0],
                                            n_steps=seq_len)

        else:

            def forward_prop_step(x_t, h_t_prev, c_t_prev):
                f_t = sigma_g(Wf.dot(x_t) + Uf.dot(h_t_prev) + bf)
                i_t = sigma_g(Wi.dot(x_t) + Ui.dot(h_t_prev) + bi)
                o_t = sigma_g(Wo.dot(x_t) + Uo.dot(h_t_prev) + bo)
                c_t = i_t * sigma_c(Wc.dot(x_t) + Uc.dot(h_t_prev) + bc)
                c_t += c_t_prev * f_t
                h_t = o_t * sigma_h(c_t)
                y_t = Wy.dot(h_t) + by

                return [y_t, h_t, c_t]

            [o_train, _, _], _ = theano.scan(forward_prop_step,
                                             sequences=[x_scan],
                                             outputs_info=[None, h0, c0],
                                             n_steps=seq_len)
            o_test = o_train

        if obj == 'c':  # classification task
            self.y = T.bscalar('y')
            self.o_train = sigma_y(o_train[-1])
            self.o_test = sigma_y(o_test[-1])
            #obj function to compute grad, use dropout
            self.cost = T.nnet.categorical_crossentropy(
                self.o_train,
                T.eye(n_out)[self.y])
            #compute accuracy use average of dropout rate
            self.accuracy = T.switch(T.eq(T.argmax(self.o_test), self.y), 1.,
                                     0.)
            self.prediction = np.argmax(self.o_test)
        elif obj == 'r':  # regression task
            self.y = T.dscalar('y')
            self.o_train = o_train[-1]
            self.o_test = o_test[-1]
            #obj function to compute grad, use dropout
            self.cost = (self.o_train[0] - self.y)**2
            #compute accuracy use average of dropout rate
            self.accuracy = (self.o_test[0] - self.y)**2
            self.prediction = self.o_test[0]

        self.optimiser = sgd_optimizer(self, 'LSTM')
Example #34
0
import theano
import theano.tensor as tt

rng = np.random

N = 400
feats = 784
D = (
    rng.randn(N, feats).astype(theano.config.floatX),
    rng.randint(size=N, low=0, high=2).astype(theano.config.floatX),
)
training_steps = 10000

# Declare Theano symbolic variables
x = tt.matrix("x")
y = tt.vector("y")
w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
b = theano.shared(np.asarray(0.0, dtype=theano.config.floatX), name="b")
x.tag.test_value = D[0]
y.tag.test_value = D[1]
# print "Initial model:"
# print w.get_value(), b.get_value()


# Construct Theano expression graph
p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))  # Probability of having a one
prediction = p_1 > 0.5  # The prediction that is done: 0 or 1
xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)  # Cross-entropy
cost = xent.mean() + 0.01 * (w ** 2).sum()  # The cost to optimize
gw, gb = tt.grad(cost, [w, b])
Example #35
0
File: reps.py Project: zhmz90/rllab
    def init_opt(self):
        is_recurrent = int(self.policy.recurrent)

        # Init dual param values
        self.param_eta = 15.
        # Adjust for linear feature vector.
        self.param_v = np.random.rand(self.env.observation_space.flat_dim * 2 +
                                      4)

        # Theano vars
        obs_var = self.env.observation_space.new_tensor_variable(
            'obs',
            extra_dims=1 + is_recurrent,
        )
        action_var = self.env.action_space.new_tensor_variable(
            'action',
            extra_dims=1 + is_recurrent,
        )
        rewards = ext.new_tensor(
            'rewards',
            ndim=1 + is_recurrent,
            dtype=theano.config.floatX,
        )
        # Feature difference variable representing the difference in feature
        # value of the next observation and the current observation \phi(s') -
        # \phi(s).
        feat_diff = ext.new_tensor('feat_diff',
                                   ndim=2 + is_recurrent,
                                   dtype=theano.config.floatX)
        param_v = TT.vector('param_v')
        param_eta = TT.scalar('eta')

        valid_var = TT.matrix('valid')

        state_info_vars = {
            k: ext.new_tensor(k,
                              ndim=2 + is_recurrent,
                              dtype=theano.config.floatX)
            for k in self.policy.state_info_keys
        }
        state_info_vars_list = [
            state_info_vars[k] for k in self.policy.state_info_keys
        ]

        # Policy-related symbolics
        dist_info_vars = self.policy.dist_info_sym(obs_var, state_info_vars)
        dist = self.policy.distribution
        # log of the policy dist
        logli = dist.log_likelihood_sym(action_var, dist_info_vars)

        # Symbolic sample Bellman error
        delta_v = rewards + TT.dot(feat_diff, param_v)

        # Policy loss (negative because we minimize)
        if is_recurrent:
            loss = -TT.sum(logli * TT.exp(delta_v / param_eta -
                                          TT.max(delta_v / param_eta)) *
                           valid_var) / TT.sum(valid_var)
        else:
            loss = -TT.mean(logli * TT.exp(delta_v / param_eta -
                                           TT.max(delta_v / param_eta)))

        # Add regularization to loss.
        reg_params = self.policy.get_params(regularizable=True)
        loss += self.L2_reg_loss * TT.sum(
            [TT.mean(TT.square(param))
             for param in reg_params]) / len(reg_params)

        # Policy loss gradient.
        loss_grad = TT.grad(loss, self.policy.get_params(trainable=True))

        if is_recurrent:
            recurrent_vars = [valid_var]
        else:
            recurrent_vars = []

        input = [
            rewards, obs_var, feat_diff, action_var
        ] + state_info_vars_list + recurrent_vars + [param_eta, param_v]
        # if is_recurrent:
        #     input +=
        f_loss = ext.compile_function(
            inputs=input,
            outputs=loss,
        )
        f_loss_grad = ext.compile_function(
            inputs=input,
            outputs=loss_grad,
        )

        # Debug prints
        old_dist_info_vars = {
            k: ext.new_tensor('old_%s' % k,
                              ndim=2 + is_recurrent,
                              dtype=theano.config.floatX)
            for k in dist.dist_info_keys
        }
        old_dist_info_vars_list = [
            old_dist_info_vars[k] for k in dist.dist_info_keys
        ]

        if is_recurrent:
            mean_kl = TT.sum(
                dist.kl_sym(old_dist_info_vars, dist_info_vars) *
                valid_var) / TT.sum(valid_var)
        else:
            mean_kl = TT.mean(dist.kl_sym(old_dist_info_vars, dist_info_vars))

        f_kl = ext.compile_function(
            inputs=[obs_var, action_var] + state_info_vars_list +
            old_dist_info_vars_list + recurrent_vars,
            outputs=mean_kl,
        )

        # Dual-related symbolics
        # Symbolic dual
        if is_recurrent:
            dual = param_eta * self.epsilon + \
                   param_eta * TT.log(
                       TT.sum(
                           TT.exp(
                               delta_v / param_eta - TT.max(delta_v / param_eta)
                           ) * valid_var
                       ) / TT.sum(valid_var)
                   ) + param_eta * TT.max(delta_v / param_eta)
        else:
            dual = param_eta * self.epsilon + \
                   param_eta * TT.log(
                       TT.mean(
                           TT.exp(
                               delta_v / param_eta - TT.max(delta_v / param_eta)
                           )
                       )
                   ) + param_eta * TT.max(delta_v / param_eta)
        # Add L2 regularization.
        dual += self.L2_reg_dual * \
                (TT.square(param_eta) + TT.square(1 / param_eta))

        # Symbolic dual gradient
        dual_grad = TT.grad(cost=dual, wrt=[param_eta, param_v])

        # Eval functions.
        f_dual = ext.compile_function(inputs=[rewards, feat_diff] +
                                      state_info_vars_list + recurrent_vars +
                                      [param_eta, param_v],
                                      outputs=dual)
        f_dual_grad = ext.compile_function(
            inputs=[rewards, feat_diff] + state_info_vars_list +
            recurrent_vars + [param_eta, param_v],
            outputs=dual_grad)

        self.opt_info = dict(f_loss_grad=f_loss_grad,
                             f_loss=f_loss,
                             f_dual=f_dual,
                             f_dual_grad=f_dual_grad,
                             f_kl=f_kl)
Example #36
0
import numpy
import theano
import theano.tensor as T
rng = numpy.random

N = 400                                   # training sample size
feats = 784                               # number of input variables

# generate a dataset: D = (input_values, target_class)
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
training_steps = 10000

# Declare Theano symbolic variables
x = T.matrix("x")
y = T.vector("y")

# initialize the weight vector w randomly
#
# this and the following bias variable b
# are shared so they keep their values
# between training iterations (updates)
w = theano.shared(rng.randn(feats), name="w")

# initialize the bias term
b = theano.shared(0., name="b")

print("Initial model:")
print(w.get_value())
print(b.get_value())
Example #37
0
def test_recurrent_init_val_error():
    # check if errors are raised when init is non matrix tensor
    hid_init = T.vector()
    with pytest.raises(ValueError):
        l_rec = RecurrentLayer(InputLayer((2, 2, 3)), 5, hid_init=hid_init)
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, initial_momentum = 0.5,
             datasets="datasets", train_batch_size=20,
             n_hidden=[500,200,100], p=0.5, dropout=False, input_p=None, drug_name=None, OUT_FOLDER="OUT_FOLDER"):

    #Demonstrate stochastic gradient descent optimization for a multilayer
    #perceptron

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    #erlo_x, erlo_y = datasets[3] #MODIFIED

    valid_batch_size = valid_set_x.get_value(borrow=True).shape[0]
    test_batch_size= test_set_x.get_value(borrow=True).shape[0]
    N_IN=valid_set_x.get_value(borrow=True).shape[1]
    train_samples = train_set_x.get_value(borrow=True).shape[0]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / train_batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / valid_batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / test_batch_size


    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar("i") # index to a [mini]batch
    vector = T.vector("v", dtype='int32')
    x = T.matrix('x')
    y = T.vector('y')

    is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction

    rng = np.random.RandomState(1234)

    # construct the MLP class
    N_HIDDEN = ".".join([str(NN) for NN in n_hidden])
    classifier = MLP(
        rng=rng,
        is_train = is_train,
        input=x,
        n_in=N_IN,   #FIXED !!!!!!
        n_hidden=n_hidden,
        n_out=2,
        p=p,
        dropout=dropout,
        input_p=input_p #, batch_size=batch_size
    )

    #classifier.negative_log_likelihood(y)
    cost = (
        classifier.errors(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y), #negative_log_likelihood(y)
        givens={
            x: valid_set_x[index * valid_batch_size:(index + 1) * valid_batch_size],
            y: valid_set_y[index * valid_batch_size:(index + 1) * valid_batch_size],
            is_train: np.cast['int32'](0)
        },
        on_unused_input='warn',
    )

    test_cor = theano.function(
        inputs=[index],
        outputs=classifier.loss(y),
        givens={
            x: test_set_x[index * test_batch_size:(index + 1) * test_batch_size],
            y: test_set_y[index * test_batch_size:(index + 1) * test_batch_size],
            is_train: np.cast['int32'](0)
        },
        on_unused_input='warn',
    )

    test_nrmse = theano.function(
        inputs=[index],
        outputs=classifier.NRMSE(y),
        givens={
            x: test_set_x[index * test_batch_size:(index + 1) * test_batch_size],
            y: test_set_y[index * test_batch_size:(index + 1) * test_batch_size],
            is_train: np.cast['int32'](0)
        },
        on_unused_input='warn',
    )

    test_pred = theano.function(
        inputs=[index],
        outputs=classifier.pred(y),
        givens={
            x: test_set_x[index * test_batch_size:(index + 1) * test_batch_size],
            y: test_set_y[index * test_batch_size:(index + 1) * test_batch_size],
            is_train: np.cast['int32'](0)
        },
        on_unused_input='warn',
    )
    ###################################

    #learning rate to shared
    learning_rate = theano.shared(np.cast[theano.config.floatX](learning_rate) )

    # momentum implementation stolen from
    # http://nbviewer.ipython.org/github/craffel/theano-tutorial/blob/master/Theano%20Tutorial.ipynb
    assert initial_momentum >= 0. and initial_momentum < 1.
    momentum =theano.shared(np.cast[theano.config.floatX](initial_momentum), name='momentum', borrow=True)

    # List of update steps for each parameter
    updates = []
    #Just gradient descent on cost
    for param in classifier.params:
        # For each parameter, we'll create a param_update shared variable.
        # This variable will keep track of the parameter's update step across iterations.
        # We initialize it to 0
        param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable, borrow=True)
        # Each parameter is updated by taking a step in the direction of the gradient.
        # However, we also "mix in" the previous step according to the given momentum value.
        # Note that when updating param_update, we are using its old value and also the new gradient step.
        updates.append((param, param - learning_rate*param_update))
        # Note that we don't need to derive backpropagation to compute updates - just use T.grad!
        updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)/(2*train_batch_size) ))

    """
    gparams = [T.grad(cost, param) for param in classifier.params]

    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]
    """
    train_model = theano.function(
        inputs=[vector],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[vector,],
            y: train_set_y[vector,],
            is_train: np.cast['int32'](1)
        },
        on_unused_input='warn',
    )

    train_error = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: train_set_x[index * train_batch_size:(index + 1) * train_batch_size],
            y: train_set_y[index * train_batch_size:(index + 1) * train_batch_size],
            is_train: np.cast['int32'](0)
        },
        on_unused_input='warn',
    )

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 18000000 # look as this many examples regardless

    patience_increase = 2 # wait this much longer when a new best is found
    improvement_threshold = 0.995 # a relative improvement of this much is considered significant (default = 0.995)
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = np.inf
    best_iter = 0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False
    test_loss = 1
    test_pear = 0
    LR_COUNT = 1

    # STORE_FILE="_LR"+str(learning_rate)+"_EPOCHS"+str(n_epochs) + "_BATCH_SIZE"+str(train_batch_size) + \
    # "_N_HIDDEN"+str(N_HIDDEN)+"_DROPOUT"+str(dropout)+"_P"+str(p)+"_IP"+str(input_p)
    #
    # STORE_RESULTS=open(OUT_FOLDER +"/"+ drug_name + STORE_FILE, "w")
    # STORE_RESULTS.write("LR"+"\t"+"EPOCHS"+"\t"+"BATCH_SIZE"+"\t"+
    #                     "L1"+"\t"+"L2"+"\t"+"N_HIDDEN"+"\t"+"P_HIDDEN"+"\t"+"DROPOUT"+"\t"+ "INPUT_DROPOUT"+"\t"+
    #                     "EPOCH_N"+"\t"+"BATCH_TYPE" + "\t" +"LOSS")

    FILE_OUT =  open(OUT_FOLDER + "/combined_D." + drug_name + ".txt", "w")
    FILE_OUT.write("EPOCH" + "\t" + "TRAIN"+ "\t"+"VALID.ERROR" + "\t" + "TEST.COR" + "\t" + "TEST.NRMSE")
    FILE_OUT.close()

    FILE_OUT_val = open(OUT_FOLDER + "/combined_D_values." + drug_name + ".txt", "w")
    FILE_OUT_val.write("EPOCH" +"\t" + "ACTUAL" +"\t"+"PREDICTED")
    FILE_OUT_val.close()

    with open(OUT_FOLDER + "/log." + drug_name + ".txt", "w") as logfile:
        logfile.write("")

    EPOCH_SIZE = n_train_batches
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        # print "momentum: ", momentum.get_value()
        # print "learning rate: ", learning_rate.get_value()

        log = "momentum: " + str(momentum.get_value()) + "; learning_rate: " + str(learning_rate.get_value())
        with open(OUT_FOLDER + "/log." + drug_name + ".txt", "a") as logfile:
            logfile.write(log + "\n")

        # if LR_COUNT==1000:
        #     new_learning_rate = learning_rate.get_value() * 0.2
        #     print new_learning_rate
        #     learning_rate.set_value(np.cast[theano.config.floatX](new_learning_rate))

        #for minibatch_index in xrange(n_train_batches):
        for minibatch_index in xrange(EPOCH_SIZE):

            ran_index = list(np.random.randint(low=0, high=train_samples-1, size=train_batch_size))
            minibatch_avg_cost = train_model(ran_index)

            rescale_weights(classifier.param_to_scale, 15.)

            # iteration number
            #iter = (epoch - 1) * n_train_batches + minibatch_index

            #if (iter + 1) % validation_frequency == 0:
            if (minibatch_index + 1) % EPOCH_SIZE == 0:
                # compute zero-one loss on validation set

                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)

                this_train_error = [train_error(i) for i in xrange(n_train_batches)]
                this_train_error = np.mean(this_train_error)


                log = ('epoch %i, minibatch %i/%i, train error %f ,validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        EPOCH_SIZE,
                        this_train_error ,
                        this_validation_loss
                    ))
                # print(log)
                with open(OUT_FOLDER + "/log." + drug_name + ".txt", "a") as logfile:
                    logfile.write(log + "\n")

                with open(OUT_FOLDER + "/combined_D." + drug_name + ".txt", "a") as FILE_OUT:
                    FILE_OUT.write("\n"+ str(epoch) + "\t" + str(this_train_error) + "\t"+ str(this_validation_loss) \
                                   +"\t" +str(test_pear) + "\t" + str(test_loss))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    LR_COUNT = 0

                    #improve patience if loss improvement is good enough
                    # if (
                    #     this_validation_loss < best_validation_loss *
                    #     improvement_threshold
                    # ):
                    #     patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_nrmse(i) for i in xrange(n_test_batches)]
                    test_loss = np.mean(test_losses)

                    test_pears = [test_cor(i) for i in xrange(n_test_batches)]
                    test_pear = np.mean(test_pears)

                    log = ((' epoch %i, minibatch %i/%i, test error of '
                        'best nrmse and pear %f,%f %%') %
                        (epoch, minibatch_index + 1, EPOCH_SIZE, test_loss, test_pear))
                    # print(log)
                    with open(OUT_FOLDER + "/log." + drug_name + ".txt", "a") as logfile:
                        logfile.write(log + "\n")

                    #ONLY SAVE MODEL if validation improves
                    MODEL = [classifier.linearRegressionLayer]
                    for e in xrange(len(n_hidden)):
                        MODEL = MODEL + [getattr(classifier, "layer_" + str(e))]
                    MODEL = MODEL + [rng]
                    with open(OUT_FOLDER + "/" + drug_name + ".pkl", "wb") as f:
                        cPickle.dump(MODEL, f)

                    #Only write if validation improvement
                    ACTUAL = test_set_y.get_value()
                    PREDICTED = [test_pred(i) for i in xrange(n_test_batches)][0]

                    with open(OUT_FOLDER + "/combined_D_values." + drug_name + ".txt", "a") as FILE_OUT_val:
                        for l in xrange(len(ACTUAL)):
                            FILE_OUT_val.write("\n" + str(epoch) + "\t" + str(ACTUAL[l]) + "\t" + str(PREDICTED[l]))
                else:
                    LR_COUNT = LR_COUNT+1

            # if patience <= iter:
            #     done_looping = True
            #     break
            # if LR_COUNT==100:
            #     done_looping = True
            #     break

        # adaption of momentum
        if momentum.get_value() < 0.99:
            new_momentum = 1. - (1. - momentum.get_value()) * 0.999
            momentum.set_value(np.cast[theano.config.floatX](new_momentum))
        # adaption of learning rate
        new_learning_rate = learning_rate.get_value() * 0.998
        learning_rate.set_value(np.cast[theano.config.floatX](new_learning_rate))

        # if epoch%500 == 0:
        #     new_learning_rate = learning_rate.get_value() * 0.1
        #     learning_rate.set_value(np.cast[theano.config.floatX](new_learning_rate))

    end_time = timeit.default_timer()

    print(('Optimization complete. Best validation score of %f %% '
            'obtained at iteration %i, with test performance %f %%') %
            (best_validation_loss, best_iter, test_pear ))

    print >> sys.stderr, ('The code for file ' +
                                os.path.split("__file__")[1] +
                                ' ran for %.2fm' % ((end_time - start_time) / 60.))
Example #39
0
import theano
import theano.tensor as T
import numpy as np
import random

x = T.vector()
w1 = theano.shared(np.array([1.,1.]))
w2 = theano.shared(np.array([1.,1.]))
b1 = theano.shared(1.)
b2 = theano.shared(1.)
z1 = T.dot(w1,x) + b1
z2 = T.dot(w2,x) + b2

a1 = 1/(1 + T.exp(-z1))
a2 = 1/(1 + T.exp(-z2))
w3 = theano.shared(np.array([1.,1.]))
b3 = theano.shared(1.)
z3 = T.dot(w3,[a1,a2]) + b3

y = 1/(1 + T.exp(-z3))
y_hat = T.scalar()
neuron = theano.function(inputs = [x],outputs = y)
cost = T.sum((y-y_hat)**2)
cost_function = theano.function(inputs = [y,y_hat],outputs = cost)

dw1,db1,dw2,db2,dw3,db3 = T.grad(cost,[w1,b1,w2,b2,w3,b3])

gradient = theano.function(
        inputs = [x,y_hat],
        updates = [(w1,w1-1*dw1),(b1,b1-1*db1),
                   (w2,w2-1*dw2),(b2,b2-1*db2),
Example #40
0
import numpy as np
import theano.tensor as T
import theano as th
th.config.openmp_elemwise_minsize = 1000
th.config.openmp = True

x = T.vector('x')
y = T.vector('y')

hit_test = x**2 + y**2 < 1
hits = hit_test.sum()
misses = x.shape[0]
pi_est = 4 * hits / misses

calculate_pi = th.function([x, y], pi_est)

x_val = np.random.uniform(-1, 1, 30000)
y_val = np.random.uniform(-1, 1, 30000)

import timeit
res = timeit.timeit("calculate_pi(x_val, y_val)",
                    "from __main__ import x_val, y_val, calculate_pi",
                    number=100000)
print(res)
    mnist = MNIST()
    # create the basic layer
    layer1 = Dense(inputs=((None, 28 * 28), matrix("x")),
                   outputs=1000,
                   activation='linear')
    layer1_act = Activation(inputs=((None, 1000), layer1.get_outputs()),
                            activation='relu')
    # create the softmax classifier
    layer2 = Softmax(inputs=((None, 1000), layer1_act.get_outputs()),
                     outputs=10,
                     out_as_probs=True)
    # create the mlp from the two layers
    mlp = Prototype(layers=[layer1, layer1_act, layer2])
    # define the loss function
    loss = Neg_LL(inputs=mlp.get_outputs(),
                  targets=vector("y", dtype="int64"),
                  one_hot=False)

    #plot the loss
    if BOKEH_AVAILABLE:
        plot = Plot("mlp_mnist",
                    monitor_channels=Monitor("loss", loss.get_loss()),
                    open_browser=True)
    else:
        plot = None

    # make an optimizer to train it (AdaDelta is a good default)
    # optimizer = AdaDelta(model=mlp, dataset=mnist, n_epoch=20)
    optimizer = AdaDelta(dataset=mnist, loss=loss, epochs=20)
    # perform training!
    # optimizer.train()
Example #42
0
from sklearn.datasets import make_classification
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report

rng = np.random.RandomState(1999)
X, y = make_classification(n_samples=400,
                           n_features=25,
                           n_informative=10,
                           n_classes=2,
                           n_clusters_per_class=2,
                           random_state=1999)

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.8)
n_samples, n_features = X_train.shape
x = T.matrix('x')
y = T.vector('y')
w = theano.shared(rng.randn(n_features), name='w')
b = theano.shared(0., name='b')

print("Initial model")
print(w.get_value(), b.get_value())

learning_rate = 0.01
reg = .1
n_iter = 10000
prob = 1 / (1 + T.exp(-T.dot(x, w) - b))
pred = prob > 0.5
loss = -y * T.log(prob) - (1 - y) * T.log(1 - prob)

# l2
# penalty = reg * (w ** 2).sum()
Example #43
0
def convert_variable(x):
    if x.ndim == 1:
        return T.vector(x.name, dtype=x.dtype)
    else:
        return T.matrix(x.name, dtype=x.dtype)
Example #44
0
  def __init__(self, D, K, hidden_layer_sizes, gamma, max_experiences=10000, min_experiences=100, batch_sz=32):
    self.K = K
    lr = 10e-3
    mu = 0.
    decay = 0.99

    # create the graph
    self.layers = []
    M1 = D
    for M2 in hidden_layer_sizes:
      layer = HiddenLayer(M1, M2)
      self.layers.append(layer)
      M1 = M2

    # final layer
    layer = HiddenLayer(M1, K, lambda x: x)
    self.layers.append(layer)

    # collect params for copy
    self.params = []
    for layer in self.layers:
      self.params += layer.params
    caches = [theano.shared(np.ones_like(p.get_value())*0.1) for p in self.params]
    velocities = [theano.shared(p.get_value()*0) for p in self.params]

    # inputs and targets
    X = T.matrix('X')
    G = T.vector('G')
    actions = T.ivector('actions')

    # calculate output and cost
    Z = X
    for layer in self.layers:
      Z = layer.forward(Z)
    Y_hat = Z

    selected_action_values = Y_hat[T.arange(actions.shape[0]), actions]
    cost = T.sum((G - selected_action_values)**2) 

    # create train function
    grads = T.grad(cost, self.params)
    g_update = [(p, p + v) for p, v, g in zip(self.params, velocities, grads)]
    c_update = [(c, decay*c + (1 - decay)*g*g) for c, g in zip(caches, grads)]
    v_update = [(v, mu*v - lr*g / T.sqrt(c)) for v, c, g in zip(velocities, caches, grads)]
    # v_update = [(v, mu*v - lr*g) for v, g in zip(velocities, grads)]
    # c_update = []
    updates = c_update + g_update + v_update

    # compile functions
    self.train_op = theano.function(
      inputs=[X, G, actions],
      updates=updates,
      allow_input_downcast=True
    )
    self.predict_op = theano.function(
      inputs=[X],
      outputs=Y_hat,
      allow_input_downcast=True
    )

    # create replay memory
    self.experience = {'s': [], 'a': [], 'r': [], 's2': [], 'done': []}
    self.max_experiences = max_experiences
    self.min_experiences = min_experiences
    self.batch_sz = batch_sz
    self.gamma = gamma
Example #45
0
def augment_system(ode_func, n_states, n_theta):
    """
    Function to create augmented system.

    Take a function which specifies a set of differential equations and return
    a compiled function which allows for computation of gradients of the
    differential equation's solition with repsect to the parameters.

    Uses float64 even if floatX=float32, because the scipy integrator always uses float64.

    Parameters
    ----------
    ode_func : function
        Differential equation.  Returns array-like.
    n_states : int
        Number of rows of the sensitivity matrix. (n_states)
    n_theta : int
        Number of ODE parameters

    Returns
    -------
    system : function
        Augemted system of differential equations.
    """

    # Present state of the system
    t_y = tt.vector("y", dtype='float64')
    t_y.tag.test_value = np.ones((n_states, ), dtype='float64')
    # Parameter(s).  Should be vector to allow for generaliztion to multiparameter
    # systems of ODEs.  Is m dimensional because it includes all initial conditions as well as ode parameters
    t_p = tt.vector("p", dtype='float64')
    t_p.tag.test_value = np.ones((n_states + n_theta, ), dtype='float64')
    # Time.  Allow for non-automonous systems of ODEs to be analyzed
    t_t = tt.scalar("t", dtype='float64')
    t_t.tag.test_value = 2.459

    # Present state of the gradients:
    # Will always be 0 unless the parameter is the inital condition
    # Entry i,j is partial of y[i] wrt to p[j]
    dydp_vec = tt.vector("dydp", dtype='float64')
    dydp_vec.tag.test_value = make_sens_ic(n_states, n_theta, 'float64')

    dydp = dydp_vec.reshape((n_states, n_states + n_theta))

    # Get symbolic representation of the ODEs by passing tensors for y, t and theta
    yhat = ode_func(t_y, t_t, t_p[n_states:])
    # Stack the results of the ode_func into a single tensor variable
    if not isinstance(yhat, (list, tuple)):
        yhat = (yhat, )
    t_yhat = tt.stack(yhat, axis=0)

    # Now compute gradients
    J = tt.jacobian(t_yhat, t_y)

    Jdfdy = tt.dot(J, dydp)

    grad_f = tt.jacobian(t_yhat, t_p)

    # This is the time derivative of dydp
    ddt_dydp = (Jdfdy + grad_f).flatten()

    system = theano.function(inputs=[t_y, t_t, t_p, dydp_vec],
                             outputs=[t_yhat, ddt_dydp],
                             on_unused_input="ignore")

    return system
Example #46
0
    def build(self):
        E, V, U, W, b, c = self.E, self.V, self.U, self.W, self.b, self.c

        x = T.ivector('x')
        y = T.ivector('y')

        # negy is the negative sampling for blackout
        # shape (len(y),k)
        negy = T.imatrix('negy')
        q_w = T.vector('q_w')

        def _recurrence(x_t, y_t, neg_y_t, s_t1_prev, s_t2_prev, q_w):

            # Word embedding layer
            # E hidden word_dim/vocab_dim
            x_e = E[:, x_t]

            # GRU Layer 1
            z_t1 = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_t1_prev) +
                                       b[0])
            r_t1 = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_t1_prev) +
                                       b[1])
            c_t1 = T.tanh(U[2].dot(x_e) + W[2].dot(s_t1_prev * r_t1) + b[2])
            s_t1 = (T.ones_like(z_t1) - z_t1) * c_t1 + z_t1 * s_t1_prev

            # GRU Layer 2
            z_t2 = T.nnet.hard_sigmoid(U[3].dot(s_t1) + W[3].dot(s_t2_prev) +
                                       b[3])
            r_t2 = T.nnet.hard_sigmoid(U[4].dot(s_t1) + W[4].dot(s_t2_prev) +
                                       b[4])
            c_t2 = T.tanh(U[5].dot(s_t1) + W[5].dot(s_t2_prev * r_t2) + b[5])
            s_t2 = (T.ones_like(z_t2) - z_t2) * c_t2 + z_t2 * s_t2_prev

            # probability of output o_t
            # o_t = T.nnet.softmax(V.dot(s_t2) + c)[0]

            # blackout version output probability
            # correct word probability (1,1)
            c_o_t = T.exp(V[y_t].dot(s_t2) + c[y_t])

            # negative word probability (k,1)
            n_o_t = T.exp(V[neg_y_t].dot(s_t2) + c[neg_y_t])

            # sample set probability
            t_o = (q_w[y_t] * c_o_t) + T.sum(q_w[neg_y_t] * n_o_t)

            # positive probability
            c_o_p = q_w[y_t] * c_o_t / t_o

            # negative probability (k,1)
            n_o_p = q_w[neg_y_t] * n_o_t / t_o

            # cost for each y in blackout
            J_dis = -(T.log(c_o_p) + T.sum(T.log(T.ones_like(n_o_p) - n_o_p)))

            # blackout version discriminative objective function
            return [J_dis, s_t1, s_t2]

        [J, _,
         _], updates = theano.scan(fn=_recurrence,
                                   sequences=[x, y, negy],
                                   truncate_gradient=self.bptt_truncate,
                                   outputs_info=[
                                       None,
                                       dict(initial=T.zeros(self.hidden_dim)),
                                       dict(initial=T.zeros(self.hidden_dim))
                                   ],
                                   non_sequences=q_w)

        cost = T.sum(J)

        lr = T.scalar("lr")
        gparams = [T.clip(T.grad(cost, p), -10, 10) for p in self.params]
        updates = sgd(self.params, gparams, lr)

        self.train = theano.function(inputs=[x, y, negy, q_w, lr],
                                     outputs=cost,
                                     updates=updates)
Example #47
0
    def build_model(self, tparams, optionsInp):
        trng = RandomStreams(1234)
        options = copy(optionsInp)
        if 'en_aux_inp' in options:
            options.pop('en_aux_inp')
        # Used for dropout.
        self.use_noise = theano.shared(numpy_floatX(0.))

        xW = T.matrix('xW', dtype='int64')
        mask = T.vector('mask', dtype='int64')

        n_Rwords = xW.shape[0]
        n_samples = xW.shape[1]

        embW = tparams['Wemb'][xW.flatten()].reshape(
            [n_Rwords, n_samples, options['word_encoding_size']])
        xI = T.matrix('xI', dtype=config.floatX)

        if options.get('multimodal_lstm', 0) == 1:
            embImg = T.dot(xI, tparams['WIemb']) + tparams['b_Img']
            embImg = T.shape_padleft(T.extra_ops.repeat(embImg,
                                                        n_samples,
                                                        axis=0),
                                     n_ones=1)
            emb = T.concatenate([embImg, embW], axis=0)
        else:
            emb = embW

        #This is implementation of input dropout !!
        if options['use_dropout']:
            emb = dropout_layer(emb,
                                self.use_noise,
                                trng,
                                options['drop_prob_encoder'],
                                shp=emb.shape)

        # This implements core lstm
        rval, updatesLSTM = basic_lstm_layer(tparams,
                                             emb, [],
                                             self.use_noise,
                                             options,
                                             prefix='lstm')

        if options['use_dropout']:
            p = dropout_layer(
                sliceT(
                    rval[0][mask + options.get('multimodal_lstm', 0),
                            T.arange(mask.shape[0]), :],
                    options.get('hidden_depth', 1) - 1,
                    options['hidden_size']), self.use_noise, trng,
                options['drop_prob_decoder'],
                (n_samples, options['hidden_size']))
        else:
            p = sliceT(
                rval[0][mask + options.get('multimodal_lstm', 0),
                        T.arange(mask.shape[0]), :],
                options.get('hidden_depth', 1) - 1, options['hidden_size'])

        if options.get('multimodal_lstm', 0) == 0:
            sent_emb = (T.dot(p, tparams['Wd']) + tparams['bd'])
            probMatch, sim_score = multimodal_cosine_sim_softmax(
                xI, sent_emb, tparams, options.get('sim_smooth_factor', 1.0))
        else:
            sent_emb = T.sum(p, axis=1).T  #(T.dot(p,tparams['Wd'])).T
            sim_score = sent_emb  #T.maximum(0.0, sent_emb) #T.tanh(sent_emb)
            smooth_factor = T.as_tensor_variable(numpy_floatX(
                options.get('sim_smooth_factor', 1.0)),
                                                 name='sm_f')
            probMatch = T.nnet.softmax(sim_score * smooth_factor)

        inp_list = [xW, mask, xI]

        if options.get('mode', 'batchtrain') == 'batchtrain':
            # In train mode we compare a batch of images against each others captions.
            batch_size = options['batch_size']
            cost = -(T.log(probMatch.diagonal()).sum()) / batch_size
        else:
            # In predict mode we compare multiple captions against a single image
            posSamp = T.ivector('posSamp')
            batch_size = posSamp.shape[0]
            cost = -(T.log(probMatch[0, posSamp]).sum()) / batch_size
            inp_list.append(posSamp)

        f_pred_sim_prob = theano.function(inp_list[:3],
                                          probMatch,
                                          name='f_pred_sim_prob')
        f_pred_sim_scr = theano.function(inp_list[:3],
                                         sim_score,
                                         name='f_pred_sim_scr')
        if options.get('multimodal_lstm', 0) == 1:
            f_sent_emb = theano.function([inp_list[0], inp_list[2]],
                                         [rval[0], emb],
                                         name='f_sent_emb')
        else:
            f_sent_emb = theano.function([inp_list[0]], [rval[0], emb],
                                         name='f_sent_emb')

        return self.use_noise, inp_list, [
            f_pred_sim_prob, f_pred_sim_scr, f_sent_emb, updatesLSTM
        ], cost, sim_score, tparams
latent_size = 20
nhidden = 512
lr = 0.001
num_epochs = 20  #50
model_filename_read = "mnist_ae"
classifier_filename_read = "mnist_classifier"
nonlin = lasagne.nonlinearities.rectify

np.random.seed(1234)  # reproducibility

#SYMBOLIC VARS
sym_x = T.matrix()
sym_lr = T.scalar('lr')
sym_z = T.matrix()
sym_y = T.matrix()
sym_target = T.vector()

### LOAD DATA
print("Using MNIST dataset")

#load adversarial examples
'''
adv_train_x = []
orig_train_x = []
adv_img_num_train = 5000
for img_num in range(0,adv_img_num_train):
    fadv = os.path.join('dataset/train/adversarial_images',"img_"+str(img_num)+".png")
    forig = os.path.join('dataset/train/original_images',"img_"+str(img_num)+".png")
    adv_train = Image.open(fadv)
    orig_train = Image.open(forig)
    adv_train.load()
Example #49
0
import theano
import theano.tensor as T
import numpy

coefficients = T.vector("coefficients")
x = T.scalar("x")

max_coefficients_supported = 10000

result, updates = theano.scan(
    # lambda函数的参数顺序为sequences, prior results, non-sequences, 上述任何一个为集合则展开
    fn=lambda coefficient, power, prior_sum, free_variable: [
        coefficient * (free_variable**power), prior_sum + coefficient *
        (free_variable**power)
    ],
    outputs_info=[None, T.zeros_like(x)
                  ],  # 第一个输出非累积,无需feedback回scan;第二个输出需要accumulation
    sequences=[coefficients,
               T.arange(max_coefficients_supported)
               ],  # scan取决于这里最短的sequence,自动截断
    non_sequences=x)
final_result = [result[0].sum(), result[1][-1]]

# Compile a function
calculate_polynomial = theano.function(inputs=[coefficients, x],
                                       outputs=final_result,
                                       updates=updates)

# Test
test_coefficients = numpy.array([1, 0, 2])
test_value = 3
Example #50
0
def OptimalNode(x_train,
                y_train,
                Regression=True,
                Classification=False,
                bias=False,
                n_iter=5,
                alpha=0.01,
                minibatch=False):
    '''
    inputs
        x_train: training features
        y_train: response variable
        n_iter: # of iterations for SGD
        alpha: strength of L2 penalty (default penalty for now)
    outputs
        Node: dictionary with Node parameters an predict method
    '''

    rng = numpy.random

    feats = len(x_train[0, :])
    D = [x_train, y_train]
    training_steps = n_iter
    #print "training steps: ", training_steps
    #print "penalty strength: ", alpha
    #print "Uses bias: ", bias

    # Declare Theano symbolic variables
    x = T.matrix("x")
    y = T.vector("y")
    w = theano.shared(rng.uniform(low=-0.25, high=0.25, size=feats), name="w")
    b = theano.shared(rng.randn(1)[0], name="b")
    a = theano.shared(abs(rng.randn(1)[0]), name="a")
    #print "Initialize node as:"
    #print w.get_value(), b.get_value(), a.get_value()

    # Construct Theano expression graph
    if bias:
        p_1 = -0.5 + a / (1 + T.exp(-T.dot(x, w) - b))
    else:
        p_1 = a / (1 + T.exp(-T.dot(x, w)))
    prediction = p_1 > 0.5
    if Classification:
        xent = -y * T.log(p_1) - (1 - y) * T.log(1 - p_1)  # Cross-entropy loss
    elif Regression:
        xent = 0.5 * (y - p_1)**2
    if alpha == 0:
        cost = xent.mean()  # The cost to minimize
    else:
        cost = xent.mean() + alpha * ((w**2).sum())
    if bias:
        gw, gb, ga = T.grad(cost, [w, b, a])
    else:
        gw, ga = T.grad(cost, [w, a])  # Compute the gradient of the cost

    # Compile
    Node = {}
    Node['Path'] = {}
    NodePath = Node['Path']
    if bias:
        train = theano.function(inputs=[x, y],
                                outputs=[prediction, xent],
                                updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb),
                                         (a, a - 0.1 * ga)))
    else:
        train = theano.function(inputs=[x, y],
                                outputs=[prediction, xent],
                                updates=((w, w - 0.1 * gw), (a, a - 0.1 * ga)))

    predict = theano.function(inputs=[x], outputs=p_1)

    # Train
    for i in range(training_steps):
        if minibatch:
            batch_split = train_test_split(x_train, y_train, test_size=0.2)
            _, D[0], _, D[1] = batch_split
            pred, err = train(D[0], D[1])
        elif not minibatch:
            pred, err = train(D[0], D[1])
        NodePath[str(i)] = {}
        NodePath[str(i)]['w'] = w.get_value()
        NodePath[str(i)]['b'] = b.get_value()
        NodePath[str(i)]['a'] = a.get_value()

    Node['w'] = w.get_value()
    Node['b'] = b.get_value()
    Node['a'] = a.get_value()
    Node['predict'] = predict

    return Node
Example #51
0
# http://lazyprogrammer.me
# theano scan example - low pass filter

import numpy as np
import matplotlib.pyplot as plt
import theano
import theano.tensor as T


X = 2*np.random.randn(300) + np.sin(np.linspace(0, 3*np.pi, 300))
plt.plot(X)
plt.title("original")
plt.show()

decay = T.scalar('decay')
sequence = T.vector('sequence')

def recurrence(x, last, decay):
	return (1-decay)*x + decay*last

outputs, _ = theano.scan(
	fn=recurrence,
	sequences=sequence,
	n_steps=sequence.shape[0],
	outputs_info=[np.float64(0)],
	non_sequences=[decay]
)

lpf = theano.function(
	inputs=[sequence, decay],
	outputs=outputs,
Example #52
0
#!/usr/bin/env python
# Theano tutorial
# Solution to Exercise in section 'Loop'

import numpy as np

import theano
import theano.tensor as tt
from six.moves import xrange

# 1. First example

theano.config.warn.subtensor_merge_bug = False

k = tt.iscalar("k")
A = tt.vector("A")


def inner_fct(prior_result, A):
    return prior_result * A

# Symbolic description of the result
result, updates = theano.scan(fn=inner_fct,
                              outputs_info=tt.ones_like(A),
                              non_sequences=A, n_steps=k)

# Scan has provided us with A ** 1 through A ** k.  Keep only the last
# value. Scan notices this and does not waste memory saving them.
final_result = result[-1]

power = theano.function(inputs=[A, k], outputs=final_result,
Example #53
0
learning_rate = 0.00005
#perc estimate
perc_est = 0.6
#tot trajectories
s_tot = 10000

partition = 3

porz = np.int(perc_est * N)

observations_var = env.observation_space.new_tensor_variable(
    'observations',
    # It should have 1 extra dimension since we want to represent a list of observations
    extra_dims=1)
actions_var = env.action_space.new_tensor_variable('actions', extra_dims=1)
d_rewards_var = TT.vector('d_rewards')
importance_weights_var = TT.vector('importance_weight')

# policy.dist_info_sym returns a dictionary, whose values are symbolic expressions for quantities related to the
# distribution of the actions. For a Gaussian policy, it contains the mean and (log) standard deviation.
dist_info_vars = policy.dist_info_sym(observations_var)
snap_dist_info_vars = snap_policy.dist_info_sym(observations_var)

surr = TT.sum(
    -dist.log_likelihood_sym_1traj_GPOMDP(actions_var, dist_info_vars) *
    d_rewards_var)

params = policy.get_params(trainable=True)
snap_params = snap_policy.get_params(trainable=True)

importance_weights = dist.likelihood_ratio_sym_1traj_GPOMDP(
def main():
    args = build_parser().parse_args()

    assert args.num_individuals >= 1, (
        'Must have at least one member in ensemble')
    assert args.max_epochs >= 1, ('Must have at least 1 epoch.')

    assert args.base_power >= 0, ('Cannot have fractional filters!')

    np.random.seed(args.seed)
    import lasagne
    lasagne.random.set_rng(np.random.RandomState(args.seed))
    experiment_timestamp = str(time.time()).replace('.', '-')
    experiment_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                   'experiments', experiment_timestamp)
    if os.path.exists(experiment_path):
        print('Experiment directory exists!')
        sys.exit(1)
    else:
        os.makedirs(experiment_path)

    # Save the commit hash used for these experiments.
    commit_hash = str(subprocess.check_output(['git', 'rev-parse', 'HEAD']),
                      'utf-8')
    commit_file_path = os.path.join(experiment_path, 'exp_commit.txt')
    with open(commit_file_path, 'w') as fd:
        fd.write(commit_hash)

    args_file_path = os.path.join(experiment_path, 'provided_args.json')
    with open(args_file_path, 'w') as fd:
        json.dump(vars(args), fd, indent=4)

    # Initial dataset setup
    dataset_mean = load_mean(args.mean_path)
    X, y = load_data(args.dataset_directory,
                     dataset_mean,
                     mean_normalise=True,
                     four_dim=True)

    train_X, train_y, val_X, val_y = train_val_split(X, y)

    print('Train X shape: {}\ttrain y shape: {}'
          'Test X shape: {}\tTest y shape: {}'
          ''.format(*(mat.shape for mat in (train_X, train_y, val_X, val_y))))

    # Network setup
    input_var = T.tensor4('input', dtype=theano.config.floatX)
    target = T.vector('target', dtype='int32')

    network_kwargs = {'input_var': input_var, 'base_power': args.base_power}
    model = MiniVGG(**network_kwargs)
    model.pretty_print_network()

    network = model.network
    prediction = get_output(network['output'])
    loss = categorical_crossentropy(prediction, target).mean()
    accuracy = np.array(100., dtype=theano.config.floatX) * (
        categorical_accuracy(prediction, target).mean())

    params = get_all_params(network['output'], trainable=True)
    updates = adam(loss, params)

    print('Starting theano function compliation')
    train_function = theano.function([input_var, target],
                                     loss,
                                     updates=updates)
    loss_function = theano.function([input_var, target], loss)
    accuracy_function = theano.function([input_var, target], accuracy)
    pred_function = theano.function([input_var], prediction)
    print('Finished theano function compliation')
    ensemble_prediction = make_ens_predictor(network, pred_function, val_X,
                                             val_y)
    train_network = make_training_function(train_function, loss_function,
                                           accuracy_function, network, val_X,
                                           val_y, args.max_epochs,
                                           args.early_stopping_epochs)

    # Setup bootstraps
    initialisations = get_k_network_initialisations(args.num_individuals,
                                                    input_var=input_var,
                                                    base_power=args.base_power)
    bootstraps = [
        get_bootstrap(train_X, train_y) for _ in range(args.num_individuals)
    ]
    ensembles = zip(initialisations, bootstraps)

    # Train models
    trained_parameters = []
    for index, (initialisation, bootstrap) in enumerate(ensembles):
        (best_params, training_losses, validation_losses,
         validation_accuracies) = train_network(*bootstrap, initialisation,
                                                True, False)
        trained_parameters.append(best_params)

        max_accuracy = validation_accuracies[np.argmin(validation_losses)]
        ensemble_accuracy = ensemble_prediction(trained_parameters)

        print('New member at {:.2f}% validation accuracy'.format(max_accuracy))
        print('Ensemble at {:.2f}% with {} members'
              ''.format(ensemble_accuracy, len(trained_parameters)))
        print()
        sys.stdout.flush()

        member_path = os.path.join(experiment_path, 'model_{}'.format(index))
        os.makedirs(member_path)
        stats = {
            'training_losses': training_losses,
            'validation_losses': validation_losses,
            'validation_accuracies': validation_accuracies
        }
        with open(os.path.join(member_path, 'train_stats.json'), 'w') as fd:
            json.dump(stats, fd, indent=4)
        model_save_path = os.path.join(member_path, 'model.npz')
        np.savez(model_save_path, *get_all_param_values(model.final_layer))
        model_hash = md5(model_save_path)
        model_hash_path = os.path.join(member_path, 'model_hash.txt')
        with open(model_hash_path, 'w') as fd:
            fd.write(model_hash + '\n')

    ensemble_accuracies = {}
    for num_models in range(1, args.num_individuals + 1):
        parameter_combinations = combinations(trained_parameters, num_models)
        validation_accuracies = [
            ensemble_prediction(parameter_combination)
            for parameter_combination in parameter_combinations
        ]
        ensemble_accuracies[num_models] = {
            'mean': np.mean(validation_accuracies),
            'std': np.std(validation_accuracies),
            'raw': validation_accuracies
        }
    results_path = os.path.join(experiment_path, 'results.json')
    with open(results_path, 'w') as fd:
        json.dump(ensemble_accuracies, fd, indent=4)
Example #55
0
File: nn.py Project: liocsm/dlef
def build_model(tparams, options):

    trng = RandomStreams(SEED)

    #
    x0 = tensor.matrix('x0', dtype='int32')  #
    x1 = tensor.matrix('x1', dtype='int32')  #

    mask0 = tensor.matrix('mask0', dtype=config.floatX)
    mask1 = tensor.matrix('mask1', dtype=config.floatX)

    y0 = tensor.vector('y0', dtype='int32')

    #sent level
    xs0 = tensor.matrix('xs0', dtype='int32')
    xs1 = tensor.matrix('xs1', dtype='int32')

    mask_xs0 = tensor.matrix('mask_xs0', dtype=config.floatX)
    mask_xs1 = tensor.matrix('mask_xs1', dtype=config.floatX)

    ys0 = tensor.vector('ys0', dtype='int32')

    #dropout_ratio = tensor.scalar(name='dropout_ratio')
    #dropout_decay_ratio = tensor.scalar(name='dropout_decay_ratio')

    #####################################
    #
    p_0 = lstm_layer_0(tparams,
                       input_state=tparams['Wemb'][x0],
                       mask=mask0,
                       options=options)
    p_1 = lstm_layer_1(tparams,
                       input_state=tparams['Wemb'][x1],
                       mask=mask1,
                       options=options)

    #p_0 = tensor.max(p_0, axis=0)
    #p_1 = tensor.max(p_1, axis=0)
    p_0 = attention_layer_2D_0(tparams, input_state=p_0, options=options)
    p_1 = attention_layer_2D_1(tparams, input_state=p_1, options=options)

    proj_0 = tensor.concatenate((p_0, p_1), axis=0)
    #proj_0 = proj_0 * dropout_mask_1D(proj_0, 1, dropout_ratio, trng) * dropout_decay_ratio

    pred_0 = tensor.nnet.softmax(
        tensor.dot(proj_0, tparams['Ws']) + tparams['bs'])
    pred_0 = pred_0.flatten()

    f_pred_prob = theano.function(inputs=[x0, x1, mask0, mask1],
                                  outputs=pred_0.max(axis=0),
                                  name='f_pred_prob')

    f_pred = theano.function(inputs=[x0, x1, mask0, mask1],
                             outputs=pred_0.argmax(axis=0),
                             name='f_pred')

    #off = 1e-6
    d_cost = -tensor.mean(tensor.log(pred_0[y0[1]] + 1e-6))

    #####
    p_s0 = lstm_layer_0(tparams,
                        input_state=tparams['Wemb'][xs0],
                        mask=mask_xs0,
                        options=options)
    p_s1 = lstm_layer_0(tparams,
                        input_state=tparams['Wemb'][xs1],
                        mask=mask_xs1,
                        options=options)

    proj_s0 = tensor.concatenate((p_s0, p_s1), axis=1)
    pred_s0 = tensor.nnet.softmax(
        tensor.dot(proj_s0, tparams['Ws']) + tparams['bs'])

    f_s_pred_prob = theano.function(inputs=[xs0, xs1, mask_xs0, mask_xs1],
                                    outputs=pred_s0.max(axis=1),
                                    name='f_s_pred_prob')

    f_s_pred = theano.function(inputs=[xs0, xs1, mask_xs0, mask_xs1],
                               outputs=pred_s0.argmax(axis=1),
                               name='f_s_pred')

    s_cost = -tensor.mean(
        tensor.log(pred_s0[tensor.arange(ys0.shape[0]), ys0] + 1e-6))

    #####################################

    adv_p_0 = lstm_layer_0(tparams,
                           input_state=tparams['p_Wemb'][x0],
                           mask=mask0,
                           options=options)
    adv_p_1 = lstm_layer_1(tparams,
                           input_state=tparams['p_Wemb'][x1],
                           mask=mask1,
                           options=options)

    adv_p_0 = attention_layer_2D_0(tparams,
                                   input_state=adv_p_0,
                                   options=options)
    adv_p_1 = attention_layer_2D_1(tparams,
                                   input_state=adv_p_1,
                                   options=options)

    adv_proj_0 = tensor.concatenate((adv_p_0, adv_p_1), axis=0)

    adv_pred_0 = tensor.nnet.softmax(
        tensor.dot(adv_proj_0, tparams['Ws']) + tparams['bs']).flatten()

    f_adv_pred_prob = theano.function(inputs=[x0, x1, mask0, mask1],
                                      outputs=adv_pred_0.max(axis=0),
                                      name='f_adv_pred_prob')

    f_adv_pred = theano.function(inputs=[x0, x1, mask0, mask1],
                                 outputs=adv_pred_0.argmax(axis=0),
                                 name='f_adv_pred')

    adv_d_cost = -tensor.mean(tensor.log(adv_pred_0[0] + 1e-6))

    d_cost_2 = 0.2 * d_cost + 0.8 * adv_d_cost

    ###
    adv_p_s0 = lstm_layer_0(tparams,
                            input_state=tparams['p_Wemb'][xs0],
                            mask=mask_xs0,
                            options=options)
    adv_p_s1 = lstm_layer_0(tparams,
                            input_state=tparams['p_Wemb'][xs1],
                            mask=mask_xs1,
                            options=options)

    proj_adv_s0 = tensor.concatenate((adv_p_s0, adv_p_s1), axis=1)
    pred_adv_s0 = tensor.nnet.softmax(
        tensor.dot(proj_s0, tparams['Ws']) + tparams['bs'])

    f_s_adv_pred_prob = theano.function(inputs=[xs0, xs1, mask_xs0, mask_xs1],
                                        outputs=pred_adv_s0.max(axis=1),
                                        name='f_s_adv_pred_prob')

    f_s_adv_pred = theano.function(inputs=[xs0, xs1, mask_xs0, mask_xs1],
                                   outputs=pred_adv_s0.argmax(axis=1),
                                   name='f_s_adv_pred')

    adv_s_cost = -tensor.mean(
        tensor.log(pred_adv_s0[tensor.arange(ys0.shape[0]), ys0] + 1e-6))

    s_cost_2 = 0.2 * s_cost + 0.8 * adv_s_cost
    #####################################
    _e = 0.6
    mycost = _e * d_cost_2 + (1 - _e) * s_cost_2


    return [x0,x1,xs0,xs1], [mask0, mask1, mask_xs0, mask_xs1], [y0,ys0], \
           f_pred_prob, f_pred, f_s_pred_prob, f_s_pred, \
           f_adv_pred_prob, f_adv_pred, f_s_adv_pred_prob, f_s_adv_pred, \
           d_cost, s_cost, adv_d_cost, adv_s_cost, \
           d_cost_2, s_cost_2, mycost
Example #56
0
    def compile(self, options):
        '''Configure the learning process.
        '''
        # input of model
        self.X = T.tensor3(name='input_frames', dtype='float32')
        self.H = T.matrix(name='H', dtype='float32')
        self.idx = T.vector(name='idx',dtype='int32')

        netlu = self.nets[0]
        netru = self.nets[1]
        netrv = self.nets[2]
        netrm = self.nets[3]
        net_hiera = self.nets[4]
        net_ru_high = self.nets[5]
        net_rv_high = self.nets[6]
        net_rm_high = self.nets[7]

        self.init_state()

        netlu.set_input([self.init_h,self.init_m])
        net_hiera.set_input([self.init_h_high,self.init_m_high])
        netru.set_input([self.init_h_ru, self.init_m_ru])
        netrv.set_input([self.init_h_rv, self.init_m_rv])
        netrm.set_input([self.init_h_rm, self.init_m_rm])

        #set the image feature as input
        idx = 0
        for l in netlu.layers:
            if hasattr(l, 'has_input_frame'):
                if l.has_input_frame:
                    l.input_frame = self.X[:,idx,:]
                    idx += 1
        assert  idx == options['v_length']
        print "start loading SS matrix..."
        time1 = time.time()
        print options['SS_path']
        SS = pkl.load(open(options['SS_path'])).astype(np.int8)
        print 'SS.shape: ',SS.shape
        # for debug
        # SS = np.zeros((train_data.data_size_,train_data.data_size_)).astype(np.float32)
        # SS[:,1] = np.ones((1,train_data.data_size_))

        SS_shared = theano.shared(value=SS, name='SS_shared')
        time2 = time.time()
        print "load SS matrix costs: ", time2 - time1

        #H_shared = theano.shared(value=T.zeros(shape=[]))

        def comp_(train):
            netlu.set_out(train=train)
            #nethiera.set_out(train=train)
            idx_hie = 0
            for i in net_hiera.layers:
                if hasattr(i,'has_input_frame'):
                    if i.has_input_frame:
                        i.input_frame = netlu.layers[(idx_hie+1)*options['hiera_step']-1].get_output(train=train)[0]
                        idx_hie += 1
            net_hiera.set_out(train=train)

            net_ru_high.set_out(train=train)
            idx_ru = 0
            for i in netru.layers:
                if hasattr(i,'has_input_frame'):
                    if i.has_input_frame:
                        if netru.layers.index(i)%options['hiera_step']==0:
                            i.input_frame = net_ru_high.layers[idx_ru].get_output(train=train)[0]
                            idx_ru += 1
                        else:
                            i.input_frame = net_ru_high.layers[idx_ru-1].get_output(train=train)[0]

            net_rv_high.set_out(train=train)
            idx_rv = 0
            for i in netrv.layers:
                if hasattr(i, 'has_input_frame'):
                    if i.has_input_frame:
                        if netrv.layers.index(i)%options['hiera_step']==0:
                            i.input_frame = net_rv_high.layers[idx_rv].get_output(train=train)[0]
                            idx_rv += 1
                        else:
                            i.input_frame = net_ru_high.layers[idx_rv-1].get_output(train=train)[0]

            net_rm_high.set_out(train=train)
            netrm.layers[0].input_frame= net_rm_high.layers[0].get_output(train=train)[0]

            if not train:
                [my_H, my_M] = net_hiera.get_out_idx(-2)
                print 'compile encoder...'
                self._encoder = theano.function([self.X,
                                                 self.init_h,self.init_m,
                                                 self.init_h_high,self.init_m_high], my_H)
            #construct pairwise loss
            lamb = options['lamb']
            [my_H, my_M] = net_hiera.get_out_idx(-2)
            #get binary code from network

            my_B = T.sgn(my_H)

            #self.H_: batch_size * nbits
            self.H_ = T.set_subtensor(self.H[self.idx,:],my_H[:self.idx.shape[0],:])  #add the hidden state into H

            #self.SS_ = self.SS[self.idx]
            if self.idx.shape[0] == options['batch_size']:
                self.SS_ = SS_shared[self.idx]    # SS_: batch_size * train_size
            else:
                # SS_: batch_size * train_size
                self.SS_ = T.set_subtensor(T.zeros((options['batch_size'],SS_shared.shape[1]))[:self.idx.shape[0]],SS_shared[self.idx])


            loss_pairwise = T.sum(T.square(T.dot(my_H,self.H_.transpose())/options['dim_proj']-self.SS_))

            loss_pairwise +=lamb*(T.sum(T.square(my_H-my_B)))

            self.y_pred = netru.get_out(train=train)
            assert len(self.y_pred) == options['v_length']
            loss_backward = T.sum(T.sqr(self.X[:,-1,:] - self.y_pred[0]))
            for i in xrange(1,options['v_length']):
                loss_backward += T.sum(T.sqr(self.X[:,-1-i,:] - self.y_pred[i]))

            self.y_pred2 = netrv.get_out(train=train)
            assert len(self.y_pred2) == options['v_length']
            loss_forward = T.sum(T.sqr(self.X[:,0,:] - self.y_pred2[0]))
            for i in xrange(1,options['v_length']):
                loss_forward += T.sum(T.sqr(self.X[:,i,:] - self.y_pred2[i]))
            
            self.y_mean = netrm.get_out(train=train)
            assert len(self.y_mean) == 1
            loss_mean= options['v_length'] * T.sum(T.sqr(T.mean(self.X, axis=1) - self.y_mean[0]))
            whts = options['weights']

            reconstruction_loss = whts[0]*loss_backward + whts[1]*loss_forward + whts[2]*loss_mean
            #add pairwise loss
            loss = loss_pairwise*options['pairwise_weight']+reconstruction_loss*(1-options['pairwise_weight'])
            #loss = loss_pairwise + reconstruction_loss

            for r in self.regularizers:
                loss = r(loss)
            
            if train:
                self.optimizer = eval('optimizer.'+ options['optimizer'])(self.params, lr=options['lrate'])
                updates = self.optimizer.get_updates(self.params, loss)
                updates += self.updates
                print 'compile train...'
                start_time = time.time()
                self._train = theano.function([self.X, self.idx, self.H,
                                               self.init_h, self.init_m,
                                               self.init_h_high, self.init_m_high,
                                               self.init_h_ru, self.init_m_ru,
                                               self.init_h_rv, self.init_m_rv,
                                               self.init_h_rm, self.init_m_rm],
                                              [self.H_ ,loss, loss_pairwise,reconstruction_loss], updates=updates)
                end_time = time.time()
                print 'spent %f seconds'  % (end_time-start_time)
            else:
                print 'compile test...'
                start_time = time.time()
                self._test = theano.function([self.X,self.idx,self.H,
                                              self.init_h, self.init_m,
                                              self.init_h_high, self.init_m_high,
                                              self.init_h_ru, self.init_m_ru,
                                              self.init_h_rv, self.init_m_rv,
                                              self.init_h_rm, self.init_m_rm], loss)
                end_time = time.time()
                print 'spent %f seconds'  % (end_time-start_time)

        comp_(train=True)
        comp_(train=False)
        print "Compile Done!"
Example #57
0
    def train(self, data1, data2, similarities, miniBatchSize=20, epochs=200):
        self.miniBatchSize = miniBatchSize
        nrMiniBatches = len(data1) / miniBatchSize
        miniBatchIndex = T.lscalar()
        momentum = T.fscalar()
        learningRate = T.fscalar()

        learningRateMiniBatch = np.float32(self.learningRate / miniBatchSize)
        print "learningRateMiniBatch in similarity net"
        print learningRateMiniBatch

        net = self._trainRBM(data1, data2)

        data1 = theano.shared(np.asarray(data1, dtype=theanoFloat))
        data2 = theano.shared(np.asarray(data2, dtype=theanoFloat))
        similarities = theano.shared(
            np.asarray(similarities, dtype=theanoFloat))

        # The mini-batch data is a matrix
        x = T.matrix('x', dtype=theanoFloat)
        y = T.matrix('y', dtype=theanoFloat)
        self.x = x
        self.y = y

        z = T.vector('z', dtype=theanoFloat)

        trainer = Trainer(x, y, net)
        self.trainer = trainer

        # error = T.sum(T.sqr(trainer.output-z))
        error = T.sum(T.nnet.binary_crossentropy(trainer.output, z))

        updates = self.buildUpdates(trainer, error, learningRate, momentum)

        # Now you have to define the theano function
        discriminativeTraining = theano.function(
            inputs=[miniBatchIndex, learningRate, momentum],
            outputs=[trainer.output, trainer.cos, error],
            updates=updates,
            givens={
                x:
                data1[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) *
                      miniBatchSize],
                y:
                data2[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) *
                      miniBatchSize],
                z:
                similarities[miniBatchIndex *
                             miniBatchSize:(miniBatchIndex + 1) *
                             miniBatchSize],
            })

        try:
            for epoch in xrange(epochs):
                print "epoch", epoch
                momentum = np.float32(
                    min(
                        np.float32(0.5) + epoch * np.float32(0.05),
                        np.float32(self.maxMomentum)))

                for miniBatch in xrange(nrMiniBatches):
                    output, cos, error = discriminativeTraining(
                        miniBatch, learningRateMiniBatch, momentum)

                print error / self.miniBatchSize

        except KeyboardInterrupt:
            print "you have decided to interrupt training"
            print "we continue testing"

        print trainer.w.get_value()
        print trainer.b.get_value()
Example #58
0
def train_conv_net(datasets,
                   U,
                   word_idx_map,
                   img_w=300,
                   filter_hs=[3, 4, 5],
                   hidden_units=[100, 2],
                   dropout_rate=[0.5],
                   shuffle_batch=True,
                   n_epochs=11,
                   batch_size=50,
                   lr_decay=0.95,
                   conv_non_linear="relu",
                   activations=[Iden],
                   sqr_norm_lim=9,
                   non_static=True,
                   pi_params=[1., 0],
                   C=1.0,
                   patience=20):
    """
    Train a convnet through iterative distillation
    img_h = sentence length (padded where necessary)
    img_w = word vector length (300 for word2vec)
    filter_hs = filter window sizes    
    hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer
    sqr_norm_lim = s^2 in the paper [Kim, 2014]
    lr_decay = adadelta decay parameter
    pi_params = update strategy of imitation parameter \pi
    C = regularization strength
    patience = number of iterations without performance improvement before stopping
    """
    rng = np.random.RandomState(3435)
    img_h = len(datasets[0][0]) - 1
    filter_w = img_w
    feature_maps = hidden_units[0]
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))
    parameters = [("image shape", img_h, img_w),
                  ("filter shape", filter_shapes),
                  ("hidden_units", hidden_units), ("dropout", dropout_rate),
                  ("batch_size", batch_size), ("non_static", non_static),
                  ("learn_decay", lr_decay),
                  ("conv_non_linear", conv_non_linear),
                  ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim),
                  ("shuffle_batch", shuffle_batch), ("pi_params", pi_params),
                  ("C", C)]
    print parameters

    #define model architecture
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')
    Words = theano.shared(value=U, name="Words")
    zero_vec_tensor = T.vector()
    zero_vec = np.zeros(img_w)
    set_zero = theano.function([zero_vec_tensor],
                               updates=[
                                   (Words,
                                    T.set_subtensor(Words[0, :],
                                                    zero_vec_tensor))
                               ],
                               allow_input_downcast=True)
    layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (x.shape[0], 1, x.shape[1], Words.shape[1]))
    conv_layers = []
    layer1_inputs = []
    for i in xrange(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        input=layer0_input,
                                        image_shape=(batch_size, 1, img_h,
                                                     img_w),
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
                                        non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs, 1)
    hidden_units[0] = feature_maps * len(filter_hs)
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=activations,
                            dropout_rates=dropout_rate)

    #build the feature of BUT-rule
    f_but = T.fmatrix('f_but')
    f_but_ind = T.fmatrix('f_ind')  # indicators
    f_but_layer0_input = Words[T.cast(f_but.flatten(), dtype="int32")].reshape(
        (f_but.shape[0], 1, f_but.shape[1], Words.shape[1]))
    f_but_pred_layers = []
    for conv_layer in conv_layers:
        f_but_layer0_output = conv_layer.predict(f_but_layer0_input,
                                                 batch_size)
        f_but_pred_layers.append(f_but_layer0_output.flatten(2))
    f_but_layer1_input = T.concatenate(f_but_pred_layers, 1)
    f_but_y_pred_p = classifier.predict_p(f_but_layer1_input)
    f_but_full = T.concatenate([f_but_ind, f_but_y_pred_p],
                               axis=1)  # batch_size x 1 + batch_size x K
    f_but_full = theano.gradient.disconnected_grad(f_but_full)

    #add logic layer
    nclasses = 2
    rules = [FOL_But(nclasses, x, f_but_full)]
    rule_lambda = [1]
    new_pi = get_pi(cur_iter=0, params=pi_params)
    logic_nn = LogicNN(rng,
                       input=x,
                       network=classifier,
                       rules=rules,
                       rule_lambda=rule_lambda,
                       pi=new_pi,
                       C=C)

    #define parameters of the model and update functions using adadelta
    params_p = logic_nn.params_p
    for conv_layer in conv_layers:
        params_p += conv_layer.params
    if non_static:
        #if word vectors are allowed to change, add them as model parameters
        params_p += [Words]
    cost_p = logic_nn.negative_log_likelihood(y)
    dropout_cost_p = logic_nn.dropout_negative_log_likelihood(y)
    grad_updates_p = sgd_updates_adadelta(params_p, dropout_cost_p, lr_decay,
                                          1e-6, sqr_norm_lim)

    #shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate
    #extra data (at random)
    np.random.seed(3435)
    # training data
    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        # shuffle both train data and features
        permutation_order = np.random.permutation(datasets[0].shape[0])
        train_set = datasets[0][permutation_order]
        extra_data = train_set[:extra_data_num]
        new_data = np.append(datasets[0], extra_data, axis=0)
        new_fea = {}
        train_fea = datasets[3]
        for k in train_fea.keys():
            train_fea_k = train_fea[k][permutation_order]
            extra_fea = train_fea_k[:extra_data_num]
            new_fea[k] = np.append(train_fea[k], extra_fea, axis=0)
        train_text = datasets[6][permutation_order]
        extra_text = train_text[:extra_data_num]
        new_text = np.append(datasets[6], extra_text, axis=0)
    else:
        new_data = datasets[0]
        new_fea = datasets[3]
        new_text = datasets[6]
    # shuffle both training data and features
    permutation_order = np.random.permutation(new_data.shape[0])
    new_data = new_data[permutation_order]
    for k in new_fea.keys():
        new_fea[k] = new_fea[k][permutation_order]
    new_text = new_text[permutation_order]
    n_batches = new_data.shape[0] / batch_size
    n_train_batches = n_batches
    train_set = new_data
    train_set_x, train_set_y = shared_dataset(
        (train_set[:, :img_h], train_set[:, -1]))
    train_fea = new_fea
    train_fea_but_ind = train_fea['but_ind'].reshape(
        [train_fea['but_ind'].shape[0], 1])
    train_fea_but_ind = shared_fea(train_fea_but_ind)
    for k in new_fea.keys():
        if k != 'but_text':
            train_fea[k] = shared_fea(new_fea[k])

    # val data
    if datasets[1].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[1].shape[0] % batch_size
        # shuffle both val data and features
        permutation_order = np.random.permutation(datasets[1].shape[0])
        val_set = datasets[1][permutation_order]
        extra_data = val_set[:extra_data_num]
        new_val_data = np.append(datasets[1], extra_data, axis=0)
        new_val_fea = {}
        val_fea = datasets[4]
        for k in val_fea.keys():
            val_fea_k = val_fea[k][permutation_order]
            extra_fea = val_fea_k[:extra_data_num]
            new_val_fea[k] = np.append(val_fea[k], extra_fea, axis=0)
        val_text = datasets[7][permutation_order]
        extra_text = val_text[:extra_data_num]
        new_val_text = np.append(datasets[7], extra_text, axis=0)
    else:
        new_val_data = datasets[1]
        new_val_fea = datasets[4]
        new_val_text = datasets[7]
    val_set = new_val_data
    val_set_x, val_set_y = shared_dataset((val_set[:, :img_h], val_set[:, -1]))
    n_batches = new_val_data.shape[0] / batch_size
    n_val_batches = n_batches
    val_fea = new_val_fea
    val_fea_but_ind = val_fea['but_ind'].reshape(
        [val_fea['but_ind'].shape[0], 1])
    val_fea_but_ind = shared_fea(val_fea_but_ind)
    for k in val_fea.keys():
        if k != 'but_text':
            val_fea[k] = shared_fea(val_fea[k])

    # test data
    test_set_x = datasets[2][:, :img_h]
    test_set_y = np.asarray(datasets[2][:, -1], "int32")
    test_fea = datasets[5]
    test_fea_but_ind = test_fea['but_ind']
    test_fea_but_ind = test_fea_but_ind.reshape([test_fea_but_ind.shape[0], 1])
    test_text = datasets[8]

    ### compile theano functions to get train/val/test errors
    val_model = theano.function(
        [index],
        logic_nn.errors(y),
        givens={
            x:
            val_set_x[index * batch_size:(index + 1) * batch_size],
            y:
            val_set_y[index * batch_size:(index + 1) * batch_size],
            f_but:
            val_fea['but'][index * batch_size:(index + 1) * batch_size],
            f_but_ind:
            val_fea_but_ind[index * batch_size:(index + 1) * batch_size, :]
        },
        allow_input_downcast=True,
        on_unused_input='warn')

    test_model = theano.function(
        [index],
        logic_nn.errors(y),
        givens={
            x:
            train_set_x[index * batch_size:(index + 1) * batch_size],
            y:
            train_set_y[index * batch_size:(index + 1) * batch_size],
            f_but:
            train_fea['but'][index * batch_size:(index + 1) * batch_size],
            f_but_ind:
            train_fea_but_ind[index * batch_size:(index + 1) * batch_size, :]
        },
        allow_input_downcast=True,
        on_unused_input='warn')

    train_model = theano.function(
        [index],
        cost_p,
        updates=grad_updates_p,
        givens={
            x:
            train_set_x[index * batch_size:(index + 1) * batch_size],
            y:
            train_set_y[index * batch_size:(index + 1) * batch_size],
            f_but:
            train_fea['but'][index * batch_size:(index + 1) * batch_size],
            f_but_ind:
            train_fea_but_ind[index * batch_size:(index + 1) * batch_size, :]
        },
        allow_input_downcast=True,
        on_unused_input='warn')

    ### setup testing
    test_size = test_set_x.shape[0]
    print 'test size ', test_size
    test_pred_layers = []
    test_layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (test_size, 1, img_h, Words.shape[1]))
    f_but_test_pred_layers = []
    f_but_test_layer0_input = Words[T.cast(f_but.flatten(),
                                           dtype="int32")].reshape(
                                               (test_size, 1, img_h,
                                                Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
        f_but_test_layer0_output = conv_layer.predict(f_but_test_layer0_input,
                                                      test_size)
        f_but_test_pred_layers.append(f_but_test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    f_but_test_layer1_input = T.concatenate(f_but_test_pred_layers, 1)
    f_but_test_y_pred_p = classifier.predict_p(f_but_test_layer1_input)
    f_but_test_full = T.concatenate([f_but_ind, f_but_test_y_pred_p],
                                    axis=1)  # Ns x 1 + Ns x K

    # transform to shared variables
    test_set_x_shr, test_set_y_shr = shared_dataset((test_set_x, test_set_y))

    test_q_y_pred, test_p_y_pred = logic_nn.predict(test_layer1_input,
                                                    test_set_x_shr,
                                                    [f_but_test_full])
    test_q_error = T.mean(T.neq(test_q_y_pred, y))
    test_p_error = T.mean(T.neq(test_p_y_pred, y))
    test_model_all = theano.function([x, y, f_but, f_but_ind],
                                     [test_q_error, test_p_error],
                                     allow_input_downcast=True,
                                     on_unused_input='warn')

    ### start training over mini-batches
    print '... training'
    epoch = 0
    batch = 0
    best_val_q_perf = 0
    val_p_perf = 0
    val_q_perf = 0
    cost_epoch = 0
    stop_count = 0
    while (epoch < n_epochs):
        start_time = time.time()
        epoch = epoch + 1
        # train
        if shuffle_batch:
            for minibatch_index in np.random.permutation(
                    range(n_train_batches)):
                batch = batch + 1
                new_pi = get_pi(cur_iter=batch * 1. / n_train_batches,
                                params=pi_params)
                logic_nn.set_pi(new_pi)
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        else:
            for minibatch_index in xrange(n_train_batches):
                batch = batch + 1
                new_pi = get_pi(cur_iter=batch * 1. / n_train_batches,
                                params=pi_params)
                logic_nn.set_pi(new_pi)
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        # eval
        train_losses = [test_model(i) for i in xrange(n_train_batches)]
        train_losses = np.array(train_losses)
        train_q_perf = 1 - np.mean(train_losses[:, 0])
        train_p_perf = 1 - np.mean(train_losses[:, 1])
        val_losses = [val_model(i) for i in xrange(n_val_batches)]
        val_losses = np.array(val_losses)
        val_q_perf = 1 - np.mean(val_losses[:, 0])
        val_p_perf = 1 - np.mean(val_losses[:, 1])
        print('epoch: %i, training time: %.2f secs; (q): train perf: %.4f %%, val perf: %.4f %%; (p): train perf: %.4f %%, val perf: %.4f %%' % \
               (epoch, time.time()-start_time, train_q_perf * 100., val_q_perf*100., train_p_perf * 100., val_p_perf*100.))
        test_loss = test_model_all(test_set_x, test_set_y, test_fea['but'],
                                   test_fea_but_ind)
        test_loss = np.array(test_loss)
        test_perf = 1 - test_loss
        print 'test perf: q %.4f %%, p %.4f %%' % (test_perf[0] * 100.,
                                                   test_perf[1] * 100.)
        if val_q_perf > best_val_q_perf:
            best_val_q_perf = val_q_perf
            ret_test_perf = test_perf
            stop_count = 0
        else:
            stop_count += 1
        if stop_count == patience:
            break
    return ret_test_perf
Theano basics.
For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow

'''

import numpy as np
import theano.tensor as T
import theano


# just some different types of variables
c = T.scalar('c')
v = T.vector('v')
A = T.matrix('A')


# we can define a matrix multiplication
w = A.dot(v)


# how do these variables actually take on values?
matrix_times_vector = theano.function(inputs=[A,v], outputs=w)

#we can create real arrays
A_val = np.array([[1,3],[3,4]])
v_val = np.array([5,6])
w_val = matrix_times_vector(A_val, v_val)
    train_word_pos_vec3D = train_word_pos_vec3D[indices]
    train_sen_length = train_sen_length[indices]
    train_label_1hot = train_label_1hot[indices]
    """
    new model
    """
    model = Network()

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor3('inputs')
    target_var = T.imatrix('targets')
    mask_var = T.imatrix('mask_layer')
    # Pi model variables:
    if model.network_type == "pi":
        input_b_var = T.tensor3('inputs_b')
        mask_train = T.vector('mask_train')
        unsup_weight_var = T.scalar('unsup_weight')
    elif model.network_type == "tempens":
        # tempens model variables:
        z_target_var = T.matrix('z_targets')
        mask_train = T.vector('mask_train')
        unsup_weight_var = T.scalar('unsup_weight')

    learning_rate_var = T.scalar('learning_rate')
    adam_beta1_var = T.scalar('adam_beta1')

    #    #Left sdp length
    #    left_sdp_length=T.imatrix('left_sdp_length')
    #    #Sentences length
    #    sen_length=T.imatrix('sen_length')