Ejemplo n.º 1
0
 def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
     self.inpt = inpt.reshape((mini_batch_size, self.n_in))
     self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
     self.y_out = T.argmax(self.output, axis=1)
     self.inpt_dropout = dropout_layer(
         inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
     self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
Ejemplo n.º 2
0
 def recurrence(xp_t, xp_t1, xq_t1, h_t_pre1, cx, ch):
     # context_x
     # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。
     cx = T.concatenate((cx[1:], xp_t.reshape(
         (1, n_in))))  # shape=(winx, 20)
     ex = T.dot(tanh(T.dot(cx, qx)), rx)  # shape=(winx, 1)
     ax = softmax(ex.T)  # shape=(1, winx)
     xc = (T.dot(cx.T, ax.T)).reshape((n_in, ))  # shape=(20, )
     # gru_unit
     z_r = sigmoid(
         T.dot(ui[:2], xp_t) + T.dot(vc[:2], xc) +
         T.dot(wh[:2], h_t_pre1) + bi[:2])
     z, r = z_r[0], z_r[1]
     c = tanh(
         T.dot(ui[2], xp_t) + T.dot(vc[2], xc) +
         T.dot(wh[2], (r * h_t_pre1)) + bi[2])
     h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c  # shape=(20, )
     # context_h
     # 定义定长矩阵,h_t拼接到最底下,删除首行, 矩阵维度不变。
     ch = T.concatenate((ch[1:], h_t.reshape((1, n_hidden))))  # 最近的5个隐层
     eh = T.dot(tanh(T.dot(ch, qh)), rh)  # shape=(winh, 1)
     ah = softmax(eh.T)  # shape=(1, winh)
     hc = (T.dot(ch.T, ah.T)).reshape((n_hidden, ))
     hw = tanh(T.dot(e, h_t) + T.dot(f, hc))
     # loss
     upq_t = T.dot(hw,
                   xp_t1 - xq_t1)  # 正负样本训练。h(t) * (xp(t+1) - xq(t+1))
     loss_t = T.log(sigmoid(upq_t))
     return [h_t, cx, ch, loss_t]
Ejemplo n.º 3
0
    def test_optimize_xent_vector2(self):
        verbose = 0
        mode = theano.compile.mode.get_default_mode()
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
        rng = numpy.random.RandomState(utt.fetch_seed())
        x_val = rng.randn(5)
        b_val = rng.randn(5)
        y_val = numpy.asarray([2])

        x = T.dvector('x')
        b = T.dvector('b')
        y = T.lvector('y')

        def print_graph(func):
            for i, node in enumerate(func.maker.fgraph.toposort()):
                print i, node
            # Last node should be the output
            print i, printing.pprint(node.outputs[0])
            print

        ## Test that a biased softmax is optimized correctly
        bias_expressions = [
                T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
                -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
                -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
                T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]

        for expr in bias_expressions:
            f = theano.function([x, b, y], expr, mode=mode)
            if verbose:
                print_graph(f)
            try:
                prev, last = f.maker.fgraph.toposort()[-2:]
                assert len(f.maker.fgraph.toposort()) == 3
                # [big_op, sum, dim_shuffle]
                f(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise

            backup = config.warn.sum_div_dimshuffle_bug
            config.warn.sum_div_dimshuffle_bug = False
            try:
                g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
            finally:
                config.warn.sum_div_dimshuffle_bug = backup

            if verbose:
                print_graph(g)
            try:
                ops = [node.op for node in g.maker.fgraph.toposort()]
                assert len(ops) <= 6
                assert crossentropy_softmax_1hot_with_bias_dx in ops
                assert softmax_with_bias in ops
                assert softmax_grad not in ops
                g(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(g)
                raise
Ejemplo n.º 4
0
 def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
     self.inpt = inpt.reshape((mini_batch_size, self.n_in))
     self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
     self.y_out = T.argmax(self.output, axis=1)
     self.inpt_dropout = dropout_layer(
         inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
     self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
Ejemplo n.º 5
0
    def test_optimize_xent_vector3(self):
        # Same as test_optimize_xent_vector2, but y is the result of
        # a "flatten", and it used to make the constant-folding
        # of arange(y.shape[0]) happen before the xent optimization
        verbose = 0
        mode = theano.compile.mode.get_default_mode()
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
        rng = numpy.random.RandomState(utt.fetch_seed())
        x_val = rng.randn(5).astype(config.floatX)
        b_val = rng.randn(5).astype(config.floatX)
        y_val = numpy.asarray([2])

        x = T.vector('x')
        b = T.vector('b')
        y_ = T.lvector('y_')
        y = y_.flatten()

        ## Test that a biased softmax is optimized correctly
        bias_expressions = [
                T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
                -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
                -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
                T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]

        for expr in bias_expressions:
            f = theano.function([x, b, y_], expr, mode=mode)
            if verbose:
                printing.debugprint(f)
            try:
                ops = [node.op for node in f.maker.fgraph.toposort()]
                # [big_op, sum, dim_shuffle, flatten]
                assert len(ops) <= 4
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
                assert not [1 for o in ops
                            if isinstance(o, T.AdvancedSubtensor)]
                f(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise

            backup = config.warn.sum_div_dimshuffle_bug
            config.warn.sum_div_dimshuffle_bug = False
            try:
                g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
            finally:
                config.warn.sum_div_dimshuffle_bug = backup

            if verbose:
                printing.debugprint(g)
            try:
                ops = [node.op for node in g.maker.fgraph.toposort()]
                assert len(ops) <= 6
                assert crossentropy_softmax_1hot_with_bias_dx in ops
                assert softmax_with_bias in ops
                assert softmax_grad not in ops
                g(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(g)
                raise
Ejemplo n.º 6
0
def bench_ConvLarge(batchsize, variant=True):
    name = "ConvLarge_b" + str(GlobalBenchReporter.batch_size)
    name += "_" + config.linker

    # Image shape 256x256
    GlobalBenchReporter.batch_size = batchsize
    data_x.set_value(randn(n_examples, 1, 256, 256))
    w0 = shared(rand(6, 1, 7, 7) * numpy.sqrt(6 / (25.)))
    b0 = shared(zeros(6))
    w1 = shared(rand(16, 6, 7, 7) * numpy.sqrt(6 / (25.)))
    b1 = shared(zeros(16))
    vv = shared(rand(16 * 11 * 11, 120) * numpy.sqrt(6.0 / 16. / 25))
    cc = shared(zeros(120))
    v = shared(zeros(120, outputs))
    c = shared(zeros(outputs))
    params = [w0, b0, w1, b1, v, c, vv, cc]

    c0 = tanh(conv2d(sx, w0, image_shape=(batchsize, 1, 256, 256),
                     filter_shape=(6, 1, 7, 7)) + b0.dimshuffle(0, 'x', 'x'))
    # this is not the correct leNet5 model, but it's closer to
    s0 = tanh(max_pool_2d(c0, (5, 5)))

    c1 = tanh(conv2d(s0, w1, image_shape=(batchsize, 6, 50, 50),
                     filter_shape=(16, 6, 7, 7)) + b1.dimshuffle(0, 'x', 'x'))
    s1 = tanh(max_pool_2d(c1, (4, 4)))

    p_y_given_x = softmax(dot(tanh(dot(s1.flatten(2), vv) + cc), v) + c)
    nll = -log(p_y_given_x)[arange(sy.shape[0]), sy]
    cost = nll.mean()

    gparams = grad(cost, params)

    train = function([si, nsi], cost,
                     updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)],
                     name=name)
    GlobalBenchReporter.eval_model(train, name)
    if not variant:
        return

    # Versions with no inputs
    snsi.set_value(GlobalBenchReporter.batch_size)
    c0 = tanh(conv2d(ssx, w0, image_shape=(batchsize, 1, 256, 256),
                     filter_shape=(6, 1, 7, 7)) + b0.dimshuffle(0, 'x', 'x'))
    # this is not the correct leNet5 model, but it's closer to
    s0 = tanh(max_pool_2d(c0, (5, 5)))

    c1 = tanh(conv2d(s0, w1, image_shape=(batchsize, 6, 50, 50),
                     filter_shape=(16, 6, 7, 7)) + b1.dimshuffle(0, 'x', 'x'))
    s1 = tanh(max_pool_2d(c1, (4, 4)))

    p_y_given_x = softmax(dot(tanh(dot(s1.flatten(2), vv) + cc), v) + c)
    nll = -log(p_y_given_x)[arange(ssy.shape[0]), ssy]
    cost = nll.mean()

    gparams = grad(cost, params)

    train2 = function([], cost,
                      updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)] + [(ssi, ssi + snsi)],
                      name=name)
    GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
Ejemplo n.º 7
0
    def test_optimize_xent_vector2(self):
        verbose = 0
        mode = theano.compile.mode.get_default_mode()
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
        rng = numpy.random.RandomState(utt.fetch_seed())
        x_val = rng.randn(5)
        b_val = rng.randn(5)
        y_val = numpy.asarray([2])

        x = T.dvector('x')
        b = T.dvector('b')
        y = T.lvector('y')

        def print_graph(func):
            for i, node in enumerate(func.maker.env.toposort()):
                print i, node
            # Last node should be the output
            print i, printing.pprint(node.outputs[0])
            print

        ## Test that a biased softmax is optimized correctly
        bias_expressions = [
            T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
            -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
            -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
            T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])
        ]

        for expr in bias_expressions:
            f = theano.function([x, b, y], expr, mode=mode)
            if verbose: print_graph(f)
            try:
                prev, last = f.maker.env.toposort()[-2:]
                assert len(
                    f.maker.env.toposort()) == 3  # [big_op, sum, dim_shuffle]
                f(x_val, b_val, y_val)
            except:
                theano.printing.debugprint(f)
                raise

            backup = config.warn.sum_div_dimshuffle_bug
            config.warn.sum_div_dimshuffle_bug = False
            try:
                g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
            finally:
                config.warn.sum_div_dimshuffle_bug = backup

            print_graph(g)
            try:
                ops = [node.op for node in g.maker.env.toposort()]
                assert len(ops) <= 6
                assert crossentropy_softmax_1hot_with_bias_dx in ops
                assert softmax_with_bias in ops
                assert softmax_grad not in ops
                g(x_val, b_val, y_val)
            except:
                theano.printing.debugprint(g)
                raise
    def setInputOutput(self, inp, inpDropout, mbSize):
        self.inp = inp.reshape((mbSize, self.nInp))
        self.out = softmax( (1-self.pDropout)*T.dot(self.inp,self.w) + self.b )
        self.yOut = T.argmax(self.out, axis=1)

        self.inpDropout = dropoutLayer( inpDropout.reshape((mbSize, self.nInp)),
                                         self.pDropout )
        self.outDropout = softmax( T.dot(self.inpDropout,self.w)+self.b)
Ejemplo n.º 9
0
def bench_mlp_500(variant=True):
    name = "mlp_784_500_10_b" + str(GlobalBenchReporter.batch_size)
    name += "_" + config.linker
    HUs = 500
    w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs)), name='w')
    b = shared(zeros(HUs), name='b')
    v = shared(zeros(outputs, HUs), name='v')
    c = shared(zeros(outputs), name='c')
    if GlobalBenchReporter.batch_size == 1:
        sx_ = sx.flatten()
        sy_ = specify_shape(sy, [1])
        ssx_ = ssx.flatten()
        ssy_ = specify_shape(ssy, [1])
    else:
        sx_ = sx
        sy_ = sy
        ssx_ = ssx
        ssy_ = ssy

    p_y_given_x = softmax(dot(tanh(dot(sx_, w.T) + b), v.T) + c)
    nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_]
    cost = nll.mean()

    gw, gb, gv, gc = grad(cost, [w, b, v, c])

    train = function([si, nsi],
                     cost,
                     updates={
                         w: w - lr * gw,
                         b: b - lr * gb,
                         v: v - lr * gv,
                         c: c - lr * gc
                     },
                     name=name)
    GlobalBenchReporter.eval_model(train, name)
    if not variant:
        return

    # Version with no inputs
    snsi.set_value(GlobalBenchReporter.batch_size)
    p_y_given_x = softmax(dot(tanh(dot(ssx_, w.T) + b), v.T) + c)
    nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_]
    cost = nll.mean()

    gw, gb, gv, gc = grad(cost, [w, b, v, c])

    train2 = function(
        [],
        cost,
        updates={
            w: w - lr * gw,
            b: b - lr * gb,
            v: v - lr * gv,
            c: c - lr * gc,
            ssi: ssi + snsi
        },
        name=name)
    GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
Ejemplo n.º 10
0
    def test_optimize_xent_vector2(self):
        verbose = 0
        mode = theano.compile.mode.get_default_mode()
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
        rng = numpy.random.RandomState(utt.fetch_seed())
        x_val = rng.randn(5).astype(config.floatX)
        b_val = rng.randn(5).astype(config.floatX)
        y_val = numpy.asarray([2])

        x = T.vector('x')
        b = T.vector('b')
        y = T.lvector('y')

        ## Test that a biased softmax is optimized correctly
        bias_expressions = [
            T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
            -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
            -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
            T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])
        ]

        for expr in bias_expressions:
            f = theano.function([x, b, y], expr, mode=mode)
            if verbose:
                printing.debugprint(f)
            try:
                ops = [node.op for node in f.maker.fgraph.toposort()]
                # [big_op, sum, dim_shuffle]
                assert len(ops) == 3
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
                assert not [
                    1 for o in ops if isinstance(o, T.AdvancedSubtensor)
                ]
                f(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise

            backup = config.warn.sum_div_dimshuffle_bug
            config.warn.sum_div_dimshuffle_bug = False
            try:
                g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
            finally:
                config.warn.sum_div_dimshuffle_bug = backup

            if verbose:
                printing.debugprint(g)
            try:
                ops = [node.op for node in g.maker.fgraph.toposort()]
                assert len(ops) <= 6
                assert crossentropy_softmax_1hot_with_bias_dx in ops
                assert softmax_with_bias in ops
                assert softmax_grad not in ops
                g(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(g)
                raise
Ejemplo n.º 11
0
def bench_deep1000(variant=True):
    name = "mlp_784_1000_1000_1000_10_b" + str(GlobalBenchReporter.batch_size)
    name +=  "_" + config.linker
    w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000)), name='w0')
    b0 = shared(zeros(1000), name='b0')
    w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)), name='w1')
    b1 = shared(zeros(1000), name='b1')
    w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)), name='w2')
    b2 = shared(zeros(1000), name='b2')
    v = shared(zeros(1000, outputs), name='v')
    c = shared(zeros(outputs), name='c')
    if GlobalBenchReporter.batch_size == 1:
        sx_ = sx.flatten()
        sy_ = specify_shape(sy, [1])
        ssx_ = ssx.flatten()
        ssy_ = specify_shape(ssy, [1])
    else:
        sx_ = sx
        sy_ = sy
        ssx_ = ssx
        ssy_ = ssy
    params = [w0, b0, w1, b1, w2, b2, v, c]

    h0 = tanh(dot(sx_, w0) + b0)
    h1 = tanh(dot(h0, w1) + b1)
    h2 = tanh(dot(h1, w2) + b2)

    p_y_given_x = softmax(dot(h2, v) + c)
    nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_]
    cost = nll.mean()

    gparams = grad(cost, params)

    train = function([si, nsi], cost,
                     updates=[(p, p - lr * gp)
                              for p, gp in zip(params, gparams)],
                     name=name)
    GlobalBenchReporter.eval_model(train, name)
    if not variant:
        return

    # Version with no inputs
    h0 = tanh(dot(ssx_, w0) + b0)
    h1 = tanh(dot(h0, w1) + b1)
    h2 = tanh(dot(h1, w2) + b2)

    p_y_given_x = softmax(dot(h2, v) + c)
    nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_]
    cost = nll.mean()

    gparams = grad(cost, params)

    train2 = function([], cost,
                      updates=[(p, p - lr * gp)
                               for p, gp in zip(params, gparams)] + [(ssi, ssi + snsi)],
                      name=name)
    snsi.set_value(GlobalBenchReporter.batch_size)
    GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
Ejemplo n.º 12
0
def bench_logreg(variant=True):
    name = "mlp_784_10_b" + str(GlobalBenchReporter.batch_size)
    name += "_" + config.linker
    v = shared(zeros(outputs, inputs), name='v')
    c = shared(zeros(outputs), name='c')
    if GlobalBenchReporter.batch_size == 1:
        sx_ = sx.flatten()
        sy_ = specify_shape(sy, [1])
        ssx_ = ssx.flatten()
        ssy_ = specify_shape(ssy, [1])
    else:
        sx_ = sx
        sy_ = sy
        ssx_ = ssx
        ssy_ = ssy

    #
    # Note on the transposed-ness of v for some reason, this data
    # layout is faster than the non-transposed orientation.
    # The change doesn't make much difference in the deeper models,
    # but in this case it was more than twice as fast.
    #
    p_y_given_x = softmax(dot(sx_, v.T) + c)
    nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_]
    cost = nll.mean()

    gv, gc = grad(cost, [v, c])

    #theano.printing.debugprint(grad(cost, [v, c]), file=open('foo', 'wb'))
    train = function([si, nsi], [],
                     updates={
                         v: v - lr * gv,
                         c: c - lr * gc
                     },
                     name=name)
    #    theano.printing.debugprint(train, print_type=True)
    GlobalBenchReporter.eval_model(train, name)
    if not variant:
        return

    # Version with no inputs
    snsi.set_value(GlobalBenchReporter.batch_size)

    p_y_given_x = softmax(dot(ssx_, v.T) + c)
    nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_]
    cost = nll.mean()

    gv, gc = grad(cost, [v, c])

    train2 = function([], [],
                      updates={
                          v: v - lr * gv,
                          c: c - lr * gc,
                          ssi: ssi + snsi
                      },
                      name=name)
    GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
Ejemplo n.º 13
0
    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape((mini_batch_size, self.n_in))
        self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
        print type(self.inpt), type(self.w), type(self.output)
        candidates = theano.shared(np.asarray(xrange(0,2), dtype=theano.config.floatX), borrow=True)
#        self.y_out = T.argmax(self.output, axis=1)
        self.y_out = T.dot(self.output, candidates)
        self.inpt_dropout = dropout_layer(
            inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
        self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
Ejemplo n.º 14
0
 def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
     """ Set input """
     self.inpt = inpt.reshape((mini_batch_size, self.n_in))
     self.output = nnet.softmax((1 - self.p_dropout) *
                                tensor.dot(self.inpt, self.weights) +
                                self.biases)
     self.y_out = tensor.argmax(self.output, axis=1)
     self.inpt_dropout = dropout_layer(
         inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
     self.output_dropout = nnet.softmax(
         tensor.dot(self.inpt_dropout, self.weights) + self.biases)
Ejemplo n.º 15
0
    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape((mini_batch_size, self.n_in))

        # Output is masked by 1 - the probability of the dropout layer
        self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
        self.y_out = T.argmax(self.output, axis=1)

        # There is dropout in the output
        self.inpt_dropout = CNN.core_layers.DropoutLayer.dropout_layer(
            inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
        self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
Ejemplo n.º 16
0
	def output(self, inpt, inpt_dropout, mini_batch_size):
		""" Generate output from a particular inpt, given the weights and biases
		An observation: inpt (w/o dropout) is used to feedforward to get the result.
		On the other hand, inpt_dropout is mainly used for training """

		self.inpt = inpt.reshape((mini_batch_size, self.n_in))
		self.output = softmax((1-self.dropout)*T.dot(self.inpt, self.W) + self.b)
		self.y_out = T.argmax(self.output, axis=1)
		self.inpt_dropout = dropout_layer(
			inpt_dropout.reshape((mini_batch_size, self.n_in)), self.dropout)
		self.output_dropout = softmax(T.dot(self.inpt_dropout, self.W) + self.b)
Ejemplo n.º 17
0
    def set_connection(self, inpt, inpt_dropout, mini_batch_size):
        # from input to output
        self.inpt = inpt.reshape((mini_batch_size, self.n_in))
        self.output = softmax(
            (1 - self.p_dropout) * (T.dot(self.inpt, self.w) + self.b))
        self.y_out = T.argmax(self.output, axis=1)

        w = self.w * np.random.binomial(1, 1 - self.p_dropout,
                                        self.w.get_value().shape)
        self.inpt_dropout = inpt_dropout.reshape((mini_batch_size, self.n_in))
        self.output_dropout = softmax(T.dot(self.inpt_dropout, w) + self.b)
        self.y_out_dropout = T.argmax(self.output_dropout, axis=1)
Ejemplo n.º 18
0
def bench_logreg(variant=True):
    name = "mlp_784_10_b" + str(GlobalBenchReporter.batch_size)
    name +=  "_" + config.linker
    v = shared(zeros(outputs, inputs), name='v')
    c = shared(zeros(outputs), name='c')
    if GlobalBenchReporter.batch_size == 1:
        sx_ = sx.flatten()
        sy_ = specify_shape(sy, [1])
        ssx_ = ssx.flatten()
        ssy_ = specify_shape(ssy, [1])
    else:
        sx_ = sx
        sy_ = sy
        ssx_ = ssx
        ssy_ = ssy

    #
    # Note on the transposed-ness of v for some reason, this data
    # layout is faster than the non-transposed orientation.
    # The change doesn't make much difference in the deeper models,
    # but in this case it was more than twice as fast.
    #
    p_y_given_x = softmax(dot(sx_, v.T) + c)
    nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_]
    cost = nll.mean()

    gv, gc = grad(cost, [v, c])

    #theano.printing.debugprint(grad(cost, [v, c]), file=open('foo', 'wb'))
    train = function([si, nsi], [],
                     updates={v: v - lr * gv, c: c - lr * gc},
                     name=name)
#    theano.printing.debugprint(train, print_type=True)
    GlobalBenchReporter.eval_model(train, name)
    if not variant:
        return

    # Version with no inputs
    snsi.set_value(GlobalBenchReporter.batch_size)

    p_y_given_x = softmax(dot(ssx_, v.T) + c)
    nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_]
    cost = nll.mean()

    gv, gc = grad(cost, [v, c])

    train2 = function([], [],
                      updates={v: v - lr * gv, c: c - lr * gc,
                               ssi: ssi + snsi},
                      name=name)
    GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
Ejemplo n.º 19
0
def bench_mlp_500(variant=True):
    name = "mlp_784_500_10_b" + str(GlobalBenchReporter.batch_size)
    name +=  "_" + config.linker
    HUs = 500
    w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs)), name='w')
    b = shared(zeros(HUs), name='b')
    v = shared(zeros(outputs, HUs), name='v')
    c = shared(zeros(outputs), name='c')
    if GlobalBenchReporter.batch_size == 1:
        sx_ = sx.flatten()
        sy_ = specify_shape(sy, [1])
        ssx_ = ssx.flatten()
        ssy_ = specify_shape(ssy, [1])
    else:
        sx_ = sx
        sy_ = sy
        ssx_ = ssx
        ssy_ = ssy

    p_y_given_x = softmax(dot(tanh(dot(sx_, w.T) + b), v.T) + c)
    nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_]
    cost = nll.mean()

    gw, gb, gv, gc = grad(cost, [w, b, v, c])

    train = function([si, nsi], cost,
                     updates={w: w - lr * gw,
                              b: b - lr * gb,
                              v: v - lr * gv,
                              c: c - lr * gc},
                     name=name)
    GlobalBenchReporter.eval_model(train, name)
    if not variant:
        return

    # Version with no inputs
    snsi.set_value(GlobalBenchReporter.batch_size)
    p_y_given_x = softmax(dot(tanh(dot(ssx_, w.T) + b), v.T) + c)
    nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_]
    cost = nll.mean()

    gw, gb, gv, gc = grad(cost, [w, b, v, c])

    train2 = function([], cost,
                     updates={w: w - lr * gw,
                              b: b - lr * gb,
                              v: v - lr * gv,
                              c: c - lr * gc,
                              ssi: ssi + snsi},
                      name=name)
    GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
Ejemplo n.º 20
0
    def test_optimize_xent_vector(self):
        verbose = 0
        mode = theano.compile.mode.get_default_mode()
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
        rng = numpy.random.RandomState(utt.fetch_seed())
        x_val = rng.randn(5).astype(config.floatX)
        y_val = numpy.asarray([2])

        x = T.vector('x')
        y = T.lvector('y')

        def print_graph(func):
            for i, node in enumerate(func.maker.fgraph.toposort()):
                print i, node
            # Last node should be the output
            print i, printing.pprint(node.outputs[0])
            print

        ## Test that a biased softmax is optimized correctly
        bias_expressions = [
                T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
                -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))]

        for expr in bias_expressions:
            f = theano.function([x, y], expr, mode=mode)
            if verbose:
                print_graph(f)
            try:
                ops = [node.op for node in f.maker.fgraph.toposort()]
                assert len(ops) == 5
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
                assert not [1 for o in ops
                            if isinstance(o, T.AdvancedSubtensor)]
                f(x_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise
            g = theano.function([x, y], T.grad(expr, x), mode=mode)
            if verbose:
                print_graph(g)
            try:
                ops = [node.op for node in g.maker.fgraph.toposort()]
                assert len(ops) == 4
                assert crossentropy_softmax_1hot_with_bias_dx in ops
                assert softmax in ops
                assert softmax_grad not in ops
                g(x_val, y_val)
            except Exception:
                theano.printing.debugprint(g)
                raise
Ejemplo n.º 21
0
 def recurrence(xp_t, xp_t1, xq_t1, mask_t, h_t_pre1, cxs, chs):
     # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20)
     # (n, winx, 20) = T.concatenate((((n, winx-1, 20)), ((n, 1, 20))), axis=1)
     # context_x
     # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。
     cxs = T.concatenate(
         (
             cxs[:, 1:, :],  # shape=(n, winx-1, 20)
             xp_t.dimshuffle(0, 'x', 1)),  # shape=(n, 1, 20)
         axis=1)  # shape=(n, winx, 20)
     exs = T.dot(tanh(T.dot(cxs, qx)), rx)  # shape=(n, winx, 1)
     exs = T.Rebroadcast((2, True))(exs)  # axis=2进行broadcast, 使其可被丢掉
     axs0 = softmax(exs.dimshuffle(
         0, 1))  # shape=(n, winx),降一维。因为softmax按行处理。
     axs = axs0.dimshuffle(0, 1, 'x')  # shape=(n, winx, 1), 升一维。还原回去。
     axs = T.Rebroadcast((2, True))(axs)  # axis=2进行broadcast, 使其可做乘法。
     # (n, 20) = T.sum((n, winx, 20) * (n, winx, 1), axis=1)
     xc = T.sum(cxs * axs, axis=1)  # shape=(n, 20)
     # gru unit
     z_r = sigmoid(
         T.dot(ui[:2], xp_t.T) + T.dot(vc[:2], xc.T) +
         T.dot(wh[:2], h_t_pre1.T) + bi[:2])
     z, r = z_r[0].T, z_r[1].T  # shape=(n, 20)
     c = tanh(
         T.dot(ui[2], xp_t.T) + T.dot(vc[2], xc.T) +
         T.dot(wh[2], (r * h_t_pre1).T) + bi[2])
     h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T  # shape=(n, 20)
     # context_h
     # 定义定长矩阵,h_t拼接到最底下,删除首行, 矩阵维度不变。
     chs = T.concatenate(
         (
             chs[:, 1:, :],  # shape=(n, winh-1, 20)
             h_t.dimshuffle(0, 'x', 1)),  # shape=(n, 1, 20)
         axis=1)  # shape=(n, winh, 20)
     ehs = T.dot(tanh(T.dot(chs, qh)), rh)  # shape=(n, winh, 1)
     ehs = T.Rebroadcast((2, True))(ehs)  # axis=2进行broadcast, 使其可被丢掉
     ahs0 = softmax(ehs.dimshuffle(
         0, 1))  # shape=(n, winh),降一维。因为softmax按行处理。
     ahs = ahs0.dimshuffle(0, 1, 'x')  # shape=(n, winh, 1), 升一维。还原回去
     ahs = T.Rebroadcast((2, True))(ahs)  # axis=2进行broadcast, 使其可做乘法。
     hcs = T.sum(chs * ahs, axis=1)  # shape=(n, 20)
     # 整体表达hws,融合当前hts、上下文hcs
     hws = tanh(T.dot(h_t, e.T) + T.dot(hcs, f.T))  # shape=(n, 20)
     # loss
     upq_t = T.sum(
         hws * (xp_t1 - xq_t1),
         axis=1)  # shape=(n, ), h(t) * (xp(t+1) - xq(t+1)), 正负样本训练。
     loss_t = T.log(sigmoid(upq_t))
     loss_t *= mask_t  # 只在损失这里乘一下0/1向量就可以了
     return [h_t, cxs, chs, loss_t]
Ejemplo n.º 22
0
    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape((mini_batch_size, self.n_in))

        # self.output use softmax function and retruns the classifier scores as probabilities
        self.output = softmax((1 - self.p_dropout) * T.dot(self.inpt, self.w) +
                              self.b)

        self.y_out = T.argmax(self.output, axis=1)

        self.inpt_dropout = dropout_layer(
            inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)

        self.output_dropout = softmax(
            T.dot(self.inpt_dropout, self.w) + self.b)
Ejemplo n.º 23
0
    def test_optimize_xent_vector(self):
        verbose = 0
        mode = theano.compile.mode.get_default_mode()
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
        rng = numpy.random.RandomState(utt.fetch_seed())
        x_val = rng.randn(5).astype(config.floatX)
        y_val = numpy.asarray([2])

        x = T.vector('x')
        y = T.lvector('y')

        def print_graph(func):
            for i, node in enumerate(func.maker.fgraph.toposort()):
                print i, node
            # Last node should be the output
            print i, printing.pprint(node.outputs[0])
            print

        ## Test that a biased softmax is optimized correctly
        bias_expressions = [
            T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
            -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))
        ]

        for expr in bias_expressions:
            f = theano.function([x, y], expr, mode=mode)
            if verbose:
                print_graph(f)
            try:
                prev, last = f.maker.fgraph.toposort()[-2:]
                assert len(f.maker.fgraph.toposort()) == 5
                f(x_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise
            g = theano.function([x, y], T.grad(expr, x), mode=mode)
            if verbose:
                print_graph(g)
            try:
                ops = [node.op for node in g.maker.fgraph.toposort()]
                assert len(ops) == 4
                assert crossentropy_softmax_1hot_with_bias_dx in ops
                assert softmax in ops
                assert softmax_grad not in ops
                g(x_val, y_val)
            except Exception:
                theano.printing.debugprint(g)
                raise
Ejemplo n.º 24
0
    def test_xent_thing_int32(self):
        verbose = 0
        mode = theano.compile.mode.get_default_mode()
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
        rng = numpy.random.RandomState(utt.fetch_seed())
        x_val = rng.randn(3, 5).astype(config.floatX)
        y_val = numpy.asarray([2, 4, 1], dtype='int64')
        x = T.matrix('x')
        y = T.lvector('y')
        yi = T.cast(y, 'int32')
        expressions = [
            T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
            -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
            -T.sum(T.log(softmax(x))[T.arange(yi.shape[0]), yi]),
            T.sum(-T.log(softmax(x))[T.arange(yi.shape[0]), yi])
        ]

        for expr in expressions:
            # Verify the optimizer worked on the expressions
            f = theano.function([x, y], expr, mode=mode)
            if verbose:
                theano.printing.debugprint(f)
            try:
                ops = [node.op for node in f.maker.fgraph.toposort()]
                assert len(ops) == 5
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
                assert not [
                    1 for o in ops if isinstance(o, T.AdvancedSubtensor)
                ]
                f(x_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise

            # Also verify the gradient wrt x
            g = theano.function([x, y], T.grad(expr, x), mode=mode)
            if verbose:
                theano.printing.debugprint(g)
            try:
                ops = [node.op for node in g.maker.fgraph.toposort()]
                assert len(ops) == 5
                assert crossentropy_softmax_1hot_with_bias_dx in ops
                assert softmax in ops
                assert softmax_grad not in ops
                g(x_val, y_val)
            except Exception:
                theano.printing.debugprint(g)
                raise
Ejemplo n.º 25
0
    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        """Construct the graph to compute the softmax layer output.

        Args:
            inpt: The input var.
            inpt_dropout: The dropouted input var.
            mini_batch_size: The mini batch size.
        """
        self.inpt = inpt.reshape((mini_batch_size, self.n_in))
        self.output = softmax(T.dot(self.inpt, self.w) + self.b)
        self.y_out = T.argmax(self.output, axis=1)
        self.inpt_dropout = dropout_layer(
            inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
        self.output_dropout = softmax(T.dot(self.inpt_dropout,
                                            self.w) + self.b)
Ejemplo n.º 26
0
    def inner(mean, var):
        # Generate samples of the distribution.
        samples = rng.normal(size=mean.shape)
        std = T.sqrt(var)
        samples = samples * std + mean

        if axis == 1:
            result = softmax(samples) # XXX
            result.name = 'susp1'
        if axis == 2:
            samples_flat = samples.reshape((samples.shape[0] * samples.shape[1], samples.shape[2]))
            result_flat = softmax(samples_flat)
            result = result.reshape(samples.shape)

        return result, T.zeros_like(var)
Ejemplo n.º 27
0
    def test_xent_thing_int32(self):
        verbose = 0
        mode = theano.compile.mode.get_default_mode()
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
        rng = numpy.random.RandomState(utt.fetch_seed())
        x_val = rng.randn(3, 5).astype(config.floatX)
        y_val = numpy.asarray([2, 4, 1], dtype='int64')
        x = T.matrix('x')
        y = T.lvector('y')
        yi = T.cast(y, 'int32')
        expressions = [
                T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
                -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
                -T.sum(T.log(softmax(x))[T.arange(yi.shape[0]), yi]),
                T.sum(-T.log(softmax(x))[T.arange(yi.shape[0]), yi])
                ]

        for expr in expressions:
            # Verify the optimizer worked on the expressions
            f = theano.function([x, y], expr, mode=mode)
            if verbose:
                theano.printing.debugprint(f)
            try:
                ops = [node.op for node in f.maker.fgraph.toposort()]
                assert len(ops) == 5
                assert crossentropy_softmax_argmax_1hot_with_bias in ops
                assert not [1 for o in ops
                            if isinstance(o, T.AdvancedSubtensor)]
                f(x_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise

            # Also verify the gradient wrt x
            g = theano.function([x, y], T.grad(expr, x), mode=mode)
            if verbose:
                theano.printing.debugprint(g)
            try:
                ops = [node.op for node in g.maker.fgraph.toposort()]
                assert len(ops) == 5
                assert crossentropy_softmax_1hot_with_bias_dx in ops
                assert softmax in ops
                assert softmax_grad not in ops
                g(x_val, y_val)
            except Exception:
                theano.printing.debugprint(g)
                raise
Ejemplo n.º 28
0
    def _get_output(self, layer_input):
        """Return layer's output.

        :param layer_input: Input in the format (n_batches, n_neurons).
        :return: Layer output.
        """
        return softmax(layer_input)
Ejemplo n.º 29
0
    def __init__(self, input, n_in, n_out, activation, rng=RandomState(1234)
        , layer_name="LogReg", W=None, b=None, borrow=True):

        # Weigth matrix W
        if W != None: self.W = shared(W, name=layer_name+"_W", borrow=borrow)
        elif activation in (relu,softplus): 
            W_val = _asarray(rng.normal(loc=0, scale=0.01, 
                size=(n_in, n_out)), dtype=floatX)
            self.W = shared(W_val, name=layer_name+"_W", borrow=borrow)
        else:
            self.W = shared(zeros((n_in, n_out), dtype=floatX), 
                name=layer_name+"_W",
                borrow=borrow)

        # Bias vector
        if b!=None: self.b = shared(b, name=layer_name+"_b", borrow=borrow)
        elif activation in (relu,softplus): 
            b_val = ones((n_out,), dtype=floatX)
            self.b = shared(value=b_val, borrow=True)
        else:
            self.b = shared(zeros((n_out,), dtype=floatX),
                name=layer_name+"_b",
                borrow=borrow)

        # T.flatten(input, ndim=2)
        # Vector of prediction probabilities
        self.p_y_given_x = softmax(T.dot(input, self.W) + self.b)
        # Prediction
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        # Parameters of the model
        self.params = [self.W, self.b]
Ejemplo n.º 30
0
	def __init__(self,input,n_in,n_out):
		'''
		>>>type input: T.TensorType
		>>>para input: input data

		>>>type n_in: int
		>>>para n_in: num of input neurons

		>>>type n_out: int
		>>>para n_out: num of output neurons
		'''
		self.w=theano.shared(
			value=np.zeros((n_in,n_out),dtype=theano.config.floatX),
			name='w',
			borrow=True
			)
		#self.b=theano.shared(
		#	value=np.zeros((n_out,),dtype=theano.config.floatX),
		#	name='b',
		#	borrow=True
		#	)
		self.param=[self.w]

		self.output=softmax(T.dot(input,self.w))
		self.predict=T.argmax(self.output,axis=1)
Ejemplo n.º 31
0
def bench_ConvMed(batchsize):
    data_x.value = randn(n_examples, 1, 96, 96)
    w0 = shared(rand(6, 1, 7, 7) * numpy.sqrt(6 / (25.)))
    b0 = shared(zeros(6))
    w1 = shared(rand(16, 6, 7, 7) * numpy.sqrt(6 / (25.)))
    b1 = shared(zeros(16))
    vv = shared(rand(16*8*8, 120) * numpy.sqrt(6.0/16./25))
    cc = shared(zeros(120))
    v = shared(zeros(120, outputs))
    c = shared(zeros(outputs))
    params = [w0, b0, w1, b1, v, c, vv, cc]

    c0 = tanh(conv2d(sx, w0, image_shape=(batchsize, 1, 96, 96), filter_shape=(6,1,7,7)) + b0.dimshuffle(0, 'x', 'x'))
    s0 = tanh(max_pool_2d(c0, (3,3))) # this is not the correct leNet5 model, but it's closer to

    c1 = tanh(conv2d(s0, w1, image_shape=(batchsize, 6, 30, 30), filter_shape=(16,6,7,7)) + b1.dimshuffle(0, 'x', 'x'))
    s1 = tanh(max_pool_2d(c1, (3,3)))

    p_y_given_x = softmax(dot(tanh(dot(s1.flatten(2), vv)+cc), v)+c)
    nll = -log(p_y_given_x)[arange(sy.shape[0]), sy]
    cost = nll.mean()

    gparams = grad(cost, params)

    train = function([si, nsi], cost,
            updates=[(p,p-lr*gp) for p,gp in zip(params, gparams)])
    eval_and_report(train, "ConvMed", [batchsize], N=120)
def bench_deep1000():
    w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000)))
    b0 = shared(zeros(1000))
    w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)))
    b1 = shared(zeros(1000))
    w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)))
    b2 = shared(zeros(1000))
    v = shared(zeros(1000, outputs))
    c = shared(zeros(outputs))
    params = [w0, b0, w1, b1, w2, b2, v, c]

    h0 = tanh(dot(sx, w0) + b0)
    h1 = tanh(dot(h0, w1) + b1)
    h2 = tanh(dot(h1, w2) + b2)

    p_y_given_x = softmax(dot(h2, v) + c)
    nll = -log(p_y_given_x)[arange(sy.shape[0]), sy]
    cost = nll.mean()

    gparams = grad(cost, params)

    train = function([si, nsi],
                     cost,
                     updates=[(p, p - lr * gp)
                              for p, gp in zip(params, gparams)])
    eval_and_report(train, "mlp_784_1000_1000_1000_10")
Ejemplo n.º 33
0
    def __init__(self,
                 input,
                 n_in,
                 n_out,
                 rng,
                 layer_name="LogReg",
                 W=None,
                 b=None,
                 borrow=True,
                 b_scale=0.1,
                 W_scale=0.01):

        # Weigth matrix W
        if W != None: self.W = shared(W, name=layer_name + "_W", borrow=borrow)
        else:
            self.W = shared(zeros((n_in, n_out), dtype=floatX),
                            name=layer_name + "_W",
                            borrow=borrow)

        # Bias vector
        if b != None: self.b = shared(b, name=layer_name + "_b", borrow=borrow)
        else:
            self.b = shared(zeros((n_out, ), dtype=floatX),
                            name=layer_name + "_b",
                            borrow=borrow)

        # Vector of prediction probabilities
        self.p_y_given_x = softmax(T.dot(input, self.W) + self.b)
        # Prediction
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        # Parameters of the model
        self.params = [self.W, self.b]
Ejemplo n.º 34
0
def AttMemLayer(incomings,
                params,
                linear=0,
                w_name=None,
                w=None,
                w_initializer=init.HeUniform()):
    '''
  incomings = (u, u_shape, A, A_shape, C, C_shape)
  '''
    ((u, u_shape), (A, A_shape), (C, C_shape)) = incomings
    u_repeat = T.extra_ops.repeat(u.reshape((-1, 1, u_shape[-1])), C_shape[1],
                                  1)
    Au = T.concatenate((A, u_repeat), axis=2)

    w_name = w_name or 'AttMem_%d' % len(params)
    w_name = add_param((C_shape[-1] + u_shape[-1], 1), params, w_name, w,
                       w_initializer)
    #Aup = T.tensordot(Au, params[w_name], axes=[len(C_shape)-1, 0])
    #Aup = Aup.reshape((-1, C_shape[1]))
    #p = nnet.softmax(Aup)
    p = nnet.softmax(
        T.tensordot(Au, params[w_name], axes=[len(C_shape) - 1, 0]).reshape(
            (-1, C_shape[1])))

    p_shape = A_shape[:2]
    O = (C * p[:, :, None]).sum(axis=1)

    return ((O, u_shape), (p, p_shape))
Ejemplo n.º 35
0
    def build_2048_ann(self, nb, nh, nh2):
        '''
        
        '''
        #nb = input nodes
        #nh = first hidden layer size
        #nh2 = second hidden layer size
        print("building")
        w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh)))
        w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, nh2)))
        w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 4)))
        input = T.dvector('input')
        target = T.wvector('target')
        x1 = T.switch(T.dot(input, w1) > 0, T.dot(input, w1), 0)
        x2 = T.switch(T.dot(x1, w2) > 0, T.dot(x1, w2), 0)
        x3 = Tann.softmax(T.dot(x2, w3))
        error = T.sum(pow((target - x3), 2))
        params = [w1, w2, w3]
        gradients = T.grad(error, params)
        backprops = [(p, p - self.lrate * g)
                     for p, g in zip(params, gradients)]

        self.trainer = theano.function(inputs=[input, target],
                                       outputs=error,
                                       updates=backprops,
                                       allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input],
                                         outputs=x3,
                                         allow_input_downcast=True)
        print("Built")
Ejemplo n.º 36
0
    def build_rectified_linear2_ann(self, nb, nh, nh2):
        #784
        #620
        '''
        Builds a neural network, using rectified linear units 2 as the activation function.
        '''
        print("Building rectified linear ann")
        w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh)))
        w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, nh2)))
        w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 10)))
        input = T.dvector('input')
        target = T.wvector('target')
        x1 = T.switch(T.dot(input, w1) > 0, T.dot(input, w1), 0)
        x2 = T.switch(T.dot(x1, w2) > 0, T.dot(x1, w2), 0)
        x3 = Tann.softmax(T.dot(x2, w3))
        error = T.sum(pow((target - x3), 2))
        params = [w1, w2, w3]
        gradients = T.grad(error, params)
        backprops = [(p, p - self.lrate * g)
                     for p, g in zip(params, gradients)]

        self.trainer = theano.function(inputs=[input, target],
                                       outputs=error,
                                       updates=backprops,
                                       allow_input_downcast=True)
        self.predictor = theano.function(inputs=[input],
                                         outputs=x3,
                                         allow_input_downcast=True)
Ejemplo n.º 37
0
    def test_softmax_optimizations_w_bias(self):
        x = tensor.matrix('x')
        b = tensor.vector('b')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot
        xe = op(x, one_of_n)

        fgraph = gof.FunctionGraph([x, b, one_of_n],
                                   [op(softmax(x + b), one_of_n)])
        assert fgraph.outputs[0].owner.op == op

        #print 'BEFORE'
        #for node in fgraph.toposort():
        #    print node.op
        #print printing.pprint(node.outputs[0])
        #print '----'

        theano.compile.mode.optdb.query(
            theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)

        #print 'AFTER'
        #for node in fgraph.toposort():
        #    print node.op
        #print printing.pprint(node.outputs[0])
        #print '===='
        assert len(fgraph.toposort()) == 2

        assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
        assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
                crossentropy_softmax_argmax_1hot_with_bias)
Ejemplo n.º 38
0
    def lstm_output(self, y_prev, ch_prev):
        """calculates info to pass to next time step.
        ch_prev is a vector of size 2*hdim"""

        c_prev = ch_prev[:self.hdim]#T.vector('c_prev')
        h_prev = ch_prev[self.hdim:]#T.vector('h_prev')

        # gates (input, forget, output)
        i_t = sigmoid(T.dot(self.Ui, h_prev))
        f_t = sigmoid(T.dot(self.Uf, h_prev))
        o_t = sigmoid(T.dot(self.Uo, h_prev))
        # new memory cell
        c_new_t = T.tanh(T.dot(self.Uc, h_prev))
        # final memory cell
        c_t = f_t * c_prev + i_t * c_new_t
        # final hidden state
        h_t = o_t * T.tanh(c_t)

        # Input vector for softmax
        theta_t = T.dot(self.U, h_t) + self.b
        # Softmax prob vector
        y_hat_t = softmax(theta_t.T).T
        # Softmax wraps output in another list, why??
        # (specifically it outputs a 2-d row, not a 1-d column)
        # y_hat_t = y_hat_t[0]
        # Compute new cost
        out_label = T.argmax(y_hat_t)

        # final joint state
        ch_t = T.concatenate([c_t, h_t])

        return (out_label, ch_t), scan_module.until(T.eq(out_label, self.out_end))
Ejemplo n.º 39
0
 def rnn_output(self, y_prev, h_prev):
     h_t = T.tanh(T.dot(self.Wh, h_prev))
     # compute new out_label
     y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T
     out_label = T.argmax(y_hat_t)
     
     return (out_label, h_t), scan_module.until(T.eq(out_label, self.out_end))
Ejemplo n.º 40
0
def MultAttMemLayer(incomings, params, num_hid, linear=0, w_name=None, w=None, w_initializer=None):
  '''
  hun_hid should be a tuple with length=len(w_name)-1
  incomings = (u, u_shape, A, A_shape, C, C_shape)
  '''
  if not w_name:
    _w_name = [None for _ in range(len(num_hid) + 1)]
  else:
    _w_name = [wn for wn in w_name]
  if not w:
    w = [None for _ in range(len(num_hid) + 1)]
  if not w_initializer:
    w_initializer = [init.HeUniform() for _ in range(len(num_hid) + 1)]
  ((u, u_shape), (A, A_shape), (C, C_shape)) = incomings
  u_repeat = T.extra_ops.repeat(u.reshape((-1, 1, u_shape[-1])), C_shape[1], 1)
  Au = T.concatenate((A, u_repeat), axis=2)
  
  _num_hid = (C_shape[-1] + u_shape[-1],) + num_hid + (1,)
  for i, nh in enumerate(_num_hid[:-1]):
    _w_name[i] = _w_name[i] or 'AttMem_%d' % len(params)
    _w_name[i] = add_param((nh, _num_hid[i+1]), params, _w_name[i], w[i], w_initializer[i])
    Au = T.tensordot(Au, params[_w_name[i]], axes=[len(C_shape)-1, 0])
  p = nnet.softmax(Au.reshape((-1, C_shape[1])))

  p_shape = A_shape[:2]
  O = (C * p[:, :, None]).sum(axis = 1)

  return ((O, u_shape), (p, p_shape))
Ejemplo n.º 41
0
def bench_ConvSmall(batchsize):
    data_x.set_value(randn(n_examples, 1, 32, 32))
    w0 = shared(rand(6, 1, 5, 5) * numpy.sqrt(6 / (25.)))
    b0 = shared(zeros(6))
    w1 = shared(rand(16, 6, 5, 5) * numpy.sqrt(6 / (25.)))
    b1 = shared(zeros(16))
    vv = shared(rand(16 * 5 * 5, 120) * numpy.sqrt(6.0 / 16. / 25))
    cc = shared(zeros(120))
    v = shared(zeros(120, outputs))
    c = shared(zeros(outputs))
    params = [w0, b0, w1, b1, v, c, vv, cc]

    c0 = tanh(conv2d(sx, w0, image_shape=(batchsize, 1, 32, 32),
                     filter_shape=(6, 1, 5, 5)) + b0.dimshuffle(0, 'x', 'x'))
    # this is not the correct leNet5 model, but it's closer to
    s0 = tanh(max_pool_2d(c0, (2, 2)))

    c1 = tanh(conv2d(s0, w1, image_shape=(batchsize, 6, 14, 14),
                     filter_shape=(16, 6, 5, 5)) +
              b1.dimshuffle(0, 'x', 'x'))
    s1 = tanh(max_pool_2d(c1, (2, 2)))

    p_y_given_x = softmax(dot(tanh(dot(s1.flatten(2), vv) + cc), v) + c)
    nll = -log(p_y_given_x)[arange(sy.shape[0]), sy]
    cost = nll.mean()

    gparams = grad(cost, params)

    train = function([si, nsi], cost,
            updates=[(p, p - lr * gp) for p, gp  in zip(params, gparams)])

    eval_and_report(train, "ConvSmall", [batchsize], N=600)
Ejemplo n.º 42
0
def bench_deep1000():
    w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000)))
    b0 = shared(zeros(1000))
    w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)))
    b1 = shared(zeros(1000))
    w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)))
    b2 = shared(zeros(1000))
    v = shared(zeros(1000, outputs))
    c = shared(zeros(outputs))
    params = [w0, b0, w1, b1, w2, b2, v, c]

    h0 = tanh(dot(sx, w0) + b0)
    h1 = tanh(dot(h0, w1) + b1)
    h2 = tanh(dot(h1, w2) + b2)

    p_y_given_x = softmax(dot(h2, v) + c)
    nll = -log(p_y_given_x)[arange(sy.shape[0]), sy]
    cost = nll.mean()

    gparams = grad(cost, params)

    train = function(
        [si, nsi],
        cost,
        updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)])
    eval_and_report(train, "mlp_784_1000_1000_1000_10")
 def _step(h_tm1, y_tm1):
     h_t = sigmoid(
         T.dot(self.Whh[1], h_tm1) + T.dot(self.Whx2, y_tm1) +
         self.bh[1])
     a = T.dot(self.Why, h_t) + self.b
     y_t = T.reshape(softmax(a), a.shape)
     return [h_t, y_t]
Ejemplo n.º 44
0
    def test_softmax_optimizations_w_bias_vector(self):
        x = tensor.vector('x')
        b = tensor.vector('b')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot
        fgraph = gof.FunctionGraph(
                [x, b, one_of_n],
                [op(softmax(x + b), one_of_n)])
        assert fgraph.outputs[0].owner.op == op
        #print 'BEFORE'
        #for node in fgraph.toposort():
        #    print node.op
        #print printing.pprint(node.outputs[0])
        #print '----'

        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
        #print 'AFTER'
        #for node in fgraph.toposort():
        #    print node.op
        #print '===='
        assert len(fgraph.toposort()) == 3
        assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
        assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
                crossentropy_softmax_argmax_1hot_with_bias)
Ejemplo n.º 45
0
 def recurrence(xp_t, h_t_pre1, cxs):
     # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20)
     # (n, winx, 20) = T.concatenate((((n, winx-1, 20)), ((n, 1, 20))), axis=1)
     # context_x
     # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。
     cxs = T.concatenate(
         (
             cxs[:, 1:, :],  # shape=(n, winx-1, 20)
             xp_t.dimshuffle(0, 'x', 1)),  # shape=(n, 1, 20)
         axis=1)  # shape=(n, winx, 20)
     exs = T.dot(tanh(T.dot(cxs, qx)), rx)  # shape=(n, winx, 1)
     exs = T.Rebroadcast((2, True))(exs)  # axis=2进行broadcast, 使其可被丢掉
     axs0 = softmax(exs.dimshuffle(
         0, 1))  # shape=(n, winx),降一维。因为softmax按行处理。
     axs = axs0.dimshuffle(0, 1, 'x')  # shape=(n, winx, 1), 升一维。还原回去。
     axs = T.Rebroadcast((2, True))(axs)  # axis=2进行broadcast, 使其可做乘法。
     # (n, 20) = T.sum((n, winx, 20) * (n, winx, 1), axis=1)
     xc = T.sum(cxs * axs, axis=1)  # shape=(n, 20)
     # gru unit
     z_r = sigmoid(
         T.dot(ui[:2], xp_t.T) + T.dot(vc[:2], xc.T) +
         T.dot(wh[:2], h_t_pre1.T) + bi[:2])
     z, r = z_r[0].T, z_r[1].T  # shape=(n, 20)
     c = tanh(
         T.dot(ui[2], xp_t.T) + T.dot(vc[2], xc.T) +
         T.dot(wh[2], (r * h_t_pre1).T) + bi[2])
     h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T  # shape=(n, 20)
     return [h_t, cxs, axs0]  # 每处位置的权重也返回, shape=(n, winx)
Ejemplo n.º 46
0
    def __init__(self, input, n_in, n_out, activation, rng, layer_name="LogReg", 
        W=None, b=None, borrow=True):

        # Weigth matrix W
        if W != None: self.W = shared(W, name=layer_name+"_W", borrow=borrow)
        elif activation in (relu,softplus): 
            W_val = _asarray(rng.normal(loc=0, scale=0.01, 
                size=(n_in, n_out)), dtype=floatX)
            self.W = shared(W_val, name=layer_name+"_W", borrow=borrow)
        else:
            self.W = shared(zeros((n_in, n_out), dtype=floatX), 
                name=layer_name+"_W",
                borrow=borrow)

        # Bias vector
        if b!=None: self.b = shared(b, name=layer_name+"_b", borrow=borrow)
        elif activation in (relu,softplus): 
            b_val = ones((n_out,), dtype=floatX)
            self.b = shared(value=b_val, borrow=True)
        else:
            self.b = shared(zeros((n_out,), dtype=floatX),
                name=layer_name+"_b",
                borrow=borrow)
            

        # Vector of prediction probabilities
        self.p_y_given_x = softmax(T.dot(input, self.W) + self.b)
        # Prediction
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        # Parameters of the model
        self.params = [self.W, self.b]
Ejemplo n.º 47
0
    def test_softmax_optimizations_w_bias2(self):
        x = tensor.matrix('x')
        b = tensor.vector('b')
        c = tensor.vector('c')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot

        env = gof.Env(
                [x, b, c, one_of_n],
                [op(softmax(T.add(x,b,c)), one_of_n)])
        assert env.outputs[0].owner.op == op

        print 'BEFORE'
        for node in env.toposort():
            print node.op
        print '----'

        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)

        print 'AFTER'
        for node in env.toposort():
            print node.op
        print '===='
        assert len(env.toposort()) == 3

        assert str(env.outputs[0].owner.op) == 'OutputGuard'
        assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
Ejemplo n.º 48
0
    def test_softmax_optimizations_w_bias2(self):
        x = tensor.matrix('x')
        b = tensor.vector('b')
        c = tensor.vector('c')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot

        env = gof.Env([x, b, c, one_of_n],
                      [op(softmax(T.add(x, b, c)), one_of_n)])
        assert env.outputs[0].owner.op == op

        print 'BEFORE'
        for node in env.toposort():
            print node.op
        print '----'

        theano.compile.mode.optdb.query(
            theano.compile.mode.OPT_FAST_RUN).optimize(env)

        print 'AFTER'
        for node in env.toposort():
            print node.op
        print '===='
        assert len(env.toposort()) == 3

        assert str(env.outputs[0].owner.op) == 'OutputGuard'
        assert env.outputs[0].owner.inputs[
            0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
    def lstm_output(self, y_prev, ch_prev):
        """calculates info to pass to next time step.
        ch_prev is a vector of size 2*hdim"""

        c_prev = ch_prev[:self.hdim]  #T.vector('c_prev')
        h_prev = ch_prev[self.hdim:]  #T.vector('h_prev')

        # gates (input, forget, output)
        i_t = sigmoid(T.dot(self.Ui, h_prev))
        f_t = sigmoid(T.dot(self.Uf, h_prev))
        o_t = sigmoid(T.dot(self.Uo, h_prev))
        # new memory cell
        c_new_t = T.tanh(T.dot(self.Uc, h_prev))
        # final memory cell
        c_t = f_t * c_prev + i_t * c_new_t
        # final hidden state
        h_t = o_t * T.tanh(c_t)

        # Input vector for softmax
        theta_t = T.dot(self.U, h_t) + self.b
        # Softmax prob vector
        y_hat_t = softmax(theta_t.T).T
        # Softmax wraps output in another list, why??
        # (specifically it outputs a 2-d row, not a 1-d column)
        # y_hat_t = y_hat_t[0]
        # Compute new cost
        out_label = T.argmax(y_hat_t)

        # final joint state
        ch_t = T.concatenate([c_t, h_t])

        return (out_label,
                ch_t), scan_module.until(T.eq(out_label, self.out_end))
Ejemplo n.º 50
0
    def rnn_output(self, y_prev, h_prev):
        h_t = T.tanh(T.dot(self.Wh, h_prev))
        # compute new out_label
        y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T
        out_label = T.argmax(y_hat_t)

        return (out_label,
                h_t), scan_module.until(T.eq(out_label, self.out_end))
Ejemplo n.º 51
0
def test_argmax_pushdown():
    x = tensor.dmatrix()

    #test that the max_and_argmax is pushed down if the max is not used
    out = tensor.max_and_argmax(
            softmax(tensor.exp(tensor.tanh(sigmoid(x)))),
            axis=-1)[1]
    env = gof.Env(
            [x],
            [out])

    theano.compile.mode.optdb.query(
            theano.compile.mode.OPT_FAST_RUN).optimize(env)

    #print 'AFTER'
    #for node in env.toposort():
        #print node.op
    assert len(env.toposort()) == 2 # an output_guard is second
    assert env.toposort()[0].op == tensor.basic._max_and_argmax
    assert str(env.toposort()[1].op) == 'OutputGuard'
    x = tensor.dmatrix()
    #test that the max_and_argmax is not pushed down if the max is used
    out = tensor.max_and_argmax(
            softmax(tensor.exp(tensor.tanh(sigmoid(x)))),
            axis=-1)[0]
    env = gof.Env(
            [x],
            [out])

    backup = config.warn.argmax_pushdown_bug
    config.warn.argmax_pushdown_bug = False
    try:
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)
    finally:
        config.warn.argmax_pushdown_bug = backup

    #print 'AFTER'
    #for node in env.toposort():
        #print node.op
    assert len(env.toposort()) == 4 # an output_guard is second
    assert isinstance(env.toposort()[0].op, tensor.Elemwise)
    assert isinstance(env.toposort()[1].op, Softmax)
    assert isinstance(env.toposort()[2].op, tensor.CAReduce)
    assert isinstance(env.toposort()[2].op.scalar_op, theano.scalar.Maximum)
    assert str(env.toposort()[3].op) == 'OutputGuard'
Ejemplo n.º 52
0
    def drnn_timestep(self, x_t, old_cost, h_prev, ys):

        Lx_t = self.L[:, x_t]
        # gates (update, reset)
        h_t = T.tanh(T.dot(self.Wx, Lx_t) + T.dot(self.Wh, h_prev))
        y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T
        cost = T.sum(-T.log(y_hat_t[ys, T.arange(ys.shape[0])]))
        return cost, h_t
Ejemplo n.º 53
0
    def drnn_timestep(self, x_t, old_cost, h_prev, ys):

        Lx_t = self.L[:,x_t]
        # gates (update, reset)
        h_t = T.tanh(T.dot(self.Wx, Lx_t) + T.dot(self.Wh, h_prev))
        y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T
        cost = T.sum(-T.log(y_hat_t[ys, T.arange(ys.shape[0])]))
        return cost, h_t
 def set_input(self, inpt, input_dropout, mini_batch_size):
     '''
     Sets the input for the Spftmax Layer Layer, by reshaping it matrix of size
     'mini_batch_size' x 'n_in', sets the output by forward pass using 
     the 'activation_fn'. 'input_dropout' and 'output_dropout' are set using 
     the dropout layer prescribed earlier.
     '''
     self.inpt = inpt.reshape((mini_batch_size, self.n_in))
     self.output = softmax((1 - self.p_dropout) *
                           Tensor.dot(self.inpt, self.weights) +
                           self.biases)
     self.y_out = Tensor.argmax(self.output, axis=1)
     self.input_dropout = dropout_layer(
         input_dropout.reshape((mini_batch_size, self.n_in)),
         self.p_dropout)
     self.output_dropout = softmax(
         Tensor.dot(self.input_dropout, self.weights) + self.biases)
Ejemplo n.º 55
0
 def forward_propagation_NAG(self, inpt, velocity, alph):
     z = T.dot(self.weights + alph*velocity, inpt) + self.biases.dimshuffle(0,'x')
     if self.last_flag == False:
         active = ReLU(z)
     else:
         active = softmax(z.T)            
     return active
     
Ejemplo n.º 56
0
    def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False):
        super(Module_Nclass, self).__init__() #boilerplate

        #self.x = module.Member(x) if x is not None else T.matrix('input')
        if x is not None:
            self.x = (x)
        else: self.x = T.matrix('input')
        #self.targ = module.Member(targ) if targ is not None else T.lvector()
        if targ is not None:
            self.targ = (targ)
        else: self.targ = T.lvector()

        #self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
        if w is not None:
            self.w = (w)
        else: self.w = (T.dmatrix())
        #self.b = module.Member(b) if b is not None else module.Member(T.dvector())
        if b is not None:
            self.b = (b)
        else: self.b = (T.dvector())
        #self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
        if lr is not None:
            self.lr = (lr)
        else: self.lr = (T.dscalar())

        self.params = [p for p in [self.w, self.b] if p.owner is None]

        linear_output = T.dot(self.x, self.w) + self.b

        (xent, softmax, max_pr, argmax) = NN.crossentropy_softmax_max_and_argmax_1hot(
                linear_output, self.targ)
        sum_xent = T.sum(xent)

        self.softmax = softmax
        self.argmax = argmax
        self.max_pr = max_pr
        self.sum_xent = sum_xent

        # Softmax being computed directly.
        softmax_unsupervised = NN.softmax(linear_output)
        self.softmax_unsupervised = softmax_unsupervised

        #compatibility with current implementation of stacker/daa or something
        #TODO: remove this, make a wrapper
        self.cost = self.sum_xent
        self.input = self.x
        # TODO: I want to make output = linear_output.
        self.output = self.softmax_unsupervised

        #define the apply method
        self.pred = T.argmax(linear_output, axis=1)
        #self.apply = module.Method([self.input], self.pred)

        #self.validate = module.Method([self.input, self.targ], [self.cost, self.argmax, self.max_pr])
        #self.softmax_output = module.Method([self.input], self.softmax_unsupervised)

        if self.params:
            gparams = T.grad(sum_xent, self.params)
Ejemplo n.º 57
0
 def __init__(self, input, w, b, params=[]):
     self.output=nnet.softmax(theano.dot(input, w)+b)
     self.l1=abs(w).sum()
     self.l2_sqr = (w**2).sum()
     self.argmax=theano.tensor.argmax(theano.dot(input, w)+b, axis=input.ndim-1)
     self.input = input
     self.w = w
     self.b = b
     self.params = params
Ejemplo n.º 58
0
	def predictInstance(self,data):
		'''
		>>>calculate new data

		>>>type data:T.tensor4
		>>>para data:newly come data
		'''
		p=softmax(T.dot(data,self.w))
		return T.argmax(p,axis=1)
Ejemplo n.º 59
0
    def drnn_output(self, x_t, old_label, h_prev):

        Lx_t = self.L[:,x_t]
        h_t = T.tanh(T.dot(self.Wx, Lx_t) + T.dot(self.Wh, h_prev))
        print h_t.type
        y_hat_t = softmax(T.dot(self.U, h_t) + self.b)[0]
        out_label = T.argmax(y_hat_t)

        return out_label, h_t
Ejemplo n.º 60
0
    def test_xent_thing_int32(self):
        verbose = 0
        mode = theano.compile.mode.get_default_mode()
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
        rng = numpy.random.RandomState(utt.fetch_seed())
        x_val = rng.randn(3,5)
        b_val = rng.randn(5)
        y_val = numpy.asarray([2,4,1], dtype='int64')

        x = T.dmatrix('x')
        b = T.dvector('b')
        y = T.lvector('y')
        yi = T.cast(y, 'int32')

        expressions = [
                T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
                -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
                -T.sum(T.log(softmax(x))[T.arange(yi.shape[0]), yi]),
                T.sum(-T.log(softmax(x))[T.arange(yi.shape[0]), yi])
                ]

        for expr in expressions:
            # Verify the optimizer worked on the expressions
            f = theano.function([x,y], expr, mode=mode)
            if verbose:
                theano.printing.debugprint(f)
            try:
                assert len(f.maker.env.toposort()) == 5
                f(x_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise

            # Also verify the gradient wrt x
            g = theano.function([x,y], T.grad(expr, x), mode=mode)
            if verbose:
                theano.printing.debugprint(g)
            try:
                assert len(g.maker.env.toposort()) == 5
                g(x_val, y_val)
            except Exception:
                theano.printing.debugprint(g)
                raise