def test_specify_shape_inplace(self):
            # test that specify_shape don't break inserting inplace op

            dtype = self.dtype
            if dtype is None:
                dtype = theano.config.floatX

            rng = numpy.random.RandomState(utt.fetch_seed())
            a = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype)
            a = self.cast_value(a)
            a_shared = self.shared_constructor(a)
            b = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype)
            b = self.cast_value(b)
            b_shared = self.shared_constructor(b)
            s = numpy.zeros((40, 40), dtype=dtype)
            s = self.cast_value(s)
            s_shared = self.shared_constructor(s)
            f = theano.function([], updates={s_shared: theano.dot(a_shared, b_shared) + s_shared})
            topo = f.maker.env.toposort()
            f()
            # [Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)]
            if theano.config.mode != "FAST_COMPILE":
                assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1
                assert all(
                    node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm)
                )
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
            # Their is no inplace gemm for sparse
            # assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "StructuredDot")
            s_shared_specify = tensor.specify_shape(s_shared, s_shared.get_value(borrow=True).shape)

            # now test with the specify shape op in the output
            f = theano.function(
                [], s_shared.shape, updates={s_shared: theano.dot(a_shared, b_shared) + s_shared_specify}
            )
            topo = f.maker.env.toposort()
            shp = f()
            assert numpy.all(shp == (40, 40))
            if theano.config.mode != "FAST_COMPILE":
                assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1
                assert all(
                    node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm)
                )
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
            # now test with the specify shape op in the inputs and outputs
            a_shared = tensor.specify_shape(a_shared, a_shared.get_value(borrow=True).shape)
            b_shared = tensor.specify_shape(b_shared, b_shared.get_value(borrow=True).shape)

            f = theano.function(
                [], s_shared.shape, updates={s_shared: theano.dot(a_shared, b_shared) + s_shared_specify}
            )
            topo = f.maker.env.toposort()
            shp = f()
            assert numpy.all(shp == (40, 40))
            if theano.config.mode != "FAST_COMPILE":
                assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1
                assert all(
                    node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm)
                )
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
Beispiel #2
0
def encoder(infomatf, infomatb, htm1matf, ctm1matf, htm1matb, ctm1matb, Eenf, Eenb, Wenf, Wenb, benf, benb):
    # infomat is a matrix, having # batch * D
    dim = Eenf.shape[1]
    #
    xtmatf = theano.dot(infomatf, Eenf)
    xtmatb = theano.dot(infomatb, Eenb)
    #
    pretranf = T.concatenate([xtmatf, htm1matf], axis=1)
    pretranb = T.concatenate([xtmatb, htm1matb], axis=1)
    #
    posttranf = theano.dot(pretranf, Wenf) + benf
    posttranb = theano.dot(pretranb, Wenb) + benb
    #
    itmatf = T.nnet.sigmoid(posttranf[:, 0:dim])
    ftmatf = T.nnet.sigmoid(posttranf[:, dim : (2 * dim)])
    gtmatf = T.tanh(posttranf[:, (2 * dim) : (3 * dim)])
    otmatf = T.nnet.sigmoid(posttranf[:, (3 * dim) :])
    ctmatf = ftmatf * ctm1matf + itmatf * gtmatf
    #
    htmatf = otmatf * T.tanh(ctmatf)
    #
    itmatb = T.nnet.sigmoid(posttranb[:, 0:dim])
    ftmatb = T.nnet.sigmoid(posttranb[:, dim : (2 * dim)])
    gtmatb = T.tanh(posttranb[:, (2 * dim) : (3 * dim)])
    otmatb = T.nnet.sigmoid(posttranb[:, (3 * dim) :])
    ctmatb = ftmatb * ctm1matb + itmatb * gtmatb
    #
    htmatb = otmatb * T.tanh(ctmatb)
    #
    return htmatf, ctmatf, htmatb, ctmatb
Beispiel #3
0
 def decoder(self, lang, h_tm1_dec, c_tm1_dec):
     x_t_lang = theano.dot(lang, self.Emb_dec)
     #
     beta1 = tensor.tensordot(self.scope_att, self.U_att, (2, 0))
     beta2 = theano.dot(h_tm1_dec, self.W_att)
     beta3 = tensor.tanh(beta1 + beta2)
     beta4 = tensor.tensordot(beta3, self.b_att,
                              (2, 0))  #  |->  # lines * # batch
     pre_alpha = tensor.nnet.softmax(tensor.transpose(beta4, axes=(1, 0)))
     #
     pre_alpha *= self.weights_pre_sel  # Alpha
     alpha = pre_alpha / pre_alpha.sum(axis=1, keepdims=True)
     #
     z_t = tensor.sum(alpha[:, :, None] *
                      tensor.transpose(self.scope_att, axes=(1, 0, 2)),
                      axis=1)
     #
     pre_tran = tensor.concatenate([x_t_lang, h_tm1_dec, z_t], axis=1)
     post_tran = theano.dot(pre_tran, self.W_dec) + self.b_dec
     #
     i_t = tensor.nnet.sigmoid(post_tran[:, :self.dim_model])
     f_t = tensor.nnet.sigmoid(post_tran[:,
                                         self.dim_model:2 * self.dim_model])
     g_t = tensor.tanh(post_tran[:, 2 * self.dim_model:3 * self.dim_model])
     o_t = tensor.nnet.sigmoid(post_tran[:, 3 * self.dim_model:])
     c_t_dec = f_t * c_tm1_dec + i_t * g_t
     h_t_dec = o_t * tensor.tanh(c_t_dec)
     #
     pre_y = tensor.concatenate([h_t_dec, z_t], axis=1)
     y_t_0 = theano.dot((x_t_lang + theano.dot(pre_y, self.L)), self.L_0)
     y_t = tensor.nnet.softmax(y_t_0)
     log_y_t = tensor.log(y_t + numpy.float32(1e-8))
     return h_t_dec, c_t_dec, y_t, log_y_t
Beispiel #4
0
    def compute_output(self):

        label_results = self.process_label_results(
            self.semantic_prediction)  #tensor.round(self.semantic_prediction)
        print(label_results)
        print(tensor.round(self.semantic_prediction))

        label_specific_Ws = tensor.tensordot(label_results,
                                             self.Ws,
                                             axes=[1, 0])

        label_specific_Vs = tensor.tensordot(label_results,
                                             self.Vs,
                                             axes=[1, 0])

        label_specific_W = th.dot(label_specific_Ws, self.W)

        label_specific_V = th.dot(label_specific_Vs, self.V)

        # compute output
        self.output = getFunction('softmax')(
            tensor.batched_dot(self.input, label_specific_W) +
            tensor.batched_dot(self.extra_input, label_specific_V) + self.b)

        for i in range(len(self.semantic_label_map.keys()) + 1):
            ho = self.get_output(i)
            self.output_hybrids.append(ho)
Beispiel #5
0
def OneStep(vsample) :
    hmean = T.nnet.sigmoid(theano.dot(vsample, W) + bhid)
    hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
    vmean = T.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
    print hmean
    return trng.binomial(size=vsample.shape, n=1, p=vmean,
                         dtype=theano.config.floatX)
Beispiel #6
0
 def one_step(self, x_t, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):
     h_t = T.tanh(theano.dot(x_t, W_ih) + theano.dot(h_tm1, W_hh) + b_h)
     y_t = theano.dot(h_t, W_ho) + b_o
     y_t = sigmoid(y_t)
     if self.ignore_zero:
         return [h_t, y_t], theano.scan_module.until(T.eq(T.sum(abs(x_t)), 0))
     return [h_t, y_t]
    def _step2(self, x_t, h_tm1, c_tm1, x_w, h_w, c_w, W_co, b_i, b_f, b_c,
               b_o):

        sigma = lasagne.nonlinearities.sigmoid

        # for the other activation function we use the tanh
        act = T.tanh

        # sequences: x_t
        # prior results: h_tm1, c_tm1
        # non-sequences: W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xy, W_hy, W_cy, b_y
        x_prod = theano.dot(x_t, x_w)
        h_prod = theano.dot(h_tm1, h_w)
        c_prod = theano.dot(c_tm1, c_w)


        i_t = sigma(self._slice(x_prod,0,self.dim_proj) + self._slice(h_prod,0,self.dim_proj)  + \
        self._slice(c_prod,0,self.dim_proj)  + b_i.dimshuffle(('x',0)))

        f_t = sigma(self._slice(x_prod,1,self.dim_proj) + self._slice(h_prod,1,self.dim_proj)  + \
        self._slice(c_prod,1,self.dim_proj)  \
        + b_f.dimshuffle(('x',0)))

        c_t = f_t * c_tm1 + i_t * act(self._slice(x_prod,2,self.dim_proj) + self._slice(h_prod,2,self.dim_proj)   \
        + b_c.dimshuffle(('x',0)) )

        o_t = sigma(self._slice(x_prod,3,self.dim_proj)+ self._slice(h_prod,3,self.dim_proj) +\
          theano.dot(c_t, W_co) + b_o.dimshuffle(('x',0)))

        h_t = o_t * act(c_t)

        return [h_t, c_t]
Beispiel #8
0
    def state_with_attend(self, h1, attended, x_m=None):

        # attented: (src_sent_len, batch_size, src_nhids*2)
        _az = theano.dot(attended, self.W_cz) + self.b_z2
        _hz = theano.dot(h1, self.W_hz2)
        if self.ln is not False:
            _az = ln(_az, self.g1, self.b1)
            _hz = ln(_hz, self.g2, self.b2)
        z = T.nnet.sigmoid(_az + _hz)
        # z: (batch_size, trg_nhids)

        _ar = theano.dot(attended, self.W_cr) + self.b_r2
        _hr = theano.dot(h1, self.W_hr2)
        if self.ln is not False:
            _ar = ln(_ar, self.g1, self.b1)
            _hr = ln(_hr, self.g2, self.b2)
        r = T.nnet.sigmoid(_ar + _hr)
        # r: (batch_size, trg_nhids)

        # _ah: (batch_size, trg_nhids)
        _ah = theano.dot(attended, self.W_ch)
        _hh = T.dot(h1, self.W_hh2) + self.b_h2
        if self.ln is not False:
            _ah = ln(_ah, self.g3, self.b3)
            _hh = ln(_hh, self.g4, self.b4)

        h2 = T.tanh(_ah + _hh * r)
        h2 = z * h1 + (1. - z) * h2

        if x_m is not None:
            h2 = x_m[:, None] * h2 + (1. - x_m)[:, None] * h1
        # h2: (batch_size, trg_nhids)
        return h2
def test_dtw():
    W = theano.shared(numpy.eye(4, dtype=theano.config.floatX), name='W')
    theano.config.compute_test_value = 'raise'
    x1 = tt.matrix('x1')
    x2 = tt.matrix('x2')
    x1.tag.test_value = numpy.array([[0.1] * 4, [-0.1] * 4],
                                    dtype=theano.config.floatX)
    x2.tag.test_value = numpy.array(
        [[0.1] * 4, [0.1] * 1 + [-0.1] * 3, [-0.1] * 4],
        dtype=theano.config.floatX)
    e1 = theano.dot(x1, W)
    e2 = theano.dot(x2, W)
    y = theano_batch_dtw.dtw.theano_symbolic_dtw(e1,
                                                 e2,
                                                 tt.constant(2, dtype='int64'),
                                                 tt.constant(3, dtype='int64'),
                                                 normalize=False)
    theano.printing.debugprint(y)
    g = theano.grad(y, W)
    theano.printing.debugprint(g)
    print 'y', y.dtype, y.tag.test_value.shape, '\n', y.tag.test_value
    print 'g', g.dtype, g.tag.test_value.shape, '\n', g.tag.test_value
    path, cost = speech_dtw._dtw.multivariate_dtw(e1.tag.test_value,
                                                  e2.tag.test_value)
    print cost, list(reversed(path))
Beispiel #10
0
def oneStep(u_tm4,u_t,x_tm3,x_tm1,y_tm1,W,W_in_1,W_in_2,W_feedback,W_out):
    x_t=T.tanh(theano.dot(x_tm1,W)+\
                theano.dot(u_t,W_in_1)+\
                theano.dot(u_tm4,W_in_2)+\
                theano.dot(y_tm1,W_feedback))
    y_t=theano.dot(x_tm3,W_out)
    return [x_t,y_t]
Beispiel #11
0
    def apply(self, state_below, mask_below, context, c_mask):
        hiddens, attended = self._forward(state_below, mask_below, context,
                                          c_mask)

        # state_below: shape(trg_sent_len-1, batch_size, trgw_embsz)
        # hiddens:     shape(trg_sent_len-1, batch_size, trg_nhids)
        # attended:    shape(trg_sent_len-1, batch_size, src_nhids*2)
        # note: the scan function will remember all privious states
        combine = T.concatenate([state_below, hiddens, attended], axis=2)
        # combine:  shape(trg_sent_len-1, batch_size, trgw_embsz+trg_nhids+src_nhids*2)

        # self.W_m: shape(trgw_embsz + trg_nhids + c_hids, n_out*2)
        # self.b_m: shape(n_out*2,)
        if self.max_out:
            merge_out = theano.dot(combine, self.W_m) + self.b_m
            # merge_out: shape(trg_sent_len-1, batch_size, n_out*2)
            merge_out = merge_out.reshape(
                (merge_out.shape[0], merge_out.shape[1],
                 merge_out.shape[2] / 2, 2),
                ndim=4).max(axis=3)

        else:
            merge_out = T.tanh(theano.dot(combine, self.W_m) + self.b_m)
        '''
    such as:  (1, 2, 6)               ->        (1, 2, 3, 2)                  ->      (1, 2, 3)
        [[[ 1, 2, 3, 4, 5, 6],              [[[[1, 2], [3, 4], [4, 5]],         [[[ 2, 4, 5],
          [ 2, 3, 4, 5, 6, 7]]]     ->        [[2, 3], [3, 4], [4, 5]]]]    ->    [ 3, 4, 5]]]
        '''
        # mask_below[:, :, None] -> shape(trg_sent_len-1, batch_size, 1)
        return merge_out * mask_below[:, :, None]
Beispiel #12
0
            def unfold():
                smearbckg = 1.
                if nbckg > 0:
                    bckgnormerr = [(-1. + nuis) / nuis if berr < 0. else berr
                                   for berr, nuis in zip(
                                       backgroundnormsysts, bckgnuisances)]
                    bckgnormerr = mc.math.stack(bckgnormerr)

                    smearedbackgrounds = backgrounds
                    if nobjsyst > 0:
                        smearbckg = smearbckg + theano.dot(
                            objnuisances, backgroundobjsysts)
                        smearedbackgrounds = backgrounds * smearbckg

                    bckg = theano.dot(1. + bckgnuisances * bckgnormerr,
                                      smearedbackgrounds)

                tresmat = array(resmat)
                reco = theano.dot(truth, tresmat)
                out = reco
                if nobjsyst > 0:
                    smear = 1. + theano.dot(objnuisances, signalobjsysts)
                    out = reco * smear
                if nbckg > 0:
                    out = bckg + out
                return out
def test_gemv1():
    ''' test vector1+dot(matrix,vector2) '''
    v1 = theano.tensor._shared(
        numpy.array(numpy.random.rand(2), dtype='float32'))
    v2 = theano.tensor._shared(
        numpy.array(numpy.random.rand(5), dtype='float32'))
    m = theano.tensor._shared(
        numpy.array(numpy.random.rand(5, 2), dtype='float32'))

    no_gpu_f = theano.function([],
                               v2 + theano.dot(m, v1),
                               mode=mode_without_gpu)
    gpu_f = theano.function([], v2 + theano.dot(m, v1), mode=mode_with_gpu)
    #gpu_f2 is needed to test the case when the input is not on the gpu
    #but the output is moved to the gpu.
    gpu_f2 = theano.function([],
                             cuda.gpu_from_host(v2 + theano.dot(m, v1)),
                             mode=mode_with_gpu)

    # Assert they produce the same output
    assert numpy.allclose(no_gpu_f(), gpu_f(), atol=atol)
    assert numpy.allclose(no_gpu_f(), gpu_f2(), atol=atol)
    # Assert that the gpu version actually uses gpu
    assert sum([
        node.op is cuda.blas.gpu_gemm_inplace
        for node in gpu_f2.maker.env.toposort()
    ]) == 1
    assert sum([
        node.op is cuda.blas.gpu_gemm_inplace
        for node in gpu_f.maker.env.toposort()
    ]) == 1
Beispiel #14
0
    def _step_forward_with_attention(self, x_t, x_m, h_tm1, c, c_mask, c_x):
        '''
        x_t: input at time t
        x_m: mask of x_t
        h_tm1: previous state
        c_x: contex of the rnn
        '''
        # attended = self.attention_layer.apply(c, c_mask, h_tm1)
        # c_z = theano.dot(attended, self.W_cz)
        # c_r = theano.dot(attended, self.W_cr)
        # c_h = theano.dot(attended, self.W_ch)

        # return [self._step_forward_with_context(x_t, x_m, h_tm1, c_z, c_r, c_h), attended]

        #### new arc
        h1 = self._step_forward(x_t, x_m, h_tm1)
        attended = self.attention_layer.apply(c, c_mask, c_x,  h1 )
        z = T.nnet.sigmoid(theano.dot(attended, self.W_cz)
                           + theano.dot(h1, self.W_hz2) + self.b_z2)
        r = T.nnet.sigmoid(theano.dot(attended, self.W_cr)
                           + theano.dot(h1, self.W_hr2) + self.b_r2)
        c_h = theano.dot(attended, self.W_ch)
        h2 = T.tanh((T.dot(h1, self.W_hh2) + self.b_h2) * r + c_h)
        h2 = h1 * z + (1. - z) * h2
        if x_m:
            h2 = x_m[:, None] * h2 + (1. - x_m)[:, None] * h1
        return h2, attended
Beispiel #15
0
    def __init__(self, rng, input, n_in, n_out, diffusion, W=None,
                 activation=T.nnet.relu):

        self.input = input

        if W is None:
            W_values = np.asarray(
                rng.uniform(
                    low=-np.sqrt(6. / (n_in + n_out)),
                    high=np.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX
            )
            if activation == theano.tensor.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name='W', borrow=True)

        self.W = W
        self.D = diffusion

        lin_output = theano.dot(theano.dot(diffusion, input), self.W)
        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        )
        self.params = [self.W]
Beispiel #16
0
    def apply(self, state_below, mask_below, init_state=None, context=None):
        if state_below.ndim == 3:
            batch_size = state_below.shape[1]
            n_steps = state_below.shape[0]
        else:
            raise NotImplementedError


        if self.with_contex:
            if init_state is None:
                init_state = T.tanh(theano.dot(context, self.W_c_init))
            c_z = theano.dot(context, self.W_cz)
            c_r = theano.dot(context, self.W_cr)
            c_h = theano.dot(context, self.W_ch)
            non_sequences = [c_z, c_r, c_h]
            rval, updates = theano.scan(self._step_forward_with_context,
                                        sequences=[state_below, mask_below],
                                        outputs_info=[init_state],
                                        non_sequences=non_sequences,
                                        n_steps=n_steps
                                        )

        else:
            if init_state is None:
                init_state = T.alloc(numpy.float32(0.), batch_size, self.n_hids)
            rval, updates = theano.scan(self._step_forward,
                                        sequences=[state_below, mask_below],
                                        outputs_info=[init_state],
                                        n_steps=n_steps
                                        )
        self.output = rval
        return self.output
        def recurrence(x_t, feat_t, h_tm1):
            # i_t = sigma(theano.dot(x_t, self.W_xi) + theano.dot(h_tm1, self.W_hi) + theano.dot(c_tm1, self.W_ci) + self.b_i)
            # f_t = sigma(theano.dot(x_t, self.W_xf) + theano.dot(h_tm1, self.W_hf) + theano.dot(c_tm1, self.W_cf) + self.b_f)
            # c_t = f_t * c_tm1 + i_t * T.tanh(theano.dot(x_t, self.W_xc) + theano.dot(h_tm1, self.W_hc) + self.b_c)
            # o_t = sigma(theano.dot(x_t, self.W_xo)+ theano.dot(h_tm1, self.W_ho) + theano.dot(c_t, self.W_co)  + self.b_o)
            # h_t = o_t * T.tanh(c_t)

            z_t = sigma(theano.dot(x_t, self.W_xz) + self.b_z)
            ###### THIS IS DIFFERENT
            r_t = sigma(
                theano.dot(x_t, self.W_xr) + theano.dot(h_tm1, self.W_hr) +
                self.b_r)
            h_t = (T.tanh(
                theano.dot(h_tm1 * r_t, self.W_hh) + T.tanh(x_t[50:100]) +
                self.b_h) * z_t) + h_tm1 * (T.ones_like(z_t) - z_t)
            # h_t = T.tanh(h_tm1)

            if self.featdim > 0:
                all_t = T.concatenate([h_t, feat_t])
            else:
                all_t = h_t

            # print "all_t", type(all_t), T.shape(all_t)
            s_t = softmax(theano.dot(all_t, self.W_hy) + self.b_y)
            # print T.shape(h_t), T.shape(c_t), T.shape(s_t)
            return [h_t, s_t]
Beispiel #18
0
 def one_step_no_output(self, x_t, h_tm1, W_xc, W_hc, b_c, W_ih, W_hh, W_ho, b_o, b_h):
     C = sigmoid(theano.dot(x_t, W_xc) + theano.dot(h_tm1, W_hc) + b_c)
     h_t_hat = T.tanh(theano.dot(x_t, W_ih) + theano.dot(h_tm1, W_hh) + b_h)
     h_t = (1 - C) * h_t_hat + C * x_t
     if self.ignore_zero:
         return [h_t, h_t], theano.scan_module.until(T.eq(T.sum(abs(x_t)), 0))
     return [h_t, h_t]
Beispiel #19
0
    def _input_to_hidden(self, x):
        x = x.dimshuffle((1, 0, 2))

        r = T.dot(x, self.W_r) + self.b_r
        z = T.dot(x, self.W_z) + self.b_z
        h = T.dot(x, self.W_h) + self.b_h

        return r, z, h
Beispiel #20
0
 def one_step_no_output(self, x_t, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):
     """
     function that did not calculate the output data
     """
     h_t = T.tanh(theano.dot(x_t, W_ih) + theano.dot(h_tm1, W_hh) + b_h)
     if self.ignore_zero:
         return [h_t, h_t], theano.scan_module.until(T.eq(T.sum(abs(x_t)), 0))
     return [h_t, h_t]
Beispiel #21
0
def oneStep(u_tm4, u_t, x_tm3, x_tm1, y_tm1, W, W_in_1, W_in_2, W_feedback,
            W_out):
    x_t = T.tanh(theano.dot(x_tm1, W) + \
                     theano.dot(u_t,   W_in_1) + \
                     theano.dot(u_tm4, W_in_2) + \
                     theano.dot(y_tm1, W_feedback))
    y_t = theano.dot(x_tm3, W_out)
    return [x_t, y_t]
Beispiel #22
0
    def __init__(self, name, inp):
        eqvars = self.arrdict[name]
        w_hidden, b_hidden, w_output, b_output = eqvars

        hidden = T.dot(w_hidden.T, inp) + b_hidden
        hidden_act = M.tanh(hidden)
        output = (T.dot(w_output.T, hidden_act) + b_output)
        self.proj = output.sum()
Beispiel #23
0
def step(x_t, h_t_1, W_h, W_x, W_y):
    # Add breakpoint

    h = t.tanh(theano.dot(W_h, h_t_1) + theano.dot(W_x, x_t) + b_h)
    y = (theano.dot(W_y, h) + b_y)
    e_y = t.exp(y - y.max())
    smax_y = e_y / e_y.sum()
    return h, smax_y
Beispiel #24
0
 def setL(x, name1="w", name2="b", name3="b_", act="sigmoid"):
     w = self.seg.params[name1]
     b = self.seg.params[name2]
     b_ = self.seg.params[name3]
     activate = self.getfunc(act)
     y = activate(theano.dot(x, w) + b)
     z = activate(theano.dot(y, w.T) + b_)
     return zip([w, b, b_], theano.grad(self.lossfunc(x, z), [w, b, b_]))
Beispiel #25
0
    def __init__(self, name, inp):
        eqvars = self.arrdict[name]
        w_hidden, b_hidden, w_output, b_output = eqvars

        hidden = T.dot(w_hidden.T, inp) + b_hidden
        hidden_act = M.tanh(hidden)
        output = (T.dot(w_output.T, hidden_act) + b_output)
        self.proj = output.sum()
Beispiel #26
0
        def test_specify_shape_inplace(self):
            # test that specify_shape don't break inserting inplace op

            dtype = self.dtype
            if dtype is None:
                dtype = theano.config.floatX

            rng = numpy.random.RandomState(utt.fetch_seed())
            a = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype)
            a = self.cast_value(a)
            a_shared = self.shared_constructor(a)
            b = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype)
            b = self.cast_value(b)
            b_shared = self.shared_constructor(b)
            s = numpy.zeros((40, 40), dtype=dtype)
            s = self.cast_value(s)
            s_shared = self.shared_constructor(s)
            f = theano.function([],
                                updates=[(s_shared, theano.dot(a_shared, b_shared)
                                         + s_shared)])
            topo = f.maker.fgraph.toposort()
            f()
            #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)]
            if theano.config.mode != 'FAST_COMPILE':
                assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1
                assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm))
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
            # Their is no inplace gemm for sparse
            #assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "StructuredDot")
            s_shared_specify = tensor.specify_shape(s_shared, s_shared.get_value(borrow=True).shape)

            # now test with the specify shape op in the output
            f = theano.function([], s_shared.shape,
                                updates=[(s_shared, theano.dot(a_shared, b_shared)
                                         + s_shared_specify)])
            topo = f.maker.fgraph.toposort()
            shp = f()
            assert numpy.all(shp == (40, 40))
            if theano.config.mode != 'FAST_COMPILE':
                assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1
                assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm))
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
            # now test with the specify shape op in the inputs and outputs
            a_shared = tensor.specify_shape(a_shared,
                    a_shared.get_value(borrow=True).shape)
            b_shared = tensor.specify_shape(b_shared,
                    b_shared.get_value(borrow=True).shape)

            f = theano.function([], s_shared.shape,
                                updates=[(s_shared, theano.dot(a_shared, b_shared)
                                         + s_shared_specify)])
            topo = f.maker.fgraph.toposort()
            shp = f()
            assert numpy.all(shp == (40, 40))
            if theano.config.mode != 'FAST_COMPILE':
                assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1
                assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm))
                assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
Beispiel #27
0
    def func_dec(self, xt, htm1, ctm1):
        # xt -- embedded world representations
        current_att_weight = self.softmax(
            theano.dot(
                tensor.tanh(
                    theano.dot(
                        htm1, self.W_att_target
                    ) + self.scope_att_times_W
                ),
                self.b_att
            )
        )
        #
        zt = theano.dot(current_att_weight, self.scope_att)
        #
        post_transform = self.b_dec + theano.dot(
            tensor.concatenate(
                [xt, htm1, zt], axis=0
            ),
            self.W_dec
        )
        gate_input = tensor.nnet.sigmoid(
            post_transform[:self.dim_model]
        )
        gate_forget = tensor.nnet.sigmoid(
            post_transform[self.dim_model:2*self.dim_model]
        )
        gate_output = tensor.nnet.sigmoid(
            post_transform[2*self.dim_model:3*self.dim_model]
        )
        gate_pre_c = tensor.tanh(
            post_transform[3*self.dim_model:]
        )
        ct = gate_forget * ctm1 + gate_input * gate_pre_c
        ht = gate_output * tensor.tanh(ct)

        '''
        Add drop out here, by cha chen
        '''

        # Set up an random number generator
        srng = RandomStreams(seed=0)

        # Set up the dropout
        windows = srng.uniform((self.dim_model,)) < 0.9
        getwins = theano.function([],windows)

        winst = getwins()
        ht_dropout = ht * winst

        # return the dropout version
        return ht, ht_dropout, ct, zt

        #
        # return ht, ct, zt

        '''
Beispiel #28
0
    def _input_to_hidden(self, x):
        # (time_steps, batch_size, input_size)
        x = x.dimshuffle((1, 0, 2))

        xi = T.dot(x, self.W_i) + self.b_i
        xf = T.dot(x, self.W_f) + self.b_f
        xc = T.dot(x, self.W_c) + self.b_c
        xo = T.dot(x, self.W_o) + self.b_o
        return xi, xf, xc, xo
    def hidden_cov_units_preactivation_given_v(self, v, small=0.5):
        """Return argument to the sigmoid that would give mean of covariance hid units

        See the math at the top of this file for what 'adjusted' means.

        return b - 0.5 * dot(adjusted(v), U)**2
        """
        unit_v = v / (TT.sqrt(TT.mean(v**2, axis=1)+small)).dimshuffle(0,'x') # adjust row norm
        return self.b + 0.5 * dot(dot(unit_v, self.U)**2, self.P)
Beispiel #30
0
 def __init__(self, input, w, b, params=[]):
     self.output=nnet.softmax(theano.dot(input, w)+b)
     self.l1=abs(w).sum()
     self.l2_sqr = (w**2).sum()
     self.argmax=theano.tensor.argmax(theano.dot(input, w)+b, axis=input.ndim-1)
     self.input = input
     self.w = w
     self.b = b
     self.params = params
Beispiel #31
0
    def _forward(self,
                 state_below,
                 mask_below=None,
                 init_state=None,
                 context=None):
        if state_below.ndim == 3:  # state_below is a 3-d matrix
            batch_size = state_below.shape[1]
            n_steps = state_below.shape[0]
        else:
            raise NotImplementedError

# state_below:(src_sent_len,batch_size,embsize),
# mask_below:(src_sent_len,batch_size) 0-1 matrix (padding)
        if mask_below:
            inps = [state_below, mask_below]
            if self.with_contex:
                fn = self._step_forward_with_context
            else:
                fn = self._step_forward
        else:
            inps = [state_below]
            if self.with_contex:
                fn = lambda x1, x2, x3, x4, x5: self._step_forward_with_context(
                    x1, None, x2, x3, x4, x5)
            else:
                fn = lambda x1, x2: self._step_forward(x1, None, x2)

        if self.with_contex:
            if init_state is None:
                init_state = T.tanh(
                    theano.dot(context, self.W_c_init) + self.b_init)
            c_z = theano.dot(context, self.W_cz)
            c_r = theano.dot(context, self.W_cr)
            c_h = theano.dot(context, self.W_ch)
            if self.ln:
                c_z = ln(c_z, self.gcz + self.bcz)
                c_r = ln(c_r, self.gcr + self.bcr)
                c_h = ln(c_h, self.gch + self.bch)
            non_sequences = [c_z, c_r, c_h]
            rval, updates = theano.scan(fn,
                                        sequences=inps,
                                        outputs_info=[init_state],
                                        non_sequences=non_sequences,
                                        n_steps=n_steps)

        else:
            if init_state is None:
                init_state = T.alloc(numpy.float32(0.), batch_size,
                                     self.n_hids)
                # init_state = T.unbroadcast(T.alloc(0., batch_size, self.n_hids), 0)
            rval, updates = theano.scan(fn,
                                        sequences=inps,
                                        outputs_info=[init_state],
                                        n_steps=n_steps)
        self.output = rval
        # if change like this, it only return the hidden state of the last word in the sentence
        return self.output
Beispiel #32
0
    def hidden_cov_units_preactivation_given_v(self, v, small=0.5):
        """Return argument to the sigmoid that would give mean of covariance hid units

        See the math at the top of this file for what 'adjusted' means.

        return b - 0.5 * dot(adjusted(v), U)**2
        """
        unit_v = v / (TT.sqrt(TT.mean(v**2, axis=1) + small)).dimshuffle(
            0, 'x')  # adjust row norm
        return self.b + 0.5 * dot(dot(unit_v, self.U)**2, self.P)
Beispiel #33
0
    def get_output_for(self, input, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.dot(input, self.Ws[0]*self.share_mask_W) + T.dot(input, self.Ws[1]*self.split_mask_W)
        activation = activation + (self.bs[0]* self.share_mask_b).dimshuffle('x', 0) + (self.bs[1]* self.split_mask_b).dimshuffle('x', 0)

        return self.nonlinearity(activation)
def build_mdn_predict(proj, x, tparams):
    x_diff_squared_avg = tensor.mean((x[:,1:] - x[:,:-1])**2,axis=1)
    invsigma_given_x = tensor.maximum(tensor.nnet.sigmoid(theano.dot(
        proj, tparams['U_sigma']) + tparams['b_sigma'])
                                      , 1e-8)/ x_diff_squared_avg[:, None]
    mu = theano.dot(proj, tparams['U_mu']) + tparams['b_mu']
    p_mix_given_x = tensor.maximum(tensor.minimum(tensor.nnet.softmax(
        tensor.dot(proj, tparams['U_mix']) + tparams['b_mix']), 1e-6), 1-1e-6)
    p_mix_given_x = tensor.log(p_mix_given_x / (tensor.sum(p_mix_given_x, axis=1)[:, None] + 10 * EPS) + EPS)
    return invsigma_given_x, mu, p_mix_given_x
Beispiel #35
0
def ln_linear(inputs, size, bias, concat=False, dtype=None, scope=None):
    if not isinstance(size, (list, tuple)):
        raise ValueError("size argument must be (input_size, output_size)")

    input_size, output_size = size

    if not isinstance(input_size, (list, tuple)):
        input_size = [input_size]

    if not isinstance(inputs, (list, tuple)):
        inputs = [inputs]

    if len(inputs) != len(input_size):
        raise RuntimeError("unmatched elements found: inputs and input_size")

    results = []

    with variable_scope(scope):
        if concat:
            input_size = sum(input_size)
            inputs = theano.tensor.concatenate(inputs, -1)

            shape = [input_size, output_size]
            matrix = get_variable("matrix", shape, dtype=dtype)
            res = theano.dot(inputs, matrix)
            with variable_scope("layer_norm"):
                alpha = get_variable("gains", shape=(output_size,), dtype=dtype, initializer=ones_initializer)
                beta = get_variable("biases", shape=(output_size,), dtype=dtype, initializer=zeros_initializer)

            res = layer_normalize(res, alpha, beta)
            results.append(res)
        else:
            for i in range(len(input_size)):
                shape = [input_size[i], output_size]
                name = "matrix_%d" % i
                matrix = get_variable(name, shape, dtype=dtype)
                res = theano.dot(inputs[i], matrix)
                with variable_scope("layer_norm"):
                    alpha = get_variable("gains_%d" % i, shape=(output_size,), dtype=dtype,
                                         initializer=ones_initializer())
                    beta = get_variable("biases_%d" % i, shape=(output_size,), dtype=dtype,
                                        initializer=zeros_initializer())

                res = layer_normalize(res, alpha, beta)
                results.append(res)

        if bias:
            shape = [output_size]
            bias = get_variable("bias", shape, dtype=dtype)
            results.append(bias)

    if len(results) == 1:
        return results[0]

    return reduce(theano.tensor.add, results)
 def pool_one(self, R):
     """
     Attention-based pooling
     :param R: sentence representation, shape=[n, nb_filter]
     :return W_max: shape=[class_embbed_dim,]
     """
     G = theano.dot(theano.dot(R, self.U), self.WL)  # shape=[n, nb_classes]
     A = T.nnet.softmax(G.transpose()).transpose()  # shape=[n, nb_classes]
     WO = T.dot(R.transpose(), A)  # shape=[nb_filter, nb_classes]
     W_max = T.max(WO, axis=1)  # shape=[nb_filter,]
     return T.tanh(W_max)
Beispiel #37
0
    def test_input_aliasing_affecting_inplace_operations(self):

        # Note: to trigger this bug with theano rev 4586:2bc6fc7f218b,
        #        you need to make in inputs mutable (so that inplace
        #        operations are used) and to break the elemwise composition
        #        with some non-elemwise op (here dot)
        x = theano.tensor.dvector()
        y = theano.tensor.dvector()
        m1 = theano.tensor.dmatrix()
        m2 = theano.tensor.dmatrix()
        f = theano.function(
            [
                theano.In(x, mutable=True),
                theano.In(y, mutable=True),
                theano.In(m1, mutable=True),
                theano.In(m2, mutable=True),
            ],
            theano.dot((x * 2), m1) + theano.dot((y * 3), m2),
        )
        # Test 1. If the same variable is given twice

        # Compute bogus values
        v = np.asarray([1, 2, 3, 4, 5], dtype="float64")
        m = np.asarray(
            [
                [1, 0, 0, 0, 0],
                [0, 1, 0, 0, 0],
                [0, 0, 1, 0, 0],
                [0, 0, 0, 1, 0],
                [0, 0, 0, 0, 1],
            ],
            dtype="float64",
        )
        bogus_vals = f(v, v, m, m)
        # Since we used inplace operation v and m may be corrupted
        # so we need to recreate them

        v = np.asarray([1, 2, 3, 4, 5], dtype="float64")
        m = np.asarray(
            [
                [1, 0, 0, 0, 0],
                [0, 1, 0, 0, 0],
                [0, 0, 1, 0, 0],
                [0, 0, 0, 1, 0],
                [0, 0, 0, 0, 1],
            ],
            dtype="float64",
        )
        m_copy = m.copy()
        v_copy = v.copy()
        vals = f(v, v_copy, m, m_copy)

        assert np.allclose(vals, bogus_vals)
Beispiel #38
0
 def gru_aspect(a_i, rm1, pb):
     g_i = sigma(
         T.dot(a_i, dropout(self.Wxa_1, self.ms[7], pb)) +
         T.dot(rm1, self.Wha_1))
     f_i = sigma(
         T.dot(a_i, dropout(self.Wxa_2, self.ms[8], pb)) +
         T.dot(rm1, self.Wha_2))
     c_i = T.tanh(
         theano.dot(a_i, dropout(self.Wxa_3, self.ms[9], pb)) +
         theano.dot(rm1 * f_i, self.Wha_3))
     r_i = (T.ones_like(g_i) - g_i) * rm1 + g_i * c_i
     return r_i
Beispiel #39
0
 def gru_opinion(a_i, rm1, pb):
     g_i = sigma(
         T.dot(a_i, dropout(self.Wxo_1, self.ms[10], pb)) +
         T.dot(rm1, self.Who_1))
     f_i = sigma(
         T.dot(a_i, dropout(self.Wxo_2, self.ms[11], pb)) +
         T.dot(rm1, self.Who_2))
     c_i = T.tanh(
         theano.dot(a_i, dropout(self.Wxo_3, self.ms[12], pb)) +
         theano.dot(rm1 * f_i, self.Who_3))
     r_i = (T.ones_like(g_i) - g_i) * rm1 + g_i * c_i
     return r_i
 def one_step(x_t, h_tminus1, c_tminus1):
     i_t = sigmoid(theano.dot(x_t, self.W_xi) + theano.dot(h_tminus1, self.W_hi) + self.b_i)
     f_t = sigmoid(theano.dot(x_t, self.W_xf) + theano.dot(h_tminus1, self.W_hf) + self.b_f)
     o_t = sigmoid(theano.dot(x_t, self.W_xo) + theano.dot(h_tminus1, self.W_ho) + self.b_o)
     g_t = self.activation_fun(theano.dot(x_t, self.W_xg) + theano.dot(h_tminus1, self.W_hg) + self.b_g)
     c_t = f_t * c_tminus1 + i_t * g_t
     h_t = o_t * self.activation_fun(c_t)
     y_t = sigmoid(theano.dot(h_t, self.W_hy) + self.b_y)
     return [h_t, c_t, y_t]
Beispiel #41
0
def rand_rotate_matrix_symbol(angle=90, ss=0.5):
    srs = T.shared_randomstreams.RandomStreams()
    # np.pi / 180 *
    agx = (srs.uniform() * (2 * angle) - angle) * np.pi / 180
    agy = (srs.uniform() * (2 * angle) - angle) * np.pi / 180
    s = srs.uniform() + ss
    Rx = T.stack(1, 0, 0, 0, T.cos(agx), T.sin(agx), 0, -T.sin(agx),
                 T.cos(agx)).reshape((3, 3))
    Ry = T.stack(T.cos(agy), 0, -T.sin(agy), 0, 1, 0, T.sin(agy), 0,
                 T.cos(agy)).reshape((3, 3))
    Ss = T.stack(s, 0, 0, 0, s, 0, 0, 0, s).reshape((3, 3))
    value = theano.dot(Ry, theano.dot(Rx, Ss))
    return value
Beispiel #42
0
    def step(self, x_t, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):

        # h_t = g(W_ih x_t + W_hh h_tm1 + bh)

        ### Does not work on recurrent layer, see http://arxiv.org/pdf/1311.0701v7.pdf
        h_t = self.g(theano.dot(x_t, W_ih) + theano.dot(h_tm1, W_hh) + b_h)

        # y_t = act(W_ho h_t + b_o)

        ### y_t = self.act(theano.dot(h_t, W_ho) + b_o)
        y_t = self.act(theano.dot(h_t, W_ho) + b_o)

        return [h_t, y_t]
Beispiel #43
0
    def get_output_for(self, input, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.dot(input[:, 0:-self.split_num], self.Ws[0]*self.share_mask_W)
        activation = activation + (self.bs[0]* self.share_mask_b).dimshuffle('x', 0)
        for i in range(0, len(self.Ws)):
            activation_mask = TT.stack([input[:, -self.split_num+i-0]]*self.num_units).T
            activation += (T.dot(input[:, 0:-self.split_num], self.Ws[i]*self.split_mask_W) + (self.bs[i]* self.split_mask_b).dimshuffle('x', 0)) * activation_mask

        return self.nonlinearity(activation)
Beispiel #44
0
    def test_partial_input_aliasing_affecting_inplace_operations(self):

        # Note: to trigger this bug with theano rev 4586:2bc6fc7f218b,
        #        you need to make in inputs mutable ( so that inplace
        #        operations are used) and to break the elemwise composition
        #        with some non-elemwise op ( here dot )
        x = theano.tensor.dvector()
        y = theano.tensor.dvector()
        z = theano.tensor.dvector()
        m1 = theano.tensor.dmatrix()
        m2 = theano.tensor.dmatrix()
        m3 = theano.tensor.dmatrix()

        # Test 2. If variables only partial overlap
        #   more exactly we care about the case when we have a,b,c
        #   and a shares memory with b, b shares memory with c, but
        #   c does not share memory with a

        f = theano.function(
            [
                theano.In(x, mutable=True),
                theano.In(y, mutable=True),
                theano.In(z, mutable=True),
                theano.In(m1, mutable=True),
                theano.In(m2, mutable=True),
                theano.In(m3, mutable=True),
            ],
            (
                theano.dot((x * 2), m1)
                + theano.dot((y * 3), m2)
                + theano.dot((z * 4), m3)
            ),
        )

        # Compute bogus values
        v = np.asarray([1, 2, 3, 4, 5], dtype="float64")
        m = np.asarray([[1, 0], [0, 1]], dtype="float64")
        bogus_vals = f(v[:2], v[1:3], v[2:4], m, m, m)
        # Since we used inplace operation v and m may be corrupted
        # so we need to recreate them

        v = np.asarray([1, 2, 3, 4, 5], dtype="float64")
        m = np.asarray([[1, 0], [0, 1]], dtype="float64")
        m_copy1 = m.copy()
        v_copy1 = v.copy()
        m_copy2 = m.copy()
        v_copy2 = v.copy()
        vals = f(v[:2], v_copy1[1:3], v_copy2[2:4], m, m_copy1, m_copy2)

        assert np.allclose(vals, bogus_vals)
Beispiel #45
0
    def mk_training_fn(self):
        """The Constant Stochastic Gradient Step Fn with Optimal Preconditioning Matrix"""
        q_size = self.q_size
        avg_C = self.avg_C
        t = self.t
        updates = self.updates
        # Trying to stick to variables names as given in the publication
        # https://arxiv.org/pdf/1704.04289v1.pdf
        S = self.batch_size
        N = self.total_size

        # inputs
        random = self.random
        inarray = self.inarray

        # gradient of log likelihood
        gt = -1 * (1. / S) * (self.dlogp_elemwise.sum(axis=0) +
                              (S / N) * self.dlog_prior)

        # update moving average of Noise Covariance
        gt_diff = (self.dlogp_elemwise - self.dlogp_elemwise.mean(axis=0))
        V = (1. / (S - 1)) * theano.dot(gt_diff.T, gt_diff)
        C_t = (1. - 1. / t) * avg_C + (1. / t) * V
        # BB^T = C
        B = tt.switch(t < 0, tt.eye(q_size), tt.slinalg.cholesky(C_t))
        # Optimal Preconditioning Matrix
        H = (2. * S / N) * tt.nlinalg.matrix_inverse(C_t)
        # step value on the log likelihood gradient preconditioned with H
        step = -1 * theano.dot(H, gt.dimshuffle([0, 'x']))

        # sample gaussian noise dW
        dW = random.normal((q_size, 1),
                           dtype=theano.config.floatX,
                           avg=0.0,
                           std=1.0)
        # noise term is inversely proportional to batch size
        noise_term = (1. / np.sqrt(S)) * theano.dot(H, theano.dot(B, dW))
        # step + noise term
        dq = (step + noise_term).flatten()

        # update time and avg_C
        updates.update({avg_C: C_t, t: t + 1})

        f = theano.function(outputs=dq,
                            inputs=inarray,
                            updates=updates,
                            allow_input_downcast=True)

        return f
Beispiel #46
0
def image_step_val(Imat, htm1mat, ctm1mat, 
                   Wcnn, Wxi, Whi, bi, Wxf, Whf, bf, 
                   Wxc, Whc, bc, Wxo, Who, bo, Why, by, forbatch):
    xtmat = theano.dot(Imat, Wcnn)
    itmat = sigma(theano.dot(xtmat,Wxi) + theano.dot(htm1mat,Whi) + T.outer(forbatch,bi) )
    ftmat = sigma(theano.dot(xtmat,Wxf) + theano.dot(htm1mat,Whf) + T.outer(forbatch,bf) )
    ctmat = ftmat * ctm1mat + itmat*act(theano.dot(xtmat,Wxc)+theano.dot(htm1mat,Whc)+T.outer(forbatch,bc) )
    otmat = sigma(theano.dot(xtmat,Wxo) + theano.dot(htm1mat,Who) + T.outer(forbatch,bo) )
    htmat = otmat * act(ctmat)
#    yt = T.concatenate([addzero,tempyt],axis=0)
    return htmat, ctmat    
def encoder(wordt, htm1, ctm1, 
            Een, Wxien, Whien, bien, Wxfen, Whfen, bfen, 
            Wxcen, Whcen, bcen, Wxoen, Whoen, boen):
    xt = theano.dot(wordt, Een)
    it = sigma(theano.dot(xt,Wxien) + theano.dot(htm1,Whien) + bien )
    ft = sigma(theano.dot(xt,Wxfen) + theano.dot(htm1,Whfen) + bfen )
    ct = ft * ctm1 + it*act(theano.dot(xt,Wxcen)+theano.dot(htm1,Whcen)+bcen )
    ot = sigma(theano.dot(xt,Wxoen) + theano.dot(htm1,Whoen) + boen )
    ht = ot * act(ct)
#    yt = T.concatenate([addzero,tempyt],axis=0)
    return ht, ct
Beispiel #48
0
 def one_lstm_step(x_t, h_tm1, c_tm1, W_xi, W_hi, W_xf, W_hf, W_xc, W_hc, W_xo, W_ho
                    ):
     i_t = T.nnet.sigmoid(theano.dot(x_t, W_xi) + theano.dot(h_tm1, W_hi) )
     f_t = T.nnet.sigmoid(theano.dot(x_t, W_xf) + theano.dot(h_tm1, W_hf) )
     c_t = f_t * c_tm1 + i_t * T.tanh(theano.dot(x_t, W_xc) + theano.dot(h_tm1, W_hc) ) 
     o_t = T.nnet.sigmoid(theano.dot(x_t, W_xo)+ theano.dot(h_tm1, W_ho) ) 
     h_t = o_t * T.tanh(c_t)
     return [h_t, c_t]
Beispiel #49
0
    def mk_training_fn(self):
        """The Constant Stochastic Gradient Step Fn with Optimal Preconditioning Matrix"""
        q_size = self.q_size
        avg_C = self.avg_C
        t = self.t
        updates = self.updates
        # Trying to stick to variables names as given in the publication
        # https://arxiv.org/pdf/1704.04289v1.pdf
        S = self.batch_size
        N = self.total_size

        # inputs
        random = self.random
        inarray = self.inarray

        # gradient of log likelihood
        gt = -1 * (1. / S) * (self.dlogp_elemwise.sum(axis=0) +
                              (S / N) * self.dlog_prior)

        # update moving average of Noise Covariance
        gt_diff = (self.dlogp_elemwise - self.dlogp_elemwise.mean(axis=0))
        V = (1. / (S - 1)) * theano.dot(gt_diff.T, gt_diff)
        C_t = (1. - 1. / t) * avg_C + (1. / t) * V
        # BB^T = C 
        B = tt.switch(t < 0, tt.eye(q_size), tt.slinalg.cholesky(C_t))
        # Optimal Preconditioning Matrix
        H = (2. * S / N) * tt.nlinalg.matrix_inverse(C_t)
        # step value on the log likelihood gradient preconditioned with H
        step = -1 * theano.dot(H, gt.dimshuffle([0, 'x']))

        # sample gaussian noise dW
        dW = random.normal(
            (q_size, 1), dtype=theano.config.floatX, avg=0.0, std=1.0)
        # noise term is inversely proportional to batch size
        noise_term = (1. / np.sqrt(S)) * theano.dot(H, theano.dot(B, dW))
        # step + noise term
        dq = (step + noise_term).flatten()

        # update time and avg_C 
        updates.update({avg_C: C_t, t: t + 1})

        f = theano.function(
            outputs=dq,
            inputs=inarray,
            updates=updates,
            allow_input_downcast=True)

        return f
    def __init__(self, x, y, n_dim, k_classes):
        self.weights = theano.shared(value=numpy.zeros(
            (n_dim, k_classes),
            # (n_dim , k_classes),
            dtype=tensor.dscalar()
        ), name="weights")

        self.bias = theano.shared(
            value=numpy.zeros((k_classes,), dtype=theano.config.floatX),
            name='bias')

        self.n_dim = n_dim
        self.classes = k_classes

        self.x = x
        self.y = y

        self.probability_d_in_k = tensor.nnet.softmax(theano.dot(self.x, self.weights) + self.bias)
        self.classification = tensor.argmax(self.probability_d_in_k, axis=1)

        self.template = [(self.n_dim, self.classes), (self.classes,)]

        self.loss_gradient = theano.function(
            inputs=[self.x, self.y],
            outputs=[tensor.grad(self.log_loss(), self.weights), tensor.grad(self.log_loss(), self.bias)]
        )
        self.loss_overall = theano.function(
            inputs=[self.x, self.y],
            outputs=self.log_loss(),
        )
Beispiel #51
0
    def audcc_from_power(self, power, n_bands=None, n_audcc=None, dct_unitary=None,
            noise_level=None):
        """
        :type power: ndarray or NdArrayResult with ndim=2

        :param power: a power spectrogram with each frame in a row.  A frequency-scaled
        spectrogram makes sense here too.

        :type n_bands: int
        :param n_bands:  number of critical bands of power

        :type n_audcc: int
        :param n_audcc:  number of cepstral coefficients to calculate

        :type dct_unitary: Bool
        :param dct_unitary: True means apply different scaling to first coef.

        """
        n_audcc = self.n_audcc if n_audcc is None else n_audcc
        dct_unitary = self.dct_unitary if dct_unitary is None else dct_unitary
        n_bands = self.n_bands if n_bands is None else n_bands
        noise_level = self.noise_level if noise_level is None else noise_level

        dct = fourier.dct_matrix(n_audcc, n_bands, unitary=dct_unitary)

        dct = theano.tensor.as_tensor_variable(dct, name="AudioFeatures.dct<%i>"%id(dct))
        return theano.dot(theano.tensor.log(power + noise_level), dct.T)
Beispiel #52
0
    def test_csr_correct_output_faster_than_scipy(self):

        # contrast with test_grad, we put csr in float32, csc in float64

        sparse_dtype = "float32"
        dense_dtype = "float32"

        a = SparseType("csr", dtype=sparse_dtype)()
        b = tensor.matrix(dtype=dense_dtype)
        d = theano.dot(a, b)
        f = theano.function([a, b], d)

        for M, N, K, nnz in [(4, 3, 2, 3), (40, 30, 20, 3), (40, 30, 20, 30), (400, 3000, 200, 6000)]:
            spmat = sp.csr_matrix(random_lil((M, N), sparse_dtype, nnz))
            mat = numpy.asarray(numpy.random.randn(N, K), dense_dtype)
            t0 = time.time()
            theano_result = f(spmat, mat)
            t1 = time.time()
            scipy_result = spmat * mat
            t2 = time.time()

            theano_time = t1 - t0
            scipy_time = t2 - t1
            # print theano_result
            # print scipy_result
            print "theano took", theano_time,
            print "scipy took", scipy_time
            overhead_tol = 0.002  # seconds
            overhead_rtol = 1.1  # times as long
            self.assertTrue(numpy.allclose(theano_result, scipy_result))
            if not theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
                self.assertFalse(theano_time > overhead_rtol * scipy_time + overhead_tol)
Beispiel #53
0
    def apply(self, state_below, mask_below, context, c_mask):
        hiddens, attended = self._forward(state_below, mask_below, context, c_mask)

        combine = T.concatenate([state_below, hiddens, attended], axis=2)

        if self.max_out:
            merge_out = theano.dot(combine, self.W_m) + self.b_m
            merge_out = merge_out.reshape((merge_out.shape[0],
                                           merge_out.shape[1],
                                           merge_out.shape[2]/2,
                                           2), ndim=4).max(axis=3)

        else:
            merge_out = T.tanh(theano.dot(combine, self.W_m) + self.b_m)

        return merge_out * mask_below[:, :, None]
Beispiel #54
0
    def one_step(i_t, h_tm1, o_tm1, h_bias, W_in, W_out, W_rec):
      """Perform one step of a simple recurrent network returning the current
      hidden activations and the output.

      `i_t` is the input at the current timestep, `h_tm1` and `o_tm1` are the
      hidden values and outputs of the previous timestep. `h_bias` is the bias
      for the hidden units. `W_in`, `W_out` and `W_rec` are the weight matrices.

      Transfer functions can be specified via `hiddenfunc` and `outfunc` for the
      hidden and the output layer."""
      hidden_in = theano.dot(W_in, i_t)
      hidden_in += theano.dot(W_rec, h_tm1)
      hidden_in += h_bias
      h_t = hiddenfunc(hidden_in)
      o_t = outfunc(theano.dot(W_out, h_t))
      return [h_t, o_t]
Beispiel #55
0
    def t_gemv1(self, m_shp):
        ''' test vector2 + dot(matrix, vector1) '''
        rng = numpy.random.RandomState(unittest_tools.fetch_seed())
        v1 = theano.shared(numpy.array(rng.uniform(size=(m_shp[1],)), dtype='float32'))
        v2_orig = numpy.array(rng.uniform(size=(m_shp[0],)), dtype='float32')
        v2 = theano.shared(v2_orig)
        m  = theano.shared(numpy.array(rng.uniform(size=m_shp), dtype='float32'))

        f = theano.function([], v2 + tensor.dot(m, v1),
                mode=self.mode)

        # Assert they produce the same output
        assert numpy.allclose(f(),
                numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
        topo = [n.op for n in f.maker.env.toposort()]
        assert topo == [CGemv(inplace=False)], topo

        #test the inplace version
        f = theano.function([], [],
                updates={v2:v2+theano.dot(m,v1)},
                mode=self.mode)

        # Assert they produce the same output
        f()
        assert numpy.allclose(v2.get_value(),
                numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
        topo = [n.op for n in f.maker.env.toposort()]
        assert topo == [CGemv(inplace=True)]
Beispiel #56
0
 def __init__(self, X, n_in, n_out):
   # Initialize network parameters.
   W = theano.shared(np.random.randn(n_in, n_out))
   b = theano.shared(np.zeros(n_out))
   self.params = [W, b]
   # Compute layer activations
   self.output = T.nnet.sigmoid(theano.dot(X, W) + b)
Beispiel #57
0
    def t_gemv1(self, m_shp):
        """ test vector2 + dot(matrix, vector1) """
        rng = numpy.random.RandomState(unittest_tools.fetch_seed())
        v1 = theano.shared(numpy.array(rng.uniform(size=(m_shp[1],)), dtype="float32"))
        v2_orig = numpy.array(rng.uniform(size=(m_shp[0],)), dtype="float32")
        v2 = theano.shared(v2_orig)
        m = theano.shared(numpy.array(rng.uniform(size=m_shp), dtype="float32"))

        f = theano.function([], v2 + tensor.dot(m, v1), mode=self.mode)

        # Assert they produce the same output
        assert numpy.allclose(f(), numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
        topo = [n.op for n in f.maker.fgraph.toposort()]
        assert topo == [CGemv(inplace=False)], topo

        # test the inplace version
        g = theano.function([], [], updates=[(v2, v2 + theano.dot(m, v1))], mode=self.mode)

        # Assert they produce the same output
        g()
        assert numpy.allclose(v2.get_value(), numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
        topo = [n.op for n in g.maker.fgraph.toposort()]
        assert topo == [CGemv(inplace=True)]

        # Do the same tests with a matrix with strides in both dimensions
        m.set_value(m.get_value(borrow=True)[::-1, ::-1], borrow=True)
        v2.set_value(v2_orig)
        assert numpy.allclose(f(), numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
        g()
        assert numpy.allclose(v2.get_value(), numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
Beispiel #58
0
    def step(self, u_t, *args):
            """
                step function to calculate BPTT

                type u_t: T.matrix()
                param u_t: input sequence of the network

                type * args: python parameter list
                param * args: this is needed to implement a more general model of the step function
                             see theano@users: http: // groups.google.com / group / theano - users / \
                             browse_thread / thread / 2fa44792c9cdd0d5

            """

            # get the recurrent activations
            r_act_vals = [args[u] for u in range(self.len_output_taps)]

            # get the recurrent weights
            r_weights = [args[u] for u in range(self.len_output_taps, (self.len_output_taps) * 2)]

            # get the input/output weights
            b_h = args[self.len_output_taps * 2]
            W_in = args[self.len_output_taps * 2 + 1]
            b_in = args[self.len_output_taps * 2 + 2]

            # sum up the recurrent activations
            act = theano.dot(r_act_vals[0], r_weights[0]) + b_h
            for u in range(1, self.len_output_taps):
                act += T.dot(r_act_vals[u], r_weights[u]) + b_h

            # compute the new recurrent activation
            h_t = T.tanh(T.dot(u_t, W_in) + b_in + act)

            return h_t
Beispiel #59
0
def pred(p, X):
    '''
    '''
    w = p['w'].value
    b = p['b'].value
    P = TT.nnet.softmax( T.dot(X, w) + b )
    return TT.argmax(P, 1)