Example #1
0
        w_t = ss6.sum(axis=1)

        attinp_h2, attgate_h2 = att_to_h2.proj(w_t)
        attinp_h3, attgate_h3 = att_to_h3.proj(w_t)

        h2_t = cell2.step(xinp_h2_t + h1inp_h2 + attinp_h2,
                          xgate_h2_t + h1gate_h2 + attgate_h2, h2_tm1)

        h2inp_h3, h2gate_h3 = h2_to_h3.proj(h2_t)

        h3_t = cell3.step(xinp_h3_t + h1inp_h3 + h2inp_h3 + attinp_h3,
                          xgate_h3_t + h1gate_h3 + h2gate_h3 + attgate_h3,
                          h3_tm1)
        return h1_t, h2_t, h3_t, k_t, w_t

    init_x = as_shared(np_zeros((minibatch_size, n_out)))
    srng = RandomStreams(1999)

    def _slice_outs(outs):
        k = n_components
        half = n_out // 2
        outs = outs.reshape((-1, n_density))
        mu = outs[:, 0:half * k].reshape((-1, half, k))
        sigma = outs[:, half * k:2 * half * k].reshape((-1, half, k))
        coeff = outs[:, 2 * half * k:]
        sigma = tensor.exp(sigma - bias_sym) + 1E-6
        coeff = tensor.nnet.softmax(coeff * (1. + bias_sym)) + 1E-6
        return mu, sigma, coeff

    # Used to calculate stopping heuristic from sections 5.3
    u_max = 0. * tensor.arange(c_sym.shape[0]) + c_sym.shape[0]
    outs_deconv = outs_deconv.dimshuffle(2, 0, 3, 1)
    outs_deconv = outs_deconv[:target.shape[0]]
    theano.printing.Print("outs_deconv.shape")(outs_deconv.shape)
    preds = softmax(outs_deconv + b_softmax)
    theano.printing.Print("preds.shape")(preds.shape)
    theano.printing.Print("target.shape")(target.shape)
    target = theano_one_hot(target, r=n_bins)
    theano.printing.Print("target.shape")(target.shape)
    cost = categorical_crossentropy(preds, target)
    theano.printing.Print("cost.shape")(cost.shape)
    theano.printing.Print("mask.shape")(mask.shape)
    cost = cost * mask.dimshuffle(0, 1, 'x')
    cost = cost.sum() / (target.shape[0] * target.shape[1])
    grads = tensor.grad(cost, params)

    init_x = as_shared(np_zeros((minibatch_size, n_out)))
    srng = RandomStreams(1999)

    """
    # Used to calculate stopping heuristic from sections 5.3
    u_max = 0. * tensor.arange(c_sym.shape[0]) + c_sym.shape[0]
    u_max = u_max.dimshuffle('x', 'x', 0)
    u_max = tensor.cast(u_max, theano.config.floatX)

    def _slice_outs(outs):
        k = n_components
        if outs.ndim == 4:
            def _r(i):
                i = i.dimshuffle(0, 2, 1, 3)
                return i.reshape((-1, i.shape[2], i.shape[3]))
            mu = _r(outs[:, :, :, 0:k])
Example #3
0
        w_t = ss6.sum(axis=1)

        attinp_h2, attgate_h2 = att_to_h2.proj(w_t)
        attinp_h3, attgate_h3 = att_to_h3.proj(w_t)

        h2_t = cell2.step(xinp_h2_t + h1inp_h2 + attinp_h2,
                          xgate_h2_t + h1gate_h2 + attgate_h2, h2_tm1)

        h2inp_h3, h2gate_h3 = h2_to_h3.proj(h2_t)

        h3_t = cell3.step(xinp_h3_t + h1inp_h3 + h2inp_h3 + attinp_h3,
                          xgate_h3_t + h1gate_h3 + h2gate_h3 + attgate_h3,
                          h3_tm1)
        return h1_t, h2_t, h3_t, k_t, w_t

    init_x = as_shared(np_zeros((minibatch_size, n_feats)))
    srng = RandomStreams(1999)

    # Used to calculate stopping heuristic from sections 5.3
    u_max = 0. * tensor.arange(c_sym.shape[0]) + c_sym.shape[0]
    u_max = u_max.dimshuffle('x', 'x', 0)
    u_max = tensor.cast(u_max, theano.config.floatX)

    def sample_step(x_tm1, h1_tm1, h2_tm1, h3_tm1, k_tm1, w_tm1, ctx):
        theano.printing.Print("x_tm1.shape")(x_tm1.shape)
        pt1 = theano_one_hot(x_tm1[:, 0], n_classes=n_softmax1)
        theano.printing.Print("pt1.shape")(pt1.shape)
        pt2 = theano_one_hot(x_tm1[:, 1], n_classes=n_softmax2)
        theano.printing.Print("pt2.shape")(pt2.shape)
        x_tm1 = tensor.concatenate((pt1, pt2), axis=-1)
        theano.printing.Print("x_tm1.shape")(x_tm1.shape)