def sample_step(x_tm1, h1_tm1, h2_tm1, h3_tm1, k_tm1, w_tm1, ctx):
        xinp_h1_t, xgate_h1_t = inp_to_h1.proj(x_tm1)
        xinp_h2_t, xgate_h2_t = inp_to_h2.proj(x_tm1)
        xinp_h3_t, xgate_h3_t = inp_to_h3.proj(x_tm1)

        attinp_h1, attgate_h1 = att_to_h1.proj(w_tm1)

        h1_t = cell1.step(xinp_h1_t + attinp_h1, xgate_h1_t + attgate_h1,
                          h1_tm1)
        h1inp_h2, h1gate_h2 = h1_to_h2.proj(h1_t)
        h1inp_h3, h1gate_h3 = h1_to_h3.proj(h1_t)

        a_t = h1_t.dot(h1_to_att_a)
        b_t = h1_t.dot(h1_to_att_b)
        k_t = h1_t.dot(h1_to_att_k)

        a_t = tensor.exp(a_t)
        b_t = tensor.exp(b_t)
        k_t = k_tm1 + tensor.exp(k_t)

        ss_t = calc_phi(k_t, a_t, b_t, u)
        # calculate and return stopping criteria
        sh_t = calc_phi(k_t, a_t, b_t, u_max)
        ss5 = ss_t.dimshuffle(0, 1, 'x')
        ss6 = ss5 * ctx.dimshuffle(1, 0, 2)
        w_t = ss6.sum(axis=1)

        attinp_h2, attgate_h2 = att_to_h2.proj(w_t)
        attinp_h3, attgate_h3 = att_to_h3.proj(w_t)

        h2_t = cell2.step(xinp_h2_t + h1inp_h2 + attinp_h2,
                          xgate_h2_t + h1gate_h2 + attgate_h2, h2_tm1)

        h2inp_h3, h2gate_h3 = h2_to_h3.proj(h2_t)

        h3_t = cell3.step(xinp_h3_t + h1inp_h3 + h2inp_h3 + attinp_h3,
                          xgate_h3_t + h1gate_h3 + h2gate_h3 + attgate_h3,
                          h3_tm1)
        out_t = h1_t.dot(h1_to_outs) + h2_t.dot(h2_to_outs) + h3_t.dot(
            h3_to_outs)
        theano.printing.Print("out_t.shape")(out_t.shape)
        l1_t = relu(out_t.dot(l1_proj) + b_l1_proj)
        l2_t = relu(l1_t.dot(l2_proj) + b_l2_proj)
        l3_t = relu(l2_t.dot(l3_proj) + b_l3_proj)
        theano.printing.Print("l3_t.shape")(l3_t.shape)
        pred_t = l3_t.dot(softmax_proj) + b_softmax_proj
        theano.printing.Print("pred_t.shape")(pred_t.shape)
        pred_t = pred_t.reshape((-1, n_features, n_softmax))
        pred_t = softmax(pred_t * (1. + softmax_bias_sym))
        theano.printing.Print("pred_t.shape")(pred_t.shape)

        shp = pred_t.shape
        pred_t = pred_t.reshape((-1, shp[-1]))
        samp_t = sample_softmax(pred_t, srng)
        samp_t = samp_t.reshape((shp[0], shp[1]))
        x_t = samp_t
        theano.printing.Print("samp_t.shape")(samp_t.shape)
        theano.printing.Print("x_t.shape")(x_t.shape)
        return x_t, h1_t, h2_t, h3_t, k_t, w_t, ss_t, sh_t
Example #2
0
    def sample_step(x_tm1, h1_tm1, h2_tm1, h3_tm1):
        xinp_h1_t, xgate_h1_t = inp_to_h1.proj(x_tm1)
        xinp_h2_t, xgate_h2_t = inp_to_h2.proj(x_tm1)
        xinp_h3_t, xgate_h3_t = inp_to_h3.proj(x_tm1)

        h1_t = cell1.step(xinp_h1_t, xgate_h1_t, h1_tm1)
        h1inp_h2, h1gate_h2 = h1_to_h2.proj(h1_t)
        h1inp_h3, h1gate_h3 = h1_to_h3.proj(h1_t)


        h2_t = cell2.step(xinp_h2_t + h1inp_h2,
                          xgate_h2_t + h1gate_h2, h2_tm1)

        h2inp_h3, h2gate_h3 = h2_to_h3.proj(h2_t)

        h3_t = cell3.step(xinp_h3_t + h1inp_h3 + h2inp_h3,
                          xgate_h3_t + h1gate_h3 + h2gate_h3,
                          h3_tm1)
        out_t = h1_t.dot(h1_to_outs) + h2_t.dot(h2_to_outs) + h3_t.dot(
            h3_to_outs) + b_to_outs

        theano.printing.Print("x_tm1.shape")(x_tm1.shape)

        theano.printing.Print("out_t.shape")(out_t.shape)
        inpt_oh = theano_one_hot(x_tm1, n_classes=n_bins)
        theano.printing.Print("inpt_oh.shape")(inpt_oh.shape)
        shp = inpt_oh.shape
        prev_t = inpt_oh
        for i in range(n_frame):
            partial_out_t = out_t[:, i * n_hid: (i + 1) * n_hid]
            theano.printing.Print("partial_out_t.shape")(partial_out_t.shape)
            theano.printing.Print("prev_t.shape")(prev_t.shape)
            shp = prev_t.shape
            prev_ti = prev_t[:, i:].reshape((shp[0], -1))
            theano.printing.Print("prev_ti.shape")(prev_ti.shape)
            features_t = tensor.concatenate((partial_out_t, prev_ti),
                         axis=1)
            theano.printing.Print("features_t.shape")(features_t.shape)
            mlp1_t = relu(features_t.dot(mlp1_w) + mlp1_b)
            mlp2_t = relu(mlp1_t.dot(mlp2_w) + mlp2_b)
            mlp3_t = relu(mlp2_t.dot(mlp3_w) + mlp3_b)
            pred_t = softmax(mlp3_t.dot(pred_w) + pred_b)
            theano.printing.Print("pred_t.shape")(pred_t.shape)
            samp_t = sample_softmax(pred_t, srng)
            theano.printing.Print("samp_t.shape")(samp_t.shape)
            samp_t_oh = theano_one_hot(samp_t, n_classes=n_bins)
            samp_t_oh = samp_t_oh.dimshuffle(0, 'x', 1)
            theano.printing.Print("samp_t_oh.shape")(samp_t_oh.shape)
            prev_t = tensor.concatenate((prev_t, samp_t_oh), axis=1)
            theano.printing.Print("prev_t.shape")(prev_t.shape)
        pred_t = prev_t[:, n_frame:].argmax(axis=-1)
        x_t = tensor.cast(pred_t, theano.config.floatX)
        return x_t, h1_t, h2_t, h3_t
    target = X_sym[1:]
    mask = X_mask_sym[1:]
    context = c_sym * c_mask_sym.dimshuffle(0, 1, 'x')
    theano.printing.Print("inpt.shape")(inpt.shape)
    theano.printing.Print("target.shape")(target.shape)

    inpt = inpt.dimshuffle(1, 'x', 0, 2)

    border_mode = (conv_size1 - 1, 0)
    conv1 = conv2d(inpt, w_conv1, subsample=(2, 1), border_mode=border_mode)
    conv1 = conv1 + b_conv1.dimshuffle('x', 0, 'x', 'x')
    theano.printing.Print("conv1.shape")(conv1.shape)

    border_mode = (conv_size2 - 1, 0)
    conv2 = conv2d(conv1, w_conv2, subsample=(2, 1), border_mode=border_mode)
    conv2 = relu(conv2 + b_conv2.dimshuffle('x', 0, 'x', 'x'))
    theano.printing.Print("conv2.shape")(conv2.shape)

    # Last axis is 1
    conv_out = conv2[:, :, :, 0].dimshuffle(2, 0, 1)
    theano.printing.Print("conv_out.shape")(conv_out.shape)

    conv_h1, convgate_h1 = conv_to_h1.proj(conv_out)
    conv_h2, convgate_h2 = conv_to_h2.proj(conv_out)

    u = tensor.arange(c_sym.shape[0]).dimshuffle('x', 'x', 0)
    u = tensor.cast(u, theano.config.floatX)

    def calc_phi(k_t, a_t, b_t, u_c):
        a_t = a_t.dimshuffle(0, 1, 'x')
        b_t = b_t.dimshuffle(0, 1, 'x')
Example #4
0
    target = X_sym[1:]
    mask = X_mask_sym[1:]
    context = c_sym * c_mask_sym.dimshuffle(0, 1, 'x')
    theano.printing.Print("inpt.shape")(inpt.shape)
    theano.printing.Print("target.shape")(target.shape)

    inpt = inpt.dimshuffle(1, 'x', 0, 2)

    border_mode = (conv_size1 - 1, 0)
    conv1 = conv2d(inpt, w_conv1, subsample=(2, 1), border_mode=border_mode)
    conv1 = conv1 + b_conv1.dimshuffle('x', 0, 'x', 'x')
    theano.printing.Print("conv1.shape")(conv1.shape)

    border_mode = (conv_size2 - 1, 0)
    conv2 = conv2d(conv1, w_conv2, subsample=(2, 1), border_mode=border_mode)
    conv2 = relu(conv2 + b_conv2.dimshuffle('x', 0, 'x', 'x'))
    theano.printing.Print("conv2.shape")(conv2.shape)

    # Last axis is 1
    conv_out = conv2[:, :, :, 0].dimshuffle(2, 0, 1)
    theano.printing.Print("conv_out.shape")(conv_out.shape)

    conv_h1, convgate_h1 = conv_to_h1.proj(conv_out)
    conv_h2, convgate_h2 = conv_to_h2.proj(conv_out)

    u = tensor.arange(c_sym.shape[0]).dimshuffle('x', 'x', 0)
    u = tensor.cast(u, theano.config.floatX)

    def calc_phi(k_t, a_t, b_t, u_c):
        a_t = a_t.dimshuffle(0, 1, 'x')
        b_t = b_t.dimshuffle(0, 1, 'x')
Example #5
0
 pred_i = []
 for i in range(n_frame):
     partial_outs = outs[:, :, i * n_hid: (i + 1) * n_hid]
     joint = tensor.concatenate((inpt_oh, next_oh), axis=2)
     sliced_context = joint[:, :, i:i + n_frame]
     theano.printing.Print("sliced_context.shape")(sliced_context.shape)
     shp = sliced_context.shape
     sliced_context = sliced_context.reshape((shp[0], shp[1], -1))
     features = tensor.concatenate((partial_outs, sliced_context), axis=2)
     theano.printing.Print("partial_outs.shape")(partial_outs.shape)
     theano.printing.Print("joint.shape")(joint.shape)
     theano.printing.Print("sliced_context.shape")(sliced_context.shape)
     theano.printing.Print("features.shape")(features.shape)
     shp = features.shape
     mlp_inpt = features.reshape((-1, shp[-1]))
     mlp1 = relu(mlp_inpt.dot(mlp1_w) + mlp1_b)
     mlp2 = relu(mlp1.dot(mlp2_w) + mlp2_b)
     mlp3 = relu(mlp2.dot(mlp3_w) + mlp3_b)
     pred = softmax(mlp3.dot(pred_w) + pred_b)
     theano.printing.Print("pred.shape")(pred.shape)
     pred = pred.reshape((shp[0], shp[1], -1))
     theano.printing.Print("pred.shape")(pred.shape)
     pred_i.append(pred.dimshuffle(0, 1, 2, 'x'))
 pred = tensor.concatenate(pred_i, axis=-1).dimshuffle(0, 1, 3, 2)
 theano.printing.Print("pred.shape")(pred.shape)
 theano.printing.Print("target.shape")(target.shape)
 target = theano_one_hot(target, n_classes=n_bins)
 theano.printing.Print("target.shape")(target.shape)
 # dimshuffle so batch is on last axis
 cost = categorical_crossentropy(pred, target, eps=1E-9)
 theano.printing.Print("cost.shape")(cost.shape)
        theano.printing.Print("x_t.shape")(x_t.shape)
        return x_t, h1_t, h2_t, h3_t, k_t, w_t, ss_t, sh_t

    (sampled, h1_s, h2_s, h3_s, k_s, w_s, stop_s,
     stop_h) = sample_step(init_x, init_h1, init_h2, init_h3, init_kappa,
                           init_w, c_sym)
    theano.printing.Print("sampled.shape")(sampled.shape)

    (h1, h2, h3, kappa, w), updates = theano.scan(
        fn=step,
        sequences=[inp_h1, inpgate_h1, inp_h2, inpgate_h2, inp_h3, inpgate_h3],
        outputs_info=[init_h1, init_h2, init_h3, init_kappa, init_w],
        non_sequences=[context])

    outs = h1.dot(h1_to_outs) + h2.dot(h2_to_outs) + h3.dot(h3_to_outs)
    l1 = relu(outs.dot(l1_proj) + b_l1_proj)
    l2 = relu(l1.dot(l2_proj) + b_l2_proj)
    l3 = relu(l2.dot(l3_proj) + b_l3_proj)
    shp = l3.shape

    l3 = l3.reshape((-1, shp[-1]))
    preds = l3.dot(softmax_proj) + b_softmax_proj
    preds = preds.reshape((shp[0], shp[1], n_features, n_softmax))
    preds = softmax(preds * (1. + softmax_bias_sym))
    theano.printing.Print("preds.shape")(preds.shape)
    theano.printing.Print("target.shape")(target.shape)
    target = theano_one_hot(target, n_softmax)
    theano.printing.Print("target.shape")(target.shape)
    cost = categorical_crossentropy(preds, target, eps=1E-9)
    theano.printing.Print("cost.shape")(cost.shape)