Esempio n. 1
0
    def __call__(self, Y, U):
        if Y.ndim > (self.axis + 1):
            Y = Y.reshape(Y.shape[:self.axis] +
                          [cgt.mul_multi(Y.shape[self.axis:])])

        outer_YU = cgt.broadcast(
            '*', Y.dimshuffle(range(Y.ndim) + ['x']),
            U.dimshuffle([0] + ['x'] * self.axis + [1]),
            ''.join(['x'] * Y.ndim + ['1', ',', 'x'] + ['1'] * self.axis +
                    ['x']))
        bilinear = cgt.dot(
            outer_YU.reshape(
                (outer_YU.shape[0], cgt.mul_multi(outer_YU.shape[1:]))),
            self.M.reshape((self.y_dim, self.y_dim * self.u_dim)).T)
        if self.axis > 1:
            bilinear = bilinear.reshape((-1, ) + self.y_shape[:self.axis - 1] +
                                        (self.y_dim, ))
        linear = cgt.dot(U, self.N.T)
        if self.axis > 1:
            linear = linear.dimshuffle([0] + ['x'] * (self.axis - 1) + [1])
        activation = bilinear + linear
        if self.b is not None:
            activation += cgt.broadcast(
                '+', activation, self.b.dimshuffle(['x'] * self.axis + [0]),
                ''.join(['x'] * activation.ndim + [','] + ['1'] *
                        (activation.ndim - 1) + ['x']))
        activation = activation.reshape((-1, ) + self.y_shape)
        return activation
Esempio n. 2
0
File: rrnn.py Progetto: zobot/rrnn
def make_deep_rrnn(size_input, size_mem, n_layers, size_output, size_batch_in, k_in, k_h):
    inputs = [cgt.matrix() for i_layer in xrange(n_layers+1)]
    outputs = []
    print 'input_size: ', size_input
    for i_layer in xrange(n_layers):
        prev_h = inputs[i_layer+1] # note that inputs[0] is the external input, so we add 1
        x = inputs[0] if i_layer==0 else outputs[i_layer-1]
        size_x = size_input if i_layer==0 else size_mem
        size_batch = prev_h.shape[0]

        xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform")
        xform_h_non = xform_h_param.weight
        xform_h_non.props["is_rotation"] = True
        xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True)
        xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1")

        r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x)
        r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem))
        r_norm = cgt.norm(r_non, axis=2, keepdims=True)
        r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1")
        prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1))
        inters_in = [prev_h_3]

        colon = slice(None, None, None)

        for i in xrange(2 * k_in):
            inter_in = inters_in[-1]
            r_cur = cgt.subtensor(r, [colon, i, colon])
            r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem))
            r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1))
            ref_cur = cgt.batched_matmul(r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in))
            inter_out = inter_in - 2 * ref_cur
            inters_in.append(inter_out)

        h_in_rot = cgt.reshape(inters_in[-1], (size_batch, size_mem))
        inters_h = [h_in_rot]

        for i in xrange(2 * k_h):
            inter_in = inters_h[-1]
            r_cur = cgt.subtensor(xform_h, [i, colon])
            r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1))
            r_cur_2 = cgt.reshape(r_cur, (1, size_mem))
            ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2)
            inter_out = inter_in - 2 * ref_cur
            inters_h.append(inter_out)
        next_h = inters_h[-1]
        outputs.append(next_h)


    category_activations = nn.Affine(size_mem, size_output,name="pred")(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    #print 'len outputs:', len(outputs)
    #print 'len inputs:', len(inputs)

    return nn.Module(inputs, outputs)
Esempio n. 3
0
def dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden):
    X = nn.dropout(X, p_drop_input)
    h = nn.rectify(cgt.dot(X, w_h))

    h = nn.dropout(h, p_drop_hidden)
    h2 = nn.rectify(cgt.dot(h, w_h2))

    h2 = nn.dropout(h2, p_drop_hidden)
    py_x = nn.softmax(cgt.dot(h2, w_o))
    return py_x
Esempio n. 4
0
def dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden):
    X = nn.dropout(X, p_drop_input)
    h = nn.rectify(cgt.dot(X, w_h))

    h = nn.dropout(h, p_drop_hidden)
    h2 = nn.rectify(cgt.dot(h, w_h2))

    h2 = nn.dropout(h2, p_drop_hidden)
    py_x = nn.softmax(cgt.dot(h2, w_o))
    return py_x
Esempio n. 5
0
    def take_one_step(self, nn_input_bf, hid_out=None):
        # Sometimes you don't want to unroll all t-steps of a recurrence but rather just one forward step.
        num_batch = nn_input_bf.shape[0]

        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def step(input_n, cell_previous, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):

            input_n = cgt.broadcast("+", cgt.dot(input_n, W_in_stacked), b_stacked, "xx,1x")

            # Calculate gates pre-activations and slice
            gates = input_n + cgt.dot(hid_previous, W_hid_stacked)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new cell value
            cell = forgetgate*cell_previous + ingate*cell_input
            # Compute new hidden unit activation
            hid = outgate*self.nonlinearity(cell)
            return [cell, hid]

        if hid_out is None:
            if self.hid_prev is None:
                self.hid_prev = cgt.dot(cgt.ones((self.num_batches, 1)), self.hid_init)
            hid_out = self.hid_prev

        if self.cell_prev is None:
            ones = cgt.ones((num_batch, 1))
            self.cell_prev = cgt.dot(ones, self.cell_init)
            self.hid_prev = cgt.dot(ones, self.hid_init)

        if hid_out is None:
            ones = cgt.ones((num_batch, 1))
            self.hid_prev = cgt.dot(ones, self.hid_init)
            hid_out = self.hid_prev

        one_step_out = step(nn_input_bf, hid_out, self.cell_prev,
                            self.W_hid_stacked, self.W_in_stacked, self.b_stacked)
        self.cell_prev = one_step_out[0]
        self.hid_prev = one_step_out[1]

        return self.hid_prev
Esempio n. 6
0
def dense_model3(X, w_h, w_h2, w_h3, w_o, p_drop_input, p_drop_hidden):
    X = nn.dropout(X, p_drop_input)
    h = nn.rectify(cgt.dot(X, w_h))

    h = nn.dropout(h, p_drop_hidden[0])
    h2 = nn.rectify(cgt.dot(h, w_h2))

    h2 = nn.dropout(h2, p_drop_hidden[1])
    h3 = nn.rectify(cgt.dot(h2, w_h3))

    h3 = nn.dropout(h3, p_drop_hidden[2])
    py_x = nn.softmax(cgt.dot(h3, w_o))
    return py_x
Esempio n. 7
0
File: rrnn.py Progetto: zoemcc/rrnn
def make_deep_rrnn_rot_relu(size_input, size_mem, n_layers, size_output,
                            size_batch_in, k_in, k_h):
    inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)]
    outputs = []
    print 'input_size: ', size_input
    for i_layer in xrange(n_layers):
        prev_h = inputs[
            i_layer +
            1]  # note that inputs[0] is the external input, so we add 1
        x = inputs[0] if i_layer == 0 else outputs[i_layer - 1]
        size_x = size_input if i_layer == 0 else size_mem
        size_batch = prev_h.shape[0]

        xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform")
        xform_h_non = xform_h_param.weight
        xform_h_non.props["is_rotation"] = True

        xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True)
        xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1")

        add_in_lin = nn.Affine(size_x, size_mem)(x)
        add_in_relu = nn.rectify(add_in_lin)

        prev_h_scaled = nn.scale_mag(prev_h)

        h_in_added = prev_h_scaled + add_in_relu
        inters_h = [h_in_added]

        colon = slice(None, None, None)

        for i in xrange(2 * k_h):
            inter_in = inters_h[-1]
            r_cur = xform_h[i, :]
            #r_cur = cgt.subtensor(xform_h, [i, colon])
            r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1))
            r_cur_2 = cgt.reshape(r_cur, (1, size_mem))
            ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2)
            inter_out = inter_in - 2 * ref_cur
            inters_h.append(inter_out)
        next_h = inters_h[-1]
        outputs.append(next_h)

    category_activations = nn.Affine(size_mem, size_output,
                                     name="pred")(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    #print 'len outputs:', len(outputs)
    #print 'len inputs:', len(inputs)

    return nn.Module(inputs, outputs)
Esempio n. 8
0
def test_flatvec():
    cgt.reset_config
    cgt.set_precision('double')
    cgt.core.update_config(backend="python")  # XXX

    N = 10
    K = 3

    Xval = np.random.randn(N, K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.shared(Xval, "X")
    y_n = cgt.shared(yval, "y")
    w_k = cgt.shared(wval, "w")
    b = cgt.shared(bval, name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])
    g = core.simplify(g)

    pars = [w_k, b]
    flatx = nn.setup_contiguous_storage(pars)
    f = cgt.function([], [err, cgt.flatcat(g)])
Esempio n. 9
0
def test_devices():
    N = 10
    K = 3

    compile_info = cgt.compilation.get_compile_info()
    cuda_enabled = compile_info["CGT_ENABLE_CUDA"]
    if not cuda_enabled:
        raise SkipTest("cuda disabled")

    Xval = np.random.randn(N,K).astype(cgt.floatX)
    wval = np.random.randn(K).astype(cgt.floatX)
    bval = np.asarray(np.random.randn()).astype(cgt.floatX)
    yval = np.random.randn(N).astype(cgt.floatX)

    with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")):

        X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu'))
        y_n = cgt.shared(yval, "y")
        w_k = cgt.shared(wval, "w")
        b = cgt.shared(bval, name="b")

        print "bval",bval

        ypred = cgt.dot(cgt.square(X_nk), w_k) + b

        err = cgt.sum(cgt.sin(ypred - y_n))
        g = cgt.grad(err, [w_k, b])
        outputs = [err]+g
        f = cgt.function([], [err]+g)
        results = f()
        print results
        assert np.allclose(results[0] , np.sin(np.square(Xval).dot(wval)+bval-yval).sum())
Esempio n. 10
0
def test_devices():
    N = 10
    K = 3

    compile_info = cgt.compilation.get_compile_info()
    cuda_enabled = compile_info["CGT_ENABLE_CUDA"]
    if not cuda_enabled:
        raise SkipTest("cuda disabled")

    Xval = np.random.randn(N, K).astype(cgt.floatX)
    wval = np.random.randn(K).astype(cgt.floatX)
    bval = np.asarray(np.random.randn()).astype(cgt.floatX)
    yval = np.random.randn(N).astype(cgt.floatX)

    with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")):

        X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu'))
        y_n = cgt.shared(yval, "y")
        w_k = cgt.shared(wval, "w")
        b = cgt.shared(bval, name="b")

        print "bval", bval

        ypred = cgt.dot(cgt.square(X_nk), w_k) + b

        err = cgt.sum(cgt.sin(ypred - y_n))
        g = cgt.grad(err, [w_k, b])
        outputs = [err] + g
        f = cgt.function([], [err] + g)
        results = f()
        print results
        assert np.allclose(
            results[0],
            np.sin(np.square(Xval).dot(wval) + bval - yval).sum())
Esempio n. 11
0
def test_flatvec():
    cgt.reset_config
    cgt.set_precision('double')
    cgt.core.update_config(backend="python") # XXX

    N = 10
    K = 3

    Xval = np.random.randn(N,K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.shared(Xval, "X")
    y_n = cgt.shared(yval, "y")
    w_k = cgt.shared(wval, "w")
    b = cgt.shared(bval, name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])
    g = core.simplify(g)

    pars = [w_k, b]
    flatx = nn.setup_contiguous_storage(pars)
    f = cgt.function([], [err,cgt.flatcat(g)])
Esempio n. 12
0
    def take_one_step(self, input_bf, hid_out=None):
        num_batch = input_bf.shape[0]

        def step(input_bh, hid_previous_bh):
            hid_pre_bh = self.hid_to_hid(hid_previous_bh)
            hid_pre_bh += self.in_to_hid(input_bh)
            return self.activation(hid_pre_bh)

        if self.prev_out is None:
            self.prev_out = cgt.dot(cgt.ones((num_batch, 1)), self.hid_init)

        if hid_out is None:
            ones = cgt.ones((num_batch, 1))
            self.prev_out = cgt.dot(ones, self.hid_init)
            hid_out = self.prev_out

        self.prev_out = step(input_bf, hid_out)
        return self.prev_out
Esempio n. 13
0
def convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden):
    l1a = nn.rectify(nn.conv2d(X, w, kernelshape=(3,3), pad=(1,1)))
    l1 = nn.max_pool_2d(l1a, kernelshape=(2, 2), stride=(2,2))
    l1 = nn.dropout(l1, p_drop_conv)

    l2a = nn.rectify(nn.conv2d(l1, w2, kernelshape=(3,3), pad=(1,1)))
    l2 = nn.max_pool_2d(l2a, kernelshape=(2, 2), stride=(2,2))
    l2 = nn.dropout(l2, p_drop_conv)

    l3a = nn.rectify(nn.conv2d(l2, w3, kernelshape=(3,3), pad=(1,1)))
    l3b = nn.max_pool_2d(l3a, kernelshape=(2, 2), stride=(2,2))
    batchsize,channels,rows,cols = l3b.shape
    l3 = cgt.reshape(l3b, [batchsize, channels*rows*cols])
    l3 = nn.dropout(l3, p_drop_conv)

    l4 = nn.rectify(cgt.dot(l3, w4))
    l4 = nn.dropout(l4, p_drop_hidden)
    
    pyx = nn.softmax(cgt.dot(l4, w_o))
    return pyx
Esempio n. 14
0
def convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden):
    l1a = nn.rectify(nn.conv2d(X, w, kernelshape=(3, 3), pad=(1, 1)))
    l1 = nn.max_pool_2d(l1a, kernelshape=(2, 2), stride=(2, 2))
    l1 = nn.dropout(l1, p_drop_conv)

    l2a = nn.rectify(nn.conv2d(l1, w2, kernelshape=(3, 3), pad=(1, 1)))
    l2 = nn.max_pool_2d(l2a, kernelshape=(2, 2), stride=(2, 2))
    l2 = nn.dropout(l2, p_drop_conv)

    l3a = nn.rectify(nn.conv2d(l2, w3, kernelshape=(3, 3), pad=(1, 1)))
    l3b = nn.max_pool_2d(l3a, kernelshape=(2, 2), stride=(2, 2))
    batchsize, channels, rows, cols = l3b.shape
    l3 = cgt.reshape(l3b, [batchsize, channels * rows * cols])
    l3 = nn.dropout(l3, p_drop_conv)

    l4 = nn.rectify(cgt.dot(l3, w4))
    l4 = nn.dropout(l4, p_drop_hidden)

    pyx = nn.softmax(cgt.dot(l4, w_o))
    return pyx
Esempio n. 15
0
        def step(input_n, cell_previous, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):

            input_n = cgt.broadcast("+", cgt.dot(input_n, W_in_stacked), b_stacked, "xx,1x")

            # Calculate gates pre-activations and slice
            gates = input_n + cgt.dot(hid_previous, W_hid_stacked)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new cell value
            cell = forgetgate*cell_previous + ingate*cell_input
            # Compute new hidden unit activation
            hid = outgate*self.nonlinearity(cell)
            return [cell, hid]
Esempio n. 16
0
def test_linreg():
    cgt.reset_config()
    cgt.set_precision('double')
    N = 10
    K = 3

    Xval = np.random.randn(N, K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.matrix("X")
    y_n = cgt.vector("y")
    w_k = cgt.vector("w")
    b = cgt.scalar(name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])

    g_simple, an, _ = cgt.core.simplify_and_analyze(g)

    print "Loss function:"
    cgt.print_tree([err])
    print "Gradient:"
    cgt.print_tree(g)

    print "Gradient simplified"
    cgt.print_tree(
        g_simple,
        nodefn=lambda node, o: o.write(" " + an["node2hash"][node][:5]))

    print "-------"

    d = {X_nk: Xval, w_k: wval, b: bval, y_n: yval}

    np.testing.assert_allclose(cgt.numeric_eval(err, d),
                               np.linalg.norm(Xval.dot(wval) + bval - yval)**2)
    np.testing.assert_allclose(cgt.numeric_eval(g[0], d),
                               2 * Xval.T.dot(Xval.dot(wval) + bval - yval))
    np.testing.assert_allclose(cgt.numeric_eval(g[1], d),
                               2 * np.sum(Xval.dot(wval) + bval - yval, 0))
Esempio n. 17
0
        def step(input_n, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1}
            hid_input = cgt.dot(hid_previous, W_hid_stacked)

            # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
            input_n = cgt.broadcast("+", input_n.dot(W_in_stacked), b_stacked, "xx,1x")

            # Reset and update gates
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w(input_n, 2)
            hidden_update_hid = slice_w(hid_input, 2)
            hidden_update = hidden_update_in + resetgate*hidden_update_hid

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate)*hid_previous + updategate*hidden_update
            return self.nonlinearity_hid(hid)  # adding this non-linearity seems to help stability.
Esempio n. 18
0
def test_linreg():
    N = 10
    K = 3

    Xval = np.random.randn(N,K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.matrix("X")
    y_n = cgt.vector("y")
    w_k = cgt.vector("w")
    b = cgt.scalar(name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])

    g_simple,an,_ = cgt.core.simplify_and_analyze(g)


    print "Loss function:"
    cgt.print_tree([err])
    print "Gradient:"
    cgt.print_tree(g)

    print "Gradient simplified"
    cgt.print_tree(g_simple, nodefn=lambda node,o: o.write(" " + an["node2hash"][node][:5]))

    print "-------"

    d = {X_nk : Xval, w_k : wval, b : bval, y_n : yval}

    np.testing.assert_allclose(cgt.numeric_eval(err,d), np.linalg.norm(Xval.dot(wval) + bval - yval)**2,
        atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
    np.testing.assert_allclose(cgt.numeric_eval(g[0],d), 2 * Xval.T.dot(Xval.dot(wval) + bval - yval),
        atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
    np.testing.assert_allclose(cgt.numeric_eval(g[1],d), 2 *  np.sum(Xval.dot(wval) + bval - yval, 0),
        atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
Esempio n. 19
0
    def __call__(self, x):
        input_btf = x
        input_tbf = cgt.dimshuffle(input_btf, [1, 0, 2])
        seq_len, num_batch = input_tbf.shape[0], input_tbf.shape[1]

        def step(input_bh, hid_previous_bh):
            hid_pre_bh = self.hid_to_hid(hid_previous_bh)
            hid_pre_bh += self.in_to_hid(input_bh)
            return self.activation(hid_pre_bh)

        hid_init_bh = cgt.dot(cgt.ones((num_batch, 1)), self.hid_init)

        hid_out_tbf = unroll_recurrence(
            step_function=step,
            input_to_unroll_tbf=input_tbf,
            hid_init=[hid_init_bh],
            go_backwards=self.backwards,
            n_steps=self.timesteps)

        hid_out_btf = cgt.dimshuffle(hid_out_tbf, [1, 0, 2])
        if self.backwards:
            hid_out_btf = cgt.flip(hid_out_btf, [1])
        return hid_out_btf
Esempio n. 20
0
    def __init__(self, input, n_in, n_out, W=None, b=None,
                 activation=cgt.tanh, prefix=""):
        self.n_in = n_in
        self.n_out = n_out

        if W is None:
            # XXX replace with nn init
            W_values = np.asarray(
                rng.uniform(
                    low=-np.sqrt(6. / (n_in + n_out)),
                    high=np.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=cgt.floatX
            )
            if activation == cgt.sigmoid:
                W_values *= 4

            W = cgt.shared(W_values, name=prefix+"_W")

        if b is None:
            b_values = np.zeros((n_out,), dtype=cgt.floatX)
            b = cgt.shared(b_values, name=prefix+"_b")

        self.W = W
        self.b = b

        # XXX broadcast api may change
        lin_output = cgt.broadcast("+", cgt.dot(input, self.W),
                cgt.dimshuffle(self.b, ["x", 0]), "xx,1x")
        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        )
        # parameters of the model
        self.params = [self.W, self.b]
Esempio n. 21
0
def dot(x, y):
    return cgt.dot(x, y)
Esempio n. 22
0
    def take_one_step(self, nn_input_bf, hid_out):

        #PROBABLY BUGGED. SHOULD BE REWRITTEN.

        self.num_batches = cgt.infer_shape(nn_input_bf)[0]

        # (n_time_steps, n_batch, n_features)
        #input_bf = cgt.dimshuffle(nn_input_bf, [1, 0, 2])

        # Stack input weight matrices into a (num_inputs, 3*num_units)
        # matrix, which speeds up computation
        W_in_stacked = cgt.concatenate(
            [self.W_in_to_resetgate, self.W_in_to_updategate,
             self.W_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        W_hid_stacked = cgt.concatenate(
            [self.W_hid_to_resetgate, self.W_hid_to_updategate,
             self.W_hid_to_hidden_update], axis=1)

        # Stack gate biases into a (3*num_units) vector
        b_stacked = cgt.concatenate(
            [self.b_resetgate, self.b_updategate,
             self.b_hidden_update], axis=1)


        # At each loop, input_n will be (n_time_steps, 3*num_units).
        # We define a slicing function that extract the input to each GRU gate
        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input__n is the n'th vector of the input
        def step(input_n, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1}
            hid_input = cgt.dot(hid_previous, W_hid_stacked)

            # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
            input_n = cgt.broadcast("+", input_n.dot(W_in_stacked), b_stacked, "xx,1x")

            # Reset and update gates
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w(input_n, 2)
            hidden_update_hid = slice_w(hid_input, 2)
            hidden_update = hidden_update_in + resetgate*hidden_update_hid

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate)*hid_previous + updategate*hidden_update
            return self.nonlinearity_hid(hid)  # adding this non-linearity seems to help stability.
            #return hid

        if hid_out is None:
            if self.hid_out is None:
                self.hid_out = cgt.dot(cgt.ones((self.num_batches, 1)), self.hid_init)
            hid_out = self.hid_out



        # Retrieve the dimensionality of the incoming layer
        hid_out = step(nn_input_bf, hid_out, W_hid_stacked, W_in_stacked, b_stacked)

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        # self.hid_out = cgt.dimshuffle(self.hid_out, [1, 0, 2])

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = cgt.flip(hid_out, [1])

        self.hid_out = hid_out

        return hid_out
Esempio n. 23
0
    def __call__(self, input_btf):

        # (n_time_steps, n_batch, n_features)
        input_tbf = cgt.dimshuffle(input_btf, [1, 0, 2])
        self.num_batches = cgt.infer_shape(input_tbf)[1]

        # Stack input weight matrices into a (num_inputs, 3*num_units)
        # matrix, which speeds up computation
        W_in_stacked = cgt.concatenate(
            [self.W_in_to_resetgate, self.W_in_to_updategate,
             self.W_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        W_hid_stacked = cgt.concatenate(
            [self.W_hid_to_resetgate, self.W_hid_to_updategate,
             self.W_hid_to_hidden_update], axis=1)

        # Stack gate biases into a (3*num_units) vector
        b_stacked = cgt.concatenate(
            [self.b_resetgate, self.b_updategate,
             self.b_hidden_update], axis=1)


        # At each loop, input_n will be (n_time_steps, 3*num_units).
        # We define a slicing function that extract the input to each GRU gate
        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input__n is the n'th vector of the input
        def step(input_n, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1}
            hid_input = cgt.dot(hid_previous, W_hid_stacked)

            # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
            input_n = cgt.broadcast("+", input_n.dot(W_in_stacked), b_stacked, "xx,1x")

            # Reset and update gates
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w(input_n, 2)
            hidden_update_hid = slice_w(hid_input, 2)
            hidden_update = hidden_update_in + resetgate*hidden_update_hid

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate)*hid_previous + updategate*hidden_update
            return hid

        sequences = [input_tbf]
        step_fun = step
        hid_init = cgt.dot(cgt.ones((self.num_batches, 1)), self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_seqs = [W_hid_stacked]
        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        non_seqs += [W_in_stacked, b_stacked]
        # theano.scan only allows for positional arguments, so when
        # self.precompute_input is True, we need to supply fake placeholder
        # arguments for the input weights and biases.

        # Retrieve the dimensionality of the incoming layer
        hid_out = unroll_lstm(
            fn=step_fun,
            sequences=sequences,
            outputs_info=[hid_init],
            go_backwards=self.backwards,
            non_sequences=non_seqs,
            n_steps=self.timesteps)[0]

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = cgt.dimshuffle(hid_out, [1, 0, 2])

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = cgt.flip(hid_out, [1])

        return hid_out
Esempio n. 24
0
def dot(x, y):
    return cgt.dot(x, y)
Esempio n. 25
0
    def __call__(self, nn_input_btf):

        # Because scan iterates over the first dimension we dimshuffle to
        # (n_time_steps, n_batch, n_features)
        nn_input_tbf = cgt.dimshuffle(nn_input_btf, [1, 0, 2])
        seq_len, num_batch = nn_input_tbf.shape[0], nn_input_tbf.shape[1]

        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def step(input_n, cell_previous, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):

            input_n = cgt.broadcast("+", cgt.dot(input_n, W_in_stacked), b_stacked, "xx,1x")

            # Calculate gates pre-activations and slice
            gates = input_n + cgt.dot(hid_previous, W_hid_stacked)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new cell value
            cell = forgetgate*cell_previous + ingate*cell_input
            # Compute new hidden unit activation
            hid = outgate*self.nonlinearity(cell)
            return [cell, hid]

        sequences = nn_input_tbf
        step_fun = step

        ones = cgt.ones((num_batch, 1))
        cell_init = cgt.dot(ones, self.cell_init)
        hid_init = cgt.dot(ones, self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_seqs = [self.W_hid_stacked]
        non_seqs += [self.W_in_stacked, self.b_stacked]
        cell_out, hid_out = unroll_lstm(
            fn=step_fun,
            sequences=sequences,
            outputs_info=[cell_init, hid_init],
            go_backwards=self.backwards,
            non_sequences=non_seqs,
            n_steps=self.timesteps)

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = cgt.dimshuffle(hid_out, [1, 0, 2])

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = cgt.flip(hid_out, [1])

        return hid_out
Esempio n. 26
0
# hyperparams
#
# Be careful when setting alpha! If it's too large
# here the cost will blow up.
alpha = 1e-7
epochs = 100

# Linear regression model
np.random.seed(0)
X = cgt.matrix("X", fixed_shape=(None, nfeats))
Y = cgt.vector("Y")
w = cgt.shared(np.random.randn(nfeats) * 0.01)

# prediction
ypred = cgt.dot(X, w)

# cost
cost = cgt.square(Y - ypred).mean()

# derivative with respect to w
dw = cgt.grad(cost=cost, wrt=w)
updates = [(w, w - dw * alpha)]

# training function
trainf = cgt.function(inputs=[X, Y], outputs=[], updates=updates)
# cost function, no updates
costf = cgt.function(inputs=[X, Y], outputs=cost)

for i in xrange(epochs):
    trainf(X_train, Y_train)