예제 #1
0
파일: test_einsum.py 프로젝트: zxie/cgt
def test_einsum():
    x = cgt.tensor3()
    y = cgt.tensor3()

    sizes = {'i': 2, 'j': 3, 'k': 5, 'l': 7}
    xaxes = 'ijk'
    yaxes = 'ikl'
    zaxes = 'ijl'
    for i in xrange(10):
        xperm = xaxes
        (yperm,
         zperm) = permaxes = [[chars[i] for i in np.random.permutation(3)]
                              for chars in [yaxes, zaxes]]
        desc = "%s,%s->%s" % tuple("".join(chars)
                                   for chars in [xperm] + permaxes)
        z = cgt.einsum(desc, x, y)
        xval = nr.randn(*(sizes[c] for c in xperm))
        yval = nr.randn(*(sizes[c] for c in yperm))
        np.testing.assert_allclose(cgt.numeric_eval(z, {
            x: xval,
            y: yval
        }),
                                   np.einsum(desc, xval, yval),
                                   atol={
                                       "single": 1e-3,
                                       "double": 1e-6
                                   }[cgt.get_precision()])
예제 #2
0
def make_ntm(opt):
    Mprev_bnm = cgt.tensor3("M", fixed_shape=(opt.b, opt.n, opt.m))
    X_bk = cgt.matrix("X", fixed_shape=(opt.b, opt.k))
    wprev_bHn = cgt.tensor3("w", fixed_shape=(opt.b, opt.h*2, opt.n))
    rprev_bhm = cgt.tensor3("r", fixed_shape=(opt.b, opt.h, opt.m))
    controller = make_ff_controller(opt)
    M_bnm, w_bHn, r_bhm, y_bp = ntm_step(opt, Mprev_bnm, X_bk, wprev_bHn, rprev_bhm, controller)
    # in this form it looks like a standard seq-to-seq model
    # external input and output are first elements
    ntm = nn.Module([X_bk, Mprev_bnm, wprev_bHn, rprev_bhm], [y_bp, M_bnm, w_bHn, r_bhm])
    return ntm
예제 #3
0
def make_ntm(opt):
    Mprev_bnm = cgt.tensor3("M", fixed_shape=(opt.b, opt.n, opt.m))
    X_bk = cgt.matrix("X", fixed_shape=(opt.b, opt.k))
    wprev_bHn = cgt.tensor3("w", fixed_shape=(opt.b, opt.h*2, opt.n))
    rprev_bhm = cgt.tensor3("r", fixed_shape=(opt.b, opt.h, opt.m))
    controller = make_ff_controller(opt)
    M_bnm, w_bHn, r_bhm, y_bp = ntm_step(opt, Mprev_bnm, X_bk, wprev_bHn, rprev_bhm, controller)
    # in this form it looks like a standard seq-to-seq model
    # external input and output are first elements
    ntm = nn.Module([X_bk, Mprev_bnm, wprev_bHn, rprev_bhm], [y_bp, M_bnm, w_bHn, r_bhm])
    return ntm
예제 #4
0
파일: tests.py 프로젝트: bstadie/nnbuilder
def test_get_train_objective():
    batch_size = 32
    feat_t_steps = 5
    feat_num_features = 256
    max_label_length = 5
    num_out_classes = 27
    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    ground_labels_basis = cgt.tensor3(fixed_shape=(batch_size, max_label_length, num_out_classes))
    seq2seq = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes)
    train_objective = seq2seq.get_train_objective(max_label_length=max_label_length,
                                                  ground_labels_basis_btc=ground_labels_basis)
    train_shape = cgt.infer_shape(train_objective)
    assert train_shape == ()
    nn.get_parameters(train_objective)
예제 #5
0
파일: tests.py 프로젝트: bstadie/nnbuilder
def test_get_character_distribution():
    batch_size = 32
    feat_t_steps = 20
    feat_num_features = 42
    num_out_classes = 28  # This is the index of the start token.
    num_out_classes_true = num_out_classes + 2  # Add start and end tokens automatically.
    decoder_size = 50

    tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features))
    tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_num_features), (batch_size, feat_num_features))
    tau3 = np.reshape(np.random.normal(0.1, 0.2, batch_size*decoder_size), (batch_size, decoder_size))

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes,
                          decoder_size=decoder_size, feature_size=feat_num_features)

    context_bf = cgt.matrix(fixed_shape=(batch_size, feat_num_features))
    state_bf = cgt.matrix(fixed_shape=(batch_size, decoder_size))

    m_out = s.get_character_distribution(state_bf, context_bf)

    out_fun = cgt.function([feats, context_bf, state_bf], [m_out])
    m = out_fun(tau, tau2, tau3)[0]

    assert m.shape == (batch_size, num_out_classes_true)
예제 #6
0
def make_ff_controller(opt):

    b, h, m, p, k = opt.b, opt.h, opt.m, opt.p, opt.k

    H = 2*h
    in_size = k + h*m
    out_size = H*m + H + H + H*3 + H + h*m + h*m + p

    # Previous reads
    r_bhm = cgt.tensor3("r", fixed_shape = (b,h,m))
    # External inputs
    X_bk = cgt.matrix("x", fixed_shape = (b,k))
    r_b_hm = r_bhm.reshape([r_bhm.shape[0], r_bhm.shape[1]*r_bhm.shape[2]])
    # Input to controller
    inp_bq = cgt.concatenate([X_bk, r_b_hm], axis=1)

    hid_sizes = opt.ff_hid_sizes
    activation = cgt.tanh

    layer_out_sizes = [in_size] + hid_sizes + [out_size]
    last_out = inp_bq
    # feedforward part. we could simplify a bit by using nn.Affine
    for i in xrange(len(layer_out_sizes)-1):
        indim = layer_out_sizes[i]
        outdim = layer_out_sizes[i+1]        
        W = cgt.shared(.02*nr.randn(indim, outdim), name="W%i"%i, fixed_shape_mask="all")
        bias = cgt.shared(.02*nr.randn(1, outdim), name="b%i"%i, fixed_shape_mask="all")
        last_out = cgt.broadcast("+",last_out.dot(W),bias,"xx,1x")
        # Don't apply nonlinearity at the last layer
        if i != len(layer_out_sizes)-2: last_out = activation(last_out)

    idx = 0
    k_bHm = last_out[:,idx:idx+H*m];      idx += H*m;         k_bHm = k_bHm.reshape([b,H,m])
    beta_bH = last_out[:,idx:idx+H];      idx += H
    g_bH = last_out[:,idx:idx+H];         idx += H
    s_bH3 = last_out[:,idx:idx+3*H];      idx += 3*H;         s_bH3 = s_bH3.reshape([b,H,3])
    gamma_bH = last_out[:,idx:idx+H];     idx += H
    e_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         e_bhm = e_bhm.reshape([b,h,m])
    a_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         a_bhm = a_bhm.reshape([b,h,m])
    y_bp = last_out[:,idx:idx+p];         idx += p

    k_bHm = cgt.tanh(k_bHm)
    beta_bH = nn.softplus(beta_bH)
    g_bH = cgt.sigmoid(g_bH)
    s_bH3 = sum_normalize2(cgt.exp(s_bH3))
    gamma_bH = cgt.sigmoid(gamma_bH)+1
    e_bhm = cgt.sigmoid(e_bhm)
    a_bhm = cgt.tanh(a_bhm)
    # y_bp = y_bp

    assert infer_shape(k_bHm) == (b,H,m)
    assert infer_shape(beta_bH) == (b,H)
    assert infer_shape(g_bH) == (b,H)
    assert infer_shape(s_bH3) == (b,H,3)
    assert infer_shape(gamma_bH) == (b,H)
    assert infer_shape(e_bhm) == (b,h,m)
    assert infer_shape(a_bhm) == (b,h,m)
    assert infer_shape(y_bp) == (b,p)

    return nn.Module([r_bhm, X_bk], [k_bHm, beta_bH, g_bH, s_bH3, gamma_bH, e_bhm, a_bhm, y_bp])
예제 #7
0
def make_ff_controller(opt):

    b, h, m, p, k = opt.b, opt.h, opt.m, opt.p, opt.k

    H = 2*h
    in_size = k + h*m
    out_size = H*m + H + H + H*3 + H + h*m + h*m + p

    # Previous reads
    r_bhm = cgt.tensor3("r", fixed_shape = (b,h,m))
    # External inputs
    X_bk = cgt.matrix("x", fixed_shape = (b,k))
    r_b_hm = r_bhm.reshape([r_bhm.shape[0], r_bhm.shape[1]*r_bhm.shape[2]])
    # Input to controller
    inp_bq = cgt.concatenate([X_bk, r_b_hm], axis=1)

    hid_sizes = opt.ff_hid_sizes
    activation = cgt.tanh

    layer_out_sizes = [in_size] + hid_sizes + [out_size]
    last_out = inp_bq
    # feedforward part. we could simplify a bit by using nn.Affine
    for i in xrange(len(layer_out_sizes)-1):
        indim = layer_out_sizes[i]
        outdim = layer_out_sizes[i+1]        
        W = cgt.shared(.02*nr.randn(indim, outdim), name="W%i"%i, fixed_shape_mask="all")
        bias = cgt.shared(.02*nr.randn(1, outdim), name="b%i"%i, fixed_shape_mask="all")
        last_out = cgt.broadcast("+",last_out.dot(W),bias,"xx,1x")
        # Don't apply nonlinearity at the last layer
        if i != len(layer_out_sizes)-2: last_out = activation(last_out)

    idx = 0
    k_bHm = last_out[:,idx:idx+H*m];      idx += H*m;         k_bHm = k_bHm.reshape([b,H,m])
    beta_bH = last_out[:,idx:idx+H];      idx += H
    g_bH = last_out[:,idx:idx+H];         idx += H
    s_bH3 = last_out[:,idx:idx+3*H];      idx += 3*H;         s_bH3 = s_bH3.reshape([b,H,3])
    gamma_bH = last_out[:,idx:idx+H];     idx += H
    e_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         e_bhm = e_bhm.reshape([b,h,m])
    a_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         a_bhm = a_bhm.reshape([b,h,m])
    y_bp = last_out[:,idx:idx+p];         idx += p

    k_bHm = cgt.tanh(k_bHm)
    beta_bH = nn.softplus(beta_bH)
    g_bH = cgt.sigmoid(g_bH)
    s_bH3 = sum_normalize2(cgt.exp(s_bH3))
    gamma_bH = cgt.sigmoid(gamma_bH)+1
    e_bhm = cgt.sigmoid(e_bhm)
    a_bhm = cgt.tanh(a_bhm)
    # y_bp = y_bp

    assert infer_shape(k_bHm) == (b,H,m)
    assert infer_shape(beta_bH) == (b,H)
    assert infer_shape(g_bH) == (b,H)
    assert infer_shape(s_bH3) == (b,H,3)
    assert infer_shape(gamma_bH) == (b,H)
    assert infer_shape(e_bhm) == (b,h,m)
    assert infer_shape(a_bhm) == (b,h,m)
    assert infer_shape(y_bp) == (b,p)

    return nn.Module([r_bhm, X_bk], [k_bHm, beta_bH, g_bH, s_bH3, gamma_bH, e_bhm, a_bhm, y_bp])
예제 #8
0
파일: sfnn.py 프로젝트: TZ2016/snn
def make_funcs(config, dbg_out={}):
    net_in, net_out = hybrid_network(config['num_inputs'], config['num_outputs'],
                                     config['num_units'], config['num_sto'],
                                     dbg_out=dbg_out)
    if not config['dbg_out_full']: dbg_out = {}
    # def f_sample(_inputs, num_samples=1, flatten=False):
    #     _mean, _var = f_step(_inputs)
    #     _samples = []
    #     for _m, _v in zip(_mean, _var):
    #         _s = np.random.multivariate_normal(_m, np.diag(np.sqrt(_v)), num_samples)
    #         if flatten: _samples.extend(_s)
    #         else: _samples.append(_s)
    #     return np.array(_samples)
    Y_gt = cgt.matrix("Y")
    Y_prec = cgt.tensor3('V', fixed_shape=(None, config['num_inputs'], config['num_inputs']))
    params = nn.get_parameters(net_out)
    size_batch, size_out = net_out.shape
    inputs, outputs = [net_in], [net_out]
    if config['no_bias']:
        print "Excluding bias"
        params = [p for p in params if not p.name.endswith(".b")]
    loss_vec = dist.gaussian.logprob(Y_gt, net_out, Y_prec)
    if config['weight_decay'] > 0.:
        print "Applying penalty on parameter norm"
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat ** 2)
        loss_vec -= loss_param # / size_batch
    loss = cgt.sum(loss_vec) / size_batch

    # TODO_TZ f_step seems not to fail if X has wrong dim
    f_step = cgt.function(inputs, outputs)
    f_surr = get_surrogate_func(inputs + [Y_prec, Y_gt], outputs,
                                [loss_vec], params, _dbg_out=dbg_out)

    return params, f_step, None, None, None, f_surr
예제 #9
0
파일: tests.py 프로젝트: bstadie/nnbuilder
def test_get_decoder_state():
    batch_size = 32
    feat_t_steps = 20
    feat_num_features = 42
    num_out_classes = 28
    num_out_classes_true = num_out_classes + 2  # Start, end, are added
    decoder_size = 50

    tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features))
    tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_num_features), (batch_size, feat_num_features))
    tau3 = np.reshape(np.random.normal(0.1, 0.2, batch_size*num_out_classes_true), (batch_size, num_out_classes_true))

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes,
                          decoder_size=decoder_size, feature_size=feat_num_features)

    context_bf = cgt.matrix(fixed_shape=(batch_size, feat_num_features))
    prev_out_bc = cgt.matrix(fixed_shape=(batch_size, num_out_classes_true))
    state_i_bf = nn.parameter(nn.init_array(nn.IIDGaussian(0.1), (batch_size, decoder_size)), name="decoder_init")
    decoder_out = s.get_decoder_state(context_bf, prev_out_bc, state_i_bf)
    decode_fun = cgt.function([feats, context_bf, prev_out_bc], [decoder_out])

    m = decode_fun(tau, tau2, tau3)[0]
    assert m.shape == (batch_size, decoder_size)
    assert np.mean(m) < 1.0
예제 #10
0
파일: rrnn.py 프로젝트: zoemcc/rrnn
def make_loss_and_grad_and_step(arch, size_input, size_output, size_mem,
                                size_batch, n_layers, n_unroll, k_in, k_h):
    # symbolic variables

    x_tnk = cgt.tensor3()
    targ_tnk = cgt.tensor3()
    #make_network = make_deep_lstm if arch=="lstm" else make_deep_gru
    make_network = make_deep_rrnn_rot_relu
    network = make_network(size_input, size_mem, n_layers, size_output,
                           size_batch, k_in, k_h)
    init_hiddens = [
        cgt.matrix() for _ in xrange(get_num_hiddens(arch, n_layers))
    ]
    # TODO fixed sizes

    cur_hiddens = init_hiddens
    loss = 0
    for t in xrange(n_unroll):
        outputs = network([x_tnk[t]] + cur_hiddens)
        cur_hiddens, prediction_logprobs = outputs[:-1], outputs[-1]
        # loss = loss + nn.categorical_negloglik(prediction_probs, targ_tnk[t]).sum()
        loss = loss - (prediction_logprobs * targ_tnk[t]).sum()
        cur_hiddens = outputs[:-1]

    final_hiddens = cur_hiddens

    loss = loss / (n_unroll * size_batch)

    params = network.get_parameters()
    gradloss = cgt.grad(loss, params)

    flatgrad = flatcat(gradloss)

    with utils.Message("compiling loss+grad"):
        f_loss_and_grad = cgt.function([x_tnk, targ_tnk] + init_hiddens,
                                       [loss, flatgrad] + final_hiddens)
    f_loss = cgt.function([x_tnk, targ_tnk] + init_hiddens, loss)

    assert len(init_hiddens) == len(final_hiddens)

    x_nk = cgt.matrix('x')
    outputs = network([x_nk] + init_hiddens)

    f_step = cgt.function([x_nk] + init_hiddens, outputs)

    # print "node count", cgt.count_nodes(flatgrad)
    return network, f_loss, f_loss_and_grad, f_step
예제 #11
0
파일: tests.py 프로젝트: bstadie/nnbuilder
def test_seq2seq_init():
    batch_size = 32
    feat_t_steps = 5
    feat_num_features = 256
    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=28)
    #assert type(s.recurrent_decoder_one) == nnbuilder.GRULayer
    assert s.get_features_fun == s.get_features_bengio
예제 #12
0
파일: tests.py 프로젝트: bstadie/nnbuilder
def test_make_prediction():
    batch_size = 32  # How many samples do you want to batch.
    feat_t_steps = 20  # How many 10ms sound clips.
    feat_num_features = 10  # The dimension of the 10ms clips.
    max_label_length = feat_t_steps  # The maximal label length of the transcription. includes start character.
    num_out_classes = 27
    num_out_classes_true = 27 + 2

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    ground_labels_basis = cgt.tensor3(fixed_shape=(batch_size, max_label_length, num_out_classes_true))

    last_time = time.time()
    print 'initializing seq2seq'
    seq2seq = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes)
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    last_time = time.time()
    print 'making prediction objective'
    pred = seq2seq.make_prediction(ground_labels_basis_btc=ground_labels_basis, max_label_length=feat_t_steps)
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    last_time = time.time()
    print 'compiling pred function'
    pred_fun = cgt.function([feats, ground_labels_basis], [pred])
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    test_data = np.load('test_data.npy')
    test_labels = np.load('test_labels.npy')
    data_mean = np.mean(test_data)
    data_sd = np.mean(test_data)

    print 'now predicting'
    last_time = time.time()
    batch_iter = 0
    batch = test_data[batch_iter, :, 0:feat_t_steps, :]
    batch = batch - data_mean
    batch = batch / data_sd
    labels = test_labels[batch_iter, :, 0:feat_t_steps]
    labels_basis = ind_to_basis(num_out_classes_true, labels)

    prediction_final = pred_fun(batch, labels_basis)[0]
    one_pred = prediction_final[0]
    print 'that took ' + str(time.time() - last_time) + ' seconds'
예제 #13
0
파일: rrnn.py 프로젝트: zobot/rrnn
def make_loss_and_grad_and_step(arch, size_input, size_output, size_mem, size_batch, n_layers, n_unroll, k_in, k_h):
    # symbolic variables

    x_tnk = cgt.tensor3()
    targ_tnk = cgt.tensor3()
    #make_network = make_deep_lstm if arch=="lstm" else make_deep_gru
    make_network = make_deep_rrnn_rot_relu
    network = make_network(size_input, size_mem, n_layers, size_output, size_batch, k_in, k_h)
    init_hiddens = [cgt.matrix() for _ in xrange(get_num_hiddens(arch, n_layers))]
    # TODO fixed sizes

    cur_hiddens = init_hiddens
    loss = 0
    for t in xrange(n_unroll):
        outputs = network([x_tnk[t]] + cur_hiddens)
        cur_hiddens, prediction_logprobs = outputs[:-1], outputs[-1]
        # loss = loss + nn.categorical_negloglik(prediction_probs, targ_tnk[t]).sum()
        loss = loss - (prediction_logprobs*targ_tnk[t]).sum()
        cur_hiddens = outputs[:-1]

    final_hiddens = cur_hiddens

    loss = loss / (n_unroll * size_batch)

    params = network.get_parameters()
    gradloss = cgt.grad(loss, params)

    flatgrad = flatcat(gradloss)

    with utils.Message("compiling loss+grad"):
        f_loss_and_grad = cgt.function([x_tnk, targ_tnk] + init_hiddens, [loss, flatgrad] + final_hiddens)
    f_loss = cgt.function([x_tnk, targ_tnk] + init_hiddens, loss)

    assert len(init_hiddens) == len(final_hiddens)

    x_nk = cgt.matrix('x')
    outputs = network([x_nk] + init_hiddens)

    f_step = cgt.function([x_nk]+init_hiddens, outputs)

    # print "node count", cgt.count_nodes(flatgrad)
    return network, f_loss, f_loss_and_grad, f_step
예제 #14
0
파일: test_einsum.py 프로젝트: xindaya/cgt
def test_einsum():
    x = cgt.tensor3()
    y = cgt.tensor3()

    sizes = {"i": 2, "j": 3, "k": 5, "l": 7}
    xaxes = "ijk"
    yaxes = "ikl"
    zaxes = "ijl"
    for i in xrange(10):
        xperm = xaxes
        (yperm, zperm) = permaxes = [[chars[i] for i in np.random.permutation(3)] for chars in [yaxes, zaxes]]
        desc = "%s,%s->%s" % tuple("".join(chars) for chars in [xperm] + permaxes)
        z = cgt.einsum(desc, x, y)
        xval = nr.randn(*(sizes[c] for c in xperm))
        yval = nr.randn(*(sizes[c] for c in yperm))
        np.testing.assert_allclose(
            cgt.numeric_eval(z, {x: xval, y: yval}),
            np.einsum(desc, xval, yval),
            atol={"single": 1e-3, "double": 1e-6}[cgt.get_precision()],
        )
예제 #15
0
파일: test_einsum.py 프로젝트: ketranm/cgt
def test_einsum():
    cgt.reset_config()
    cgt.set_precision("double")
    x = cgt.tensor3()
    y = cgt.tensor3()

    sizes = {'i':2,'j':3,'k':5,'l':7}
    xaxes = 'ijk'
    yaxes = 'ikl'
    zaxes = 'ijl'
    for i in xrange(10):
        xperm = xaxes
        (yperm,zperm) = permaxes = [[chars[i] for i in np.random.permutation(3)] for chars in [yaxes,zaxes]]
        desc = "%s,%s->%s"%tuple("".join(chars) for chars in [xperm] + permaxes)
        z = cgt.einsum(desc, x, y)
        xval = nr.randn(*(sizes[c] for c in xperm))
        yval = nr.randn(*(sizes[c] for c in yperm))
        np.testing.assert_allclose(
            cgt.numeric_eval(z, {x : xval, y : yval}),
            np.einsum(desc, xval, yval))
예제 #16
0
def make_funcs(opt, ntm, total_time, loss_timesteps):
    x_tbk = cgt.tensor3("x", fixed_shape=(total_time, opt.b, opt.k))
    y_tbp = cgt.tensor3("y", fixed_shape=(total_time, opt.b, opt.p))
    loss_timesteps = set(loss_timesteps)

    initial_states = make_ntm_initial_states(opt)
    params = ntm.get_parameters() + get_parameters(initial_states)
    # params = ntm.get_parameters()

    lossCE = 0
    loss01 = 0

    state_arrs = initial_states
    for t in xrange(total_time):
        tmp = ntm([x_tbk[t]] + state_arrs)
        raw_pred = tmp[0]
        state_arrs = tmp[1:4]

        if t in loss_timesteps:
            p_pred = cgt.sigmoid(raw_pred)
            ce = bernoulli_crossentropy(
                y_tbp[t],
                p_pred).sum()  # cross-entropy of bernoulli distribution
            lossCE = lossCE + ce
            loss01 = loss01 + cgt.cast(cgt.equal(y_tbp[t], round01(p_pred)),
                                       cgt.floatX).sum()

    lossCE = lossCE / (len(loss_timesteps) * opt.p * opt.b) / np.log(2)
    loss01 = loss01 / (len(loss_timesteps) * opt.p * opt.b)
    gradloss = cgt.grad(lossCE, params)

    flatgrad = flatcat(gradloss)

    f_loss = cgt.function([x_tbk, y_tbp], lossCE)
    f_loss_and_grad = cgt.function([x_tbk, y_tbp], [lossCE, loss01, flatgrad])

    print "number of nodes in computation graph:", core.count_nodes(
        [lossCE, loss01, flatgrad])

    return f_loss, f_loss_and_grad, params
예제 #17
0
파일: tests.py 프로젝트: bstadie/nnbuilder
def test_get_features_default():
    batch_size = 32
    feat_t_steps = 20
    feat_num_features = 42
    num_out_classes = 28
    num_units = 20
    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes)
    out = s.get_features_fun(feats, num_units=num_units)
    out_fun = cgt.function([feats], [out])
    tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features))
    m = out_fun(tau)[0]
    assert m.shape == (batch_size, feat_t_steps, num_units)
    assert np.mean(m) < 1
예제 #18
0
def make_funcs(opt, ntm, total_time, loss_timesteps):    
    x_tbk = cgt.tensor3("x", fixed_shape=(total_time, opt.b, opt.k))
    y_tbp = cgt.tensor3("y", fixed_shape=(total_time, opt.b, opt.p))
    loss_timesteps = set(loss_timesteps)

    initial_states = make_ntm_initial_states(opt)
    params = ntm.get_parameters() + get_parameters(initial_states)
    # params = ntm.get_parameters()

    lossCE = 0
    loss01 = 0

    state_arrs = initial_states
    for t in xrange(total_time):
        tmp = ntm([x_tbk[t]] + state_arrs)
        raw_pred = tmp[0]
        state_arrs = tmp[1:4]

        if t in loss_timesteps:
            p_pred = cgt.sigmoid(raw_pred)
            ce = bernoulli_crossentropy(y_tbp[t] , p_pred).sum() # cross-entropy of bernoulli distribution
            lossCE = lossCE + ce
            loss01 = loss01 + cgt.cast(cgt.equal(y_tbp[t], round01(p_pred)),cgt.floatX).sum()


    lossCE = lossCE / (len(loss_timesteps) * opt.p * opt.b) / np.log(2)
    loss01 = loss01 / (len(loss_timesteps) * opt.p * opt.b)
    gradloss = cgt.grad(lossCE, params)

    flatgrad = flatcat(gradloss)

    f_loss = cgt.function([x_tbk, y_tbp], lossCE)
    f_loss_and_grad = cgt.function([x_tbk, y_tbp], [lossCE, loss01, flatgrad])

    print "number of nodes in computation graph:", core.count_nodes([lossCE, loss01, flatgrad])

    return f_loss, f_loss_and_grad, params
예제 #19
0
def main(num_epochs=NUM_EPOCHS):
    #cgt.set_precision('half')
    print("Building network ...")
    # Recurrent layers expect input of shape
    # (batch size, max sequence length, number of features)
    X = cgt.tensor3(name='X', fixed_shape=(N_BATCH, MAX_LENGTH, 2))
    l_forward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN)
    l_backward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN, backwards=True)
    #l_forward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid)
    #l_backward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid, backwards=True)
    #l_forward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify)
    #l_backward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify, backwards=True)
    l_forward_slice = l_forward[:, MAX_LENGTH-1, :]  # Take the last element in the forward slice time dimension
    l_backward_slice = l_backward[:, 0, :]  # And the first element in the backward slice time dimension
    l_sum = cgt.concatenate([l_forward_slice, l_backward_slice], axis=1)
    l_out = nnbuilder.denseLayer(l_sum, num_units=1, activation=cgt.tanh)
    target_values = cgt.vector('target_output')
    predicted_values = l_out[:, 0]  # For this task we only need the last value
    cost = cgt.mean((predicted_values - target_values)**2)
    # Compute SGD updates for training
    print("Computing updates ...")
    updates = nn.rmsprop(cost, nn.get_parameters(l_out), LEARNING_RATE)
    #updates = nn.nesterov_momentum(cost, nn.get_parameters(l_out), 0.05)
    # cgt functions for training and computing cost
    print("Compiling functions ...")
    train = cgt.function([X, target_values], cost, updates=updates)
    compute_cost = cgt.function([X, target_values], cost)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val = gen_data()

    print("Training ...")
    time_start = time.time()
    try:
        for epoch in range(num_epochs):
            for _ in range(EPOCH_SIZE):
                X, y, m = gen_data()
                train(X, y)
            cost_val = compute_cost(X_val, y_val)
            print("Epoch {} validation cost = {}".format(epoch+1, cost_val))
            print ('Epoch took ' + str(time.time() - time_start))
            time_start = time.time()
    except KeyboardInterrupt:
        pass
예제 #20
0
def make_funcs(config, dbg_out=None):
    params, Xs, Ys, C_0, H_0, C_T, H_T, C_1, H_1 = lstm_network(
        config['rnn_steps'], config['num_inputs'], config['num_outputs'],
        config['num_units'], config['num_mems'])

    # basic
    size_batch = Xs[0].shape[0]
    dY = Ys[0].shape[-1]
    Ys_gt = [
        cgt.matrix(fixed_shape=(size_batch, dY), name='Y%d' % t)
        for t in range(len(Ys))
    ]
    Ys_var = [cgt.tensor3(fixed_shape=(size_batch, dY, dY)) for _ in Ys]
    net_inputs, net_outputs = Xs + C_0 + H_0 + Ys_var, Ys + C_T + H_T

    # calculate loss
    loss_vec = []
    for i in range(len(Ys)):
        #     if i == 0: continue
        _l = dist.gaussian.logprob(Ys_gt[i], Ys[i], Ys_var[i])
        loss_vec.append(_l)
    loss_vec = cgt.add_multi(loss_vec)
    if config['weight_decay'] > 0.:
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat**2)
        loss_vec -= loss_param  # / size_batch
    loss = cgt.sum(loss_vec) / config['rnn_steps'] / size_batch
    grad = cgt.grad(loss, params)

    # functions
    def f_init(size_batch):
        c_0, h_0 = [], []
        for _n_m in config['num_mems']:
            if _n_m > 0:
                c_0.append(np.zeros((size_batch, _n_m)))
                h_0.append(np.zeros((size_batch, _n_m)))
        return c_0, h_0

    f_step = cgt.function([Xs[0]] + C_0 + H_0, [Ys[0]] + C_1 + H_1)
    f_loss = cgt.function(net_inputs + Ys_gt, loss)
    f_grad = cgt.function(net_inputs + Ys_gt, grad)
    f_surr = cgt.function(net_inputs + Ys_gt, [loss] + net_outputs + grad)
    return params, f_step, f_loss, f_grad, f_init, f_surr
예제 #21
0
파일: tests.py 프로젝트: bstadie/nnbuilder
def test_get_context():
    batch_size = 32
    feat_t_steps = 3
    feat_num_features = 30
    state_num_features = 20
    num_out_classes = 28
    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    prev_out = cgt.matrix(fixed_shape=(batch_size, state_num_features))
    sigmoided = cgt.sigmoid(prev_out)
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes, feature_size=feat_num_features, decoder_size=state_num_features)
    mm = cgt.infer_shape(s.features_post_mlp_btf)
    assert mm == (batch_size, feat_t_steps, feat_num_features)
    context_out = s.get_context(sigmoided)
    out_fun = cgt.function([feats, prev_out], [context_out])
    tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features))
    tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*state_num_features), (batch_size, state_num_features))
    m = out_fun(tau, tau2)[0]
    assert m.shape == (batch_size, feat_num_features)
    assert np.mean(m) < 1
예제 #22
0
파일: rnn.py 프로젝트: TZ2016/snn
def make_funcs(config, dbg_out=None):
    params, Xs, Ys, C_0, H_0, C_T, H_T, C_1, H_1 = lstm_network(
        config['rnn_steps'], config['num_inputs'], config['num_outputs'],
        config['num_units'], config['num_mems']
    )

    # basic
    size_batch = Xs[0].shape[0]
    dY = Ys[0].shape[-1]
    Ys_gt = [cgt.matrix(fixed_shape=(size_batch, dY), name='Y%d'%t)
             for t in range(len(Ys))]
    Ys_var = [cgt.tensor3(fixed_shape=(size_batch, dY, dY)) for _ in Ys]
    net_inputs, net_outputs = Xs + C_0 + H_0 + Ys_var, Ys + C_T + H_T

    # calculate loss
    loss_vec = []
    for i in range(len(Ys)):
        #     if i == 0: continue
        _l = dist.gaussian.logprob(Ys_gt[i], Ys[i], Ys_var[i])
        loss_vec.append(_l)
    loss_vec = cgt.add_multi(loss_vec)
    if config['weight_decay'] > 0.:
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat ** 2)
        loss_vec -= loss_param  # / size_batch
    loss = cgt.sum(loss_vec) / config['rnn_steps'] / size_batch
    grad = cgt.grad(loss, params)

    # functions
    def f_init(size_batch):
        c_0, h_0 = [], []
        for _n_m in config['num_mems']:
            if _n_m > 0:
                c_0.append(np.zeros((size_batch, _n_m)))
                h_0.append(np.zeros((size_batch, _n_m)))
        return c_0, h_0
    f_step = cgt.function([Xs[0]] + C_0 + H_0, [Ys[0]] + C_1 + H_1)
    f_loss = cgt.function(net_inputs + Ys_gt, loss)
    f_grad = cgt.function(net_inputs + Ys_gt, grad)
    f_surr = cgt.function(net_inputs + Ys_gt, [loss] + net_outputs + grad)
    return params, f_step, f_loss, f_grad, f_init, f_surr
예제 #23
0
파일: sfnn.py 프로젝트: TZ2016/snn
def make_funcs(config, dbg_out={}):
    net_in, net_out = hybrid_network(config['num_inputs'],
                                     config['num_outputs'],
                                     config['num_units'],
                                     config['num_sto'],
                                     dbg_out=dbg_out)
    if not config['dbg_out_full']: dbg_out = {}
    # def f_sample(_inputs, num_samples=1, flatten=False):
    #     _mean, _var = f_step(_inputs)
    #     _samples = []
    #     for _m, _v in zip(_mean, _var):
    #         _s = np.random.multivariate_normal(_m, np.diag(np.sqrt(_v)), num_samples)
    #         if flatten: _samples.extend(_s)
    #         else: _samples.append(_s)
    #     return np.array(_samples)
    Y_gt = cgt.matrix("Y")
    Y_prec = cgt.tensor3('V',
                         fixed_shape=(None, config['num_inputs'],
                                      config['num_inputs']))
    params = nn.get_parameters(net_out)
    size_batch, size_out = net_out.shape
    inputs, outputs = [net_in], [net_out]
    if config['no_bias']:
        print "Excluding bias"
        params = [p for p in params if not p.name.endswith(".b")]
    loss_vec = dist.gaussian.logprob(Y_gt, net_out, Y_prec)
    if config['weight_decay'] > 0.:
        print "Applying penalty on parameter norm"
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat**2)
        loss_vec -= loss_param  # / size_batch
    loss = cgt.sum(loss_vec) / size_batch

    # TODO_TZ f_step seems not to fail if X has wrong dim
    f_step = cgt.function(inputs, outputs)
    f_surr = get_surrogate_func(inputs + [Y_prec, Y_gt],
                                outputs, [loss_vec],
                                params,
                                _dbg_out=dbg_out)

    return params, f_step, None, None, None, f_surr
예제 #24
0
파일: cgt_gru.py 프로젝트: zxie/cgt
from gru import GRUCell
import time
from cgt.utils import Message
import numpy as np

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--horizon", type=int)
    args = parser.parse_args()
    horizon = args.horizon
    assert horizon is not None
    size = 128
    batchsize = 64
    cell = GRUCell([size], size)
    X = cgt.tensor3()
    init = cgt.matrix()

    prev_h = init
    for i in xrange(horizon):
        prev_h = cell(X[i], prev_h)
    loss = prev_h.sum()

    with Message("compiling"):
        f = cgt.function([X, init], cgt.grad(loss, cell.params()))
    with Message("running"):
        xval = np.zeros((horizon, batchsize, size), cgt.floatX)
        initval = np.zeros((batchsize, size), cgt.floatX)
        for i in xrange(100):
            f(xval, initval)
예제 #25
0
파일: seq_model.py 프로젝트: zxie/cgt
import gru, cgt, numpy as np
import sys
from time import time

elapsed = []
horizons = 2**np.arange(2, 10)

for horizon in horizons:
    print "HORIZON", horizon
    tstart = time()

    batch_size = 6
    dim_x = 16
    mem_size = 10

    X_tnk = cgt.tensor3("X")

    cell = gru.GRUCell([dim_x], mem_size)

    Minit_nk = cgt.zeros((X_tnk.shape[0], X_tnk.shape[1]), cgt.floatX)
    M = Minit_nk

    for t in xrange(horizon):
        M = cell(M, X_tnk[t])

    # cgt.print_tree(M)
    print "simplifying..."
    M_simp = cgt.simplify([M])
    print "done"
    # cgt.print_tree(M_simp)
    print "fn before:", cgt.count_nodes(M)
예제 #26
0
파일: seq_model.py 프로젝트: EdsterG/cgt
import gru,cgt, numpy as np
import sys
from time import time

elapsed = []
horizons = 2**np.arange(2, 10)

for horizon in horizons:
    print "HORIZON",horizon
    tstart = time()

    batch_size = 6
    dim_x = 16
    mem_size = 10

    X_tnk = cgt.tensor3("X")

    cell = gru.GRUCell([dim_x], mem_size)

    Minit_nk = cgt.zeros((X_tnk.shape[0], X_tnk.shape[1]),cgt.floatX)
    M = Minit_nk

    for t in xrange(horizon):
        M = cell(M, X_tnk[t])

    # cgt.print_tree(M)
    print "simplifying..."
    M_simp = cgt.simplify([M])
    print "done"
    # cgt.print_tree(M_simp)
    print "fn before:",cgt.count_nodes(M)
예제 #27
0
파일: tests.py 프로젝트: bstadie/nnbuilder
def test_seq_2_seq():
    batch_size = 32  # How many samples do you want to batch.
    feat_t_steps = 3  # How many 10ms sound clips.
    feat_num_features = 10  # The dimension of the 10ms clips.
    max_label_length = feat_t_steps  # The maximal label length of the transcription.
    num_out_classes = 27  # 26 letters and space.
    num_out_classes_true = 27 + 2  # Start and end tokens are added.
    num_batches = 512  # 1032
    num_epochs = 40

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    ground_labels_basis = cgt.tensor3(fixed_shape=(batch_size, max_label_length, num_out_classes_true))

    last_time = time.time()
    print 'initializing seq2seq'
    seq2seq = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes)
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    last_time = time.time()
    print 'making train objective'
    train_objective = seq2seq.get_train_objective(max_label_length=max_label_length,
                                                  ground_labels_basis_btc=ground_labels_basis)
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    last_time = time.time()
    print 'making updates'
    updates = nn.rmsprop(train_objective, nn.get_parameters(train_objective), learning_rate=0.0001)
    #updates = nn.nesterov_momentum(train_objective, nn.get_parameters(train_objective), learning_rate=0.0001, mu=0.4)
    #updates = nn.momentum(train_objective, nn.get_parameters(train_objective), learning_rate=0.00001, mu=0.4)
    #updates = nn.adadelta(train_objective, nn.get_parameters(train_objective), learning_rate=0.0001, rho=0.95)
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    last_time = time.time()
    print 'compiling train function, test function, and prediction output function'
    train_function = cgt.function([feats, ground_labels_basis], [], updates=updates)
    test_function = cgt.function([feats, ground_labels_basis], [train_objective])
    pred = seq2seq.make_prediction(ground_labels_basis_btc=ground_labels_basis, max_label_length=feat_t_steps)
    pred_fun = cgt.function([feats, ground_labels_basis], [pred])
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    test_data = np.load('test_data.npy')
    test_labels = np.load('test_labels.npy')
    data_mean = np.mean(test_data)
    data_sd = np.std(test_data)

    print 'now training'
    last_time = time.time()
    for one_epoch in range(0, num_epochs):
        tested = 0
        print 'starting epoch ' + str(one_epoch)
        for batch_iter in range(0, num_batches):
            batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd,
                                                             test_labels, num_out_classes_true)
            train_function(batch, labels_basis)
        for batch_iter in range(0, num_batches):
            batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd,
                                                             test_labels, num_out_classes_true)
            tested += test_function(batch, labels_basis)[0]

        tested = tested / batch_iter
        print 'train loss for batch ' + str(batch_iter) + ' is ' + str(tested)

        print 'an actual prediction is '
        print pred_fun(batch, labels_basis)[0]
        print 'the truth is'
        print test_labels[batch_iter, :, 0:feat_t_steps]

        print 'that took ' + str(time.time() - last_time) + ' seconds'
        last_time = time.time()


    prediction_final = pred_fun(batch, labels_basis)[0]
    print prediction_final
예제 #28
0
파일: cgt_gru.py 프로젝트: EdsterG/cgt
from gru import GRUCell
import time
from cgt.utils import Message
import numpy as np

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--horizon",type=int)
    args = parser.parse_args()
    horizon = args.horizon
    assert horizon is not None    
    size=128
    batchsize=64
    cell = GRUCell([size],size)
    X = cgt.tensor3()
    init = cgt.matrix()

    prev_h = init
    for i in xrange(horizon):
        prev_h = cell(X[i], prev_h)
    loss = prev_h.sum()

    with Message("compiling"):
        f = cgt.function([X, init],cgt.grad(loss, cell.params()))
    with Message("running"):
        xval = np.zeros((horizon,batchsize,size),cgt.floatX)
        initval = np.zeros((batchsize, size), cgt.floatX)
        for i in xrange(100): 
            f(xval, initval)
예제 #29
0
파일: tests.py 프로젝트: bstadie/nnbuilder
def test_the_test_problem():
    #Works
    batch_size = 32  # How many samples do you want to batch.
    feat_t_steps = 20  # How many 10ms sound clips.
    feat_num_features = 10  # The dimension of the 10ms clips.
    max_label_length = feat_t_steps  # The maximal label length of the transcription. includes start character.
    num_out_classes = 27
    num_out_classes_true = num_out_classes + 2
    num_batches = 756
    num_epochs = 30

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    ground_labels_basis = cgt.tensor3(fixed_shape=(batch_size, max_label_length, num_out_classes_true))

    last_time = time.time()
    print 'initializing temporal dense layer'
    d1 = nnbuilder.temporalDenseLayer(feats, num_units=128, activation=cgt.sigmoid)
    #d2 = nnbuilder.temporalDenseLayer(d1, num_units=128, activation=cgt.sigmoid)
    d3 = nnbuilder.temporalDenseLayer(d1, num_units=num_out_classes_true, activation=nnbuilder.linear)
    out = nn.three_d_softmax(d3, axis=2)

    log_probs = None
    for iter_step in range(0, max_label_length):
        this_character_dist_bc = out[:, iter_step, :]
        prev_out_bc = ground_labels_basis[:, iter_step, :]
        log_probs_pre = prev_out_bc * this_character_dist_bc
        log_probs_pre = cgt.log(cgt.sum(log_probs_pre, axis=1))
        if log_probs is None:
            log_probs = cgt.sum(log_probs_pre)
        else:
            log_probs += cgt.sum(log_probs_pre)

    log_probs = -log_probs

    print 'that took ' + str(time.time() - last_time) + ' seconds'

    last_time = time.time()
    print 'compiling objective function'
    updates = nn.rmsprop(log_probs, nn.get_parameters(log_probs), learning_rate=0.01)
    pred_train = cgt.function([feats, ground_labels_basis], [], updates=updates)
    pred_fun = cgt.function([feats, ground_labels_basis], [log_probs])
    most_likely_chars = cgt.argmax(out, axis=1)
    actual_predictions = cgt.function([feats, ground_labels_basis], [most_likely_chars])
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    test_data = np.load('test_data.npy')
    test_labels = np.load('test_labels.npy')
    data_mean = np.mean(test_data)
    data_sd = np.mean(test_data)

    print 'now training'
    for one_epoch in range(0, num_epochs):
        trained = 0
        last_time = time.time()
        print 'starting epoch ' + str(one_epoch)
        for batch_iter in range(0, num_batches):
            batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd,
                                                             test_labels, num_out_classes_true)
            pred_train(batch, labels_basis)

        for batch_iter in range(0, num_batches):
            batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd,
                                                             test_labels, num_out_classes_true)
            trained += pred_fun(batch, labels_basis)[0]

        trained = trained/batch_iter
        print 'train loss is ' + str(trained)
        print 'that took ' + str(time.time() - last_time) + ' seconds'

        act_pred = actual_predictions(batch, labels_basis)[0]
        print 'an actual prediction is '
        print act_pred