Example #1
0
def test_get_decoder_state():
    batch_size = 32
    feat_t_steps = 20
    feat_num_features = 42
    num_out_classes = 28
    num_out_classes_true = num_out_classes + 2  # Start, end, are added
    decoder_size = 50

    tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features))
    tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_num_features), (batch_size, feat_num_features))
    tau3 = np.reshape(np.random.normal(0.1, 0.2, batch_size*num_out_classes_true), (batch_size, num_out_classes_true))

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes,
                          decoder_size=decoder_size, feature_size=feat_num_features)

    context_bf = cgt.matrix(fixed_shape=(batch_size, feat_num_features))
    prev_out_bc = cgt.matrix(fixed_shape=(batch_size, num_out_classes_true))
    state_i_bf = nn.parameter(nn.init_array(nn.IIDGaussian(0.1), (batch_size, decoder_size)), name="decoder_init")
    decoder_out = s.get_decoder_state(context_bf, prev_out_bc, state_i_bf)
    decode_fun = cgt.function([feats, context_bf, prev_out_bc], [decoder_out])

    m = decode_fun(tau, tau2, tau3)[0]
    assert m.shape == (batch_size, decoder_size)
    assert np.mean(m) < 1.0
Example #2
0
def test_matmuls():
    with cgt.scoped_update_config(parallel=True):

        m = 8
        d = 1000

        # build graph

        X = cgt.matrix("X")
        Y = cgt.matrix("Y")
        loss = 0
        for k in xrange(m):
            # loss = loss+cgt.sin(X*Y+k).sum()
            loss = loss + (X.dot(Y + k)).sum()

            f = cgt.function([X, Y], loss)

        # test things out!

        seed(0)

        X_val = randn(d, d)
        Y_val = randn(d, d)
        vals = [X_val, Y_val]

        tic = time.time()
        out = f(*vals)
        toc = time.time()

        print toc - tic
Example #3
0
def test_get_character_distribution():
    batch_size = 32
    feat_t_steps = 20
    feat_num_features = 42
    num_out_classes = 28  # This is the index of the start token.
    num_out_classes_true = num_out_classes + 2  # Add start and end tokens automatically.
    decoder_size = 50

    tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features))
    tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_num_features), (batch_size, feat_num_features))
    tau3 = np.reshape(np.random.normal(0.1, 0.2, batch_size*decoder_size), (batch_size, decoder_size))

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes,
                          decoder_size=decoder_size, feature_size=feat_num_features)

    context_bf = cgt.matrix(fixed_shape=(batch_size, feat_num_features))
    state_bf = cgt.matrix(fixed_shape=(batch_size, decoder_size))

    m_out = s.get_character_distribution(state_bf, context_bf)

    out_fun = cgt.function([feats, context_bf, state_bf], [m_out])
    m = out_fun(tau, tau2, tau3)[0]

    assert m.shape == (batch_size, num_out_classes_true)
Example #4
0
def test_matmuls():
    with cgt.scoped_update_config(parallel = True, backend="native"):

        m = 8
        d = 1000

        # build graph

        X = cgt.matrix("X")
        Y = cgt.matrix("Y")
        loss=0
        for k in xrange(m):
            # loss = loss+cgt.sin(X*Y+k).sum()
            loss = loss+(X.dot(Y+k)).sum()

            f = cgt.function([X,Y], loss)

        # test things out!

        seed(0)

        X_val = randn(d, d)
        Y_val = randn(d, d)
        vals = [X_val, Y_val]

        tic = time.time()
        out = f(*vals)
        toc = time.time()

        print toc-tic
Example #5
0
def make_deep_lstm(size_input, size_mem, n_layers, size_output, size_batch):
    inputs = [cgt.matrix(fixed_shape=(size_batch, size_input))]
    for _ in xrange(2 * n_layers):
        inputs.append(cgt.matrix(fixed_shape=(size_batch, size_mem)))
    outputs = []
    for i_layer in xrange(n_layers):
        prev_h = inputs[i_layer * 2]
        prev_c = inputs[i_layer * 2 + 1]
        if i_layer == 0:
            x = inputs[0]
            size_x = size_input
        else:
            x = outputs[(i_layer - 1) * 2]
            size_x = size_mem
        input_sums = nn.Affine(size_x, 4 * size_mem)(x) + nn.Affine(
            size_x, 4 * size_mem)(prev_h)
        sigmoid_chunk = cgt.sigmoid(input_sums[:, 0:3 * size_mem])
        in_gate = sigmoid_chunk[:, 0:size_mem]
        forget_gate = sigmoid_chunk[:, size_mem:2 * size_mem]
        out_gate = sigmoid_chunk[:, 2 * size_mem:3 * size_mem]
        in_transform = cgt.tanh(input_sums[:, 3 * size_mem:4 * size_mem])
        next_c = forget_gate * prev_c + in_gate * in_transform
        next_h = out_gate * cgt.tanh(next_c)
        outputs.append(next_c)
        outputs.append(next_h)

    category_activations = nn.Affine(size_mem, size_output)(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    return nn.Module(inputs, outputs)
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no",fixed_shape=(None,n_in))
        a_n = cgt.vector("a_n",dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0)/128.0 
        nhid = 64
        h1 = cgt.tanh(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(nn.Affine(nhid,n_actions,weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n*q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Example #7
0
File: rrnn.py Project: zobot/rrnn
def make_deep_lstm(size_input, size_mem, n_layers, size_output, size_batch):
    inputs = [cgt.matrix(fixed_shape=(size_batch, size_input))]
    for _ in xrange(2*n_layers):
        inputs.append(cgt.matrix(fixed_shape=(size_batch, size_mem)))
    outputs = []
    for i_layer in xrange(n_layers):
        prev_h = inputs[i_layer*2]
        prev_c = inputs[i_layer*2+1]
        if i_layer==0:
            x = inputs[0]
            size_x = size_input
        else:
            x = outputs[(i_layer-1)*2]
            size_x = size_mem
        input_sums = nn.Affine(size_x, 4*size_mem)(x) + nn.Affine(size_x, 4*size_mem)(prev_h)
        sigmoid_chunk = cgt.sigmoid(input_sums[:,0:3*size_mem])
        in_gate = sigmoid_chunk[:,0:size_mem]
        forget_gate = sigmoid_chunk[:,size_mem:2*size_mem]
        out_gate = sigmoid_chunk[:,2*size_mem:3*size_mem]
        in_transform = cgt.tanh(input_sums[:,3*size_mem:4*size_mem])
        next_c = forget_gate*prev_c + in_gate * in_transform
        next_h = out_gate*cgt.tanh(next_c)
        outputs.append(next_c)
        outputs.append(next_h)

    category_activations = nn.Affine(size_mem, size_output)(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    return nn.Module(inputs, outputs)
Example #8
0
 def s_func_lstm(_in, _s_in, _s_out, name=''):
     c_prev = cgt.matrix(fixed_shape=(None, _s_out))
     h_prev = cgt.matrix(fixed_shape=(None, _s_out))
     c_cur, h_cur = lstm_block(h_prev, c_prev, _in, _s_in, _s_out, name)
     net_c_prev.append(c_prev)
     net_h_prev.append(h_prev)
     net_c_curr.append(c_cur)
     net_h_curr.append(h_cur)
     return h_cur
Example #9
0
File: rnn.py Project: TZ2016/snn
 def s_func_lstm(_in, _s_in, _s_out, name=''):
     c_prev = cgt.matrix(fixed_shape=(None, _s_out))
     h_prev = cgt.matrix(fixed_shape=(None, _s_out))
     c_cur, h_cur = lstm_block(h_prev, c_prev, _in, _s_in, _s_out, name)
     net_c_prev.append(c_prev)
     net_h_prev.append(h_prev)
     net_c_curr.append(c_cur)
     net_h_curr.append(h_cur)
     return h_cur
Example #10
0
    def __init__(self, obs_dim, ctrl_dim):

        cgt.set_precision('double')
        Serializable.__init__(self, obs_dim, ctrl_dim)

        self.obs_dim = obs_dim
        self.ctrl_dim = ctrl_dim

        o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim))
        a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim))
        adv_n = cgt.vector("adv_n")
        oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim))
        self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a")
        std_1a = cgt.exp(logstd_1a)

        # Here's where we apply the network
        h0 = o_no
        nhid = 32
        h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0))
        h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1))
        mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2)

        b = cgt.size(o_no, 0)
        std_na = cgt.repeat(std_1a, b, axis=0)

        oldmean_na = oldpdist_np[:, 0:self.ctrl_dim]
        oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim]

        logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum()
        oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1)

        ratio_n = cgt.exp(logp_n - oldlogp_n)

        surr = (ratio_n*adv_n).mean()

        pdists_np = cgt.concatenate([mean_na, std_na], axis=1)
        # kl = cgt.log(sigafter/)

        params = nn.get_parameters(surr)

        oldvar_na = cgt.square(oldstd_na)
        var_na = cgt.square(std_na)
        kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean()


        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])
        self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], pdists_np)

        self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])

        self.pc = ParamCollection(params)
Example #11
0
    def __init__(self, xdim, args, dec="bernoulli"):
        self.xdim = xdim
        self.hdim = args.hdim
        self.zdim = args.zdim
        self.lmbda = args.lmbda  # weight decay coefficient * 2
        self.x = cgt.matrix("x", dtype=cgt.floatX)
        self.eps = cgt.matrix("eps", dtype=cgt.floatX)

        self.enc_mlp = GaussianMLP(self.x, self.xdim, self.hdim, self.zdim, nlayers=args.nlayers, eps=self.eps)
        if dec == "bernoulli":
            # log p(x | z) defined as -CE(x, y) = dec_mlp.cost(y)
            self.dec_mlp = BernoulliMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x)
        elif dec == "gaussian":
            self.dec_mlp = GaussianMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x)
        else:
            raise RuntimeError("unrecognized decoder %" % dec)

        self.cost = (-cgt.sum(kld_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var)) + self.dec_mlp.cost) / args.batch_size
        self.params = self.enc_mlp.params + self.dec_mlp.params
        # L2 regularization
        self.gparams = [cgt.grad(self.cost, [p])[0] + self.lmbda * p for p in self.params]
        self.gaccums = [cgt.shared(np.zeros(p.op.get_value().shape, dtype=cgt.floatX)) for p in self.params]

        # XXX replace w/ adagrad update from nn
        ADAGRAD_EPS = 1e-10  # for stability
        self.updates = [
            (param, param - args.lr * gparam / cgt.sqrt(gaccum + cgt.square(gparam) + ADAGRAD_EPS))
            for param, gparam, gaccum in zip(self.params, self.gparams, self.gaccums)
        ]
        self.updates += [
            (gaccum, gaccum + cgt.square(gparam))
            for gaccum, gparam in zip(self.gaccums, self.gparams)
        ]

        self.train = cgt.function(
            [self.x, self.eps],
            self.cost,
            updates=self.updates
        )
        self.test = cgt.function(
            [self.x, self.eps],
            self.cost,
            updates=None
        )
        # can be used for semi-supervised learning for example
        self.encode = cgt.function(
            [self.x, self.eps],
            self.enc_mlp.out
        )
Example #12
0
File: sfnn.py Project: TZ2016/snn
def make_funcs(config, dbg_out={}):
    net_in, net_out = hybrid_network(config['num_inputs'], config['num_outputs'],
                                     config['num_units'], config['num_sto'],
                                     dbg_out=dbg_out)
    if not config['dbg_out_full']: dbg_out = {}
    # def f_sample(_inputs, num_samples=1, flatten=False):
    #     _mean, _var = f_step(_inputs)
    #     _samples = []
    #     for _m, _v in zip(_mean, _var):
    #         _s = np.random.multivariate_normal(_m, np.diag(np.sqrt(_v)), num_samples)
    #         if flatten: _samples.extend(_s)
    #         else: _samples.append(_s)
    #     return np.array(_samples)
    Y_gt = cgt.matrix("Y")
    Y_prec = cgt.tensor3('V', fixed_shape=(None, config['num_inputs'], config['num_inputs']))
    params = nn.get_parameters(net_out)
    size_batch, size_out = net_out.shape
    inputs, outputs = [net_in], [net_out]
    if config['no_bias']:
        print "Excluding bias"
        params = [p for p in params if not p.name.endswith(".b")]
    loss_vec = dist.gaussian.logprob(Y_gt, net_out, Y_prec)
    if config['weight_decay'] > 0.:
        print "Applying penalty on parameter norm"
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat ** 2)
        loss_vec -= loss_param # / size_batch
    loss = cgt.sum(loss_vec) / size_batch

    # TODO_TZ f_step seems not to fail if X has wrong dim
    f_step = cgt.function(inputs, outputs)
    f_surr = get_surrogate_func(inputs + [Y_prec, Y_gt], outputs,
                                [loss_vec], params, _dbg_out=dbg_out)

    return params, f_step, None, None, None, f_surr
Example #13
0
def make_ff_controller(opt):

    b, h, m, p, k = opt.b, opt.h, opt.m, opt.p, opt.k

    H = 2*h
    in_size = k + h*m
    out_size = H*m + H + H + H*3 + H + h*m + h*m + p

    # Previous reads
    r_bhm = cgt.tensor3("r", fixed_shape = (b,h,m))
    # External inputs
    X_bk = cgt.matrix("x", fixed_shape = (b,k))
    r_b_hm = r_bhm.reshape([r_bhm.shape[0], r_bhm.shape[1]*r_bhm.shape[2]])
    # Input to controller
    inp_bq = cgt.concatenate([X_bk, r_b_hm], axis=1)

    hid_sizes = opt.ff_hid_sizes
    activation = cgt.tanh

    layer_out_sizes = [in_size] + hid_sizes + [out_size]
    last_out = inp_bq
    # feedforward part. we could simplify a bit by using nn.Affine
    for i in xrange(len(layer_out_sizes)-1):
        indim = layer_out_sizes[i]
        outdim = layer_out_sizes[i+1]        
        W = cgt.shared(.02*nr.randn(indim, outdim), name="W%i"%i, fixed_shape_mask="all")
        bias = cgt.shared(.02*nr.randn(1, outdim), name="b%i"%i, fixed_shape_mask="all")
        last_out = cgt.broadcast("+",last_out.dot(W),bias,"xx,1x")
        # Don't apply nonlinearity at the last layer
        if i != len(layer_out_sizes)-2: last_out = activation(last_out)

    idx = 0
    k_bHm = last_out[:,idx:idx+H*m];      idx += H*m;         k_bHm = k_bHm.reshape([b,H,m])
    beta_bH = last_out[:,idx:idx+H];      idx += H
    g_bH = last_out[:,idx:idx+H];         idx += H
    s_bH3 = last_out[:,idx:idx+3*H];      idx += 3*H;         s_bH3 = s_bH3.reshape([b,H,3])
    gamma_bH = last_out[:,idx:idx+H];     idx += H
    e_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         e_bhm = e_bhm.reshape([b,h,m])
    a_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         a_bhm = a_bhm.reshape([b,h,m])
    y_bp = last_out[:,idx:idx+p];         idx += p

    k_bHm = cgt.tanh(k_bHm)
    beta_bH = nn.softplus(beta_bH)
    g_bH = cgt.sigmoid(g_bH)
    s_bH3 = sum_normalize2(cgt.exp(s_bH3))
    gamma_bH = cgt.sigmoid(gamma_bH)+1
    e_bhm = cgt.sigmoid(e_bhm)
    a_bhm = cgt.tanh(a_bhm)
    # y_bp = y_bp

    assert infer_shape(k_bHm) == (b,H,m)
    assert infer_shape(beta_bH) == (b,H)
    assert infer_shape(g_bH) == (b,H)
    assert infer_shape(s_bH3) == (b,H,3)
    assert infer_shape(gamma_bH) == (b,H)
    assert infer_shape(e_bhm) == (b,h,m)
    assert infer_shape(a_bhm) == (b,h,m)
    assert infer_shape(y_bp) == (b,p)

    return nn.Module([r_bhm, X_bk], [k_bHm, beta_bH, g_bH, s_bH3, gamma_bH, e_bhm, a_bhm, y_bp])
Example #14
0
def make_deep_gru(size_input, size_mem, n_layers, size_output, size_batch):
    inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)]
    outputs = []
    for i_layer in xrange(n_layers):
        prev_h = inputs[
            i_layer +
            1]  # note that inputs[0] is the external input, so we add 1
        x = inputs[0] if i_layer == 0 else outputs[i_layer - 1]
        size_x = size_input if i_layer == 0 else size_mem
        update_gate = cgt.sigmoid(
            nn.Affine(size_x, size_mem, name="i2u")(x) +
            nn.Affine(size_mem, size_mem, name="h2u")(prev_h))
        reset_gate = cgt.sigmoid(
            nn.Affine(size_x, size_mem, name="i2r")(x) +
            nn.Affine(size_mem, size_mem, name="h2r")(prev_h))
        gated_hidden = reset_gate * prev_h
        p2 = nn.Affine(size_mem, size_mem)(gated_hidden)
        p1 = nn.Affine(size_x, size_mem)(x)
        hidden_target = cgt.tanh(p1 + p2)
        next_h = (1.0 - update_gate) * prev_h + update_gate * hidden_target
        outputs.append(next_h)
    category_activations = nn.Affine(size_mem, size_output,
                                     name="pred")(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    return nn.Module(inputs, outputs)
Example #15
0
def make_ff_controller(opt):

    b, h, m, p, k = opt.b, opt.h, opt.m, opt.p, opt.k

    H = 2*h
    in_size = k + h*m
    out_size = H*m + H + H + H*3 + H + h*m + h*m + p

    # Previous reads
    r_bhm = cgt.tensor3("r", fixed_shape = (b,h,m))
    # External inputs
    X_bk = cgt.matrix("x", fixed_shape = (b,k))
    r_b_hm = r_bhm.reshape([r_bhm.shape[0], r_bhm.shape[1]*r_bhm.shape[2]])
    # Input to controller
    inp_bq = cgt.concatenate([X_bk, r_b_hm], axis=1)

    hid_sizes = opt.ff_hid_sizes
    activation = cgt.tanh

    layer_out_sizes = [in_size] + hid_sizes + [out_size]
    last_out = inp_bq
    # feedforward part. we could simplify a bit by using nn.Affine
    for i in xrange(len(layer_out_sizes)-1):
        indim = layer_out_sizes[i]
        outdim = layer_out_sizes[i+1]        
        W = cgt.shared(.02*nr.randn(indim, outdim), name="W%i"%i, fixed_shape_mask="all")
        bias = cgt.shared(.02*nr.randn(1, outdim), name="b%i"%i, fixed_shape_mask="all")
        last_out = cgt.broadcast("+",last_out.dot(W),bias,"xx,1x")
        # Don't apply nonlinearity at the last layer
        if i != len(layer_out_sizes)-2: last_out = activation(last_out)

    idx = 0
    k_bHm = last_out[:,idx:idx+H*m];      idx += H*m;         k_bHm = k_bHm.reshape([b,H,m])
    beta_bH = last_out[:,idx:idx+H];      idx += H
    g_bH = last_out[:,idx:idx+H];         idx += H
    s_bH3 = last_out[:,idx:idx+3*H];      idx += 3*H;         s_bH3 = s_bH3.reshape([b,H,3])
    gamma_bH = last_out[:,idx:idx+H];     idx += H
    e_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         e_bhm = e_bhm.reshape([b,h,m])
    a_bhm = last_out[:,idx:idx+h*m];      idx += h*m;         a_bhm = a_bhm.reshape([b,h,m])
    y_bp = last_out[:,idx:idx+p];         idx += p

    k_bHm = cgt.tanh(k_bHm)
    beta_bH = nn.softplus(beta_bH)
    g_bH = cgt.sigmoid(g_bH)
    s_bH3 = sum_normalize2(cgt.exp(s_bH3))
    gamma_bH = cgt.sigmoid(gamma_bH)+1
    e_bhm = cgt.sigmoid(e_bhm)
    a_bhm = cgt.tanh(a_bhm)
    # y_bp = y_bp

    assert infer_shape(k_bHm) == (b,H,m)
    assert infer_shape(beta_bH) == (b,H)
    assert infer_shape(g_bH) == (b,H)
    assert infer_shape(s_bH3) == (b,H,3)
    assert infer_shape(gamma_bH) == (b,H)
    assert infer_shape(e_bhm) == (b,h,m)
    assert infer_shape(a_bhm) == (b,h,m)
    assert infer_shape(y_bp) == (b,p)

    return nn.Module([r_bhm, X_bk], [k_bHm, beta_bH, g_bH, s_bH3, gamma_bH, e_bhm, a_bhm, y_bp])
Example #16
0
File: sfnn.py Project: TZ2016/snn
def hybrid_network(size_in, size_out, num_units, num_stos, dbg_out={}):
    assert len(num_units) == len(num_stos)
    net_in = cgt.matrix("X", fixed_shape=(None, size_in))
    prev_num_units, prev_out = size_in, net_in
    dbg_out['NET~in'] = net_in
    curr_layer = 1
    for (curr_num_units, curr_num_sto) in zip(num_units, num_stos):
        assert curr_num_units >= curr_num_sto >= 0
        prev_out = combo_layer(
            prev_out,
            prev_num_units,
            curr_num_units, (curr_num_sto, ),
            s_funcs=s_func_ip,
            o_funcs=(lambda x: cgt.bernoulli(cgt.sigmoid(x)), cgt.nn.rectify),
            name=str(curr_layer),
            dbg_out=dbg_out)
        dbg_out['L%d~out' % curr_layer] = prev_out
        prev_num_units = curr_num_units
        curr_layer += 1
    net_out = nn.Affine(prev_num_units,
                        size_out,
                        name="InnerProd(%d->%d)" %
                        (prev_num_units, size_out))(prev_out)
    dbg_out['NET~out'] = net_out
    return net_in, net_out
Example #17
0
def lstm_network_t(size_in, size_out, num_units, num_mems, dbg_out={}):
    def s_func_lstm(_in, _s_in, _s_out, name=''):
        c_prev = cgt.matrix(fixed_shape=(None, _s_out))
        h_prev = cgt.matrix(fixed_shape=(None, _s_out))
        c_cur, h_cur = lstm_block(h_prev, c_prev, _in, _s_in, _s_out, name)
        net_c_prev.append(c_prev)
        net_h_prev.append(h_prev)
        net_c_curr.append(c_cur)
        net_h_curr.append(h_cur)
        return h_cur

    assert len(num_units) == len(num_mems)
    net_c_prev, net_h_prev, net_c_curr, net_h_curr = [], [], [], []
    net_in = cgt.matrix(fixed_shape=(None, size_in))
    prev_num_units, prev_out = size_in, net_in
    curr_layer = 1
    for curr_num_units, curr_num_mem in zip(num_units, num_mems):
        assert curr_num_units >= curr_num_mem >= 0
        prev_out = combo_layer(prev_out,
                               prev_num_units,
                               curr_num_units, (curr_num_mem, ),
                               s_funcs=(s_func_lstm, s_func_ip),
                               o_funcs=(None, cgt.sigmoid),
                               name=str(curr_layer),
                               dbg_out=dbg_out)
        dbg_out['L%d~out' % curr_layer] = prev_out
        prev_num_units = curr_num_units
        curr_layer += 1
    net_out = nn.Affine(prev_num_units, size_out, name="Out")(prev_out)
    dbg_out['NET~out'] = net_out
    return net_in, net_out, net_c_prev, net_h_prev, net_c_curr, net_h_curr
Example #18
0
def hybrid_network(size_in, size_out, num_units, num_stos, dbg_out=[]):
    assert len(num_units) == len(num_stos)
    X = cgt.matrix("X", fixed_shape=(None, size_in))
    prev_num_units, prev_out = size_in, X
    dbg_out.append(X)
    for (curr_num_units, curr_num_sto) in zip(num_units, num_stos):
        _layer_dbg_out = []
        prev_out = hybrid_layer(prev_out,
                                prev_num_units,
                                curr_num_units,
                                curr_num_sto,
                                dbg_out=_layer_dbg_out)
        prev_num_units = curr_num_units
        dbg_out.extend(_layer_dbg_out)
        dbg_out.append(prev_out)
    # TODO_TZ bigger problem! param cannot deterministically influence cost
    #         otherwise the surrogate cost is not complete log likelihood
    net_out = nn.Affine(prev_num_units,
                        size_out,
                        name="InnerProd(%d->%d)" %
                        (prev_num_units, size_out))(prev_out)
    dbg_out.append(net_out)
    # assert prev_num_units == size_out
    # net_out = prev_out
    return X, net_out
Example #19
0
File: rnn.py Project: TZ2016/snn
def lstm_network_t(size_in, size_out, num_units, num_mems, dbg_out={}):
    def s_func_lstm(_in, _s_in, _s_out, name=''):
        c_prev = cgt.matrix(fixed_shape=(None, _s_out))
        h_prev = cgt.matrix(fixed_shape=(None, _s_out))
        c_cur, h_cur = lstm_block(h_prev, c_prev, _in, _s_in, _s_out, name)
        net_c_prev.append(c_prev)
        net_h_prev.append(h_prev)
        net_c_curr.append(c_cur)
        net_h_curr.append(h_cur)
        return h_cur
    assert len(num_units) == len(num_mems)
    net_c_prev, net_h_prev, net_c_curr, net_h_curr = [], [], [], []
    net_in = cgt.matrix(fixed_shape=(None, size_in))
    prev_num_units, prev_out = size_in, net_in
    curr_layer = 1
    for curr_num_units, curr_num_mem in zip(num_units, num_mems):
        assert curr_num_units >= curr_num_mem >= 0
        prev_out = combo_layer(
            prev_out, prev_num_units, curr_num_units,
            (curr_num_mem,),
            s_funcs=(s_func_lstm, s_func_ip),
            o_funcs=(None, cgt.sigmoid),
            name=str(curr_layer), dbg_out=dbg_out
        )
        dbg_out['L%d~out' % curr_layer] = prev_out
        prev_num_units = curr_num_units
        curr_layer += 1
    net_out = nn.Affine(prev_num_units, size_out,
                        name="Out")(prev_out)
    dbg_out['NET~out'] = net_out
    return net_in, net_out, net_c_prev, net_h_prev, net_c_curr, net_h_curr
Example #20
0
def test_setting_weights():
    X = cgt.matrix("X", fixed_shape=(None, 28*28))
    model = build_model(X, 0.0)
    nnbuilder.set_all_weights(model, 'mnist.p')
    y = cgt.vector("y", dtype='i8')
    cost = -cgt.mean(categorical.loglik(y, model))
    selected_number = cgt.argmax(model, axis=1)
    err_nodrop = cgt.cast(cgt.not_equal(selected_number, y), cgt.floatX).mean()
    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost])

    Xdata, ydata = load_data()

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(3):
        tstart = time.time()
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
Example #21
0
File: rrnn.py Project: zoemcc/rrnn
def make_loss_and_grad_and_step(arch, size_input, size_output, size_mem,
                                size_batch, n_layers, n_unroll, k_in, k_h):
    # symbolic variables

    x_tnk = cgt.tensor3()
    targ_tnk = cgt.tensor3()
    #make_network = make_deep_lstm if arch=="lstm" else make_deep_gru
    make_network = make_deep_rrnn_rot_relu
    network = make_network(size_input, size_mem, n_layers, size_output,
                           size_batch, k_in, k_h)
    init_hiddens = [
        cgt.matrix() for _ in xrange(get_num_hiddens(arch, n_layers))
    ]
    # TODO fixed sizes

    cur_hiddens = init_hiddens
    loss = 0
    for t in xrange(n_unroll):
        outputs = network([x_tnk[t]] + cur_hiddens)
        cur_hiddens, prediction_logprobs = outputs[:-1], outputs[-1]
        # loss = loss + nn.categorical_negloglik(prediction_probs, targ_tnk[t]).sum()
        loss = loss - (prediction_logprobs * targ_tnk[t]).sum()
        cur_hiddens = outputs[:-1]

    final_hiddens = cur_hiddens

    loss = loss / (n_unroll * size_batch)

    params = network.get_parameters()
    gradloss = cgt.grad(loss, params)

    flatgrad = flatcat(gradloss)

    with utils.Message("compiling loss+grad"):
        f_loss_and_grad = cgt.function([x_tnk, targ_tnk] + init_hiddens,
                                       [loss, flatgrad] + final_hiddens)
    f_loss = cgt.function([x_tnk, targ_tnk] + init_hiddens, loss)

    assert len(init_hiddens) == len(final_hiddens)

    x_nk = cgt.matrix('x')
    outputs = network([x_nk] + init_hiddens)

    f_step = cgt.function([x_nk] + init_hiddens, outputs)

    # print "node count", cgt.count_nodes(flatgrad)
    return network, f_loss, f_loss_and_grad, f_step
Example #22
0
 def make_updater_fc():
     X = cgt.matrix("X", fixed_shape=(None, 28 * 28))
     y = cgt.vector("y", dtype='i8')
     stepsize = cgt.scalar("stepsize")
     loss = build_fc_return_loss(X, y)
     params = nn.get_parameters(loss)
     gparams = cgt.grad(loss, params)
     updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
     return cgt.function([X, y, stepsize], loss, updates=updates)
Example #23
0
File: rrnn.py Project: zobot/rrnn
def make_deep_rrnn(size_input, size_mem, n_layers, size_output, size_batch_in, k_in, k_h):
    inputs = [cgt.matrix() for i_layer in xrange(n_layers+1)]
    outputs = []
    print 'input_size: ', size_input
    for i_layer in xrange(n_layers):
        prev_h = inputs[i_layer+1] # note that inputs[0] is the external input, so we add 1
        x = inputs[0] if i_layer==0 else outputs[i_layer-1]
        size_x = size_input if i_layer==0 else size_mem
        size_batch = prev_h.shape[0]

        xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform")
        xform_h_non = xform_h_param.weight
        xform_h_non.props["is_rotation"] = True
        xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True)
        xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1")

        r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x)
        r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem))
        r_norm = cgt.norm(r_non, axis=2, keepdims=True)
        r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1")
        prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1))
        inters_in = [prev_h_3]

        colon = slice(None, None, None)

        for i in xrange(2 * k_in):
            inter_in = inters_in[-1]
            r_cur = cgt.subtensor(r, [colon, i, colon])
            r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem))
            r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1))
            ref_cur = cgt.batched_matmul(r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in))
            inter_out = inter_in - 2 * ref_cur
            inters_in.append(inter_out)

        h_in_rot = cgt.reshape(inters_in[-1], (size_batch, size_mem))
        inters_h = [h_in_rot]

        for i in xrange(2 * k_h):
            inter_in = inters_h[-1]
            r_cur = cgt.subtensor(xform_h, [i, colon])
            r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1))
            r_cur_2 = cgt.reshape(r_cur, (1, size_mem))
            ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2)
            inter_out = inter_in - 2 * ref_cur
            inters_h.append(inter_out)
        next_h = inters_h[-1]
        outputs.append(next_h)


    category_activations = nn.Affine(size_mem, size_output,name="pred")(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    #print 'len outputs:', len(outputs)
    #print 'len inputs:', len(inputs)

    return nn.Module(inputs, outputs)
 def make_updater_fc():
     X = cgt.matrix("X", fixed_shape=(None, 28 * 28))
     y = cgt.vector("y", dtype="i8")
     stepsize = cgt.scalar("stepsize")
     loss = build_fc_return_loss(X, y)
     params = nn.get_parameters(loss)
     gparams = cgt.grad(loss, params)
     updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
     return cgt.function([X, y, stepsize], loss, updates=updates)
Example #25
0
def test_stack():
    x = cgt.scalar()
    y = cgt.scalar()
    z = cgt.scalar()
    s0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(s0, {x: 1, y: 2, z: 3}).shape == (3, )

    x = cgt.vector()
    y = cgt.vector()
    z = cgt.vector()
    v0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(v0, {
        x: np.zeros(2),
        y: np.zeros(2),
        z: np.zeros(2)
    }).shape == (3, 2)
    v1 = cgt.stack([x, y, z], axis=1)
    assert cgt.numeric_eval(v1, {
        x: np.zeros(2),
        y: np.ones(2),
        z: np.zeros(2)
    }).shape == (2, 3)

    x = cgt.matrix()
    y = cgt.matrix()
    z = cgt.matrix()
    m0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(m0, {
        x: np.zeros((2, 4)),
        y: np.zeros((2, 4)),
        z: np.zeros((2, 4))
    }).shape == (3, 2, 4)
    m1 = cgt.stack([x, y, z], axis=1)
    assert cgt.numeric_eval(m1, {
        x: np.zeros((2, 4)),
        y: np.zeros((2, 4)),
        z: np.zeros((2, 4))
    }).shape == (2, 3, 4)
    m2 = cgt.stack([x, y, z], axis=2)
    assert cgt.numeric_eval(m2, {
        x: np.zeros((2, 4)),
        y: np.zeros((2, 4)),
        z: np.zeros((2, 4))
    }).shape == (2, 4, 3)
Example #26
0
def test_multi_output():
    for x in (cgt.scalar('x'), cgt.vector('x'), cgt.matrix('x')):
        for cls in (SinCos, SinCos2):
            y,z = core.unpack(core.Result(cls(), [x]))
            xnum = np.ones((3,)*x.ndim, cgt.floatX)
            correct = (np.sin(xnum),np.cos(xnum))
            yznum = cgt.numeric_eval([y,z], {x:xnum})
            np.testing.assert_allclose(yznum, correct)
            f = cgt.function([x],[y,z])
            np.testing.assert_allclose(f(xnum), correct)
Example #27
0
File: rnn.py Project: TZ2016/snn
def lstm_network(T, size_in, size_out, num_units, num_mems, dbg_out={}):
    assert T > 0
    x, y, c_in, h_in, c_out, h_out = lstm_network_t(
        size_in, size_out, num_units, num_mems, dbg_out
    )
    f_lstm_t = nn.Module([x] + c_in + h_in, [y] + c_out + h_out)
    Xs = [cgt.matrix(fixed_shape=x.get_fixed_shape(), name="X%d"%t)
          for t in range(T)]
    C_0 = [cgt.matrix(fixed_shape=_c.get_fixed_shape()) for _c in c_in]
    H_0 = [cgt.matrix(fixed_shape=_h.get_fixed_shape()) for _h in h_in]
    loss, C_t, H_t, Ys = [], C_0, H_0, []
    for t, x in enumerate(Xs):
        _out = f_lstm_t([x] + C_t + H_t)
        y, C_t, H_t = _out[0], _out[1:len(C_t)+1], _out[1+len(C_t):]
        Ys.append(y)
        if t == 0:  C_1, H_1 = C_t, H_t
    C_T, H_T = C_t, H_t
    params = f_lstm_t.get_parameters()
    return params, Xs, Ys, C_0, H_0, C_T, H_T, C_1, H_1
Example #28
0
def test_multi_output():
    for x in (cgt.scalar('x'), cgt.vector('x'), cgt.matrix('x')):
        for cls in (SinCos, SinCos2):
            y, z = core.unpack(core.Result(cls(), [x]))
            xnum = np.ones((3, ) * x.ndim, cgt.floatX)
            correct = (np.sin(xnum), np.cos(xnum))
            yznum = cgt.numeric_eval([y, z], {x: xnum})
            np.testing.assert_allclose(yznum, correct)
            f = cgt.function([x], [y, z])
            np.testing.assert_allclose(f(xnum), correct)
Example #29
0
File: rrnn.py Project: zobot/rrnn
def make_loss_and_grad_and_step(arch, size_input, size_output, size_mem, size_batch, n_layers, n_unroll, k_in, k_h):
    # symbolic variables

    x_tnk = cgt.tensor3()
    targ_tnk = cgt.tensor3()
    #make_network = make_deep_lstm if arch=="lstm" else make_deep_gru
    make_network = make_deep_rrnn_rot_relu
    network = make_network(size_input, size_mem, n_layers, size_output, size_batch, k_in, k_h)
    init_hiddens = [cgt.matrix() for _ in xrange(get_num_hiddens(arch, n_layers))]
    # TODO fixed sizes

    cur_hiddens = init_hiddens
    loss = 0
    for t in xrange(n_unroll):
        outputs = network([x_tnk[t]] + cur_hiddens)
        cur_hiddens, prediction_logprobs = outputs[:-1], outputs[-1]
        # loss = loss + nn.categorical_negloglik(prediction_probs, targ_tnk[t]).sum()
        loss = loss - (prediction_logprobs*targ_tnk[t]).sum()
        cur_hiddens = outputs[:-1]

    final_hiddens = cur_hiddens

    loss = loss / (n_unroll * size_batch)

    params = network.get_parameters()
    gradloss = cgt.grad(loss, params)

    flatgrad = flatcat(gradloss)

    with utils.Message("compiling loss+grad"):
        f_loss_and_grad = cgt.function([x_tnk, targ_tnk] + init_hiddens, [loss, flatgrad] + final_hiddens)
    f_loss = cgt.function([x_tnk, targ_tnk] + init_hiddens, loss)

    assert len(init_hiddens) == len(final_hiddens)

    x_nk = cgt.matrix('x')
    outputs = network([x_nk] + init_hiddens)

    f_step = cgt.function([x_nk]+init_hiddens, outputs)

    # print "node count", cgt.count_nodes(flatgrad)
    return network, f_loss, f_loss_and_grad, f_step
Example #30
0
def test_take_one_step_lstm():
    nn_input = cgt.matrix(fixed_shape=(20, 64))
    l = nnbuilder.LSTM(num_units=128, input_time_size=None, input_feature_size=64)
    o = l.take_one_step(nn_input)
    out = cgt.function([nn_input], [o])
    tau = np.zeros(shape=(20, 64))
    tau[0, 0:40] = 1
    m = out(tau)[0]
    mm = np.mean(m[0])
    mmm = np.mean(m[1])
    assert mm != mmm
Example #31
0
def make_ntm(opt):
    Mprev_bnm = cgt.tensor3("M", fixed_shape=(opt.b, opt.n, opt.m))
    X_bk = cgt.matrix("X", fixed_shape=(opt.b, opt.k))
    wprev_bHn = cgt.tensor3("w", fixed_shape=(opt.b, opt.h*2, opt.n))
    rprev_bhm = cgt.tensor3("r", fixed_shape=(opt.b, opt.h, opt.m))
    controller = make_ff_controller(opt)
    M_bnm, w_bHn, r_bhm, y_bp = ntm_step(opt, Mprev_bnm, X_bk, wprev_bHn, rprev_bhm, controller)
    # in this form it looks like a standard seq-to-seq model
    # external input and output are first elements
    ntm = nn.Module([X_bk, Mprev_bnm, wprev_bHn, rprev_bhm], [y_bp, M_bnm, w_bHn, r_bhm])
    return ntm
Example #32
0
def make_ntm(opt):
    Mprev_bnm = cgt.tensor3("M", fixed_shape=(opt.b, opt.n, opt.m))
    X_bk = cgt.matrix("X", fixed_shape=(opt.b, opt.k))
    wprev_bHn = cgt.tensor3("w", fixed_shape=(opt.b, opt.h*2, opt.n))
    rprev_bhm = cgt.tensor3("r", fixed_shape=(opt.b, opt.h, opt.m))
    controller = make_ff_controller(opt)
    M_bnm, w_bHn, r_bhm, y_bp = ntm_step(opt, Mprev_bnm, X_bk, wprev_bHn, rprev_bhm, controller)
    # in this form it looks like a standard seq-to-seq model
    # external input and output are first elements
    ntm = nn.Module([X_bk, Mprev_bnm, wprev_bHn, rprev_bhm], [y_bp, M_bnm, w_bHn, r_bhm])
    return ntm
Example #33
0
def lstm_network(T, size_in, size_out, num_units, num_mems, dbg_out={}):
    assert T > 0
    x, y, c_in, h_in, c_out, h_out = lstm_network_t(size_in, size_out,
                                                    num_units, num_mems,
                                                    dbg_out)
    f_lstm_t = nn.Module([x] + c_in + h_in, [y] + c_out + h_out)
    Xs = [
        cgt.matrix(fixed_shape=x.get_fixed_shape(), name="X%d" % t)
        for t in range(T)
    ]
    C_0 = [cgt.matrix(fixed_shape=_c.get_fixed_shape()) for _c in c_in]
    H_0 = [cgt.matrix(fixed_shape=_h.get_fixed_shape()) for _h in h_in]
    loss, C_t, H_t, Ys = [], C_0, H_0, []
    for t, x in enumerate(Xs):
        _out = f_lstm_t([x] + C_t + H_t)
        y, C_t, H_t = _out[0], _out[1:len(C_t) + 1], _out[1 + len(C_t):]
        Ys.append(y)
        if t == 0: C_1, H_1 = C_t, H_t
    C_T, H_T = C_t, H_t
    params = f_lstm_t.get_parameters()
    return params, Xs, Ys, C_0, H_0, C_T, H_T, C_1, H_1
Example #34
0
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no", fixed_shape=(None, n_in))
        a_n = cgt.vector("a_n", dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0) / 128.0
        nhid = 64
        h1 = cgt.tanh(
            nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(
            nn.Affine(nhid, n_actions,
                      weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n * q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_n, q_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n],
                                      [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Example #35
0
def make_loss_and_grad(net):
    X_b = inps[0] #cgt.matrix(dtype=cgt.floatX)
    y_onehot = cgt.matrix(dtype='i4')
    outputs = [logprobs]

    loss = nn.crossent(outputs[0], y_onehot) / b_size
    #gradloss = cgt.grad(loss, params)
    gradloss = cgt.grad(loss, param_list)

    # XXX use flatcat function
    grad = cgt.concatenate([x.flatten() for x in gradloss])
    #grad = gradloss
    return cgt.make_function([X_b, y_onehot], [loss, grad, logprobs])
Example #36
0
File: rrnn.py Project: zoemcc/rrnn
def make_deep_rrnn_rot_relu(size_input, size_mem, n_layers, size_output,
                            size_batch_in, k_in, k_h):
    inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)]
    outputs = []
    print 'input_size: ', size_input
    for i_layer in xrange(n_layers):
        prev_h = inputs[
            i_layer +
            1]  # note that inputs[0] is the external input, so we add 1
        x = inputs[0] if i_layer == 0 else outputs[i_layer - 1]
        size_x = size_input if i_layer == 0 else size_mem
        size_batch = prev_h.shape[0]

        xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform")
        xform_h_non = xform_h_param.weight
        xform_h_non.props["is_rotation"] = True

        xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True)
        xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1")

        add_in_lin = nn.Affine(size_x, size_mem)(x)
        add_in_relu = nn.rectify(add_in_lin)

        prev_h_scaled = nn.scale_mag(prev_h)

        h_in_added = prev_h_scaled + add_in_relu
        inters_h = [h_in_added]

        colon = slice(None, None, None)

        for i in xrange(2 * k_h):
            inter_in = inters_h[-1]
            r_cur = xform_h[i, :]
            #r_cur = cgt.subtensor(xform_h, [i, colon])
            r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1))
            r_cur_2 = cgt.reshape(r_cur, (1, size_mem))
            ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2)
            inter_out = inter_in - 2 * ref_cur
            inters_h.append(inter_out)
        next_h = inters_h[-1]
        outputs.append(next_h)

    category_activations = nn.Affine(size_mem, size_output,
                                     name="pred")(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    #print 'len outputs:', len(outputs)
    #print 'len inputs:', len(inputs)

    return nn.Module(inputs, outputs)
Example #37
0
def test_stack():
    x = cgt.scalar()
    y = cgt.scalar()
    z = cgt.scalar()
    s0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(s0, {x: 1, y: 2, z: 3}).shape == (3,)

    x = cgt.vector()
    y = cgt.vector()
    z = cgt.vector()
    v0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(v0, {x: np.zeros(2), y: np.zeros(2), z: np.zeros(2)}).shape == (3, 2)
    v1 = cgt.stack([x, y, z], axis=1)
    assert cgt.numeric_eval(v1, {x: np.zeros(2), y: np.ones(2), z: np.zeros(2)}).shape == (2, 3)

    x = cgt.matrix()
    y = cgt.matrix()
    z = cgt.matrix()
    m0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(m0, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (3, 2, 4)
    m1 = cgt.stack([x, y, z], axis=1)
    assert cgt.numeric_eval(m1, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (2, 3, 4)
    m2 = cgt.stack([x, y, z], axis=2)
    assert cgt.numeric_eval(m2, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (2, 4, 3)
    def make_updater_fc_parallel():
        X = cgt.matrix("X", fixed_shape=(None, 28 * 28))
        y = cgt.vector("y", dtype="i8")
        stepsize = cgt.scalar("stepsize")

        loss = build_fc_return_loss(X, y)
        params = nn.get_parameters(loss)
        m = nn.Module([X, y], [loss])
        split_loss = 0
        for start in xrange(0, batch_size, batch_size // 4):
            sli = slice(start, start + batch_size // 4)
            split_loss += m([X[sli], y[sli]])[0]
        split_loss /= 4
        gparams = cgt.grad(split_loss, params)
        updates2 = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], split_loss, updates=updates2)
Example #39
0
def test_incsubtensor0():
    # First let's test fancy slice along zeroth dimension

    W = cgt.shared(np.zeros((5, 3)), name="W")
    inc = cgt.matrix()  # we'll increment W by this matrix
    incval = np.arange(9).reshape(3, 3)

    inds = cgt.vector(dtype='i8')
    updates = {W: cgt.inc_subtensor(W, inds, inc)}
    f = cgt.function([inds, inc], [], updates=updates)
    f([1, 2, 4], incval)

    assert np.allclose(
        W.op.get_value(),
        np.array([[0., 0., 0.], [0., 1., 2.], [3., 4., 5.], [0., 0., 0.],
                  [6., 7., 8.]]))
Example #40
0
    def make_updater_fc_parallel():
        X = cgt.matrix("X", fixed_shape=(None, 28 * 28))
        y = cgt.vector("y", dtype='i8')
        stepsize = cgt.scalar("stepsize")

        loss = build_fc_return_loss(X, y)
        params = nn.get_parameters(loss)
        m = nn.Module([X, y], [loss])
        split_loss = 0
        for start in xrange(0, batch_size, batch_size // 4):
            sli = slice(start, start + batch_size // 4)
            split_loss += m([X[sli], y[sli]])[0]
        split_loss /= 4
        gparams = cgt.grad(split_loss, params)
        updates2 = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], split_loss, updates=updates2)
Example #41
0
def test_noncontiguous_matrix():

    x = np.arange(1,7).reshape(2,3).astype(cgt.floatX)
    result = np.log(x.sum(axis=0)).sum()


    xvar = cgt.matrix()
    f = cgt.function([xvar],cgt.log(xvar.sum(axis=0)).sum())


    assert np.allclose( f(np.asarray(x, order='C')), result)
    assert np.allclose( f(np.asarray(x, order='C', dtype='int64')), result)
    assert np.allclose( f(np.asarray(x, order='F')), result)

    X = np.zeros((4,6))
    X[::2,::2] = x
    assert np.allclose( f(X[::2,::2]), result)
Example #42
0
def main():
    print("Loading data...")
    X = cgt.matrix("X", fixed_shape=(None, 28*28))
    y = cgt.vector("y", dtype='i8')

    model = build_model(X, 0.0)
    loss = -cgt.mean(categorical.loglik(y, model))

    updates = nn.rmsprop(loss, nn.get_parameters(loss), 0.01)
    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)

    y_nodrop = cgt.argmax(model, axis=1)

    cost_nodrop = -cgt.mean(categorical.loglik(y, model))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop])


    batch_size=128
    Xdata, ydata = load_data()

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(3):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start+batch_size
            train(Xtrain[start:end], ytrain[start:end])
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])

    nnbuilder.save_weights(model, 'mnist')
Example #43
0
def test_linreg():
    cgt.reset_config()
    cgt.set_precision('double')
    N = 10
    K = 3

    Xval = np.random.randn(N, K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.matrix("X")
    y_n = cgt.vector("y")
    w_k = cgt.vector("w")
    b = cgt.scalar(name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])

    g_simple, an, _ = cgt.core.simplify_and_analyze(g)

    print "Loss function:"
    cgt.print_tree([err])
    print "Gradient:"
    cgt.print_tree(g)

    print "Gradient simplified"
    cgt.print_tree(
        g_simple,
        nodefn=lambda node, o: o.write(" " + an["node2hash"][node][:5]))

    print "-------"

    d = {X_nk: Xval, w_k: wval, b: bval, y_n: yval}

    np.testing.assert_allclose(cgt.numeric_eval(err, d),
                               np.linalg.norm(Xval.dot(wval) + bval - yval)**2)
    np.testing.assert_allclose(cgt.numeric_eval(g[0], d),
                               2 * Xval.T.dot(Xval.dot(wval) + bval - yval))
    np.testing.assert_allclose(cgt.numeric_eval(g[1], d),
                               2 * np.sum(Xval.dot(wval) + bval - yval, 0))
Example #44
0
def test_incsubtensor1():
    W = cgt.shared(np.zeros((5,3)), name="W")
    inc = cgt.matrix() # we'll increment W by this matrix
    incval = np.arange(9).reshape(3,3)

    start = cgt.scalar(dtype='i8')
    stop = cgt.scalar(dtype='i8')
    updates = {W : cgt.inc_subtensor(W, slice(start, stop), inc)}
    f = cgt.function([start,stop,inc],[],updates=updates)
    f(0,3,incval)
    assert np.allclose(W.op.get_value(), 
        np.array(
        [
         [ 0.,  1.,  2.],
         [ 3.,  4.,  5.],
         [ 6.,  7.,  8.],
         [ 0.,  0.,  0.],
         [ 0.,  0.,  0.],
         ]))
Example #45
0
def test_incsubtensor1():
    W = cgt.shared(np.zeros((5, 3)), name="W")
    inc = cgt.matrix()  # we'll increment W by this matrix
    incval = np.arange(9).reshape(3, 3)

    start = cgt.scalar(dtype='i8')
    stop = cgt.scalar(dtype='i8')
    updates = {W: cgt.inc_subtensor(W, slice(start, stop), inc)}
    f = cgt.function([start, stop, inc], [], updates=updates)
    f(0, 3, incval)
    assert np.allclose(
        W.op.get_value(),
        np.array([
            [0., 1., 2.],
            [3., 4., 5.],
            [6., 7., 8.],
            [0., 0., 0.],
            [0., 0., 0.],
        ]))
Example #46
0
def make_funcs(config, dbg_out=None):
    params, Xs, Ys, C_0, H_0, C_T, H_T, C_1, H_1 = lstm_network(
        config['rnn_steps'], config['num_inputs'], config['num_outputs'],
        config['num_units'], config['num_mems'])

    # basic
    size_batch = Xs[0].shape[0]
    dY = Ys[0].shape[-1]
    Ys_gt = [
        cgt.matrix(fixed_shape=(size_batch, dY), name='Y%d' % t)
        for t in range(len(Ys))
    ]
    Ys_var = [cgt.tensor3(fixed_shape=(size_batch, dY, dY)) for _ in Ys]
    net_inputs, net_outputs = Xs + C_0 + H_0 + Ys_var, Ys + C_T + H_T

    # calculate loss
    loss_vec = []
    for i in range(len(Ys)):
        #     if i == 0: continue
        _l = dist.gaussian.logprob(Ys_gt[i], Ys[i], Ys_var[i])
        loss_vec.append(_l)
    loss_vec = cgt.add_multi(loss_vec)
    if config['weight_decay'] > 0.:
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat**2)
        loss_vec -= loss_param  # / size_batch
    loss = cgt.sum(loss_vec) / config['rnn_steps'] / size_batch
    grad = cgt.grad(loss, params)

    # functions
    def f_init(size_batch):
        c_0, h_0 = [], []
        for _n_m in config['num_mems']:
            if _n_m > 0:
                c_0.append(np.zeros((size_batch, _n_m)))
                h_0.append(np.zeros((size_batch, _n_m)))
        return c_0, h_0

    f_step = cgt.function([Xs[0]] + C_0 + H_0, [Ys[0]] + C_1 + H_1)
    f_loss = cgt.function(net_inputs + Ys_gt, loss)
    f_grad = cgt.function(net_inputs + Ys_gt, grad)
    f_surr = cgt.function(net_inputs + Ys_gt, [loss] + net_outputs + grad)
    return params, f_step, f_loss, f_grad, f_init, f_surr
Example #47
0
 def __init__(self, num_features=None, num_hidden=100):
     stepsize = 0.01
     # with shape (batchsize, ncols)
     X = cgt.matrix("X", fixed_shape=(1, num_features))
     # y: a symbolic variable representing the rewards, which are integers
     y = cgt.scalar("y", dtype='float64')
     
     hid1 = nn.rectify(
         nn.Affine(num_features, num_hidden, weight_init=nn.IIDGaussian(std=.1), bias_init=nn.Constant(1))(X)
     )
     # One final fully-connected layer, and then a linear activation output for reward
     output = nn.Affine(num_hidden, 1, weight_init=nn.IIDGaussian(std=.1), bias_init=nn.Constant(1))(hid1)
     abs_deviation = cgt.abs(output - y).mean()
     params = nn.get_parameters(abs_deviation)
     gparams = cgt.grad(abs_deviation, params)
     
     updates = [(p, p-stepsize*gp) for (p, gp) in zip(params, gparams)]
     self.predictor = cgt.function([X], output)
     self.updater = cgt.function([X, y], abs_deviation, updates=updates)
Example #48
0
def test_get_context():
    batch_size = 32
    feat_t_steps = 3
    feat_num_features = 30
    state_num_features = 20
    num_out_classes = 28
    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    prev_out = cgt.matrix(fixed_shape=(batch_size, state_num_features))
    sigmoided = cgt.sigmoid(prev_out)
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes, feature_size=feat_num_features, decoder_size=state_num_features)
    mm = cgt.infer_shape(s.features_post_mlp_btf)
    assert mm == (batch_size, feat_t_steps, feat_num_features)
    context_out = s.get_context(sigmoided)
    out_fun = cgt.function([feats, prev_out], [context_out])
    tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features))
    tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*state_num_features), (batch_size, state_num_features))
    m = out_fun(tau, tau2)[0]
    assert m.shape == (batch_size, feat_num_features)
    assert np.mean(m) < 1
Example #49
0
def test_incsubtensor0():
    # First let's test fancy slice along zeroth dimension

    W = cgt.shared(np.zeros((5,3)), name="W")
    inc = cgt.matrix() # we'll increment W by this matrix
    incval = np.arange(9).reshape(3,3)
    

    inds = cgt.vector(dtype='i8')
    updates = {W : cgt.inc_subtensor(W, inds, inc)}
    f = cgt.function([inds,inc],[],updates=updates)
    f([1,2,4],incval)

    assert np.allclose(W.op.get_value(), 
        np.array(
        [[ 0.,  0.,  0.],
         [ 0.,  1.,  2.],
         [ 3.,  4.,  5.],
         [ 0.,  0.,  0.],
         [ 6.,  7.,  8.]]))
Example #50
0
def build_bilinear_net(input_shapes, **kwargs):
    x_shape, u_shape = input_shapes
    X = cgt.tensor4('X', fixed_shape=(None, ) + x_shape)
    U = cgt.matrix('U', fixed_shape=(None, ) + u_shape)

    X_diff_pred = Bilinear(input_shapes, b=None, name='bilinear')(X, U)
    X_next_pred = X + X_diff_pred
    Y = X.reshape((X.shape[0], cgt.mul_multi(X.shape[1:])))
    Y_diff_pred = X_diff_pred.reshape(
        (X_diff_pred.shape[0], cgt.mul_multi(X_diff_pred.shape[1:])))

    X_diff = cgt.tensor4('X_diff', fixed_shape=(None, ) + x_shape)
    X_next = X + X_diff
    loss = ((X_next - X_next_pred)**2).mean(axis=0).sum() / 2.

    net_name = 'BilinearNet'
    input_vars = OrderedDict([(var.name, var) for var in [X, U, X_diff]])
    pred_vars = OrderedDict([('Y_diff_pred', Y_diff_pred), ('Y', Y),
                             ('X_next_pred', X_next_pred)])
    return net_name, input_vars, pred_vars, loss
Example #51
0
File: sfnn.py Project: TZ2016/snn
def make_funcs(config, dbg_out={}):
    net_in, net_out = hybrid_network(config['num_inputs'],
                                     config['num_outputs'],
                                     config['num_units'],
                                     config['num_sto'],
                                     dbg_out=dbg_out)
    if not config['dbg_out_full']: dbg_out = {}
    # def f_sample(_inputs, num_samples=1, flatten=False):
    #     _mean, _var = f_step(_inputs)
    #     _samples = []
    #     for _m, _v in zip(_mean, _var):
    #         _s = np.random.multivariate_normal(_m, np.diag(np.sqrt(_v)), num_samples)
    #         if flatten: _samples.extend(_s)
    #         else: _samples.append(_s)
    #     return np.array(_samples)
    Y_gt = cgt.matrix("Y")
    Y_prec = cgt.tensor3('V',
                         fixed_shape=(None, config['num_inputs'],
                                      config['num_inputs']))
    params = nn.get_parameters(net_out)
    size_batch, size_out = net_out.shape
    inputs, outputs = [net_in], [net_out]
    if config['no_bias']:
        print "Excluding bias"
        params = [p for p in params if not p.name.endswith(".b")]
    loss_vec = dist.gaussian.logprob(Y_gt, net_out, Y_prec)
    if config['weight_decay'] > 0.:
        print "Applying penalty on parameter norm"
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat**2)
        loss_vec -= loss_param  # / size_batch
    loss = cgt.sum(loss_vec) / size_batch

    # TODO_TZ f_step seems not to fail if X has wrong dim
    f_step = cgt.function(inputs, outputs)
    f_surr = get_surrogate_func(inputs + [Y_prec, Y_gt],
                                outputs, [loss_vec],
                                params,
                                _dbg_out=dbg_out)

    return params, f_step, None, None, None, f_surr
Example #52
0
File: rnn.py Project: TZ2016/snn
def make_funcs(config, dbg_out=None):
    params, Xs, Ys, C_0, H_0, C_T, H_T, C_1, H_1 = lstm_network(
        config['rnn_steps'], config['num_inputs'], config['num_outputs'],
        config['num_units'], config['num_mems']
    )

    # basic
    size_batch = Xs[0].shape[0]
    dY = Ys[0].shape[-1]
    Ys_gt = [cgt.matrix(fixed_shape=(size_batch, dY), name='Y%d'%t)
             for t in range(len(Ys))]
    Ys_var = [cgt.tensor3(fixed_shape=(size_batch, dY, dY)) for _ in Ys]
    net_inputs, net_outputs = Xs + C_0 + H_0 + Ys_var, Ys + C_T + H_T

    # calculate loss
    loss_vec = []
    for i in range(len(Ys)):
        #     if i == 0: continue
        _l = dist.gaussian.logprob(Ys_gt[i], Ys[i], Ys_var[i])
        loss_vec.append(_l)
    loss_vec = cgt.add_multi(loss_vec)
    if config['weight_decay'] > 0.:
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat ** 2)
        loss_vec -= loss_param  # / size_batch
    loss = cgt.sum(loss_vec) / config['rnn_steps'] / size_batch
    grad = cgt.grad(loss, params)

    # functions
    def f_init(size_batch):
        c_0, h_0 = [], []
        for _n_m in config['num_mems']:
            if _n_m > 0:
                c_0.append(np.zeros((size_batch, _n_m)))
                h_0.append(np.zeros((size_batch, _n_m)))
        return c_0, h_0
    f_step = cgt.function([Xs[0]] + C_0 + H_0, [Ys[0]] + C_1 + H_1)
    f_loss = cgt.function(net_inputs + Ys_gt, loss)
    f_grad = cgt.function(net_inputs + Ys_gt, grad)
    f_surr = cgt.function(net_inputs + Ys_gt, [loss] + net_outputs + grad)
    return params, f_step, f_loss, f_grad, f_init, f_surr
Example #53
0
File: sfnn.py Project: TZ2016/snn
def hybrid_network(size_in, size_out, num_units, num_stos, dbg_out={}):
    assert len(num_units) == len(num_stos)
    net_in = cgt.matrix("X", fixed_shape=(None, size_in))
    prev_num_units, prev_out = size_in, net_in
    dbg_out['NET~in'] = net_in
    curr_layer = 1
    for (curr_num_units, curr_num_sto) in zip(num_units, num_stos):
        assert curr_num_units >= curr_num_sto >= 0
        prev_out = combo_layer(prev_out, prev_num_units, curr_num_units,
                               (curr_num_sto,),
                               s_funcs=s_func_ip,
                               o_funcs=(lambda x: cgt.bernoulli(cgt.sigmoid(x)), cgt.nn.rectify),
                               name=str(curr_layer), dbg_out=dbg_out)
        dbg_out['L%d~out' % curr_layer] = prev_out
        prev_num_units = curr_num_units
        curr_layer += 1
    net_out = nn.Affine(prev_num_units, size_out,
                        name="InnerProd(%d->%d)" % (prev_num_units, size_out)
                        )(prev_out)
    dbg_out['NET~out'] = net_out
    return net_in, net_out
Example #54
0
    def __init__(self, num_features=None, num_hidden=100):
        stepsize = 0.01
        # with shape (batchsize, ncols)
        X = cgt.matrix("X", fixed_shape=(1, num_features))
        # y: a symbolic variable representing the rewards, which are integers
        y = cgt.scalar("y", dtype='float64')

        hid1 = nn.rectify(
            nn.Affine(num_features,
                      num_hidden,
                      weight_init=nn.IIDGaussian(std=.1),
                      bias_init=nn.Constant(1))(X))
        # One final fully-connected layer, and then a linear activation output for reward
        output = nn.Affine(num_hidden,
                           1,
                           weight_init=nn.IIDGaussian(std=.1),
                           bias_init=nn.Constant(1))(hid1)
        abs_deviation = cgt.abs(output - y).mean()
        params = nn.get_parameters(abs_deviation)
        gparams = cgt.grad(abs_deviation, params)

        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        self.predictor = cgt.function([X], output)
        self.updater = cgt.function([X, y], abs_deviation, updates=updates)
Example #55
0
def make_funcs(net_in, net_out, config, dbg_out=None):
    def f_grad(*x):
        out = f_surr(*x)
        return out['loss'], out['surr_loss'], out['surr_grad']

    Y = cgt.matrix("Y")
    params = nn.get_parameters(net_out)
    if 'no_bias' in config and config['no_bias']:
        print "Excluding bias"
        params = [p for p in params if not p.name.endswith(".b")]
    size_out, size_batch = Y.shape[1], net_in.shape[0]
    f_step = cgt.function([net_in], [net_out])
    # loss_raw of shape (size_batch, 1); loss should be a scalar
    # sum-of-squares loss
    sigma = 0.1
    loss_raw = -cgt.sum((net_out - Y)**2, axis=1, keepdims=True) / sigma
    # negative log-likelihood
    # out_sigma = cgt.exp(net_out[:, size_out:]) + 1.e-6  # positive sigma
    # loss_raw = -gaussian_diagonal.logprob(
    #     Y, net_out,
    # out_sigma
    # cgt.fill(.01, [size_batch, size_out])
    # )
    if 'param_penal_wt' in config:
        print "Applying penalty on parameter norm"
        assert config['param_penal_wt'] > 0
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = cgt.fill(cgt.sum(params_flat**2), [size_batch, 1])
        loss_param *= config['param_penal_wt']
        loss_raw += loss_param
    loss = cgt.sum(loss_raw) / size_batch
    # end of loss definition
    f_loss = cgt.function([net_in, Y], [net_out, loss])
    f_surr = get_surrogate_func([net_in, Y], [net_out] + dbg_out, [loss_raw],
                                params)
    return params, f_step, f_loss, f_grad, f_surr
Example #56
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--epochs", type=int, default=10)
    parser.add_argument("--profile", action="store_true")
    parser.add_argument("--dropout", action="store_true")
    parser.add_argument("--stepsize", type=float, default=.001)
    parser.add_argument("--model", choices=["dense", "conv"], default="dense")
    parser.add_argument("--unittest", action="store_true")
    parser.add_argument("--grad_check", action="store_true")
    args = parser.parse_args()

    if args.grad_check: cgt.set_precision("quad")

    # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/
    # converted to npz
    mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz")

    Xdata = (mnist["X"] / 255.).astype(cgt.floatX)
    ydata = mnist["y"]

    np.random.seed(0)

    if args.model == "conv":
        Xdata = Xdata.reshape(-1, 1, 28, 28)

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    X = cgt.tensor4("X",
                    fixed_shape=(None, 1, 28,
                                 28)) if args.model == "conv" else cgt.matrix(
                                     "X", fixed_shape=(None, 28 * 28))
    y = cgt.vector("y", dtype='i8')

    if args.model == "dense":
        p_drop_input, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0)
        w_h = init_weights(784, 256)
        w_h2 = init_weights(256, 256)
        w_o = init_weights(256, 10)
        pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden)
        pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.)
        params = [w_h, w_h2, w_o]
    elif args.model == "conv":
        p_drop_conv, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0)
        w = init_weights(32, 1, 3, 3)
        w2 = init_weights(64, 32, 3, 3)
        w3 = init_weights(128, 64, 3, 3)
        w4 = init_weights(128 * 2 * 2, 625)
        w_o = init_weights(625, 10)
        pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv,
                                  p_drop_hidden)
        pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.)
        params = [w, w2, w3, w4, w_o]
    else:
        raise RuntimeError("Unreachable")

    cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop))
    updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize)

    y_nodrop = cgt.argmax(pofy_nodrop, axis=1)
    cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)
    computeloss = cgt.function(inputs=[X, y],
                               outputs=[err_nodrop, cost_nodrop])

    batch_size = 128

    from cgt.tests import gradcheck_model
    if args.grad_check:
        cost_nodrop = cgt.core.clone(cost_nodrop, {
            X: Xtrain[:1],
            y: ytrain[:1]
        })
        print "doing gradient check..."
        print "------------------------------------"
        gradcheck_model(cost_nodrop, params[0:1])
        print "success!"
        return

    if args.profile: cgt.profiler.start()

    print fmt_row(10, [
        "Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"
    ])
    for i_epoch in xrange(args.epochs):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start + batch_size
            train(Xtrain[start:end], ytrain[start:end])
            if args.unittest: return
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)],
                                          ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(
            10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
    if args.profile: cgt.execution.profiler.print_stats()
Example #57
0
import cgt
from cgt import nn, utils
import numpy as np, numpy.random as nr
from numpy.linalg import norm
from param_collection import ParamCollection

k_in = 1
size_x = 3
size_mem = 4
size_batch = 4

x = cgt.matrix(fixed_shape=(size_batch, size_x))
prev_h = cgt.matrix(fixed_shape=(size_batch, size_mem))
r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x)
r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem))
r_norm = cgt.norm(r_non, axis=2, keepdims=True)
r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1")
prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1))
inters = [prev_h_3]

for i in xrange(k_in * 2):
    inter_in = inters[-1]
    r_cur = r[:, i, :]
    r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem))
    r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1))
    ref_cur = cgt.batched_matmul(
        r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in))
    inter_out = inter_in - ref_cur
    inters.append(inter_out)
h = inters[-1]