Example #1
0
def main():
    train_images, train_labels, test_images, test_labels = load_mnist()
    X = normalize(train_images)
    label_size = len(np.unique(train_labels))
    y = one_hot_vector(train_labels, label_size)

    print("Total training example:", X.shape[0])

    nn = NN(epoch=20, batch_size=256)

    nn.add_layer(Layer(784))
    nn.add_layer(Layer(200, activation_fn=relu))
    nn.add_layer(Layer(100, activation_fn=relu))
    nn.add_layer(Layer(10, activation_fn=softmax))

    nn.fit(X, y)


    print("Train Accuracy is:", nn.accuracy(X, y))

    X_test = normalize(test_images)
    Y_test = one_hot_vector(test_labels, label_size)
    print("Test Accuracy is:", nn.accuracy(X_test, Y_test))

    nn.plot_learning_curve()
Example #2
0
    def ready(self):
        args = self.args
        index = self.index = T.lscalar()
        x = self.x = T.fmatrix()
        y = self.y = T.ivector()

        dropout = self.dropout = theano.shared(
            np.float64(args.dropout).astype("float32"))

        n_d = args.hidden_dim
        layers = self.layers = []
        for i in xrange(args.depth):
            l = Layer(n_in=28 * 28 if i == 0 else n_d,
                      n_out=n_d,
                      activation=ReLU)
            layers.append(l)

        output_layer = self.output_layer = Layer(n_in=n_d,
                                                 n_out=10,
                                                 activation=softmax)

        h = x
        for l in layers:
            h = l.forward(h)
            h = apply_dropout(h, dropout)

        self.h_final = h

        # batch * 10
        probs = self.probs = output_layer.forward(h)

        # batch
        preds = self.preds = T.argmax(probs, axis=1)
        err = self.err = T.mean(T.cast(T.neq(preds, y), dtype="float32"))

        #
        loss = self.loss = -T.mean(T.log(probs[T.arange(y.shape[0]), y]))
        #loss = self.loss = T.mean( T.nnet.categorical_crossentropy(
        #                            probs,
        #                            y
        #                    ))

        params = self.params = []
        for l in layers + [output_layer]:
            for p in l.params:
                params.append(p)

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost += T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg

        self.l2_cost = l2_cost
        self.cost = loss + l2_cost
        print "cost.dtype", self.cost.dtype
Example #3
0
    def ready(self, args, train):
        # len * batch
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = T.matrix(dtype=theano.config.floatX)

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(n_d=self.n_d,
                                         vocab=set(w for w in train))
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(self.n_V, self.n_d))

        activation = get_activation_by_name(args["activation"])

        rnn_layer = LSTM(n_in=self.n_d, n_out=self.n_d, activation=activation)

        output_layer = Layer(
            n_in=self.n_d,
            n_out=self.n_V,
            activation=T.nnet.softmax,
        )

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        x = x.reshape((self.idxs.shape[0], self.idxs.shape[1], self.n_d))

        # len * batch * (n_d+n_d)
        h = rnn_layer.forward_all(x, self.init_state, return_c=True)

        self.last_state = h[-1]
        h = h[:, :, self.n_d:]
        h = apply_dropout(h, self.dropout)

        self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys])
        #self.nll = T.nnet.categorical_crossentropy(
        #                self.p_y_given_x,
        #                idys
        #            )

        self.layers = [embedding_layer, rnn_layer, output_layer]
        #self.params = [ x_flat ] + rnn_layer.params + output_layer.params
        self.params = embedding_layer.params + rnn_layer.params + output_layer.params
        self.num_params = sum(
            len(x.get_value(borrow=True).ravel()) for l in self.layers
            for x in l.params)
        say("# of params in total: {}\n".format(self.num_params))
Example #4
0
    def build_model(self):
        args = self.args
        weights = self.weights

        meta_emb = self.meta_emb = self.embs[0]
        golden_embs = self.embs[1:]

        n_m_d = meta_emb.n_d
        dropout = self.dropout = theano.shared(
            np.float64(args.dropout_rate).astype(theano.config.floatX))

        batch_ids = self.batch_ids = T.ivector('batch_d_char')
        batch_masks = self.batch_masks = T.fmatrix('batch_d_char_mask')

        layers = self.layers = [meta_emb]

        slices_embs = meta_emb.forward(batch_ids.ravel())
        slices_embs = slices_embs.reshape((batch_ids.shape[0], n_m_d))
        prev_output = apply_dropout(slices_embs, dropout, v2=True)

        self.all_loss = 0.0
        for i in range(len(weights)):
            mask, weight, golden_emb = batch_masks[i], weights[i], golden_embs[
                i]
            n_o_d = golden_emb.n_d
            layer = Layer(n_m_d, n_o_d, linear)
            layers.append(layer)
            mapped_output = layer.forward(prev_output)

            slices_embs = golden_emb.forward(batch_ids.ravel())
            slices_embs = slices_embs.reshape((batch_ids.shape[0], n_o_d))
            self.all_loss += weight * T.sum(
                T.sum((mapped_output - slices_embs) *
                      (mapped_output - slices_embs),
                      axis=1) * mask) / (1e-8 + T.sum(mask))

        for i, l in enumerate(layers[1:]):
            say("layer {}: n_in={}\tn_out={}\n".format(i, l.n_in, l.n_out))

        self.l2_sqr = None
        self.params = []

        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

        self.all_loss += self.l2_sqr
        n_params = sum(
            len(x.get_value(borrow=True).ravel()) for x in self.params)
        say("total # parameters: {}\n".format(n_params))
Example #5
0
    def __init__(self,
                 n_in,
                 n_out,
                 activation,
                 highway=True,
                 dropout=None,
                 combine_c=True):
        self.n_in, self.n_out = n_in, n_out
        self.highway = highway
        self.activation = activation
        self.dropout = dropout
        self.combine_c = combine_c

        self.lambda_gate = RecurrentLayer(n_in, n_out, sigmoid)
        self.input_layer_1 = Layer(n_in, n_out, linear, has_bias=False)
        self.input_layer_2 = Layer(n_in, n_out, linear, has_bias=False)
        if highway:
            self.highway_layer = HighwayLayer(n_out)
Example #6
0
class HighwayLayer(object):
    def __init__(self, n_d):
        self.n_d = n_d
        self.gate = Layer(n_d, n_d, sigmoid)

    def forward(self, x, h):
        t = self.gate.forward(x)
        return h * t + x * (1 - t)

    @property
    def params(self):
        return self.gate.params

    @params.setter
    def params(self, param_list):
        self.gate.params = param_list
Example #7
0
    def ready(self):
        index = self.index = T.lscalar()
        x = self.x = T.fmatrix()
        y = self.y = T.ivector()

        layer = self.layer = Layer(
                    n_in = 28*28,
                    n_out = 10,
                    activation = softmax
                )

        # batch * 10
        probs = self.probs = layer.forward(x)

        # batch
        preds = self.preds = T.argmax(probs, axis=1)
        err = self.err = T.mean(T.cast(T.neq(preds, y), dtype="float32"))

        #
        loss = self.loss = - T.mean( T.log(probs[T.arange(y.shape[0]),y]) )
        #loss = self.loss = T.mean( T.nnet.categorical_crossentropy(
        #                            probs,
        #                            y
        #                    ))

        self.params = layer.params

        l2_cost = None
        for p in self.params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost += T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg

        self.l2_cost = l2_cost
        self.cost = loss + l2_cost
        print "cost.dtype", self.cost.dtype
Example #8
0
# import matplotlib.pyplot as plt
import numpy as np

def col(a):
    """Make sure the array is a column."""
    return np.atleast_2d(a).T

N = 100_000     # Create N points inside the square [-2,2]×[-2,2]
data = np.random.uniform(low=-2, high=2, size=(2, N))

ts = np.zeros(shape=(2, N))
ts[0, data[0,]*data[1,]>0] = 1
ts[1, :] = 1 - ts[0, :]

net = NeuralNetwork([
    Layer(2, 3, LeakyReLU()),
    Layer(3, 2, LeakyReLU()),
], MSELoss(), 0.05)

def assess(net, data, ts):
    correct = 0
    cs = []
    for i in range(data.shape[1]):
        out = net.forward_pass(col(data[:, i]))
        guess = np.argmax(np.ndarray.flatten(out))
        if ts[guess, i]:
            correct += 1
        cs.append(guess)
    # fig = plt.figure()
    # plt.scatter(data[0, :1000], data[1, :1000], c=cs)
    # fig.show()
Example #9
0
    def ready(self):
        args = self.args
        #n_domain = 2
        accum_dict = self.accum_dict = {}
        
        # len(sent) * len(doc) * batch
        s_idxs = self.s_idxs = T.itensor3()
        t_idxs = self.t_idxs = T.itensor3()

        # batch
        s_idys = self.s_idys = T.ivector()
        t_idys = self.t_idys = T.ivector()
        
        # batch
        s_dom_ids = self.s_dom_ids = T.ivector()
        t_dom_ids = self.t_dom_ids = T.ivector()
        
        # len(doc) * batch, 0: negative, 1: positive, -1: REL_UNK, -2, REL_PAD
        s_gold_rels = self.s_gold_rels = T.imatrix() 
        t_gold_rels = self.t_gold_rels = T.imatrix() 
        
        # has label flag, 0: no, 1: yes
        s_has_lab = self.s_has_lab = T.iscalar()
        t_has_lab = self.t_has_lab = T.iscalar()
        
        self.dropout = theano.shared(np.float64(args.dropout).astype(
                            theano.config.floatX))

        embedding_layer = self.embedding_layer
        if not embedding_layer.fix_init_embs:
            accum_dict[embedding_layer] = self.create_accumulators(embedding_layer)

        activation = get_activation_by_name(args.activation)
        n_d = self.n_d = args.hidden_dim
        n_e = self.n_e = embedding_layer.n_d
        n_c = self.nclasses
        self.rho = theano.shared(np.float64(0.0).astype(theano.config.floatX))

        self.source_k = 2

        # CNN to encode sentence into embedding
        cnn_layer = self.cnn_layer = LeCNN(
                n_in = n_e,
                n_out = n_d,
                activation=activation,
                order = args.cnn_window_size,
                BN = True,
            )
        accum_dict[cnn_layer] = self.create_accumulators(cnn_layer)
        
        # softmax layer to predict the label of the document
        self.lab_hid_layer = lab_hid_layer = Layer(
                n_in = n_d,
                n_out = n_d,
                activation = activation,
            )
        accum_dict[lab_hid_layer] = self.create_accumulators(lab_hid_layer)
        self.lab_out_layer = lab_out_layer = Layer(
                n_in = n_d,
                n_out = n_c,
                activation = logsoftmax,
            )
        accum_dict[lab_out_layer] = self.create_accumulators(lab_out_layer)
        
        # hidden layer to predict the domain of the document
        dom_hid_layer = self.dom_hid_layer = Layer(
                n_in = n_d,
                n_out = n_d,
                activation = activation,
            )
        accum_dict[dom_hid_layer] = self.create_accumulators(dom_hid_layer)

        # softmax layer to predict the domain of the document
        dom_out_layer = self.dom_out_layer = Layer(
                n_in = n_d,
                n_out = 2,
                activation = logsoftmax,
            )
        accum_dict[dom_out_layer] = self.create_accumulators(dom_out_layer)

        # for each domain, a vector parameter to compute the relevance score
        rel_hid_layer = self.rel_hid_layer =  Layer(
                n_in = n_d,
                n_out = n_d,
                activation = activation,
            )
        accum_dict[rel_hid_layer] = self.create_accumulators(rel_hid_layer)
        s_rel_out_layer = self.s_rel_out_layer =  Layer(
                n_in = n_d,
                n_out = 1,
                activation = sigmoid,
            )
        accum_dict[s_rel_out_layer] = self.create_accumulators(s_rel_out_layer)
        t_rel_out_layer = self.t_rel_out_layer =  Layer(
                n_in = n_d,
                n_out = 1,
                activation = sigmoid,
            )
        accum_dict[t_rel_out_layer] = self.create_accumulators(t_rel_out_layer)
        
        # transformation to domain independent layer
        trans_layer = self.trans_layer = Layer(
                n_in = n_d,
                n_out = n_d,
                activation = activation,
                has_bias=False,
                init_zero=True,
            )
        accum_dict[trans_layer] = self.create_accumulators(trans_layer)
        val = np.eye(n_d, dtype=theano.config.floatX)
        identity_mat = theano.shared(val)
        trans_layer.W.set_value(val)
        
        # reconstruction layer
        recon_layer = self.recon_layer = Layer(
                n_in = n_d,
                n_out = n_e,
                activation = tanh,
            )
        accum_dict[recon_layer] = self.create_accumulators(recon_layer)
        
        # construct network
        s_lab_loss, s_rel_loss, s_dom_loss, s_adv_loss, s_lab_prob, s_recon_loss = self.ready_one_domain(
                         s_idxs, s_idys, s_dom_ids, s_gold_rels, \
                         cnn_layer, rel_hid_layer, s_rel_out_layer, trans_layer, \
                         dom_hid_layer, dom_out_layer, lab_hid_layer, lab_out_layer)
        self.s_lab_loss, self.s_rel_loss, self.s_dom_loss, self.s_adv_loss, self.s_lab_prob, self.s_recon_loss = \
                        s_lab_loss, s_rel_loss, s_dom_loss, s_adv_loss, s_lab_prob, s_recon_loss
        
        t_lab_loss, t_rel_loss, t_dom_loss, t_adv_loss, t_lab_prob, t_recon_loss = self.ready_one_domain(
                         t_idxs, t_idys, t_dom_ids, t_gold_rels, \
                         cnn_layer, rel_hid_layer, t_rel_out_layer, trans_layer, \
                         dom_hid_layer, dom_out_layer, lab_hid_layer, lab_out_layer)
        self.t_lab_loss, self.t_rel_loss, self.t_dom_loss, self.t_adv_loss, self.t_lab_prob, self.t_recon_loss = \
                        t_lab_loss, t_rel_loss, t_dom_loss, t_adv_loss, t_lab_prob, t_recon_loss
        
        # transformation regularization
        trans_reg = self.trans_reg = args.trans_reg * T.sum((trans_layer.W - identity_mat) ** 2)
        
        # domain cost
        layers = [ dom_out_layer, dom_hid_layer ]
        self.dom_params = self.get_params(layers)
        self.dom_accums = self.get_accumulators(layers, accum_dict)
        self.dom_cost = s_dom_loss + t_dom_loss + args.l2_reg * self.get_l2_cost(self.dom_params)
        
        # label cost
        lab_layers = [ lab_out_layer, lab_hid_layer ]
        lab_params = self.get_params(lab_layers)
        lab_cost = s_has_lab * self.source_k * s_lab_loss + t_has_lab * t_lab_loss \
                    + args.l2_reg * (s_has_lab + t_has_lab) * self.get_l2_cost(lab_params)
            
        # total cost
        other_layers = [ cnn_layer, s_rel_out_layer, t_rel_out_layer, rel_hid_layer, trans_layer, recon_layer ]
        other_params = self.get_params(other_layers)
        self.other_cost_except_dom = lab_cost + s_rel_loss + t_rel_loss + s_adv_loss + t_adv_loss + trans_reg \
                     + s_recon_loss + t_recon_loss \
                     + args.l2_reg * self.get_l2_cost(other_params)
        self.other_params_except_dom = lab_params + other_params
        self.other_accums_except_dom = self.get_accumulators(lab_layers + other_layers, accum_dict)
        if not embedding_layer.fix_init_embs:
            self.other_params_except_dom += embedding_layer.params
            self.add_accumulators(self.other_accums_except_dom, embedding_layer, accum_dict)
        
        # info
        layers = lab_layers + other_layers + [ dom_out_layer, dom_hid_layer ]
        params = self.params = self.get_params(layers)
        if not embedding_layer.fix_init_embs:
            self.params += embedding_layer.params
        say("num of parameters: {}\n".format(
            sum(len(x.get_value(borrow=True).ravel()) for x in params)
        ))
Example #10
0
    for digit in range(10):
        t = np.zeros((10, 1))
        t[digit] = 1
        ts[digit] = t

    for i, row in enumerate(data):
        if i % 1000 == 0:
            print(i)
        digit = row[0]
        x = row[1:].reshape((784, 1))
        net.train(x, ts[digit])


if __name__ == "__main__":
    layers = [
        Layer(784, 16, LeakyReLU()),
        Layer(16, 16, LeakyReLU()),
        Layer(16, 10, LeakyReLU()),
    ]
    net = NeuralNetwork(layers, MSELoss(), 0.001)
    # CrossEntropyLoss ← um pouco mais chato
    # Sigmoid ← wikipedia

    print("Loading data...")
    train_data = load_data("mnistdata/mnist_train.csv")
    print("Done.")

    print("Training network...")
    train(net, train_data)
    print("Done.")
Example #11
0
    def ready(self):
        global total_generate_time
        #say("in generator ready: \n")
        #start_generate_time = time.time()
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = self.dropout = theano.shared(
            np.float64(args.dropout).astype(theano.config.floatX))

        # len*batch
        x = self.x = T.imatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = []
        layer_type = args.layer.lower()
        for i in xrange(2):
            if layer_type == "rcnn":
                l = RCNN(n_in=n_e,
                         n_out=n_d,
                         activation=activation,
                         order=args.order)
            elif layer_type == "lstm":
                l = LSTM(n_in=n_e, n_out=n_d, activation=activation)

            l = Layer(n_in=n_e, n_out=n_d, activation=sigmoid)

            layers.append(l)

        # len * batch
        #masks = T.cast(T.neq(x, padding_id), theano.config.floatX)
        masks = T.cast(T.neq(x, padding_id), theano.config.floatX).dimshuffle(
            (0, 1, "x"))

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)
        self.word_embs = embs

        flipped_embs = embs[::-1]

        # len*bacth*n_d
        h1 = layers[0].forward(embs)
        h2 = layers[1].forward(flipped_embs)
        h_final = T.concatenate([h1, h2[::-1]], axis=2)
        h_final = apply_dropout(h_final, dropout)
        size = n_d * 2

        #size = n_e

        output_layer = self.output_layer = Layer(n_in=size,
                                                 n_out=1,
                                                 activation=sigmoid)

        # len*batch*1
        probs = output_layer.forward(h_final)
        #probs = output_layer.forward(embs)
        #probs1 = probs.reshape(x.shape)

        #probs_rev = output_layer.forward(flipped_embs)
        #probs1_rev = probs.reshape(x.shape)

        #probs = T.concatenate([probs1, probs1_rev[::-1]], axis=2)

        # len*batch
        probs2 = probs.reshape(x.shape)
        if self.args.seed is not None:
            self.MRG_rng = MRG_RandomStreams(self.args.seed)
        else:
            self.MRG_rng = MRG_RandomStreams()
        z_pred = self.z_pred = T.cast(
            self.MRG_rng.binomial(size=probs2.shape, p=probs2),
            theano.config.floatX)  #"int8")

        # we are computing approximated gradient by sampling z;
        # so should mark sampled z not part of the gradient propagation path
        #
        z_pred = self.z_pred = theano.gradient.disconnected_grad(z_pred)
        #self.sample_updates = sample_updates
        print "z_pred", z_pred.ndim

        z2 = z_pred.dimshuffle((0, 1, "x"))
        logpz = -T.nnet.binary_crossentropy(probs, z2) * masks
        logpz = self.logpz = logpz.reshape(x.shape)
        probs = self.probs = probs.reshape(x.shape)

        # batch
        z = z_pred
        self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
        self.zdiff = T.sum(T.abs_(z[1:] - z[:-1]),
                           axis=0,
                           dtype=theano.config.floatX)

        params = self.params = []
        for l in layers + [output_layer]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost
Example #12
0
    def ready(self):
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = self.dropout = theano.shared(
                np.float64(args.dropout).astype(theano.config.floatX)
            )

        # len*batch
        x = self.x = T.imatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = [ ]
        layer_type = args.layer.lower()
        for i in xrange(1):
            l = CNN(
                    n_in = n_e,
                    n_out = n_d,
                    activation = activation,
                    order = args.order
                )
            layers.append(l)

        # len * batch
        masks = T.cast(T.neq(x, padding_id), "int8").dimshuffle((0,1,'x'))

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)
        self.word_embs = embs

        # len*bacth*n_d
        h1 = layers[0].forward_all(embs)
        h_final = h1
        size = n_d
        h_final = apply_dropout(h_final, dropout)

        output_layer = self.output_layer = Layer(
                n_in = size,
                n_out = 1,
                activation = sigmoid
            )

        # len*batch*1
        probs = output_layer.forward(h_final)

        # len*batch
        self.MRG_rng = MRG_RandomStreams()
        z_pred_dim3 = self.MRG_rng.binomial(size=probs.shape, p=probs, dtype="int8")
        z_pred = z_pred_dim3.reshape(x.shape)

        # we are computing approximated gradient by sampling z;
        # so should mark sampled z not part of the gradient propagation path
        #
        z_pred = self.z_pred = theano.gradient.disconnected_grad(z_pred)
        print "z_pred", z_pred.ndim

        #logpz = - T.nnet.binary_crossentropy(probs, z_pred_dim3) * masks
        logpz = - T.nnet.binary_crossentropy(probs, z_pred_dim3)
        logpz = self.logpz = logpz.reshape(x.shape)
        probs = self.probs = probs.reshape(x.shape)

        # batch
        z = z_pred
        self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
        self.zdiff = T.sum(T.abs_(z[1:]-z[:-1]), axis=0, dtype=theano.config.floatX)

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost
Example #13
0
    def ready(self):
        encoder = self.encoder
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = self.dropout = encoder.dropout

        # len*batch
        x = self.x = encoder.x
        z = self.z = encoder.z

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = []
        layer_type = args.layer.lower()
        for i in range(2):
            if layer_type == "rcnn":
                l = RCNN(
                    n_in=n_e,  # if i == 0 else n_d,
                    n_out=n_d,
                    activation=activation,
                    order=args.order)
            elif layer_type == "lstm":
                l = LSTM(
                    n_in=n_e,  # if i == 0 else n_d,
                    n_out=n_d,
                    activation=activation)
            layers.append(l)

        # len * batch
        #masks = T.cast(T.neq(x, padding_id), theano.config.floatX)
        masks = T.cast(T.neq(x, padding_id), "int8").dimshuffle((0, 1, "x"))

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)

        flipped_embs = embs[::-1]

        # len*bacth*n_d
        h1 = layers[0].forward_all(embs)
        h2 = layers[1].forward_all(flipped_embs)
        h_final = T.concatenate([h1, h2[::-1]], axis=2)
        h_final = apply_dropout(h_final, dropout)
        size = n_d * 2

        output_layer = self.output_layer = Layer(n_in=size,
                                                 n_out=1,
                                                 activation=sigmoid)

        # len*batch*1
        probs = output_layer.forward(h_final)

        # len*batch
        probs2 = probs.reshape(x.shape)
        self.MRG_rng = MRG_RandomStreams()
        z_pred = self.z_pred = T.cast(
            self.MRG_rng.binomial(size=probs2.shape, p=probs2), "int8")

        # we are computing approximated gradient by sampling z;
        # so should mark sampled z not part of the gradient propagation path
        #
        self.z_pred = theano.gradient.disconnected_grad(z_pred)

        z2 = z.dimshuffle((0, 1, "x"))
        logpz = -T.nnet.binary_crossentropy(probs, z2) * masks
        logpz = self.logpz = logpz.reshape(x.shape)
        probs = self.probs = probs.reshape(x.shape)

        # batch
        zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
        zdiff_pre = (z[1:] - z[:-1]) * 1.0
        zdiff = T.sum(abs(zdiff_pre), axis=0, dtype=theano.config.floatX)

        loss_mat = encoder.loss_mat
        if args.aspect < 0:
            loss_vec = T.mean(loss_mat, axis=1)
        else:
            assert args.aspect < self.nclasses
            loss_vec = loss_mat[:, args.aspect]
        self.loss_vec = loss_vec

        coherent_factor = args.sparsity * args.coherent
        loss = self.loss = T.mean(loss_vec)
        sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \
                                             T.mean(zdiff) * coherent_factor
        cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor
        cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
        self.obj = T.mean(cost_vec)

        params = self.params = []
        for l in layers + [output_layer]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg

        cost = self.cost = cost_logpz * 10 + l2_cost
        print("cost.dtype", cost.dtype)

        self.cost_e = loss * 10 + encoder.l2_cost
Example #14
0
    def ready(self):
        args = self.args
        embedding_layer = self.embedding_layer
        self.n_hidden = args.hidden_dim
        self.n_in = embedding_layer.n_d
        dropout = self.dropout = theano.shared(
            np.float64(args.dropout_rate).astype(theano.config.floatX))

        # x is length * batch_size
        # y is batch_size
        self.x = T.imatrix('x')
        self.y = T.ivector('y')

        x = self.x
        y = self.y
        n_hidden = self.n_hidden
        n_in = self.n_in

        # fetch word embeddings
        # (len * batch_size) * n_in
        slices = embedding_layer.forward(x.ravel())
        self.slices = slices

        # 3-d tensor, len * batch_size * n_in
        slices = slices.reshape((x.shape[0], x.shape[1], n_in))

        # stacking the feature extraction layers
        pooling = args.pooling
        depth = args.depth
        layers = self.layers = []
        prev_output = slices
        prev_output = apply_dropout(prev_output, dropout, v2=True)
        size = 0
        softmax_inputs = []
        activation = get_activation_by_name(args.act)
        for i in range(depth):
            if args.layer.lower() == "lstm":
                layer = LSTM(n_in=n_hidden if i > 0 else n_in, n_out=n_hidden)
            elif args.layer.lower() == "strcnn":
                layer = StrCNN(n_in=n_hidden if i > 0 else n_in,
                               n_out=n_hidden,
                               activation=activation,
                               decay=args.decay,
                               order=args.order)
            elif args.layer.lower() == "rcnn":
                layer = RCNN(n_in=n_hidden if i > 0 else n_in,
                             n_out=n_hidden,
                             activation=activation,
                             order=args.order,
                             mode=args.mode)
            else:
                raise Exception("unknown layer type: {}".format(args.layer))

            layers.append(layer)
            prev_output = layer.forward_all(prev_output)
            if pooling:
                softmax_inputs.append(T.sum(prev_output,
                                            axis=0))  # summing over columns
            else:
                softmax_inputs.append(prev_output[-1])
            prev_output = apply_dropout(prev_output, dropout)
            size += n_hidden

        # final feature representation is the concatenation of all extraction layers
        if pooling:
            softmax_input = T.concatenate(softmax_inputs, axis=1) / x.shape[0]
        else:
            softmax_input = T.concatenate(softmax_inputs, axis=1)
        softmax_input = apply_dropout(softmax_input, dropout, v2=True)

        # feed the feature repr. to the softmax output layer
        layers.append(
            Layer(n_in=size,
                  n_out=self.nclasses,
                  activation=softmax,
                  has_bias=False))

        for l, i in zip(layers, range(len(layers))):
            say("layer {}: n_in={}\tn_out={}\n".format(i, l.n_in, l.n_out))

        # unnormalized score of y given x
        self.p_y_given_x = layers[-1].forward(softmax_input)
        self.pred = T.argmax(self.p_y_given_x, axis=1)
        self.nll_loss = T.mean(
            T.nnet.categorical_crossentropy(self.p_y_given_x, y))

        # adding regularizations
        self.l2_sqr = None
        self.params = []
        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                        for x in self.params)
        say("total # parameters: {}\n".format(nparams))
Example #15
0
    def ready(self, args, train):
        # len * batch
        depth = args["depth"]
        self.args = args
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = [
            T.matrix(dtype=theano.config.floatX) for i in xrange(depth * 2)
        ]

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)
        rnn_dropout_prob = np.float64(args["rnn_dropout"]).astype(
            theano.config.floatX)
        self.rnn_dropout = theano.shared(rnn_dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(n_d=self.n_d,
                                         vocab=set(w for w in train))
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(self.n_V, self.n_d))

        activation = get_activation_by_name(args["activation"])

        layers = self.layers = []
        for i in xrange(depth):
            rnn_layer = KernelNN(n_in=self.n_d,
                                 n_out=self.n_d,
                                 activation=activation,
                                 highway=args["highway"],
                                 dropout=self.rnn_dropout)
            layers.append(rnn_layer)

        output_layer = Layer(
            n_in=self.n_d,
            n_out=self.n_V,
            activation=T.nnet.softmax,
        )
        output_layer.W = embedding_layer.embeddings.T

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        #x = x_flat
        x = x.reshape((self.idxs.shape[0], self.idxs.shape[1], self.n_d))

        # len * batch * (n_d+n_d)
        self.last_state = []
        prev_h = x
        for i in xrange(depth):
            hidden = self.init_state[i * 2:i * 2 + 2]
            c, h = layers[i].forward_all(prev_h, hidden, return_c=True)
            self.last_state += [c[-1], h[-1]]
            prev_h = h

        prev_h = apply_dropout(prev_h, self.dropout)
        self.p_y_given_x = output_layer.forward(prev_h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = T.nnet.categorical_crossentropy(self.p_y_given_x, idys)

        self.params = [x for l in layers for x in l.params]
        self.params += [embedding_layer.embeddings, output_layer.b]
        self.num_params = sum(
            len(x.get_value(borrow=True).ravel()) for x in self.params)
        say("# of params in total: {}\n".format(self.num_params))
        layers += [embedding_layer, output_layer]
Example #16
0
        guess = np.argmax(out)
        if t == guess:
            correct += 1

    return correct / test_data.shape[0]


def train(net, train_data):
    for i, train_row in enumerate(train_data):
        if not i % 1000:
            print(i)

        net.train(to_col(train_row[1:]) / 255, train_row[0])


if __name__ == "__main__":
    layers = [
        Layer(784, 10, LeakyReLU()),
    ]
    net = NeuralNetwork(layers, CrossEntropyLoss(), 0.001)

    test_data = load_data(TEST_FILE, delimiter=",", dtype=int)
    accuracy = test(net, test_data)
    print(f"Accuracy is {100*accuracy:.2f}%")  # Expected to be around 10%

    train_data = load_data(TRAIN_FILE, delimiter=",", dtype=int)
    train(net, train_data)

    accuracy = test(net, test_data)
    print(f"Accuracy is {100*accuracy:.2f}%")
Example #17
0
def train_student(student, teacher, train_data):
    """Train a student network to behave like the teacher network."""

    for i, train_row in enumerate(train_data):
        if not i % 1000:
            print(i)

        x = to_col(train_row[1:])
        teacher_out = teacher.forward_pass(x)
        student.train(x, teacher_out)


if __name__ == "__main__":
    teacher_layers = [
        Layer(784, 16, LeakyReLU()),
        Layer(16, 16, LeakyReLU()),
        Layer(16, 10, LeakyReLU()),
    ]
    teacher_net = NeuralNetwork(teacher_layers, CrossEntropyLoss(), 0.001)

    train_data = load_data("mnistdata/mnist_train.csv",
                           delimiter=",",
                           dtype=int)
    train(teacher_net, train_data)

    test_data = load_data("mnistdata/mnist_test.csv", delimiter=",", dtype=int)
    accuracy = test(teacher_net, test_data)
    print(f"Accuracy of the teacher net is {100*accuracy:.2f}")

    student_layers = [
Example #18
0
    def ready(self):
        args = self.args
        embedding_layer = self.embedding_layer
        user_embedding_layer = self.user_embedding_layer
        self.n_hidden = args.hidden_dim
        self.n_in = embedding_layer.n_d
        dropout = self.dropout = theano.shared(
            np.float64(args.dropout_rate).astype(theano.config.floatX)
        )

        # x is length * batch_size
        # y is batch_size
        self.x = T.imatrix('x')
        self.w_masks = T.fmatrix('mask')
        self.w_lens = T.fvector('lens')
        self.s_ml = T.iscalar('sent_maxlen')
        self.s_num = T.iscalar('sent_num')
        self.y = T.ivector('y')
        self.usr = T.ivector('users')

        x = self.x
        y = self.y
        usr = self.usr
        w_masks = self.w_masks
        w_lens = self.w_lens
        s_ml = self.s_ml
        s_num = self.s_num
        n_hidden = self.n_hidden
        n_emb = n_in = self.n_in

        layers = self.layers = []

        slicesu = user_embedding_layer.forward(usr)
        slices = embedding_layer.forward(x.ravel())
        self.slices = slices  # important for updating word embeddings

        # 3-d tensor, len * batch_size * n_in
        slices = slices.reshape((x.shape[0], x.shape[1], n_in))

        pooling = args.pooling
        prev_output = slices
        prev_output = apply_dropout(prev_output, dropout, v2=True)
        size = 0

        n_hidden_t = n_hidden
        if args.direction == "bi":
            n_hidden_t = 2 * n_hidden

        softmax_inputs = []
        activation = get_activation_by_name(args.act)

        if args.layer.lower() == "lstm":
            layer = LSTM(n_in=n_in,
                         n_out=n_hidden_t,
                         direction=args.direction
                         )
        elif args.layer.lower() == "cnn":
            layer = CNN(n_in=n_in,
                        n_out=n_hidden_t,
                        activation=activation,
                        order=args.order
                        )
        else:
            raise Exception("unknown layer type: {}".format(args.layer))

        layers.append(layer)
        prev_output = layer.forward_all(prev_output, masks=w_masks)
        prev_output = apply_dropout(prev_output, dropout)

        # final feature representation is the concatenation of all extraction layers
        if args.user_atten:
            layer = IterAttentionLayer(
                n_in=n_emb,
                n_out=n_hidden_t
            )
            layers.append(layer)
            if args.user_atten_base:
                slicesu = None
            softmax_input = layers[-1].multi_hop_forward(
                prev_output, user_embs=slicesu, isWord=True, masks=w_masks)
        else:
            if pooling:
                softmax_input = T.sum(prev_output, axis=0) / w_lens.dimshuffle(0, 'x')
            else:
                ind = T.cast(w_lens - T.ones_like(w_lens), 'int32')
                softmax_input = prev_output[T.arange(ind.shape[0]), ind]

        softmax_input = apply_dropout(softmax_input, dropout, v2=True)

        n_in = n_hidden_t
        size = 0
        softmax_inputs = []
        [sentlen, emblen] = T.shape(softmax_input)
        prev_output = softmax_input.reshape(
            (sentlen / s_num, s_num, emblen)).dimshuffle(1, 0, 2)
        if args.layer.lower() == "lstm":
            layer = LSTM(n_in=n_in,
                         n_out=n_hidden_t,
                         direction=args.direction
                         )
        elif args.layer.lower() == "cnn":
            layer = CNN(n_in=n_in,
                        n_out=n_hidden_t,
                        activation=activation,
                        order=args.order,
                        )
        else:
            raise Exception("unknown layer type: {}".format(args.layer))

        layers.append(layer)
        prev_output = layer.forward_all(prev_output)
        prev_output = apply_dropout(prev_output, dropout)

        if args.user_atten:
            layer = IterAttentionLayer(
                n_in=n_emb,
                n_out=n_hidden_t
            )
            layers.append(layer)

            if args.user_atten_base:
                slicesu = None
            softmax_input = layers[-1].multi_hop_forward(
                prev_output, user_embs=slicesu, isWord=False)
        else:
            if pooling:
                softmax_input = T.sum(prev_output, axis=0) / \
                    T.cast(s_num, 'float32')
            else:
                softmax_input = prev_output[-1]
        softmax_input = apply_dropout(softmax_input, dropout, v2=True)

        size = n_hidden_t
        layers.append(Layer(
            n_in=size,
            n_out=self.nclasses,
            activation=softmax,
            has_bias=False
        ))
        if not args.fix_emb:
            for l, i in zip(layers, range(len(layers))):
                say("layer {}: n_in={}\tn_out={}\n".format(
                    i, l.n_in, l.n_out
                ))
        else:
            for l, i in zip(layers[1:], range(len(layers[1:]))):
                say("layer {}: n_in={}\tn_out={}\n".format(
                    i, l.n_in, l.n_out
                ))

        # unnormalized score of y given x
        self.p_y_given_x = layers[-1].forward(softmax_input)
        self.pred = T.argmax(self.p_y_given_x, axis=1)
        self.nll_loss = T.mean(T.nnet.categorical_crossentropy(
            self.p_y_given_x,
            y
        ))

        # adding regularizations
        self.l2_sqr = None
        self.params = []
        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

        nparams = sum(len(x.get_value(borrow=True).ravel())
                      for x in self.params)
        say("total # parameters: {}\n".format(nparams))
Example #19
0
    #     tv = np.zeros((10, 1))
    #     tv[t] = 1
    #     ts[t] = tv

    for i, train_row in enumerate(train_data):
        if not i % 1000:
            print(i)

        t = train_row[0]
        x = to_col(train_row[1:])
        net.train(x, t)


if __name__ == "__main__":
    layers = [
        Layer(784, 16, LeakyReLU()),
        Layer(16, 16, LeakyReLU()),
        Layer(16, 10, LeakyReLU()),
    ]
    net = NeuralNetwork(layers, CrossEntropyLoss(), 0.001)

    test_data = load_data("mnistdata/mnist_test.csv", delimiter=",", dtype=int)

    accuracy = test(net, test_data)
    print(f"Accuracy is {100*accuracy:.2f}%")  # Expected to be around 10%

    train_data = load_data("mnistdata/mnist_train.csv",
                           delimiter=",",
                           dtype=int)
    train(net, train_data)
Example #20
0
File: test.py Project: Centauria/BP
logging.basicConfig(level=logging.NOTSET)


def see(layer: Layer):
    for n in layer.cells:
        print(n)
        for link in n.output_list:
            print(link)


for it in range(10):
    s = Sequential()
    s.add_layer(InputLayer(2, 'input'))
    s.add_layer(
        Layer(2, 'hidden', Function.ReLU(), Initializer.uniform(0, 0.1)),
        Initializer.uniform(-0.1, 0.1))
    s.add_layer(Layer(1, 'output', initializer=Initializer.uniform(0, 0.1)),
                Initializer.uniform(-0.1, 0.1))

    logging.info('TEST %i' % it)

    error_val = [1]
    error_val_max_length = 50
    while np.mean(error_val) > 0.05:
        a, b = np.random.randint(2), np.random.randint(2)
        c = int(a == b)
        input_data = np.array([a, b])
        target_output = np.array([c])
        s.commit(input_data, target_output, 0.1)
        error = s.forward(input_data)[0] - target_output
Example #21
0
    def ready(self):
	args = self.args
	w_emb_layer = self.w_emb_layer
	c_emb_layer = self.c_emb_layer
	r_emb_layers = self.r_emb_layers
	r_matrix_layers = self.r_matrix_layers	

	char_dim = self.char_dim = args.char_dim
	char_lstm_dim = self.char_lstm_dim = args.char_lstm_dim
	word_dim = self.word_dim = args.word_dim
	word_lstm_dim = self.word_lstm_dim = args.word_lstm_dim
	
	dropout = self.dropout = theano.shared(
                np.float64(args.dropout).astype(theano.config.floatX)
            )

	word_ids = self.word_ids = T.ivector('word_ids')
	char_ids = self.char_ids = T.imatrix('char_ids')
	char_lens = self.char_lens = T.fvector('char_lens')
	char_masks = self.char_masks = T.imatrix('char_masks')
	up_ids = self.up_ids = T.imatrix('up_ids')
	up_rels = self.up_rels = T.imatrix('up_rels')
	up_id_masks = self.up_id_masks = T.imatrix('up_id_masks')
	down_ids = self.down_ids = T.imatrix('down_ids')
	down_rels = self.down_rels = T.imatrix('down_rels')
	down_id_masks = self.down_id_masks = T.imatrix('down_id_masks')
	tag_ids = self.tag_ids = T.ivector('tag_ids')
	
	layers = self.layers = [w_emb_layer, c_emb_layer]
	layers.extend(r_emb_layers)
	layers.extend(r_matrix_layers)	

	inputs = self.inputs = []

	inputs.append(self.word_ids)
	inputs.append(self.char_ids)
	inputs.append(self.char_lens)
	inputs.append(self.char_masks)
	inputs.append(self.up_ids)
	inputs.append(self.up_rels)
	inputs.append(self.up_id_masks)
	inputs.append(self.down_ids)
	inputs.append(self.down_rels)
	inputs.append(self.down_id_masks)
	inputs.append(self.tag_ids)
	wslices = w_emb_layer.forward(word_ids)
	cslices = c_emb_layer.forward(char_ids.ravel())
	cslices = cslices.reshape((char_ids.shape[0], char_ids.shape[1], char_dim))
	cslices = cslices.dimshuffle(1, 0, 2)
	
	bv_ur_slicess = []
        bv_dr_slicess = []
        b_ur_slicess = []
        b_dr_slicess = []
	
	bv_ur_matrixss = []
	bv_dr_matrixss = []
	b_ur_matrixss = []
	b_dr_matrixss = []
	
	for r_matrix_layer in r_matrix_layers:
            bv_ur_matrixs = r_matrix_layer.forward1(up_rels.ravel())
            bv_dr_matrixs = r_matrix_layer.forward1(down_rels.ravel())
            b_ur_matrixs = r_matrix_layer.forward2(up_rels.ravel())
            b_dr_matrixs = r_matrix_layer.forward2(down_rels.ravel())
            bv_ur_matrixss.append(bv_ur_matrixs.reshape((up_rels.shape[0], up_rels.shape[1], word_dim, word_dim)))
            bv_dr_matrixss.append(bv_dr_matrixs.reshape((down_rels.shape[0], down_rels.shape[1], word_dim, word_dim)))
            b_ur_matrixss.append(b_ur_matrixs.reshape((up_rels.shape[0], up_rels.shape[1], word_dim, word_dim)))
            b_dr_matrixss.append(b_dr_matrixs.reshape((down_rels.shape[0], down_rels.shape[1], word_dim, word_dim)))
	
	for r_emb_layer in r_emb_layers:
            bv_ur_slices = r_emb_layer.forward(up_rels.ravel())
            bv_dr_slices = r_emb_layer.forward(down_rels.ravel())
            b_ur_slices = r_emb_layer.forward2(up_rels.ravel())
            b_dr_slices = r_emb_layer.forward2(down_rels.ravel())
            bv_ur_slicess.append(bv_ur_slices.reshape((up_rels.shape[0], up_rels.shape[1], word_dim)))
            bv_dr_slicess.append(bv_dr_slices.reshape((down_rels.shape[0], down_rels.shape[1], word_dim)))
            b_ur_slicess.append(b_ur_slices.reshape((up_rels.shape[0], up_rels.shape[1], word_dim)))
            b_dr_slicess.append(b_dr_slices.reshape((down_rels.shape[0], down_rels.shape[1], word_dim)))

	char_masks = char_masks.dimshuffle(1, 0)

	prev_output = wslices
	prev_size = word_dim

	if char_dim:
	    layers.append(LSTM(
		n_in = char_dim,
		n_out = char_lstm_dim,
		direction = 'bi' if args.char_bidirect else 'si'	
	    ))
	    prev_output_2 = cslices
	    prev_output_2 = apply_dropout(prev_output_2, dropout, v2 = True)
	    prev_output_2 = layers[-1].forward_all(cslices, char_masks)
	    prev_output_2 = T.sum(prev_output_2, axis = 0)
	    prev_output_2 = prev_output_2 / (1e-6 * T.ones_like(char_lens) + char_lens).dimshuffle(0, 'x')

	    prev_size += char_lstm_dim
	    prev_output = T.concatenate([prev_output, prev_output_2], axis = 1)
	
	prev_output = apply_dropout(prev_output, dropout)
	if args.conv != 0:
	    for i in range(args.clayer):
            	layers.append(GKNNMultiHeadGate(
                        n_in = prev_size,
                        n_out = prev_size,
			n_head = args.head
                        ))
	    	prev_output = layers[-1].forward_all(prev_output, up_ids, up_id_masks, bv_ur_slicess[0], down_ids, down_id_masks, bv_dr_slicess[0])
	    	prev_output = apply_dropout(prev_output, dropout)
	
	
	#prev_size *= 2
	#layers.append(LSTM(
	#    n_in = prev_size,
	#    n_out = word_lstm_dim,
	#    direction = 'bi' if args.word_bidirect else 'si'
	#))
	
	#prev_output = prev_output.dimshuffle(0, 'x', 1)
	#prev_output = layers[-1].forward_all(prev_output)
	#prev_output = prev_output.reshape((prev_output.shape[0], prev_output.shape[-1]))
	
	#prev_size = word_lstm_dim
	
	layers.append(Layer(
	    n_in = prev_size,
	    n_out = args.classes,
	    activation = linear, #ReLU,
	    has_bias = False
	))

	n_tags = args.classes
	s_len = char_ids.shape[0]
	tags_scores = layers[-1].forward(prev_output)
	transitions = shared((n_tags + 2, n_tags + 2), 'transitions')
	small = -1000
        b_s = np.array([[small] * n_tags + [0, small]]).astype(np.float32)
        e_s = np.array([[small] * n_tags + [small, 0]]).astype(np.float32)
        observations = T.concatenate(
            [tags_scores, small * T.ones((s_len, 2))],
            axis=1
        )
	
        observations = T.concatenate(
            [b_s, observations, e_s],
            axis=0
        )

        real_path_score = tags_scores[T.arange(s_len), tag_ids].sum()
	b_id = theano.shared(value=np.array([n_tags], dtype=np.int32))
        e_id = theano.shared(value=np.array([n_tags + 1], dtype=np.int32))
        padded_tags_ids = T.concatenate([b_id, tag_ids, e_id], axis=0)
	
	pre_ids = T.arange(s_len + 1)
	
	s_ids = T.arange(s_len + 1) + 1
	
        real_path_score += transitions[
           padded_tags_ids[pre_ids],
           padded_tags_ids[s_ids]
        ].sum()
	
	all_paths_scores = CRFForward(observations, transitions)
        self.nll_loss = nll_loss = - (real_path_score - all_paths_scores)
        preds = CRFForward(observations, transitions, viterbi = True,
                        return_alpha = False, return_best_sequence=True)
        
	self.pred = preds[1:-1]
	
	self.l2_sqr = None
        params = self.params = [transitions]
        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

	
	#for l, i in zip(layers[3:], range(len(layers[3:]))):
        for l, i in zip(layers[2+len(r_emb_layers)+len(r_matrix_layers):], range(len(layers[2+len(r_emb_layers)+len(r_matrix_layers):]))):
	    say("layer {}: n_in={}\tn_out={}\n".format(
                    i, l.n_in, l.n_out
                ))

        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                        for x in self.params)
        say("total # parameters: {}\n".format(nparams))
	
	cost = self.nll_loss + self.l2_sqr

	lr_method_name = args.learning
	lr_method_parameters = {}
	lr_method_parameters['lr'] = args.learning_rate
	updates = Optimization(clip=5.0).get_updates(lr_method_name, cost, params, **lr_method_parameters)
	
	f_train = theano.function(
	    	inputs = self.inputs,
		outputs = [cost, nll_loss],
		updates = updates,
		allow_input_downcast = True
	)

	f_eval = theano.function(
		inputs = self.inputs[:-1],
		outputs = self.pred,
		allow_input_downcast = True
	)
	
	return f_train, f_eval
Example #22
0
    def ready(self):
        args = self.args
        weights = self.weights

        # len(source) * batch
        idxs = self.idxs = T.imatrix()

        # len(target) * batch
        idys = self.idys = T.imatrix()
        idts = idys[:-1]
        idgs = idys[1:]

        dropout = self.dropout = theano.shared(
            np.float64(args.dropout).astype(theano.config.floatX))

        embedding_layer = self.embedding_layer

        activation = get_activation_by_name(args.activation)
        n_d = self.n_d = args.hidden_dim
        n_e = self.n_e = embedding_layer.n_d
        n_V = self.n_V = embedding_layer.n_V

        if args.layer.lower() == "rcnn":
            LayerType = RCNN
        elif args.layer.lower() == "lstm":
            LayerType = LSTM
        elif args.layer.lower() == "gru":
            LayerType = GRU

        depth = self.depth = args.depth
        layers = self.layers = []
        for i in range(depth * 2):
            if LayerType != RCNN:
                feature_layer = LayerType(n_in=n_e if i / 2 == 0 else n_d,
                                          n_out=n_d,
                                          activation=activation)
            else:
                feature_layer = LayerType(n_in=n_e if i / 2 == 0 else n_d,
                                          n_out=n_d,
                                          activation=activation,
                                          order=args.order,
                                          mode=args.mode,
                                          has_outgate=args.outgate)
            layers.append(feature_layer)

        self.output_layer = output_layer = Layer(
            n_in=n_d,
            n_out=n_V,
            activation=T.nnet.softmax,
        )

        # feature computation starts here

        # (len*batch)*n_e
        xs_flat = embedding_layer.forward(idxs.ravel())
        xs_flat = apply_dropout(xs_flat, dropout)
        if weights is not None:
            xs_w = weights[idxs.ravel()].dimshuffle((0, 'x'))
            xs_flat = xs_flat * xs_w
        # len*batch*n_e
        xs = xs_flat.reshape((idxs.shape[0], idxs.shape[1], n_e))

        # (len*batch)*n_e
        xt_flat = embedding_layer.forward(idts.ravel())
        xt_flat = apply_dropout(xt_flat, dropout)
        if weights is not None:
            xt_w = weights[idts.ravel()].dimshuffle((0, 'x'))
            xt_flat = xt_flat * xt_w
        # len*batch*n_e
        xt = xt_flat.reshape((idts.shape[0], idts.shape[1], n_e))

        prev_hs = xs
        prev_ht = xt
        for i in range(depth):
            # len*batch*n_d
            hs = layers[i * 2].forward_all(prev_hs, return_c=True)
            ht = layers[i * 2 + 1].forward_all(prev_ht, hs[-1])
            hs = hs[:, :, -n_d:]
            ht = ht[:, :, -n_d:]
            prev_hs = hs
            prev_ht = ht
            prev_hs = apply_dropout(hs, dropout)
            prev_ht = apply_dropout(ht, dropout)

        self.p_y_given_x = output_layer.forward(
            prev_ht.reshape((xt_flat.shape[0], n_d)))

        h_final = hs[-1]
        self.scores2 = -(h_final[1:] - h_final[0]).norm(2, axis=1)
        h_final = self.normalize_2d(h_final)
        self.scores = T.dot(h_final[1:], h_final[0])

        # (len*batch)
        nll = T.nnet.categorical_crossentropy(self.p_y_given_x, idgs.ravel())
        nll = nll.reshape(idgs.shape)
        self.nll = nll
        self.mask = mask = T.cast(T.neq(idgs, self.padding_id),
                                  theano.config.floatX)
        nll = T.sum(nll * mask, axis=0)

        #layers.append(embedding_layer)
        layers.append(output_layer)
        params = []
        for l in self.layers:
            params += l.params
        self.params = params
        say("num of parameters: {}\n".format(
            sum(len(x.get_value(borrow=True).ravel()) for x in params)))

        l2_reg = None
        for p in params:
            if l2_reg is None:
                l2_reg = p.norm(2)
            else:
                l2_reg = l2_reg + p.norm(2)
        l2_reg = l2_reg * args.l2_reg
        self.loss = T.mean(nll)
        self.cost = self.loss + l2_reg
Example #23
0
    def ready(self):
        args = self.args
        embedding_layer = self.embedding_layer
	num_aspects = self.num_aspects        

        self.n_emb = embedding_layer.n_d
        
	dropout = self.dropout = theano.shared(
                np.float64(args.dropout_rate).astype(theano.config.floatX)
            )

        self.x = T.imatrix('x')
	self.w_masks = T.fmatrix('mask')
	self.w_lens = T.fvector('sent_len')
	self.s_maxlen = T.iscalar('sent_max_len')
	self.s_num = T.iscalar('sent_num')
	self.y = T.ivector('y')
	self.ay = T.imatrix('ay')
	self.ay_mask = T.fmatrix('ay_mask')	
	self.aay = T.itensor3('aay')

        x = self.x
	query = self.query
        
	w_masks = self.w_masks
	w_lens = self.w_lens
	s_ml = self.s_maxlen
	s_num = self.s_num
	n_emb = self.n_emb
	
	y = self.y
        ay = self.ay
	ay_mask = self.ay_mask
	aay = self.aay

	layers = self.layers = [embedding_layer]
        slices  = embedding_layer.forward(x.ravel())
	self.slices = slices = slices.reshape( (x.shape[0], x.shape[1], n_emb) )
	
	slices_query = embedding_layer.forward(query.flatten(), is_node = False)
	slices_query = slices_query.reshape( (query.shape[0], query.shape[1], n_emb))
	
	layers.append(Query_Repr_Layer(slices_query))
	slices_query_tmp = slices_query = layers[-1].forward()
	
	layer = LSTM(n_in = n_emb, n_out = n_emb)
        layers.append(layer)

	prev_output = slices
        prev_output = apply_dropout(prev_output, dropout, v2=True)
        prev_output = layers[-1].forward_all(prev_output, w_masks)

        layer = Layer(n_in = n_emb, n_out = n_emb, activation = tanh)
        layers.append(layer)
        self.slices_query = slices_query = layers[-1].forward(slices_query)

	maskss = []
	w_lenss = []
	for i in range(num_aspects):
	    maskss.append(w_masks)
	    w_lenss.append(w_lens)

	maskss = T.concatenate(maskss, axis = 1)
        w_lenss = T.concatenate(w_lenss)

	layer = IterAttentionLayer(n_in = n_emb, n_out = n_emb)
        layers.append(layer)
	prev_output = layers[-1].forward(prev_output, slices_query, is_word = True, hop = args.hop_word, masks = w_masks, aspect_num = num_aspects)
	prev_output = prev_output.reshape((prev_output.shape[0] * prev_output.shape[1], prev_output.shape[2]))
        prev_output = apply_dropout(prev_output, dropout, v2=True)
	
	prev_output = prev_output.reshape((num_aspects, prev_output.shape[0] / (num_aspects * s_num), s_num, prev_output.shape[1]))
	prev_output = prev_output.dimshuffle(2, 0, 1, 3)
	prev_output = prev_output.reshape((prev_output.shape[0], prev_output.shape[1] * prev_output.shape[2], prev_output.shape[3]))

        layer = LSTM(n_in = n_emb * args.hop_word, n_out = n_emb)
	layers.append(layer)
	prev_output = layers[-1].forward_all(prev_output)
	
	#layers.append(Query_Repr_Layer(slices_query))
        #slices_query = layers[-1].forward()
	layer = Layer(n_in = n_emb, n_out = n_emb, activation = tanh)
        layers.append(layer)
        slices_query = layers[-1].forward(slices_query_tmp) # bug
	
	layer = IterAttentionLayer(n_in = n_emb, n_out = n_emb)
        layers.append(layer)
	prev_output = layers[-1].forward(prev_output, slices_query, is_word = False, hop = args.hop_sent, aspect_num = num_aspects)
        prev_output = prev_output.reshape((prev_output.shape[0] * prev_output.shape[1], prev_output.shape[2]))
	prev_output = apply_dropout(prev_output, dropout, v2=True)

	prev_output = prev_output.reshape((num_aspects, prev_output.shape[0] / num_aspects, prev_output.shape[1]))
	
	softmax_inputs = []
	for i in range(num_aspects):
	    softmax_inputs.append(prev_output[i])
	
	size = n_emb * args.hop_sent
	
	p_y_given_a = []
	pred_ay = []
	nll_loss_ay = []
	
	for i in range(num_aspects):
	    layers.append(Layer(n_in = size,
                    n_out = args.score_scale,
                    activation = softmax,
                    has_bias = False,))

	    p_y_given_a.append(layers[-1].forward(softmax_inputs[i]))
	    nll_loss_ay.append( T.mean(T.sum( -T.log(p_y_given_a[-1]) * aay[:, i, :] * ay_mask[:, i].dimshuffle(0, 'x'))))
	    pred_ay.append(T.argmax(p_y_given_a[-1], axis = 1))

	self.p_y_given_a = p_y_given_a
	self.nll_loss_ay = T.sum(nll_loss_ay)
	self.pred_ay = T.stack(pred_ay).dimshuffle(1, 0)
        
	for l,i in zip(layers[4:], range(len(layers[3:]))):
            say("layer {}: n_in={}\tn_out={}\n".format(
                    i, l.n_in, l.n_out
           ))
	
	self.l2_sqr = None
        self.params = [ ]
        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                        for x in self.params)
        say("total # parameters: {}\n".format(nparams))
Example #24
0
    def ready(self):
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = self.dropout = theano.shared(
            np.float64(args.dropout).astype(theano.config.floatX))

        # len*batch
        x = self.x = T.imatrix()

        z = self.z = T.bmatrix()
        z = z.dimshuffle((0, 1, "x"))

        # batch*nclasses
        y = self.y = T.fmatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = []
        depth = args.depth
        layer_type = args.layer.lower()
        for i in range(depth):
            if layer_type == "rcnn":
                l = ExtRCNN(n_in=n_e if i == 0 else n_d,
                            n_out=n_d,
                            activation=activation,
                            order=args.order)
            elif layer_type == "lstm":
                l = ExtLSTM(n_in=n_e if i == 0 else n_d,
                            n_out=n_d,
                            activation=activation)
            layers.append(l)

        # len * batch * 1
        masks = T.cast(
            T.neq(x, padding_id).dimshuffle((0, 1, "x")) * z,
            theano.config.floatX)
        # batch * 1
        cnt_non_padding = T.sum(masks, axis=0) + 1e-8

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)

        pooling = args.pooling
        lst_states = []
        h_prev = embs
        for l in layers:
            # len*batch*n_d
            h_next = l.forward_all(h_prev, z)
            if pooling:
                # batch * n_d
                masked_sum = T.sum(h_next * masks, axis=0)
                lst_states.append(masked_sum / cnt_non_padding)  # mean pooling
            else:
                lst_states.append(h_next[-1])  # last state
            h_prev = apply_dropout(h_next, dropout)

        if args.use_all:
            size = depth * n_d
            # batch * size (i.e. n_d*depth)
            h_final = T.concatenate(lst_states, axis=1)
        else:
            size = n_d
            h_final = lst_states[-1]
        h_final = apply_dropout(h_final, dropout)

        output_layer = self.output_layer = Layer(n_in=size,
                                                 n_out=self.nclasses,
                                                 activation=sigmoid)

        # batch * nclasses
        preds = self.preds = output_layer.forward(h_final)

        # batch
        loss_mat = self.loss_mat = (preds - y)**2
        loss = self.loss = T.mean(loss_mat)

        pred_diff = self.pred_diff = T.mean(
            T.max(preds, axis=1) - T.min(preds, axis=1))

        params = self.params = []
        for l in layers + [output_layer]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost

        cost = self.cost = loss * 10 + l2_cost
Example #25
0
        net.train(to_col(train_row[1:])/255, train_row[0])

def train_students(teacher, students, train_data):
    for i, train_row in enumerate(train_data):
        if not i%1000:
            print(i)

        x = to_col(train_row[1:])/255
        out = teacher.forward_pass(x)
        for student in students:
            student.train(x, out)


if __name__ == "__main__":
    layers = [
        Layer(784, 16, LeakyReLU()),
        Layer(16, 16, LeakyReLU()),
        Layer(16, 10, LeakyReLU()),
    ]
    teacher = NeuralNetwork(layers, CrossEntropyLoss(), 0.03)
    students = [
        NeuralNetwork([Layer(784, 10, LeakyReLU())], MSELoss(), 0.001),
        NeuralNetwork([Layer(784, 10, LeakyReLU())], MSELoss(), 0.003),
        NeuralNetwork([Layer(784, 10, LeakyReLU())], MSELoss(), 0.01),
        NeuralNetwork([Layer(784, 10, LeakyReLU())], MSELoss(), 0.03),
        NeuralNetwork([Layer(784, 10, LeakyReLU())], MSELoss(), 0.1),
        NeuralNetwork([Layer(784, 10, LeakyReLU())], MSELoss(), 0.3),
    ]

    test_data = load_data(TEST_FILE, delimiter=",", dtype=int)
    accuracy = test(teacher, test_data)
Example #26
0
    def ready(self):
        generator = self.generator
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]
        unk_id = embedding_layer.vocab_map["<unk>"]
        unk_vec = embedding_layer.embeddings[unk_id]

        dropout = generator.dropout

        # len*batch
        x = generator.x
        z = generator.z_pred
        z = z.dimshuffle((0,1,"x"))

        # batch*nclasses
        y = self.y = T.fmatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = [ ]
        depth = args.depth
        layer_type = args.layer.lower()
        for i in xrange(depth):
            l = CNN(
                    n_in = n_e if i == 0 else n_d,
                    n_out = n_d,
                    activation = activation,
                    order = args.order
                )
            layers.append(l)

        # len * batch * 1
        masks = T.cast(T.neq(x, padding_id).dimshuffle((0,1,"x")) * z, theano.config.floatX)
        # batch * 1
        cnt_non_padding = T.sum(masks, axis=0) + 1e-8

        # len*batch*n_e
        embs = generator.word_embs*z + unk_vec.dimshuffle(('x','x',0))*(1-z)

        pooling = args.pooling
        lst_states = [ ]
        h_prev = embs
        for l in layers:
            # len*batch*n_d
            h_next = l.forward_all(h_prev)
            if pooling:
                # batch * n_d
                masked_sum = T.sum(h_next * masks, axis=0)
                lst_states.append(masked_sum/cnt_non_padding) # mean pooling
            else:
                lst_states.append(T.max(h_next, axis=0))
            h_prev = apply_dropout(h_next, dropout)

        if args.use_all:
            size = depth * n_d
            # batch * size (i.e. n_d*depth)
            h_final = T.concatenate(lst_states, axis=1)
        else:
            size = n_d
            h_final = lst_states[-1]
        h_final = apply_dropout(h_final, dropout)

        output_layer = self.output_layer = Layer(
                n_in = size,
                n_out = self.nclasses,
                activation = sigmoid
            )

        # batch * nclasses
        p_y_given_x = self.p_y_given_x = output_layer.forward(h_final)
        preds = self.preds = p_y_given_x > 0.5
        print preds, preds.dtype
        print self.nclasses

        # batch
        loss_mat = T.nnet.binary_crossentropy(p_y_given_x, y)

        if args.aspect < 0:
            loss_vec = T.mean(loss_mat, axis=1)
        else:
            assert args.aspect < self.nclasses
            loss_vec = loss_mat[:,args.aspect]
        self.loss_vec = loss_vec

        self.true_pos = T.sum(preds*y)
        self.tot_pos = T.sum(preds)
        self.tot_true = T.sum(y)

        zsum = generator.zsum
        zdiff = generator.zdiff
        logpz = generator.logpz

        coherent_factor = args.sparsity * args.coherent
        loss = self.loss = T.mean(loss_vec)
        sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \
                                             T.mean(zdiff) * coherent_factor
        cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor
        cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
        self.obj = T.mean(cost_vec)

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)
        if not args.fix_emb:
            params += embedding_layer.params
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost

        self.cost_g = cost_logpz + generator.l2_cost
        self.cost_e = loss + l2_cost
Example #27
0
 def __init__(self, n_d):
     self.n_d = n_d
     self.gate = Layer(n_d, n_d, sigmoid)
Example #28
0
    def ready(self):
        global total_encode_time
        #say("in encoder ready: \n")
        #start_encode_time = time.time()
        generator = self.generator
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = generator.dropout

        # len*batch
        x = generator.x
        z = generator.z_pred
        z = z.dimshuffle((0, 1, "x"))

        # batch*nclasses
        y = self.y = T.fmatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = []
        depth = args.depth
        layer_type = args.layer.lower()
        for i in xrange(depth):
            if layer_type == "rcnn":
                l = ExtRCNN(n_in=n_e if i == 0 else n_d,
                            n_out=n_d,
                            activation=activation,
                            order=args.order)
            elif layer_type == "lstm":
                l = ExtLSTM(n_in=n_e if i == 0 else n_d,
                            n_out=n_d,
                            activation=activation)
            layers.append(l)

        # len * batch * 1
        masks = T.cast(
            T.neq(x, padding_id).dimshuffle((0, 1, "x")) * z,
            theano.config.floatX)
        # batch * 1
        cnt_non_padding = T.sum(masks, axis=0) + 1e-8

        # len*batch*n_e
        embs = generator.word_embs

        pooling = args.pooling
        lst_states = []
        h_prev = embs
        for l in layers:
            # len*batch*n_d
            h_next = l.forward_all(h_prev, z)
            if pooling:
                # batch * n_d
                masked_sum = T.sum(h_next * masks, axis=0)
                lst_states.append(masked_sum / cnt_non_padding)  # mean pooling
            else:
                lst_states.append(h_next[-1])  # last state
            h_prev = apply_dropout(h_next, dropout)

        if args.use_all:
            size = depth * n_d
            # batch * size (i.e. n_d*depth)
            h_final = T.concatenate(lst_states, axis=1)
        else:
            size = n_d
            h_final = lst_states[-1]
        h_final = apply_dropout(h_final, dropout)

        output_layer = self.output_layer = Layer(n_in=size,
                                                 n_out=self.nclasses,
                                                 activation=sigmoid)

        # batch * nclasses
        preds = self.preds = output_layer.forward(h_final)

        # batch
        loss_mat = self.loss_mat = (preds - y)**2

        pred_diff = self.pred_diff = T.mean(
            T.max(preds, axis=1) - T.min(preds, axis=1))

        if args.aspect < 0:
            loss_vec = T.mean(loss_mat, axis=1)
        else:
            assert args.aspect < self.nclasses
            loss_vec = loss_mat[:, args.aspect]
        self.loss_vec = loss_vec

        zsum = generator.zsum
        zdiff = generator.zdiff
        logpz = generator.logpz

        coherent_factor = args.sparsity * args.coherent
        loss = self.loss = T.mean(loss_vec)
        sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \
                                             T.mean(zdiff) * coherent_factor
        cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor
        cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
        self.obj = T.mean(cost_vec)

        params = self.params = []
        for l in layers + [output_layer]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost

        self.cost_g = cost_logpz * 10 + generator.l2_cost
        self.cost_e = loss * 10 + l2_cost
Example #29
0
    def ready(self, args, train):
        # len * batch
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = T.matrix(dtype=theano.config.floatX)

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(
                n_d = self.n_d,
                vocab = set(w for w in train)
            )
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(
                self.n_V, self.n_d
            ))

        activation = get_activation_by_name(args["activation"])

        rnn_layer = LSTM(
                 n_in = self.n_d,
                 n_out = self.n_d,
                 activation = activation
            )

        output_layer = Layer(
                n_in = self.n_d,
                n_out = self.n_V,
                activation = T.nnet.softmax,
            )

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        x = x.reshape( (self.idxs.shape[0], self.idxs.shape[1], self.n_d) )

        # len * batch * (n_d+n_d)
        h = rnn_layer.forward_all(x, self.init_state, return_c=True)

        self.last_state = h[-1]
        h = h[:,:,self.n_d:]
        h = apply_dropout(h, self.dropout)

        self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys])
        #self.nll = T.nnet.categorical_crossentropy(
        #                self.p_y_given_x,
        #                idys
        #            )

        self.layers = [ embedding_layer, rnn_layer, output_layer ]
        #self.params = [ x_flat ] + rnn_layer.params + output_layer.params
        self.params = embedding_layer.params + rnn_layer.params + output_layer.params
        self.num_params = sum(len(x.get_value(borrow=True).ravel())
                                for l in self.layers for x in l.params)
        say("# of params in total: {}\n".format(self.num_params))
Example #30
0
import numpy as np
from nn import NN, Layer

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

Y = np.array([[0], [1], [1], [0]])

nn = NN(2)
nn.add(Layer(3))
nn.add(Layer(1))

print("Predictions before training")
print(nn.feed_forward(X))
print()

nn.train(X, Y)

print()
print("Predictions after training")
print(nn.feed_forward(X))
Example #31
0
    def ready(self):
        args = self.args
        index = self.index = T.lscalar()
        x = self.x = T.fmatrix()
        y = self.y = T.ivector()

        dropout = self.dropout = theano.shared(np.float64(args.dropout).astype(
                        "float32"))

        n_d = args.hidden_dim
        layers = self.layers = [ ]
        for i in xrange(args.depth):
            l = Layer(
                        n_in = 28*28 if i == 0 else n_d,
                        n_out = n_d,
                        activation = ReLU
                    )
            layers.append(l)


        output_layer = self.output_layer = Layer(
                    n_in = n_d,
                    n_out = 10,
                    activation = softmax
                )

        h = x
        for l in layers:
            h = l.forward(h)
            h = apply_dropout(h, dropout)

        self.h_final = h

        # batch * 10
        probs = self.probs = output_layer.forward(h)

        # batch
        preds = self.preds = T.argmax(probs, axis=1)
        err = self.err = T.mean(T.cast(T.neq(preds, y), dtype="float32"))

        #
        loss = self.loss = -T.mean( T.log(probs[T.arange(y.shape[0]), y]) )
        #loss = self.loss = T.mean( T.nnet.categorical_crossentropy(
        #                            probs,
        #                            y
        #                    ))

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost += T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg

        self.l2_cost = l2_cost
        self.cost = loss + l2_cost
        print "cost.dtype", self.cost.dtype