Beispiel #1
0
    def ready(self):
        args = self.args
        index = self.index = T.lscalar()
        x = self.x = T.fmatrix()
        y = self.y = T.ivector()

        dropout = self.dropout = theano.shared(
            np.float64(args.dropout).astype("float32"))

        n_d = args.hidden_dim
        layers = self.layers = []
        for i in xrange(args.depth):
            l = Layer(n_in=28 * 28 if i == 0 else n_d,
                      n_out=n_d,
                      activation=ReLU)
            layers.append(l)

        output_layer = self.output_layer = Layer(n_in=n_d,
                                                 n_out=10,
                                                 activation=softmax)

        h = x
        for l in layers:
            h = l.forward(h)
            h = apply_dropout(h, dropout)

        self.h_final = h

        # batch * 10
        probs = self.probs = output_layer.forward(h)

        # batch
        preds = self.preds = T.argmax(probs, axis=1)
        err = self.err = T.mean(T.cast(T.neq(preds, y), dtype="float32"))

        #
        loss = self.loss = -T.mean(T.log(probs[T.arange(y.shape[0]), y]))
        #loss = self.loss = T.mean( T.nnet.categorical_crossentropy(
        #                            probs,
        #                            y
        #                    ))

        params = self.params = []
        for l in layers + [output_layer]:
            for p in l.params:
                params.append(p)

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost += T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg

        self.l2_cost = l2_cost
        self.cost = loss + l2_cost
        print "cost.dtype", self.cost.dtype
Beispiel #2
0
    def ready(self, args, train):
        # len * batch
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = T.matrix(dtype=theano.config.floatX)

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(n_d=self.n_d,
                                         vocab=set(w for w in train))
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(self.n_V, self.n_d))

        activation = get_activation_by_name(args["activation"])

        rnn_layer = LSTM(n_in=self.n_d, n_out=self.n_d, activation=activation)

        output_layer = Layer(
            n_in=self.n_d,
            n_out=self.n_V,
            activation=T.nnet.softmax,
        )

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        x = x.reshape((self.idxs.shape[0], self.idxs.shape[1], self.n_d))

        # len * batch * (n_d+n_d)
        h = rnn_layer.forward_all(x, self.init_state, return_c=True)

        self.last_state = h[-1]
        h = h[:, :, self.n_d:]
        h = apply_dropout(h, self.dropout)

        self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys])
        #self.nll = T.nnet.categorical_crossentropy(
        #                self.p_y_given_x,
        #                idys
        #            )

        self.layers = [embedding_layer, rnn_layer, output_layer]
        #self.params = [ x_flat ] + rnn_layer.params + output_layer.params
        self.params = embedding_layer.params + rnn_layer.params + output_layer.params
        self.num_params = sum(
            len(x.get_value(borrow=True).ravel()) for l in self.layers
            for x in l.params)
        say("# of params in total: {}\n".format(self.num_params))
Beispiel #3
0
    def build_model(self):
        args = self.args
        weights = self.weights

        meta_emb = self.meta_emb = self.embs[0]
        golden_embs = self.embs[1:]

        n_m_d = meta_emb.n_d
        dropout = self.dropout = theano.shared(
            np.float64(args.dropout_rate).astype(theano.config.floatX))

        batch_ids = self.batch_ids = T.ivector('batch_d_char')
        batch_masks = self.batch_masks = T.fmatrix('batch_d_char_mask')

        layers = self.layers = [meta_emb]

        slices_embs = meta_emb.forward(batch_ids.ravel())
        slices_embs = slices_embs.reshape((batch_ids.shape[0], n_m_d))
        prev_output = apply_dropout(slices_embs, dropout, v2=True)

        self.all_loss = 0.0
        for i in range(len(weights)):
            mask, weight, golden_emb = batch_masks[i], weights[i], golden_embs[
                i]
            n_o_d = golden_emb.n_d
            layer = Layer(n_m_d, n_o_d, linear)
            layers.append(layer)
            mapped_output = layer.forward(prev_output)

            slices_embs = golden_emb.forward(batch_ids.ravel())
            slices_embs = slices_embs.reshape((batch_ids.shape[0], n_o_d))
            self.all_loss += weight * T.sum(
                T.sum((mapped_output - slices_embs) *
                      (mapped_output - slices_embs),
                      axis=1) * mask) / (1e-8 + T.sum(mask))

        for i, l in enumerate(layers[1:]):
            say("layer {}: n_in={}\tn_out={}\n".format(i, l.n_in, l.n_out))

        self.l2_sqr = None
        self.params = []

        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

        self.all_loss += self.l2_sqr
        n_params = sum(
            len(x.get_value(borrow=True).ravel()) for x in self.params)
        say("total # parameters: {}\n".format(n_params))
Beispiel #4
0
class HighwayLayer(object):
    def __init__(self, n_d):
        self.n_d = n_d
        self.gate = Layer(n_d, n_d, sigmoid)

    def forward(self, x, h):
        t = self.gate.forward(x)
        return h * t + x * (1 - t)

    @property
    def params(self):
        return self.gate.params

    @params.setter
    def params(self, param_list):
        self.gate.params = param_list
Beispiel #5
0
    def ready(self, args, train):
        # len * batch
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = T.matrix(dtype=theano.config.floatX)

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(
                n_d = self.n_d,
                vocab = set(w for w in train)
            )
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(
                self.n_V, self.n_d
            ))

        activation = get_activation_by_name(args["activation"])

        rnn_layer = LSTM(
                 n_in = self.n_d,
                 n_out = self.n_d,
                 activation = activation
            )

        output_layer = Layer(
                n_in = self.n_d,
                n_out = self.n_V,
                activation = T.nnet.softmax,
            )

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        x = x.reshape( (self.idxs.shape[0], self.idxs.shape[1], self.n_d) )

        # len * batch * (n_d+n_d)
        h = rnn_layer.forward_all(x, self.init_state, return_c=True)

        self.last_state = h[-1]
        h = h[:,:,self.n_d:]
        h = apply_dropout(h, self.dropout)

        self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys])
        #self.nll = T.nnet.categorical_crossentropy(
        #                self.p_y_given_x,
        #                idys
        #            )

        self.layers = [ embedding_layer, rnn_layer, output_layer ]
        #self.params = [ x_flat ] + rnn_layer.params + output_layer.params
        self.params = embedding_layer.params + rnn_layer.params + output_layer.params
        self.num_params = sum(len(x.get_value(borrow=True).ravel())
                                for l in self.layers for x in l.params)
        say("# of params in total: {}\n".format(self.num_params))
Beispiel #6
0
    def ready(self):
        args = self.args
        index = self.index = T.lscalar()
        x = self.x = T.fmatrix()
        y = self.y = T.ivector()

        dropout = self.dropout = theano.shared(np.float64(args.dropout).astype(
                        "float32"))

        n_d = args.hidden_dim
        layers = self.layers = [ ]
        for i in xrange(args.depth):
            l = Layer(
                        n_in = 28*28 if i == 0 else n_d,
                        n_out = n_d,
                        activation = ReLU
                    )
            layers.append(l)


        output_layer = self.output_layer = Layer(
                    n_in = n_d,
                    n_out = 10,
                    activation = softmax
                )

        h = x
        for l in layers:
            h = l.forward(h)
            h = apply_dropout(h, dropout)

        self.h_final = h

        # batch * 10
        probs = self.probs = output_layer.forward(h)

        # batch
        preds = self.preds = T.argmax(probs, axis=1)
        err = self.err = T.mean(T.cast(T.neq(preds, y), dtype="float32"))

        #
        loss = self.loss = -T.mean( T.log(probs[T.arange(y.shape[0]), y]) )
        #loss = self.loss = T.mean( T.nnet.categorical_crossentropy(
        #                            probs,
        #                            y
        #                    ))

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost += T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg

        self.l2_cost = l2_cost
        self.cost = loss + l2_cost
        print "cost.dtype", self.cost.dtype
Beispiel #7
0
    def ready(self, args, train):
        # len * batch
        depth = args["depth"]
        self.args = args
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = [
            T.matrix(dtype=theano.config.floatX) for i in xrange(depth * 2)
        ]

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)
        rnn_dropout_prob = np.float64(args["rnn_dropout"]).astype(
            theano.config.floatX)
        self.rnn_dropout = theano.shared(rnn_dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(n_d=self.n_d,
                                         vocab=set(w for w in train))
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(self.n_V, self.n_d))

        activation = get_activation_by_name(args["activation"])

        layers = self.layers = []
        for i in xrange(depth):
            rnn_layer = KernelNN(n_in=self.n_d,
                                 n_out=self.n_d,
                                 activation=activation,
                                 highway=args["highway"],
                                 dropout=self.rnn_dropout)
            layers.append(rnn_layer)

        output_layer = Layer(
            n_in=self.n_d,
            n_out=self.n_V,
            activation=T.nnet.softmax,
        )
        output_layer.W = embedding_layer.embeddings.T

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        #x = x_flat
        x = x.reshape((self.idxs.shape[0], self.idxs.shape[1], self.n_d))

        # len * batch * (n_d+n_d)
        self.last_state = []
        prev_h = x
        for i in xrange(depth):
            hidden = self.init_state[i * 2:i * 2 + 2]
            c, h = layers[i].forward_all(prev_h, hidden, return_c=True)
            self.last_state += [c[-1], h[-1]]
            prev_h = h

        prev_h = apply_dropout(prev_h, self.dropout)
        self.p_y_given_x = output_layer.forward(prev_h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = T.nnet.categorical_crossentropy(self.p_y_given_x, idys)

        self.params = [x for l in layers for x in l.params]
        self.params += [embedding_layer.embeddings, output_layer.b]
        self.num_params = sum(
            len(x.get_value(borrow=True).ravel()) for x in self.params)
        say("# of params in total: {}\n".format(self.num_params))
        layers += [embedding_layer, output_layer]