Esempio n. 1
0
    def ready(self, args, train):
        # len * batch
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = T.matrix(dtype=theano.config.floatX)

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(n_d=self.n_d,
                                         vocab=set(w for w in train))
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(self.n_V, self.n_d))

        activation = get_activation_by_name(args["activation"])

        rnn_layer = LSTM(n_in=self.n_d, n_out=self.n_d, activation=activation)

        output_layer = Layer(
            n_in=self.n_d,
            n_out=self.n_V,
            activation=T.nnet.softmax,
        )

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        x = x.reshape((self.idxs.shape[0], self.idxs.shape[1], self.n_d))

        # len * batch * (n_d+n_d)
        h = rnn_layer.forward_all(x, self.init_state, return_c=True)

        self.last_state = h[-1]
        h = h[:, :, self.n_d:]
        h = apply_dropout(h, self.dropout)

        self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys])
        #self.nll = T.nnet.categorical_crossentropy(
        #                self.p_y_given_x,
        #                idys
        #            )

        self.layers = [embedding_layer, rnn_layer, output_layer]
        #self.params = [ x_flat ] + rnn_layer.params + output_layer.params
        self.params = embedding_layer.params + rnn_layer.params + output_layer.params
        self.num_params = sum(
            len(x.get_value(borrow=True).ravel()) for l in self.layers
            for x in l.params)
        say("# of params in total: {}\n".format(self.num_params))
Esempio n. 2
0
    def ready(self, args, train):
        # len * batch
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = T.matrix(dtype=theano.config.floatX)

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(
                n_d = self.n_d,
                vocab = set(w for w in train)
            )
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(
                self.n_V, self.n_d
            ))

        activation = get_activation_by_name(args["activation"])

        rnn_layer = LSTM(
                 n_in = self.n_d,
                 n_out = self.n_d,
                 activation = activation
            )

        output_layer = Layer(
                n_in = self.n_d,
                n_out = self.n_V,
                activation = T.nnet.softmax,
            )

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        x = x.reshape( (self.idxs.shape[0], self.idxs.shape[1], self.n_d) )

        # len * batch * (n_d+n_d)
        h = rnn_layer.forward_all(x, self.init_state, return_c=True)

        self.last_state = h[-1]
        h = h[:,:,self.n_d:]
        h = apply_dropout(h, self.dropout)

        self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys])
        #self.nll = T.nnet.categorical_crossentropy(
        #                self.p_y_given_x,
        #                idys
        #            )

        self.layers = [ embedding_layer, rnn_layer, output_layer ]
        #self.params = [ x_flat ] + rnn_layer.params + output_layer.params
        self.params = embedding_layer.params + rnn_layer.params + output_layer.params
        self.num_params = sum(len(x.get_value(borrow=True).ravel())
                                for l in self.layers for x in l.params)
        say("# of params in total: {}\n".format(self.num_params))
Esempio n. 3
0
    def ready(self, args, train):
        # len * batch
        depth = args["depth"]
        self.args = args
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = [
            T.matrix(dtype=theano.config.floatX) for i in xrange(depth * 2)
        ]

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)
        rnn_dropout_prob = np.float64(args["rnn_dropout"]).astype(
            theano.config.floatX)
        self.rnn_dropout = theano.shared(rnn_dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(n_d=self.n_d,
                                         vocab=set(w for w in train))
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(self.n_V, self.n_d))

        activation = get_activation_by_name(args["activation"])

        layers = self.layers = []
        for i in xrange(depth):
            rnn_layer = KernelNN(n_in=self.n_d,
                                 n_out=self.n_d,
                                 activation=activation,
                                 highway=args["highway"],
                                 dropout=self.rnn_dropout)
            layers.append(rnn_layer)

        output_layer = Layer(
            n_in=self.n_d,
            n_out=self.n_V,
            activation=T.nnet.softmax,
        )
        output_layer.W = embedding_layer.embeddings.T

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        #x = x_flat
        x = x.reshape((self.idxs.shape[0], self.idxs.shape[1], self.n_d))

        # len * batch * (n_d+n_d)
        self.last_state = []
        prev_h = x
        for i in xrange(depth):
            hidden = self.init_state[i * 2:i * 2 + 2]
            c, h = layers[i].forward_all(prev_h, hidden, return_c=True)
            self.last_state += [c[-1], h[-1]]
            prev_h = h

        prev_h = apply_dropout(prev_h, self.dropout)
        self.p_y_given_x = output_layer.forward(prev_h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = T.nnet.categorical_crossentropy(self.p_y_given_x, idys)

        self.params = [x for l in layers for x in l.params]
        self.params += [embedding_layer.embeddings, output_layer.b]
        self.num_params = sum(
            len(x.get_value(borrow=True).ravel()) for x in self.params)
        say("# of params in total: {}\n".format(self.num_params))
        layers += [embedding_layer, output_layer]