Exemple #1
0
    def ready(self, args, train):
        # len * batch
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = T.matrix(dtype=theano.config.floatX)

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(n_d=self.n_d,
                                         vocab=set(w for w in train))
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(self.n_V, self.n_d))

        activation = get_activation_by_name(args["activation"])

        rnn_layer = LSTM(n_in=self.n_d, n_out=self.n_d, activation=activation)

        output_layer = Layer(
            n_in=self.n_d,
            n_out=self.n_V,
            activation=T.nnet.softmax,
        )

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        x = x.reshape((self.idxs.shape[0], self.idxs.shape[1], self.n_d))

        # len * batch * (n_d+n_d)
        h = rnn_layer.forward_all(x, self.init_state, return_c=True)

        self.last_state = h[-1]
        h = h[:, :, self.n_d:]
        h = apply_dropout(h, self.dropout)

        self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys])
        #self.nll = T.nnet.categorical_crossentropy(
        #                self.p_y_given_x,
        #                idys
        #            )

        self.layers = [embedding_layer, rnn_layer, output_layer]
        #self.params = [ x_flat ] + rnn_layer.params + output_layer.params
        self.params = embedding_layer.params + rnn_layer.params + output_layer.params
        self.num_params = sum(
            len(x.get_value(borrow=True).ravel()) for l in self.layers
            for x in l.params)
        say("# of params in total: {}\n".format(self.num_params))
Exemple #2
0
    def ready(self):
        args = self.args
        embedding_layer = self.embedding_layer
        self.n_hidden = args.hidden_dim
        self.n_in = embedding_layer.n_d
        dropout = self.dropout = theano.shared(
            np.float64(args.dropout_rate).astype(theano.config.floatX))

        # x is length * batch_size
        # y is batch_size
        self.x = T.imatrix('x')
        self.y = T.ivector('y')

        x = self.x
        y = self.y
        n_hidden = self.n_hidden
        n_in = self.n_in

        # fetch word embeddings
        # (len * batch_size) * n_in
        slices = embedding_layer.forward(x.ravel())
        self.slices = slices

        # 3-d tensor, len * batch_size * n_in
        slices = slices.reshape((x.shape[0], x.shape[1], n_in))

        # stacking the feature extraction layers
        pooling = args.pooling
        depth = args.depth
        layers = self.layers = []
        prev_output = slices
        prev_output = apply_dropout(prev_output, dropout, v2=True)
        size = 0
        softmax_inputs = []
        activation = get_activation_by_name(args.act)
        for i in range(depth):
            if args.layer.lower() == "lstm":
                layer = LSTM(n_in=n_hidden if i > 0 else n_in, n_out=n_hidden)
            elif args.layer.lower() == "strcnn":
                layer = StrCNN(n_in=n_hidden if i > 0 else n_in,
                               n_out=n_hidden,
                               activation=activation,
                               decay=args.decay,
                               order=args.order)
            elif args.layer.lower() == "rcnn":
                layer = RCNN(n_in=n_hidden if i > 0 else n_in,
                             n_out=n_hidden,
                             activation=activation,
                             order=args.order,
                             mode=args.mode)
            else:
                raise Exception("unknown layer type: {}".format(args.layer))

            layers.append(layer)
            prev_output = layer.forward_all(prev_output)
            if pooling:
                softmax_inputs.append(T.sum(prev_output,
                                            axis=0))  # summing over columns
            else:
                softmax_inputs.append(prev_output[-1])
            prev_output = apply_dropout(prev_output, dropout)
            size += n_hidden

        # final feature representation is the concatenation of all extraction layers
        if pooling:
            softmax_input = T.concatenate(softmax_inputs, axis=1) / x.shape[0]
        else:
            softmax_input = T.concatenate(softmax_inputs, axis=1)
        softmax_input = apply_dropout(softmax_input, dropout, v2=True)

        # feed the feature repr. to the softmax output layer
        layers.append(
            Layer(n_in=size,
                  n_out=self.nclasses,
                  activation=softmax,
                  has_bias=False))

        for l, i in zip(layers, range(len(layers))):
            say("layer {}: n_in={}\tn_out={}\n".format(i, l.n_in, l.n_out))

        # unnormalized score of y given x
        self.p_y_given_x = layers[-1].forward(softmax_input)
        self.pred = T.argmax(self.p_y_given_x, axis=1)
        self.nll_loss = T.mean(
            T.nnet.categorical_crossentropy(self.p_y_given_x, y))

        # adding regularizations
        self.l2_sqr = None
        self.params = []
        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                        for x in self.params)
        say("total # parameters: {}\n".format(nparams))
Exemple #3
0
    def ready(self):
        args = self.args
        embedding_layer = self.embedding_layer
        self.n_hidden = args.hidden_dim
        self.n_in = embedding_layer.n_d
        dropout = self.dropout = theano.shared(
                np.float64(args.dropout_rate).astype(theano.config.floatX)
            )

        # x is length * batch_size
        # y is batch_size
        self.x = T.imatrix('x')
        self.y = T.ivector('y')

        x = self.x
        y = self.y
        n_hidden = self.n_hidden
        n_in = self.n_in

        # fetch word embeddings
        # (len * batch_size) * n_in
        slices  = embedding_layer.forward(x.ravel())
        self.slices = slices

        # 3-d tensor, len * batch_size * n_in
        slices = slices.reshape( (x.shape[0], x.shape[1], n_in) )

        # stacking the feature extraction layers
        pooling = args.pooling
        depth = args.depth
        layers = self.layers = [ ]
        prev_output = slices
        prev_output = apply_dropout(prev_output, dropout, v2=True)
        size = 0
        softmax_inputs = [ ]
        activation = get_activation_by_name(args.act)
        for i in range(depth):
            if args.layer.lower() == "lstm":
                layer = LSTM(
                            n_in = n_hidden if i > 0 else n_in,
                            n_out = n_hidden
                        )
            elif args.layer.lower() == "strcnn":
                layer = StrCNN(
                            n_in = n_hidden if i > 0 else n_in,
                            n_out = n_hidden,
                            activation = activation,
                            decay = args.decay,
                            order = args.order
                        )
            elif args.layer.lower() == "rcnn":
                layer = RCNN(
                            n_in = n_hidden if i > 0 else n_in,
                            n_out = n_hidden,
                            activation = activation,
                            order = args.order,
                            mode = args.mode
                        )
            else:
                raise Exception("unknown layer type: {}".format(args.layer))

            layers.append(layer)
            prev_output = layer.forward_all(prev_output)
            if pooling:
                softmax_inputs.append(T.sum(prev_output, axis=0)) # summing over columns
            else:
                softmax_inputs.append(prev_output[-1])
            prev_output = apply_dropout(prev_output, dropout)
            size += n_hidden

        # final feature representation is the concatenation of all extraction layers
        if pooling:
            softmax_input = T.concatenate(softmax_inputs, axis=1) / x.shape[0]
        else:
            softmax_input = T.concatenate(softmax_inputs, axis=1)
        softmax_input = apply_dropout(softmax_input, dropout, v2=True)

        # feed the feature repr. to the softmax output layer
        layers.append( Layer(
                n_in = size,
                n_out = self.nclasses,
                activation = softmax,
                has_bias = False
        ) )

        for l,i in zip(layers, range(len(layers))):
            say("layer {}: n_in={}\tn_out={}\n".format(
                i, l.n_in, l.n_out
            ))

        # unnormalized score of y given x
        self.p_y_given_x = layers[-1].forward(softmax_input)
        self.pred = T.argmax(self.p_y_given_x, axis=1)
        self.nll_loss = T.mean( T.nnet.categorical_crossentropy(
                                    self.p_y_given_x,
                                    y
                            ))

        # adding regularizations
        self.l2_sqr = None
        self.params = [ ]
        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                        for x in self.params)
        say("total # parameters: {}\n".format(nparams))
Exemple #4
0
    def ready(self, args, train):
        # len * batch
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = T.matrix(dtype=theano.config.floatX)

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(
                n_d = self.n_d,
                vocab = set(w for w in train)
            )
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(
                self.n_V, self.n_d
            ))

        activation = get_activation_by_name(args["activation"])

        rnn_layer = LSTM(
                 n_in = self.n_d,
                 n_out = self.n_d,
                 activation = activation
            )

        output_layer = Layer(
                n_in = self.n_d,
                n_out = self.n_V,
                activation = T.nnet.softmax,
            )

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        x = x.reshape( (self.idxs.shape[0], self.idxs.shape[1], self.n_d) )

        # len * batch * (n_d+n_d)
        h = rnn_layer.forward_all(x, self.init_state, return_c=True)

        self.last_state = h[-1]
        h = h[:,:,self.n_d:]
        h = apply_dropout(h, self.dropout)

        self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys])
        #self.nll = T.nnet.categorical_crossentropy(
        #                self.p_y_given_x,
        #                idys
        #            )

        self.layers = [ embedding_layer, rnn_layer, output_layer ]
        #self.params = [ x_flat ] + rnn_layer.params + output_layer.params
        self.params = embedding_layer.params + rnn_layer.params + output_layer.params
        self.num_params = sum(len(x.get_value(borrow=True).ravel())
                                for l in self.layers for x in l.params)
        say("# of params in total: {}\n".format(self.num_params))
Exemple #5
0
    def ready(self):
        args = self.args
        embedding_layer = self.embedding_layer
        user_embedding_layer = self.user_embedding_layer
        self.n_hidden = args.hidden_dim
        self.n_in = embedding_layer.n_d
        dropout = self.dropout = theano.shared(
            np.float64(args.dropout_rate).astype(theano.config.floatX)
        )

        # x is length * batch_size
        # y is batch_size
        self.x = T.imatrix('x')
        self.w_masks = T.fmatrix('mask')
        self.w_lens = T.fvector('lens')
        self.s_ml = T.iscalar('sent_maxlen')
        self.s_num = T.iscalar('sent_num')
        self.y = T.ivector('y')
        self.usr = T.ivector('users')

        x = self.x
        y = self.y
        usr = self.usr
        w_masks = self.w_masks
        w_lens = self.w_lens
        s_ml = self.s_ml
        s_num = self.s_num
        n_hidden = self.n_hidden
        n_emb = n_in = self.n_in

        layers = self.layers = []

        slicesu = user_embedding_layer.forward(usr)
        slices = embedding_layer.forward(x.ravel())
        self.slices = slices  # important for updating word embeddings

        # 3-d tensor, len * batch_size * n_in
        slices = slices.reshape((x.shape[0], x.shape[1], n_in))

        pooling = args.pooling
        prev_output = slices
        prev_output = apply_dropout(prev_output, dropout, v2=True)
        size = 0

        n_hidden_t = n_hidden
        if args.direction == "bi":
            n_hidden_t = 2 * n_hidden

        softmax_inputs = []
        activation = get_activation_by_name(args.act)

        if args.layer.lower() == "lstm":
            layer = LSTM(n_in=n_in,
                         n_out=n_hidden_t,
                         direction=args.direction
                         )
        elif args.layer.lower() == "cnn":
            layer = CNN(n_in=n_in,
                        n_out=n_hidden_t,
                        activation=activation,
                        order=args.order
                        )
        else:
            raise Exception("unknown layer type: {}".format(args.layer))

        layers.append(layer)
        prev_output = layer.forward_all(prev_output, masks=w_masks)
        prev_output = apply_dropout(prev_output, dropout)

        # final feature representation is the concatenation of all extraction layers
        if args.user_atten:
            layer = IterAttentionLayer(
                n_in=n_emb,
                n_out=n_hidden_t
            )
            layers.append(layer)
            if args.user_atten_base:
                slicesu = None
            softmax_input = layers[-1].multi_hop_forward(
                prev_output, user_embs=slicesu, isWord=True, masks=w_masks)
        else:
            if pooling:
                softmax_input = T.sum(prev_output, axis=0) / w_lens.dimshuffle(0, 'x')
            else:
                ind = T.cast(w_lens - T.ones_like(w_lens), 'int32')
                softmax_input = prev_output[T.arange(ind.shape[0]), ind]

        softmax_input = apply_dropout(softmax_input, dropout, v2=True)

        n_in = n_hidden_t
        size = 0
        softmax_inputs = []
        [sentlen, emblen] = T.shape(softmax_input)
        prev_output = softmax_input.reshape(
            (sentlen / s_num, s_num, emblen)).dimshuffle(1, 0, 2)
        if args.layer.lower() == "lstm":
            layer = LSTM(n_in=n_in,
                         n_out=n_hidden_t,
                         direction=args.direction
                         )
        elif args.layer.lower() == "cnn":
            layer = CNN(n_in=n_in,
                        n_out=n_hidden_t,
                        activation=activation,
                        order=args.order,
                        )
        else:
            raise Exception("unknown layer type: {}".format(args.layer))

        layers.append(layer)
        prev_output = layer.forward_all(prev_output)
        prev_output = apply_dropout(prev_output, dropout)

        if args.user_atten:
            layer = IterAttentionLayer(
                n_in=n_emb,
                n_out=n_hidden_t
            )
            layers.append(layer)

            if args.user_atten_base:
                slicesu = None
            softmax_input = layers[-1].multi_hop_forward(
                prev_output, user_embs=slicesu, isWord=False)
        else:
            if pooling:
                softmax_input = T.sum(prev_output, axis=0) / \
                    T.cast(s_num, 'float32')
            else:
                softmax_input = prev_output[-1]
        softmax_input = apply_dropout(softmax_input, dropout, v2=True)

        size = n_hidden_t
        layers.append(Layer(
            n_in=size,
            n_out=self.nclasses,
            activation=softmax,
            has_bias=False
        ))
        if not args.fix_emb:
            for l, i in zip(layers, range(len(layers))):
                say("layer {}: n_in={}\tn_out={}\n".format(
                    i, l.n_in, l.n_out
                ))
        else:
            for l, i in zip(layers[1:], range(len(layers[1:]))):
                say("layer {}: n_in={}\tn_out={}\n".format(
                    i, l.n_in, l.n_out
                ))

        # unnormalized score of y given x
        self.p_y_given_x = layers[-1].forward(softmax_input)
        self.pred = T.argmax(self.p_y_given_x, axis=1)
        self.nll_loss = T.mean(T.nnet.categorical_crossentropy(
            self.p_y_given_x,
            y
        ))

        # adding regularizations
        self.l2_sqr = None
        self.params = []
        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

        nparams = sum(len(x.get_value(borrow=True).ravel())
                      for x in self.params)
        say("total # parameters: {}\n".format(nparams))