Ejemplo n.º 1
0
    def train(self, classdict, nb_epochs=500, l2reg=0.01, bias_l2reg=0.01, optimizer='adam'):
        """ Train the classifier.

        Given the training data, train the classifier.

        :param classdict: training data
        :param nb_epochs: number of epochs (Defauly: 500)
        :param l2reg: L2 regularization coefficient (Default: 0.01)
        :param bias_l2reg: L2 regularization coefficient for bias (Default: 0.01)
        :param optimizer: optimizer for gradient descent. Options: sgd, rmsprop, adagrad, adadelta, adam, adamax, nadam. (Default: adam)
        :return: None
        :type classdict: dict
        :type nb_epochs: int
        :type l2reg: float
        :type bias_l2reg: float
        :type optimizer: str
        """
        self.dictionary, self.corpus, self.classlabels = gc.generate_gensim_corpora(classdict,
                                                                                    preprocess_and_tokenize=lambda s: tokenize(self.preprocessor(s)))
        self.index_classlabels()

        X, y = self.convert_classdict_to_XY(classdict)

        kmodel = logistic_framework(len(self.dictionary),
                                    len(self.classlabels),
                                    l2reg=l2reg,
                                    bias_l2reg=bias_l2reg,
                                    optimizer=optimizer)
        kmodel.fit(X.toarray(), y.toarray(), epochs=nb_epochs)

        self.model = kmodel
        self.trained = True
    def generate_corpus(self, classdict):
        """ Calculate the gensim dictionary and corpus, and extract the class labels
        from the training data. Called by :func:`~train`.

        :param classdict: training data
        :return: None
        :type classdict: dict
        """
        self.dictionary, self.corpus, self.classlabels = gc.generate_gensim_corpora(classdict,
                                                                                    preprocess_and_tokenize=lambda sent: tokenize(self.preprocessor(sent)))