Esempio n. 1
0
    def __init__(
            self,
            lowercase: Boolean(),
            stopwords_remove: Boolean(),
            binary: Boolean(),
            inner_tokenizer: algorithm(Sentence(), List(Word())),
            inner_stemmer: algorithm(Word(), Stem()),
            inner_stopwords: algorithm(List(Word()), List(Word())),
    ):
        self.stopwords_remove = stopwords_remove
        self.inner_tokenizer = inner_tokenizer
        self.inner_stemmer = inner_stemmer
        self.inner_stopwords = inner_stopwords

        SklearnTransformer.__init__(self)
        _CountVectorizer.__init__(self, lowercase=lowercase, binary=binary)
Esempio n. 2
0
    def __init__(
        self,
        language: CategoricalValue(
            "danish",
            "dutch",
            "english",
            "finnish",
            "french",
            "german",
            "hungarian",
            "italian",
            "norwegian",
            "portuguese",
            "russian",
            "spanish",
            "swedish",
            "turkish",
        ),
    ):
        self.language = language
        from nltk.corpus import stopwords

        self.words = stopwords.words(language)
        SklearnTransformer.__init__(self)
Esempio n. 3
0
 def run(self, input: Seq[Sentence]) -> MatrixContinuousDense:
     """This methods receive a document list and transform this into a dense continuous matrix.
    """
     return SklearnTransformer.run(self, input)
Esempio n. 4
0
 def run(self, input: Seq[Word]) -> Seq[Word]:
     """This methods receive a word list list and transform this into a word list list without stopwords.
    """
     return SklearnTransformer.run(self, input)
Esempio n. 5
0
 def run(self, input: List(Word())) -> List(Word()):
     """This methods receive a word list list and transform this into a word list list without stopwords.
    """
     return SklearnTransformer.run(self, input)
Esempio n. 6
0
 def train(self):
     SklearnTransformer.train(self)
Esempio n. 7
0
 def run(self, input: List(Sentence())) -> MatrixContinuousSparse():
     return SklearnTransformer.run(self, input)