예제 #1
0
 def __init__(
     self,
     tokenizer: algorithm(Document(), List(Sentence())),
     feature_extractor: algorithm(Sentence(), Flags()),
 ):
     self.tokenizer = tokenizer
     self.feature_extractor = feature_extractor
예제 #2
0
def test_polimorphic_interface():
    interface = algorithm(MatrixContinuousDense(), MatrixContinuousDense())
    assert interface.is_compatible(ExactAlgorithm)
    assert interface.is_compatible(HigherInputAlgorithm)

    interface = algorithm(MatrixContinuousDense(), MatrixContinuous())
    assert interface.is_compatible(LowerOutputAlgorithm)
예제 #3
0
 def __init__(
     self,
     tokenizer: algorithm(Sentence(), List(Word())),
     feature_extractor: algorithm(Word(), Flags()),
     include_text: Boolean(),
 ):
     self.tokenizer = tokenizer
     self.feature_extractor = feature_extractor
     self.include_text = include_text
예제 #4
0
 def __init__(
     self,
     extractors: Distinct(
         algorithm(Word(), Flags()), exceptions=["MultipleFeatureExtractor"]
     ),
     merger: algorithm(List(Flags()), Flags()),
 ):
     self.extractors = extractors
     self.merger = merger
예제 #5
0
    def __init__(
            self,
            lowercase: Boolean(),
            stopwords_remove: Boolean(),
            binary: Boolean(),
            inner_tokenizer: algorithm(Sentence(), List(Word())),
            inner_stemmer: algorithm(Word(), Stem()),
            inner_stopwords: algorithm(List(Word()), List(Word())),
    ):
        self.stopwords_remove = stopwords_remove
        self.inner_tokenizer = inner_tokenizer
        self.inner_stemmer = inner_stemmer
        self.inner_stopwords = inner_stopwords

        SklearnTransformer.__init__(self)
        _CountVectorizer.__init__(self, lowercase=lowercase, binary=binary)
예제 #6
0
    def __init__(
        self,
        affix_length: DiscreteValue(min=2, max=6),
        min_stem_length: DiscreteValue(min=1, max=4),
        cutoff: DiscreteValue(min=0, max=10),
        backoff: algorithm(
            Seq[Seq[Word]], Supervised[Seq[Seq[Postag]]], Seq[Seq[Postag]]
        ),
    ):
        self.affix_length = affix_length
        self.min_stem_length = min_stem_length
        self.cutoff = cutoff
        self.backoff = backoff
        self.tagger = _AffixTagger

        self.values = dict(
            affix_length=affix_length,
            min_stem_length=min_stem_length,
            cutoff=cutoff,
            backoff=backoff,
        )

        NltkTagger.__init__(self)
예제 #7
0
 def __init__(self, dependance: algorithm(Word, Stem)):
     pass
예제 #8
0
 def __init__(self, dependance: algorithm(Sentence, Document)):
     pass
예제 #9
0
 def __init__(self, stem: algorithm(Word, Stem)):
     pass
예제 #10
0
 def __init__(self, ub: algorithm(Sentence, Document)):
     pass
예제 #11
0
 def __init__(self,
     tokenizer: algorithm(Sentence(), List(Word())),
     token_feature_extractor: algorithm(Word(), Flags()),
     # token_sentence_encoder: algorithm(Word(), )
 ):
     pass
예제 #12
0
 def __init__(self, tokenizer: algorithm(Sentence(), List(Word()))):
     self.tokenizer = tokenizer