def test_meta_pipeline_graph(): # Test List algorithm generation build_pipeline_graph(input=List(Word()), output=List(Word()), registry=[WordToWordAlgorithm]) # Test Tuple breakdown feature build_pipeline_graph(input=Tuple(Word(), Matrix()), output=Text(), registry=[WordToWordAlgorithm]) # Test Tuple breakdown feature and List algorithm generation build_pipeline_graph(input=Tuple(List(Word()), Matrix()), output=List(Word()), registry=[WordToWordAlgorithm])
def _build_pipeline(): builder = build_pipelines(input=Tuple(List(List(Flags())), List(List(Category()))), output=List(List(Category())), registry=find_classes(include="CRF")) return builder.sample(sampler=Sampler(random_state=0))
def test_save_load_tuple(): TupleClass = build_composite_tuple( 1, input_type=Tuple(MatrixContinuousDense(), CategoricalVector()), output_type=Tuple(MatrixContinuousDense(), CategoricalVector()), ) algorithm = TupleClass(DummyAlgorithm) fp = BytesIO() Pickler(fp).dump(algorithm) fp.seek(0) algorithm2 = Unpickler(fp).load() assert repr(algorithm) == repr(algorithm2)
def _make_pipeline_builder(self): registry = self.registry or find_classes(include=self.include_filter, exclude=self.exclude_filter) return build_pipelines( input=Tuple(self.input, self.output), output=self.output, registry=registry, )
def run(self, input: Sentence()) -> Tuple(List(Word()), List(Flags())): tokenized = self.nlp(input) tokens = [] flags = [] for token in tokenized: token_flags = {} if self.extract_lemma: token_flags["lemma"] = token.lemma_ if self.extract_pos_tag: token_flags["pos"] = token.pos_ for kv in token.tag_.split("|"): kv = kv.split("=") if len(kv) == 2: token_flags["tag_" + kv[0]] = kv[1] else: token_flags["tag_" + kv[0]] = True if self.extract_dep: token_flags["dep"] = token.dep_ if self.extract_entity: token_flags["ent_type"] = token.ent_type_ token_flags["ent_kb_id"] = token.ent_kb_id_ if self.extract_details: token_flags["is_alpha"] = token.is_alpha token_flags["is_ascii"] = token.is_ascii token_flags["is_digit"] = token.is_digit token_flags["is_lower"] = token.is_lower token_flags["is_upper"] = token.is_upper token_flags["is_title"] = token.is_title token_flags["is_punct"] = token.is_punct token_flags["is_left_punct"] = token.is_left_punct token_flags["is_right_punct"] = token.is_right_punct token_flags["is_space"] = token.is_space token_flags["is_bracket"] = token.is_bracket token_flags["is_quote"] = token.is_quote token_flags["is_currency"] = token.is_currency token_flags["like_url"] = token.like_url token_flags["like_num"] = token.like_num token_flags["like_email"] = token.like_email token_flags["is_oov"] = token.is_oov token_flags["is_stop"] = token.is_stop if self.extract_sentiment: token_flags["sentiment"] = token.sentiment tokens.append(token.text) flags.append(token_flags) return tokens, flags
import logging logging.basicConfig(level=logging.DEBUG) @nice_repr class A: def run(self, input: Sentence()) -> List(Word()): pass @nice_repr class B: def run(self, input: List(Word())) -> List(Vector()): pass @nice_repr class C: def run(self, input: List(Vector())) -> Matrix(): pass builder = build_pipelines(input=Tuple(Sentence(), Vector()), output=Matrix(), registry=[A, B, C]) pipeline = builder.sample() print(pipeline) print(pipeline.run([[[True], [False, True]]]))
def run( self, input: Tuple(Sentence(), List(Tuple(Entity(), Entity(), Category()))) ) -> Tuple(List(Vector()), CategoricalVector()): pass
def run( self, input: Tuple(Sentence(), List(Entity())) ) -> Tuple(List(Word()), List(Postag())): pass
def run( self, input: Tuple(List(MatrixContinuousDense()), List(List(Postag()))) ) -> List(List(Postag())): return super().run(input)
def run( self, input: Tuple(Tensor3(), CategoricalVector())) -> CategoricalVector(): return super().run(input)
def run( self, input: Tuple(MatrixContinuousDense(), CategoricalVector()) ) -> CategoricalVector(): return super().run(input)
def run( self, input: Tuple(MatrixContinuousDense(), CategoricalVector()) ) -> CategoricalVector(): X, y = input return y
def run( self, input: Tuple(List(List(Flags())), List(List(Category()))) ) -> List(List(Category())): return SklearnEstimator.run(self, input)