Ejemplo n.º 1
0
def standalone_engine(rules, **kwargs):
    parser = rita.compile_string(rules, use_engine="standalone", **kwargs)
    print(parser.patterns)

    def parse(text):
        results = list(parser.execute(text, include_submatches=False))
        return list([(r["text"], r["label"]) for r in results])
    return parse
Ejemplo n.º 2
0
 def test_save_and_load_rules_from_file(self):
     rules = '''
     {WORD("Hello"), WORD("world")}->MARK("HELLO")
     '''
     engine = rita.compile_string(rules, use_engine="standalone")
     with tempfile.TemporaryDirectory() as tmpdir:
         rules_path = os.path.join(tmpdir, "rules-example.json")
         engine.save(rules_path)
         engine.load(rules_path)
         engine.execute("Hello world")
Ejemplo n.º 3
0
def spacy_engine(rules, **kwargs):
    spacy = pytest.importorskip("spacy", minversion="2.1")
    nlp = spacy.load("en_core_web_sm")
    setup_spacy(nlp, rules_string=rules, override_ents=True, **kwargs)
    patterns = rita.compile_string(rules, **kwargs)
    print(patterns)

    def parse(text):
        doc = nlp(text)
        return list([(e.text, e.label_) for e in doc.ents])
    return parse
Ejemplo n.º 4
0
def rust_engine(rules, **kwargs):
    from rita.engine.translate_rust import load_lib
    lib = load_lib()
    if lib is None:
        pytest.skip("Missing rita-rust dynamic lib, skipping related tests")
    print("Trying to run: {}".format(rules))
    parser = rita.compile_string(rules, use_engine="rust", **kwargs)
    print(parser.patterns)

    def parse(text):
        results = list(parser.execute(text, include_submatches=False))
        return list([(r["text"], r["label"]) for r in results])
    return parse
Ejemplo n.º 5
0
def _spacy_v3(model, patterns=None, rules_path=None, rules_string=None, override_ents=True):
    ruler = model.add_pipe("entity_ruler", config={"overwrite_ents": override_ents, "validate": True})
    if not patterns:
        if rules_path:
            patterns = rita.compile(rules_path, use_engine="spacy")
        elif rules_string:
            patterns = rita.compile_string(rules_string, use_engine="spacy")
        else:
            raise RuntimeError("Please provides rules. Either `patterns`, `rules_path` or `rules_string`")

        ruler.add_patterns(patterns)
    else:
        ruler.from_disk(patterns)
    return model
Ejemplo n.º 6
0
def spacy_engine(rules, **kwargs):
    spacy = pytest.importorskip("spacy", minversion="2.1")
    patterns = rita.compile_string(rules, **kwargs)
    nlp = spacy.load("en")
    ruler = spacy.pipeline.EntityRuler(nlp, overwrite_ents=True)
    print(patterns)
    ruler.add_patterns(patterns)
    nlp.add_pipe(ruler)

    def parse(text):
        doc = nlp(text)
        return list([(e.text, e.label_) for e in doc.ents])

    return parse
Ejemplo n.º 7
0
def _spacy_v2(model, patterns=None, rules_path=None, rules_string=None, override_ents=True):
    from spacy.pipeline import EntityRuler
    ruler = EntityRuler(model, overwrite_ents=override_ents)
    if not patterns:
        if rules_path:
            patterns = rita.compile(rules_path, use_engine="spacy")
        elif rules_string:
            patterns = rita.compile_string(rules_string, use_engine="spacy")
        else:
            raise RuntimeError("Please provides rules. Either `patterns`, `rules_path` or `rules_string`")

        ruler.add_patterns(patterns)
    else:
        ruler.from_disk(patterns)

    model.add_pipe(ruler)
    return model
Ejemplo n.º 8
0
def test_compile_context(engine):
    if engine == "rust":
        from rita.engine.translate_rust import load_lib
        lib = load_lib()
        if lib is None:
            pytest.skip(
                "Missing rita-rust dynamic lib, skipping related tests")
    rules = """

    {WORD*, IN_LIST(companies), WORD*}->MARK("SUSPISCIOUS_COMPANY")
    """
    parser = rita.compile_string(rules,
                                 use_engine=engine,
                                 companies=["CompanyA", "CompanyB"])
    print(parser.patterns)

    results = list(parser.execute("CompanyB is doing it's dirty work."))
    assert results[0] == {
        "start":
        0,
        "end":
        33,
        "label":
        "SUSPISCIOUS_COMPANY",
        "text":
        "CompanyB is doing it's dirty work",
        "submatches": [{
            "start": 0,
            "end": 33,
            "key": "SUSPISCIOUS_COMPANY",
            "text": "CompanyB is doing it's dirty work"
        }, {
            "start": 0,
            "end": 9,
            "key": "s2",
            "text": "CompanyB"
        }, {
            "start": 9,
            "end": 33,
            "key": "s4",
            "text": "is doing it's dirty work"
        }],
    }
Ejemplo n.º 9
0
 def compiler(self, rules):
     return rita.compile_string(rules, use_engine="standalone").patterns
Ejemplo n.º 10
0
 def compiler(self, rules):
     pytest.importorskip("spacy", minversion="2.1")
     return rita.compile_string(rules, use_engine="spacy")