Beispiel #1
0
def main():
    from rita.utils import RitaJSONEncoder
    parser = argparse.ArgumentParser(
        description="Compile rita -> spaCy patterns"
    )

    parser.add_argument("-f", help=".rita rules file")
    parser.add_argument(
        "out",
        help="output .jsonl file to store rules"
    )
    parser.add_argument("--debug", help="debug mode", action="store_true")
    parser.add_argument("--engine", help="Engine to use when compiling rules", default="spacy")
    args = parser.parse_args()

    if args.debug:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    patterns = rita.compile(args.f, use_engine=args.engine)

    logger.info("Compiling rules using {} engine".format(args.engine))

    with open(args.out, "w") as f:
        for pattern in patterns:
            f.write(json.dumps(pattern, cls=RitaJSONEncoder) + "\n")
Beispiel #2
0
def _spacy_v3(model, patterns=None, rules_path=None, rules_string=None, override_ents=True):
    ruler = model.add_pipe("entity_ruler", config={"overwrite_ents": override_ents, "validate": True})
    if not patterns:
        if rules_path:
            patterns = rita.compile(rules_path, use_engine="spacy")
        elif rules_string:
            patterns = rita.compile_string(rules_string, use_engine="spacy")
        else:
            raise RuntimeError("Please provides rules. Either `patterns`, `rules_path` or `rules_string`")

        ruler.add_patterns(patterns)
    else:
        ruler.from_disk(patterns)
    return model
Beispiel #3
0
def test_shortcuts_spacy_compiled():
    spacy = pytest.importorskip("spacy", minversion="2.1")
    nlp = spacy.load("en_core_web_sm")
    tmp = tempfile.NamedTemporaryFile(mode="w",
                                      encoding="UTF-8",
                                      suffix=".jsonl",
                                      delete=False)
    patterns = rita.compile("examples/color-car.rita")
    for pattern in patterns:
        tmp.write(json.dumps(pattern) + "\n")
    tmp.flush()
    tmp.close()
    setup_spacy(nlp, patterns=tmp.name)
    os.unlink(tmp.name)
Beispiel #4
0
def _spacy_v2(model, patterns=None, rules_path=None, rules_string=None, override_ents=True):
    from spacy.pipeline import EntityRuler
    ruler = EntityRuler(model, overwrite_ents=override_ents)
    if not patterns:
        if rules_path:
            patterns = rita.compile(rules_path, use_engine="spacy")
        elif rules_string:
            patterns = rita.compile_string(rules_string, use_engine="spacy")
        else:
            raise RuntimeError("Please provides rules. Either `patterns`, `rules_path` or `rules_string`")

        ruler.add_patterns(patterns)
    else:
        ruler.from_disk(patterns)

    model.add_pipe(ruler)
    return model