def regex_parse(r, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern: if op.ignore_case(config): d = {"LOWER": {"REGEX": r.lower()}} else: d = {"TEXT": {"REGEX": r}} if not op.empty(): d["OP"] = op.value yield d
def any_of_parse(lst, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern: if op.ignore_case(config): normalized = sorted([item.lower() for item in lst]) base = {"LOWER": {"REGEX": r"^({0})$".format("|".join(normalized))}} else: base = {"TEXT": {"REGEX": r"^({0})$".format("|".join(sorted(lst)))}} if not op.empty(): base["OP"] = op.value yield base
def generic_parse(tag, value, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern: d = {} if tag == "ORTH" and op.ignore_case(config): d["LOWER"] = value.lower() else: d[tag] = value if not op.empty(): d["OP"] = op.value yield d
def tag_parse(values, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern: """ For generating POS/TAG patterns based on a Regex e.g. TAG("^NN|^JJ") for adjectives or nouns also deals with TAG_WORD for tag and word or tag and list """ d = {"TAG": {"REGEX": values["tag"]}} if "word" in values: if op.ignore_case(config): d["LOWER"] = values["word"].lower() else: d["TEXT"] = values["word"] elif "list" in values: lst = values["list"] if op.ignore_case(config): normalized = sorted([item.lower() for item in lst]) d["LOWER"] = {"REGEX": r"^({0})$".format("|".join(normalized))} else: d["TEXT"] = {"REGEX": r"^({0})$".format("|".join(sorted(lst)))} if not op.empty(): d["OP"] = op.value yield d