Beispiel #1
0
def regex_parse(r, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern:
    if op.ignore_case(config):
        d = {"LOWER": {"REGEX": r.lower()}}
    else:
        d = {"TEXT": {"REGEX": r}}

    if not op.empty():
        d["OP"] = op.value
    yield d
Beispiel #2
0
def any_of_parse(lst, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern:
    if op.ignore_case(config):
        normalized = sorted([item.lower() for item in lst])
        base = {"LOWER": {"REGEX": r"^({0})$".format("|".join(normalized))}}
    else:
        base = {"TEXT": {"REGEX": r"^({0})$".format("|".join(sorted(lst)))}}

    if not op.empty():
        base["OP"] = op.value
    yield base
def apply_operator(syntax, op: ExtendedOp) -> str:
    if op.empty():
        return syntax

    elif str(op) == "!":  # A bit complicated one
        return (r"((?!{})\w+)".format(syntax
                                      .rstrip(")")
                                      .lstrip("(")))
    else:
        return syntax + str(op)
Beispiel #4
0
def generic_parse(tag, value, config: "SessionConfig",
                  op: ExtendedOp) -> SpacyPattern:
    d = {}
    if tag == "ORTH" and op.ignore_case(config):
        d["LOWER"] = value.lower()
    else:
        d[tag] = value

    if not op.empty():
        d["OP"] = op.value
    yield d
Beispiel #5
0
def tag_parse(values, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern:
    """
    For generating POS/TAG patterns based on a Regex
    e.g. TAG("^NN|^JJ") for adjectives or nouns
    also deals with TAG_WORD for tag and word or tag and list
    """
    d = {"TAG": {"REGEX": values["tag"]}}
    if "word" in values:
        if op.ignore_case(config):
            d["LOWER"] = values["word"].lower()
        else:
            d["TEXT"] = values["word"]
    elif "list" in values:
        lst = values["list"]
        if op.ignore_case(config):
            normalized = sorted([item.lower() for item in lst])
            d["LOWER"] = {"REGEX": r"^({0})$".format("|".join(normalized))}
        else:
            d["TEXT"] = {"REGEX": r"^({0})$".format("|".join(sorted(lst)))}
    if not op.empty():
        d["OP"] = op.value
    yield d
Beispiel #6
0
def punct_parse(_, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern:
    d = dict()
    d["IS_PUNCT"] = True
    if not op.empty():
        d["OP"] = op.value
    yield d
Beispiel #7
0
def fuzzy_parse(r, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern:
    # TODO: build premutations
    d = {"LOWER": {"REGEX": "({0})[.,?;!]?".format("|".join(r))}}
    if not op.empty():
        d["OP"] = op.value
    yield d
Beispiel #8
0
def orth_parse(value, config: "SessionConfig", op: ExtendedOp) -> SpacyPattern:
    d = {}
    d["ORTH"] = value
    if not op.empty():
        d["OP"] = op.value
    yield d