Example #1
0
def number_per_unit(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    units = []
    for node in dep_graph.nodes(filter=lambda n: n.UPOS == "SYM"):

        previous_node = dep_graph.get_node_by_loc(node.LOC - 1)
        post_node = dep_graph.get_node_by_loc(node.LOC + 1)

        if not previous_node or not post_node:
            continue

        if previous_node.UPOS == "NUM" and post_node.UPOS == "NOUN":
            units.append((previous_node, node, post_node))

    for unit in units:
        unit_node = merge_dep_nodes(unit,
                                    UPOS="NUM",
                                    LOC=unit[-1].LOC
                                    )

        dep_graph.replace_nodes(unit, unit_node)
def ever_since(dep_graph: DependencyGraph):
    """TODO: add doc string
    """
    ever_nodes = []
    since_nodes = []
    for node in dep_graph.nodes():
        if node.LEMMA == "ever":
            ever_nodes.append(node)
        elif node.LEMMA == "since":
            since_nodes.append(node)
    if not ever_nodes or not since_nodes:
        return
    since_LOCs = [node.LOC for node in since_nodes]
    rel_remove = []
    union_nodes = []
    for ever_node in ever_nodes:
        expect_LOC = ever_node.LOC + 1
        if expect_LOC not in since_LOCs:
            continue
        union_nodes.append(
            (ever_node, since_nodes[since_LOCs.index(expect_LOC)]))
        for p_node, p_rel in dep_graph.parents(ever_node):
            if 'advmod' not in p_rel:
                continue
            rel_remove.append((p_node, ever_node, 'advmod'))
    for src, trg, rel in rel_remove:
        dep_graph.remove_dependency(src, trg, rel)
    for ever_node, since_node in union_nodes:
        new_since_node = merge_dep_nodes([ever_node, since_node],
                                         UPOS=since_node.UPOS,
                                         LOC=since_node.LOC)
        dep_graph.replace_nodes([ever_node, since_node], new_since_node)
Example #3
0
def get_adj_verb_phrase(dep_graph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    adj_node = pattern.create_node(UPOS="ADJ")
    get_node = pattern.create_node(LEMMA="get", UPOS="VERB")

    pattern.add_dependency(adj_node, get_node, r'aux')

    verb_phrases = []

    for match in dep_graph.match(pattern):

        dep_adj_node = match[adj_node]
        dep_get_node = match[get_node]

        if isinstance(dep_adj_node,
                      DependencyGraphSuperNode) and dep_adj_node.is_conj:
            continue

        pred = [dep_get_node, dep_adj_node]
        head = dep_adj_node
        verb_phrases.append((pred, head))

    for verbs, root in verb_phrases:
        verb_node = merge_dep_nodes(verbs, UPOS="VERB", LOC=root.LOC)
        dep_graph.replace_nodes(verbs, verb_node)
Example #4
0
def reverse_passive_verb(dep_graph: DependencyGraph):
    """
    I'd forgotten how blown away I was by some of the songs the first time I saw it in NY.
    :param dep_graph:
    :return:
    """
    pattern = DependencyGraph()

    subj_node = pattern.create_node()
    verb_node = pattern.create_node(UPOS="VERB", FEATS={"Tense": "Past"})
    be_node = pattern.create_node(LEMMA=r"\bbe\b")

    pattern.add_dependency(verb_node, subj_node, r"\w*subj")
    pattern.add_dependency(verb_node, be_node, "cop")

    for match in list(dep_graph.match(pattern)):

        dep_subj_node = match[subj_node]
        dep_verb_node = match[verb_node]
        dep_be_node = match[be_node]

        if not (dep_verb_node.LOC < dep_subj_node.LOC < dep_be_node.LOC):
            continue

        new_verb_phrase = [dep_be_node, dep_verb_node]
        dep_new_verb = merge_dep_nodes(new_verb_phrase,
                                       UPOS="VERB",
                                       LOC=dep_be_node.LOC)
        dep_graph.replace_nodes(new_verb_phrase, dep_new_verb)
Example #5
0
def there_be_verb_phrase(dep_graph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    there_node = pattern.create_node(FORM=r'there|There')
    be_node = pattern.create_node()

    pattern.add_dependency(be_node, there_node, r'\w*expl\w*')

    verb_phrases = []
    for match in dep_graph.match(pattern):

        dep_there_node = match[there_node]
        dep_be_node = match[be_node]

        if not "be" in dep_be_node.LEMMA.split(" "):
            continue

        pred = [dep_there_node, dep_be_node]
        head = dep_be_node

        verb_phrases.append((pred, head))

    for verbs, root in verb_phrases:
        verb_node = merge_dep_nodes(verbs, UPOS="VERB", LOC=root.LOC)

        dep_graph.replace_nodes(verbs, verb_node)
Example #6
0
def ccomp_mark_sconj(dep_graph: DependencyGraph):
    """
    See them as they are
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()
    pred1_node = pattern.create_node(UPOS="VERB")
    pred2_node = pattern.create_node()
    sconj_node = pattern.create_node(UPOS="SCONJ")

    pattern.add_dependency(pred1_node, pred2_node, r'ccomp')
    pattern.add_dependency(pred2_node, sconj_node, 'mark')

    for match in list(dep_graph.match(pattern)):

        dep_pred1_node = match[pred1_node]
        dep_pred2_node = match[pred2_node]
        dep_sconj_node = match[sconj_node]

        if dep_sconj_node.LEMMA == "as":
            dep_graph.remove_dependency(dep_pred2_node, dep_sconj_node)
            new_verb = [dep_pred1_node, "{1}", dep_sconj_node, "{2}"]

            new_verb_node = merge_dep_nodes(new_verb,
                                            UPOS=dep_pred1_node.UPOS,
                                            LOC=dep_pred1_node.LOC)
            # print("Noun detected", noun_node.ID)
            dep_graph.replace_nodes(new_verb, new_verb_node)
def whose_noun(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """

    pattern = DependencyGraph()
    noun_node = pattern.create_node(UPOS="NOUN|PROPN|PRON|X|NUM|SYM")
    owner_node = pattern.create_node()
    whose_node = pattern.create_node(LEMMA="whose")

    pattern.add_dependency(noun_node, owner_node, "nmod:poss")
    pattern.add_dependency(owner_node, whose_node, "ref")

    whose_noun_phrase = []
    for match in dep_graph.match(pattern):
        dep_owner_node = match[owner_node]
        dep_noun_node = match[noun_node]
        dep_whose_node = match[whose_node]

        whose_noun_phrase.append(
            (dep_owner_node, dep_whose_node, dep_noun_node))

    for owner, whose, noun in whose_noun_phrase:
        noun_node = merge_dep_nodes([whose, noun],
                                    UPOS=noun.UPOS,
                                    LOC=noun.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.remove_dependency(owner_node, whose)
        dep_graph.remove_dependency(noun, owner_node, "nmod:poss")
        dep_graph.replace_nodes([whose, noun], noun_node)
Example #8
0
def part(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """

    pattern = DependencyGraph()

    parent_node = pattern.create_node(UPOS="AUX|VERB")
    part_node = pattern.create_node(UPOS="PART")

    pattern.add_dependency(parent_node, part_node, r'advmod')

    for match in list(dep_graph.match(pattern)):
        dep_parent_node = match[parent_node]
        dep_part_node = match[part_node]

        new_node_list = [dep_parent_node, dep_part_node]
        new_node_list.sort(key=lambda n: n.LOC)

        new_node = merge_dep_nodes(new_node_list,
                                   UPOS=dep_parent_node.UPOS,
                                   LOC=dep_parent_node.LOC,
                                   FEATS=dep_parent_node.FEATS
                                   )

        dep_graph.replace_nodes(new_node_list, new_node)
Example #9
0
def goeswith(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    goeswith_phrases = []
    for n in dep_graph.nodes():

        goeswith_nodes = [n for n, l in dep_graph.children(n,
                                                           filter=lambda n, l: "goeswith" in l)]

        if not goeswith_nodes:
            continue

        goeswith_nodes.append(n)
        goeswith_nodes.sort(key=lambda n: n.LOC)

        goeswith_phrases.append(goeswith_nodes)

    for goeswith_nodes in goeswith_phrases:

        upos = "X"
        for node in goeswith_nodes:
            if node.UPOS != "X":
                upos = node.UPOS

        new_node = merge_dep_nodes(goeswith_nodes,
                                   UPOS=upos,
                                   LOC=goeswith_nodes[-1].LOC
                                   )

        dep_graph.replace_nodes(goeswith_nodes, new_node)
Example #10
0
def separated_asas(dep_graph: DependencyGraph):
    """
    ##### Equality comparison #####
    ##### A is as X a C as B #####

    ##### the first 'as' is always the advmod of a following element, X, which is within the range of as... as #####
    ##### the second 'as' is always the dependent of B #####
    ##### B sometimes depends on the first 'as', sometimes dependts on X #####
    ##### Sometimes X has a head that is also within the range of as...as #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    adj_node = DependencyGraphNode(UPOS="ADJ")
    noun_node = DependencyGraphNode(UPOS="NOUN")
    as1_node = DependencyGraphNode(FORM="as")
    as2_node = DependencyGraphNode(FORM="as")
    obj_node = DependencyGraphNode()

    pattern.add_nodes([noun_node, adj_node, as1_node, as2_node, obj_node])
    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, as1_node, r'\w*advmod\w*')
    pattern.add_dependency(as1_node, obj_node, r'\w*advcl:as\w*')
    pattern.add_dependency(obj_node, as2_node, r'mark')

    as_as_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_adj_node = match[adj_node]
        dep_as1_node = match[as1_node]
        dep_as2_node = match[as2_node]
        dep_obj_node = match[obj_node]

        if dep_as1_node.LOC < dep_adj_node.LOC < dep_noun_node.LOC < dep_as2_node.LOC < dep_obj_node.LOC:
            pred = [
                node for node in dep_graph.nodes()
                if dep_as1_node.LOC <= node.LOC <= dep_adj_node.LOC
            ]
            pred.append(dep_as2_node)
            pred.sort(key=lambda x: x.LOC)
            head = dep_adj_node

            asas_node = merge_dep_nodes(pred, UPOS="ADJ", LOC=dep_as2_node.LOC)

            as_as_pred.append(
                (pred, head, asas_node, dep_noun_node, dep_obj_node))

    for pred, head, asas_node, dep_noun_node, dep_obj_node in as_as_pred:
        dep_graph.replace_nodes(pred, asas_node)

        dep_graph.remove_dependency(asas_node, dep_obj_node)
        dep_graph.remove_dependency(dep_noun_node, asas_node)

        dep_graph.add_dependency(dep_noun_node, dep_obj_node,
                                 "acl:" + asas_node.FORM)
Example #11
0
def be_adp_phrase(dep_graph: DependencyGraph):
    """
    example: is for xxx
    this should be not applied:
    1. if xxx is adj, then be_adj_verb will be applied;
    2. if xxx is NOUN, then copula_phrase will be applied
    note that there may be multiple adp:
    the insurgency is out of the picture
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    some_node = pattern.create_node()

    adp_node = pattern.create_node(UPOS="ADP")
    be_node = pattern.create_node(UPOS="AUX")

    pattern.add_dependency(some_node, be_node, r'cop')
    pattern.add_dependency(some_node, adp_node, r'case')

    verb_phrases = []

    for match in dep_graph.match(pattern):

        dep_be_node = match[be_node]
        dep_some_node = match[some_node]

        dep_adp_nodes = [
            n for n, l in dep_graph.children(
                dep_some_node,
                filter=lambda n, l: "case" in l and n.UPOS == "ADP")
        ]

        if not all(dep_be_node.LOC < x.LOC < dep_some_node.LOC
                   for x in dep_adp_nodes):
            continue

        pred = [dep_be_node] + dep_adp_nodes
        head = dep_be_node

        verb_phrases.append((dep_some_node, pred, head))

    for dep_some_node, verbs, root in verb_phrases:

        if not all(dep_graph.get_node(v.ID) for v in verbs):
            continue  # has been processed

        verb_node = merge_dep_nodes(verbs, UPOS="AUX", LOC=root.LOC)

        for node in verbs:
            dep_graph.remove_dependency(dep_some_node, node)
        dep_graph.replace_nodes(verbs, verb_node)
        dep_graph.add_dependency(dep_some_node, verb_node, "cop")
Example #12
0
def amod_obl(dep_graph: DependencyGraph):
    """
    ##### include: more than, successful by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode(UPOS=r"NOUN|PRON")
    adj_node = DependencyGraphNode(UPOS="ADJ")
    adp_node = DependencyGraphNode(UPOS="ADP")
    obl_node = DependencyGraphNode()

    pattern.add_nodes([noun_node, adj_node, adp_node, obl_node])
    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, obl_node, r'obl:\w+')
    pattern.add_dependency(obl_node, adp_node, r'case')

    more_than_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_adj_node = match[adj_node]
        dep_obl_node = match[obl_node]
        dep_adp_node = match[adp_node]

        obl_nodes = list(
            dep_graph.children(dep_adj_node, filter=lambda n, l: "obl" in l))

        if len(obl_nodes) > 1:
            # similar in form to the one
            continue

        if dep_adp_node.FORM not in dep_graph.get_dependency(
                dep_adj_node, dep_obl_node).values():
            continue

        if dep_noun_node.LOC < dep_adj_node.LOC < dep_adp_node.LOC < dep_obl_node.LOC:
            more_than_pred.append(
                (dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node))

    for dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node in more_than_pred:
        nodes = [dep_adj_node, dep_adp_node]
        more_than_pred = merge_dep_nodes(nodes,
                                         UPOS="ADP",
                                         LOC=dep_adp_node.LOC)
        dep_graph.remove_dependency(dep_noun_node, dep_adj_node)
        dep_graph.remove_dependency(dep_adj_node, dep_obl_node)

        dep_graph.replace_nodes([dep_adj_node, dep_adp_node], more_than_pred)
        dep_graph.add_dependency(dep_noun_node, dep_obl_node,
                                 "nmod:" + more_than_pred.FORM)
def det_adjv_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    phrases = []

    for node in dep_graph.nodes(filter=lambda n: n.UPOS in {"ADJ", "ADV"}):

        parent_rels = itertools.chain.from_iterable(
            (rel for parent, rel in dep_graph.parents(node)))
        if any([rel in valid_adj_form for rel in parent_rels]):
            continue

        if any([rel in {"amod", "advmod"} for rel in parent_rels]):
            continue

        det = [
            n for n, l in dep_graph.children(node,
                                             filter=lambda n, l: l == "det")
        ]

        if not det:
            continue

        det.sort(key=lambda x: x.LOC)

        det = det[-1]

        if det.LEMMA not in {"the", "a", "an", "some", "any", "all"}:
            continue

        root = node
        np_elements = list(
            dep_graph.offsprings(
                root, filter=lambda n: det.LOC <= n.LOC <= root.LOC))

        # check the element should be continuous

        np_elements = sorted(list(np_elements), key=lambda x: x.LOC)
        # if np_elements[-1].LOC - np_elements[0].LOC != len(np_elements) - 1:
        #     print ("root", root)
        #     for n in np_elements:
        #         print("np element", n.LOC, n)
        #     raise Exception("Bad Business Logic")

        phrases.append((np_elements, root))

    for np, root in phrases:
        noun_node = merge_dep_nodes(np, UPOS="NOUN", LOC=root.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(np, noun_node)
Example #14
0
def and_or(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    parent_node = pattern.create_node()
    some_node = pattern.create_node()
    and_node = pattern.create_node(LEMMA=r"\band\b")
    or_node = pattern.create_node(LEMMA=r"\bor\b")

    pattern.add_dependency(parent_node, some_node, r'\bconj:\w*')
    pattern.add_dependency(some_node, and_node, r'\bcc\b')
    pattern.add_dependency(some_node, or_node, r'\bcc\b')
    pattern.add_dependency(and_node, or_node, r'\bconj')

    for match in list(dep_graph.match(pattern)):

        dep_parent_node = match[parent_node]
        dep_some_node = match[some_node]
        dep_and_node = match[and_node]
        dep_or_node = match[or_node]

        rel = dep_graph.get_dependency(dep_parent_node, dep_some_node)

        if not rel.startswith("conj:and") and not rel.startswith("conj:or"):
            continue

        and_or_nodes = [n for n in dep_graph.nodes() if dep_and_node.LOC < n.LOC < dep_or_node.LOC]

        if any([node.UPOS in {"VERB", "NOUN", "ADJ", "ADP", "ADV"} for node in and_or_nodes]):
            continue

        and_or_nodes.append(dep_and_node)
        and_or_nodes.append(dep_or_node)
        and_or_nodes.sort(key=lambda n: n.LOC)

        if not all([dep_graph.get_node(x.ID) for x in and_or_nodes]):
            continue

        new_and_or_node = merge_dep_nodes(and_or_nodes,
                                          UPOS=dep_and_node.UPOS,
                                          LOC=dep_and_node.LOC,
                                          FEATS=dep_and_node.FEATS
                                          )

        dep_graph.replace_nodes(and_or_nodes, new_and_or_node)
        dep_graph.set_dependency(dep_parent_node, dep_some_node, "conj:" + new_and_or_node.FORM)
Example #15
0
def multi_word_sconj(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    verb_node = pattern.create_node(UPOS="VERB")
    verb2_node = pattern.create_node(UPOS="VERB")
    mark_node = pattern.create_node(UPOS="SCONJ")

    pattern.add_dependency(verb_node, verb2_node, r'advcl:\w*')
    pattern.add_dependency(verb2_node, mark_node, r'mark')

    mark_phrases = []
    for match in dep_graph.match(pattern):

        dep_verb_node = match[verb_node]
        dep_verb2_node = match[verb2_node]
        dep_mark_node = match[mark_node]

        if dep_mark_node.LEMMA not in dep_graph.get_dependency(dep_verb_node, dep_verb2_node).values():
            continue

        new_marks = list(dep_graph.offsprings(dep_mark_node))
        if len(new_marks) == 1:
            continue

        new_marks.sort(key=lambda n: n.LOC)
        mark_phrases.append((dep_verb_node, dep_verb2_node, dep_mark_node, new_marks))

    for (dep_verb_node, dep_verb2_node, dep_mark_node, new_marks) in mark_phrases:

        if not all([dep_graph.get_node(x.ID) for x in new_marks]):
            continue

        dep_graph.remove_dependency(dep_verb2_node, dep_mark_node)
        dep_graph.remove_dependency(dep_verb_node, dep_verb2_node)

        new_mark_node = merge_dep_nodes(new_marks,
                                        UPOS=dep_mark_node.UPOS,
                                        LOC=dep_mark_node.LOC
                                        )

        dep_graph.replace_nodes(new_marks, new_mark_node)
        dep_graph.add_dependency(dep_verb_node, dep_verb2_node, "advcl:" + new_mark_node.LEMMA)
        dep_graph.add_dependency(dep_verb2_node, new_mark_node, "mark")
Example #16
0
def multi_words_mark(dep_graph: DependencyGraph):
    """
    arise on to
    the "on to" should be combined
    :param dep_graph:
    :param oia_graph:
    :return:
    """
    # print('multi_words_mark')
    mark_phrases = []

    for node in dep_graph.nodes():
        marks = []
        for n, l in dep_graph.children(node, filter=lambda n, l: "mark" in l):
            marks.extend(dep_graph.offsprings(n))

        if not marks:
            continue
        # print('multi_words_mark marks:', marks)
        if len(marks) > 1:
            if any([x.UPOS in {"NOUN", "NUM", "VERB", "ADJ", "ADV", "PRON"} for x in marks]):
                continue

            marks.sort(key=lambda n: n.LOC)
            mark_phrases.append((node, marks))

    for node, marks in mark_phrases:
        # print('multi_words_mark marks:', marks)
        if not all([dep_graph.get_node(x.ID) for x in marks]):
            continue

        mark_min_loc = marks[0].LOC
        mark_max_loc = marks[-1].LOC
        marks = [n for n in dep_graph.nodes() if mark_min_loc <= n.LOC <= mark_max_loc]
        marks.sort(key=lambda n: n.LOC)

        if any([x.UPOS in NOUN_UPOS for x in marks]):
            continue
        # print('marks:')
        # for nnnn in marks:
        #     print(nnnn)
        new_mark_node = merge_dep_nodes(marks,
                                        UPOS=marks[0].UPOS,
                                        LOC=marks[0].LOC
                                        )
        for mark in marks:
            dep_graph.remove_dependency(node, mark)
        dep_graph.replace_nodes(marks, new_mark_node)
        dep_graph.add_dependency(node, new_mark_node, "mark")
Example #17
0
def multi_words_cc(dep_graph: DependencyGraph):
    """
    arise on to
    the "on to" should be combined
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    mark_phrases = []

    for node in dep_graph.nodes():
        marks = []
        for n, l in dep_graph.children(node, filter=lambda n, l: "cc" == l):
            marks.extend(dep_graph.offsprings(n))

        if not marks:
            continue

        if len(marks) > 1:
            if any([x.UPOS in {"NOUN", "NUM", "VERB"} for x in marks]):
                continue

            marks.sort(key=lambda n: n.LOC)
            mark_phrases.append((node, marks))

    for node, marks in mark_phrases:

        mark_min_loc = marks[0].LOC
        mark_max_loc = marks[-1].LOC
        marks = [n for n in dep_graph.nodes() if mark_min_loc <= n.LOC <= mark_max_loc]

        if any([x.UPOS in {"NOUN", "NUM", "VERB"} for x in marks]):
            continue
        if not all([dep_graph.get_node(x.ID) for x in marks]):
            continue
        new_mark_node = merge_dep_nodes(marks,
                                        UPOS=marks[0].UPOS,
                                        LOC=marks[0].LOC
                                        )

        dep_graph.replace_nodes(marks, new_mark_node)
        for mark in marks:
            dep_graph.remove_dependency(node, mark)

        if dep_graph.get_node(node.ID):
            dep_graph.add_dependency(node, new_mark_node, "cc")
def noun_all(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """
    noun_all_phrase = []
    for root in dep_graph.nodes(filter=lambda x: x.UPOS in
                                {"NOUN", "PROPN", "PRON", "X", "NUM", "SYM"}):
        for child, rels in dep_graph.children(root):
            if "det" in rels and child.LEMMA == "all" and child.LOC == root.LOC + 1:
                noun_all_phrase.append((root, child))

    for noun, all in noun_all_phrase:
        noun_node = merge_dep_nodes([noun, all], UPOS=noun.UPOS, LOC=noun.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes([noun, all], noun_node)
Example #19
0
def adjv_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    phrases = []

    for node in dep_graph.nodes(filter=lambda n: n.UPOS in {"ADJ", "ADV"}):

        is_root = True
        for parent, rel in dep_graph.parents(node):
            if "advmod" in rel and parent.UPOS not in {"ADJ", "ADV"}:
                is_root = True
                break
            elif rel.intersect(valid_adj_form):
                is_root = False

        if not is_root:
            continue

        adjv_element = valid_adjv_element(node, dep_graph)

        adjv_element = sorted(list(adjv_element), key=lambda x: x.LOC)

        connected_components = [node]
        start_loc = node.LOC
        for child in reversed(adjv_element):
            # print(str(node.FORM))

            if child.UPOS in {"ADJ", "ADV"} and child.LOC == start_loc - 1:

                connected_components.append(child)
                start_loc = child.LOC

        connected_components.sort(key=lambda x: x.LOC)

        if len(connected_components) > 1:
            phrases.append((connected_components, node))

    for adjv_phrase, node in phrases:
        adjv_node = merge_dep_nodes(adjv_phrase, UPOS=node.UPOS, LOC=node.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(adjv_phrase, adjv_node)
Example #20
0
def such_that(dep_graph: DependencyGraph):
    """
    ##### such a high price that
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode(UPOS="NOUN")
    such_node = DependencyGraphNode(FORM="such")
    clause_pred_node = DependencyGraphNode(UPOS="VERB")
    that_node = DependencyGraphNode(FORM="that")

    pattern.add_nodes([noun_node, such_node, clause_pred_node, that_node])
    pattern.add_dependency(noun_node, such_node, r'det:predet')
    pattern.add_dependency(such_node, clause_pred_node, r'advcl:that')
    pattern.add_dependency(clause_pred_node, that_node, r'mark')

    such_that_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_such_node = match[such_node]
        dep_clause_pred_node = match[clause_pred_node]
        dep_that_node = match[that_node]

        if dep_such_node.LOC < dep_noun_node.LOC < dep_that_node.LOC < dep_clause_pred_node.LOC:
            such_that_pred.append((dep_noun_node, dep_such_node,
                                   dep_clause_pred_node, dep_that_node))

    for dep_noun_node, dep_such_node, dep_clause_pred_node, dep_that_node in such_that_pred:
        nodes = [dep_such_node, dep_that_node]
        such_that_pred = merge_dep_nodes(nodes,
                                         UPOS="SCONJ",
                                         LOC=dep_that_node.LOC)
        dep_graph.add_node(such_that_pred)
        dep_graph.add_dependency(dep_noun_node, dep_clause_pred_node,
                                 "advcl:" + such_that_pred.FORM)
        dep_graph.add_dependency(dep_clause_pred_node, such_that_pred, "mark")

        dep_graph.remove_node(dep_such_node)
        dep_graph.remove_node(dep_that_node)
Example #21
0
def xcomp_verb(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """

    pattern = DependencyGraph()

    pred_node = pattern.create_node()
    xcomp_verb_node = pattern.create_node(UPOS="VERB|AUX")
    xcomp_mark_node = pattern.create_node(UPOS="PART")

    pattern.add_dependency(pred_node, xcomp_verb_node, "xcomp")
    pattern.add_dependency(xcomp_verb_node, xcomp_mark_node, "mark")

    for match in list(dep_graph.match(pattern)):

        dep_pred_node = match[pred_node]
        dep_xcomp_verb_node = match[xcomp_verb_node]
        dep_xcomp_mark_node = match[xcomp_mark_node]

        if dep_xcomp_mark_node.LEMMA != "to":
            # print('--------------------------LEMMA:      ',dep_xcomp_mark_node.LEMMA)
            # raise Exception("Unexpected Situation: xcomp mark != to let's throw out to see what happens")
            continue

        if dep_xcomp_mark_node.LOC > dep_xcomp_verb_node.LOC:
            raise Exception(
                "Unexpected Situation: xcomp mark after the xcomp verb")

        pred_nodes = list(
            dep_graph.parents(dep_xcomp_verb_node,
                              filter=lambda n, l: "xcomp" in l))

        if len(pred_nodes) > 1:
            raise Exception(
                "Unexpected Situation: Multiple xcomp parents found")

        new_verb_phrase = [dep_xcomp_mark_node, dep_xcomp_verb_node]
        dep_new_verb = merge_dep_nodes(new_verb_phrase,
                                       UPOS="VERB",
                                       LOC=dep_xcomp_verb_node.LOC)
        dep_graph.replace_nodes(new_verb_phrase, dep_new_verb)
def det_of_noun(dep_graph: DependencyGraph):
    """
    any/some/all of noun
    :param dep_graph:
    :return:
    """
    pattern = DependencyGraph()
    det_node = pattern.create_node(UPOS="DET")
    of_node = pattern.create_node(LEMMA="of")
    noun2_node = pattern.create_node(UPOS="NOUN|PROPN|PRON|X|NUM|SYM")

    pattern.add_dependency(det_node, noun2_node, "nmod:of")
    pattern.add_dependency(noun2_node, of_node, "case")

    for match in list(dep_graph.match(pattern)):

        dep_det_node = match[det_node]
        dep_noun2_node = match[noun2_node]
        dep_of_node = match[of_node]

        if not all([dep_det_node, dep_noun2_node, dep_of_node]):
            # processed by others
            continue

        if isinstance(dep_noun2_node,
                      DependencyGraphSuperNode) and dep_noun2_node.is_conj:
            continue

        dep_noun2_parents = [
            parent for parent, rel in dep_graph.parents(dep_noun2_node)
        ]
        if len(dep_noun2_parents) == 1:
            assert dep_noun2_parents[0] == dep_det_node

            new_noun_nodes = [dep_det_node, dep_of_node, dep_noun2_node]

            new_noun = merge_dep_nodes(new_noun_nodes,
                                       UPOS=dep_det_node.UPOS,
                                       FEATS=dep_det_node.FEATS,
                                       LOC=dep_det_node.LOC)

            dep_graph.replace_nodes(new_noun_nodes, new_noun)
Example #23
0
def be_not_phrase(dep_graph: DependencyGraph):
    """TODO: add doc string
    """
    pattern = DependencyGraph()

    be_node = pattern.create_node()  # contain the be verb
    obj_node = pattern.create_node()
    # not_node = pattern.create_node(UPOS="PART")
    not_node = pattern.create_node()

    pattern.add_node(be_node)
    pattern.add_node(obj_node)
    pattern.add_node(not_node)

    pattern.add_dependency(be_node, obj_node, r'\w*obj\w*')
    pattern.add_dependency(obj_node, not_node, r'\w*advmod\w*')

    be_not = []
    for match in dep_graph.match(pattern):
        # print("be_not_phrase match !!!!!!!!!!!!!!")
        dep_be_node = match[be_node]
        dep_obj_node = match[obj_node]
        dep_not_node = match[not_node]

        if not "be" in dep_be_node.LEMMA.split(" "):
            continue

        if not "not" in dep_not_node.LEMMA.split(" "):
            continue

        if (dep_not_node.LOC > dep_obj_node.LOC) or (dep_be_node.LOC >
                                                     dep_not_node.LOC):
            continue
        be_not.append((dep_be_node, dep_obj_node, dep_not_node))

    for dep_be_node, dep_obj_node, dep_not_node in be_not:
        dep_graph.remove_dependency(dep_obj_node, dep_not_node, 'advmod')
        verb_node = merge_dep_nodes((dep_be_node, dep_not_node),
                                    UPOS=dep_be_node.UPOS,
                                    LOC=dep_be_node.LOC)
        dep_graph.replace_nodes([dep_be_node, dep_not_node], verb_node)
Example #24
0
def to_verb(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """
    to_verb_phrase = []
    for root in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB"}):
        if any("to" in rels.values()
               for parent, rels in dep_graph.parents(root)):
            continue

        for child, rels in dep_graph.children(root):
            if "mark" in rels and child.LEMMA == "to" and child.LOC == root.LOC - 1 and \
                    not (isinstance(child, DependencyGraphSuperNode) and child.is_conj):
                to_verb_phrase.append((child, root))

    for to, verb in to_verb_phrase:
        noun_node = merge_dep_nodes([to, verb], UPOS=verb.UPOS, LOC=verb.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes([to, verb], noun_node)
Example #25
0
def aux_not(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """

    aux_not = []
    for node in dep_graph.nodes(filter=lambda n: n.UPOS == "AUX"):

        next_node = dep_graph.get_node_by_loc(node.LOC + 1)

        if not next_node:
            continue

        if next_node.UPOS == "PART" and next_node.FORM == "n't":
            aux_not.append((node, next_node))

    for aux_node, not_node in aux_not:
        new_node = merge_dep_nodes([aux_node, not_node], UPOS=aux_node.UPOS, LOC=aux_node.LOC)

        dep_graph.replace_nodes([aux_node, not_node], new_node)
Example #26
0
def be_not_phrase2(dep_graph: DependencyGraph):
    """TODO: add doc string
    """
    be_not = []
    # for pred_node in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB"}):
    for pred_node in dep_graph.nodes():
        # print('pred_node LEMMA:', pred_node.LEMMA, 'pred_node UPOS:', pred_node.UPOS)
        if not "be" in pred_node.LEMMA.split(" "):
            continue
        objs = []
        for child, rel in dep_graph.children(pred_node):
            if rel.startswith('obj'):
                objs.append(child)
        if not objs:
            continue
        objs.sort(key=lambda x: x.LOC)
        for obj in objs:

            def __interested_node2(n):
                # that conj is ommited
                return (n.UPOS == "PART" and "not" in n.LEMMA.split(" "))

            nodes_of_interests2 = [
                n for n, l in dep_graph.children(
                    obj,
                    filter=lambda n, l: l == "advmod" and __interested_node2(n
                                                                             ))
            ]
            if not nodes_of_interests2:
                continue
            assert len(nodes_of_interests2) == 1
            not_node = nodes_of_interests2[0]
            be_not.append((pred_node, obj, not_node))
    for dep_be_node, dep_obj_node, dep_not_node in be_not:
        dep_graph.remove_dependency(dep_obj_node, dep_not_node, 'advmod')
        verb_node = merge_dep_nodes((dep_be_node, dep_not_node),
                                    UPOS=dep_be_node.UPOS,
                                    LOC=dep_be_node.LOC)
        dep_graph.replace_nodes([dep_be_node, dep_not_node], verb_node)
Example #27
0
def num_pair(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    pattern = DependencyGraph()

    num1_node = pattern.create_node(UPOS="NUM")
    num2_node = pattern.create_node(UPOS="NUM")
    case_node = pattern.create_node(LEMMA=r"--|-|by")

    pattern.add_dependency(num1_node, num2_node, r'nmod')
    pattern.add_dependency(num2_node, case_node, r'case')

    num_intervals = []

    for match in dep_graph.match(pattern):

        dep_num1_node = match[num1_node]
        dep_num2_node = match[num2_node]
        dep_case_node = match[case_node]

        if dep_num1_node.LOC < dep_case_node.LOC < dep_num2_node.LOC or \
                dep_num2_node.LOC < dep_case_node.LOC < dep_num1_node.LOC:
            interval = [dep_num1_node, dep_case_node, dep_num2_node]
            interval.sort(key=lambda x: x.LOC)
            num_intervals.append(interval)

    for interval in num_intervals:
        interval_node = merge_dep_nodes(interval,
                                        UPOS="NOUN",
                                        LOC=interval[-1].LOC
                                        )

        dep_graph.replace_nodes(interval, interval_node)
def noun_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    nouns = []
    # we first find np roots
    for root in dep_graph.nodes(
            filter=lambda x: x.UPOS in {"NOUN", "PROPN", "X", "NUM", "SYM"}):

        logger.debug("checking the node:")
        logger.debug(str(root))

        # np_elements = valid_np_element(root, dep_graph)
        parent_rels = set(
            itertools.chain.from_iterable(l.values()
                                          for n, l in dep_graph.parents(root)))
        parent_rels = set(rel.replace("_", " ") for rel in parent_rels)

        escaped_case_node = set()
        if parent_rels:
            case_nodes = [
                x
                for x, l in dep_graph.children(root,
                                               filter=lambda n, l: l == "case")
            ]
            for node in case_nodes:
                if node.LEMMA.lower() in parent_rels or node.FORM.lower(
                ) in parent_rels:
                    # lemma is for including
                    escaped_case_node.add(node)

        valid_np_children = [(n, l) for n, l in dep_graph.children(
            root, filter=lambda n, l: is_valid_np_child(dep_graph, root, l, n))
                             ]
        logger.debug("noun_phrase: valid_np_children:")
        for node, l in valid_np_children:
            logger.debug(str(node))

        np_elements = [root]

        for n, l in valid_np_children:
            if n.UPOS == "ADP":
                continue
            if n.LOC > root.LOC and \
                    not any(l.startswith(x)
                            for x in {"fixed", "compound", "nummod",
                                      "nmod:tmod", "flat", "nmod:npmod", "dep"}):
                continue
            if n in escaped_case_node:
                continue

            if isinstance(n, DependencyGraphSuperNode) and n.is_conj:
                continue

            offsprings = list(dep_graph.offsprings(n))
            valid_np_component = True

            for x in offsprings:
                for parent, rels in dep_graph.parents(x):
                    if any(x in rels
                           for x in {"acl", "obl", "advcl", "subj", "obj"}):
                        valid_np_component = False
                        break
                if not valid_np_component:
                    break
            if valid_np_component:
                np_elements.extend(offsprings)

        logger.debug("noun_phrase: candidate np_elements:")
        for node in np_elements:
            logger.debug(str(node))

        det = [
            n for n, l in dep_graph.children(root,
                                             filter=lambda n, l: l == "det")
        ]
        det = [x for x in det if x.LOC <= root.LOC]
        det.sort(key=lambda x: x.LOC)

        if det:
            # raise Exception("noun phrase without det ")

            det = det[-1]
            # check the element should be continuous
            np_elements = [x for x in np_elements if det.LOC <= x.LOC]
            logger.debug(
                "noun_phrase: det found, cut the nodes before the det")

        filtered_np_elements = sorted(list(np_elements), key=lambda x: x.LOC)
        # if np_elements[-1].LOC - np_elements[0].LOC != len(np_elements) - 1:
        #     print ("root", root)
        #     for n in np_elements:
        #         print("np element", n.LOC, n)
        #     raise Exception("Bad Business Logic")
        changed = True
        while changed:
            changed = False
            if filtered_np_elements and filtered_np_elements[0].LEMMA in {
                    "-", "--"
            }:
                filtered_np_elements.pop(0)
                changed = True
            if filtered_np_elements and filtered_np_elements[0].UPOS in {
                    "ADP", "CCONJ", "PUNCT"
            }:
                filtered_np_elements.pop(0)
                changed = True

        if filtered_np_elements:
            nouns.append((set(filtered_np_elements), root))

    sub_nouns = []
    for idx1, (phrase1, head1) in enumerate(nouns):
        for idx2, (phrase2, head2) in enumerate(nouns):
            if idx1 == idx2:
                continue

            phrasex, phrasey = (
                phrase1, phrase2) if len(phrase1) > len(phrase2) else (phrase2,
                                                                       phrase1)
            common = phrasex.intersection(phrasey)

            if not common:
                continue
            elif len(common) == len(phrasey):
                # node2 is a sub np of node1, delete
                sub_nouns.append(phrasey)
            else:
                print("Phrase 1", [x.ID for x in phrase1])
                print("Phrase 2", [x.ID for x in phrase2])
                # return
                raise Exception("duplicate words found")

    for idx, (phrase, head) in enumerate(nouns):

        if phrase in sub_nouns:
            continue

        phrase = sorted(list(phrase), key=lambda x: x.LOC)

        for node in phrase:
            for child, _ in dep_graph.children(node):
                if child.LOC == phrase[0].LOC - 1 and child.LEMMA in {
                        "\"", "\'"
                }:
                    phrase.insert(0, child)
                if child.LOC == phrase[-1].LOC + 1 and child.LEMMA in {
                        "\"", "\'"
                }:
                    phrase.append(child)

        noun_node = merge_dep_nodes(phrase, UPOS=head.UPOS, LOC=phrase[-1].LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(phrase, noun_node)
def noun_of_noun(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """
    pattern = DependencyGraph()
    noun1_node = pattern.create_node(UPOS="NOUN|PROPN|PRON|X|NUM|SYM")
    of_node = pattern.create_node(LEMMA="of")
    noun2_node = pattern.create_node(UPOS="NOUN|PROPN|PRON|X|NUM|SYM")

    pattern.add_dependency(noun1_node, noun2_node, "nmod:of")
    pattern.add_dependency(noun2_node, of_node, "case")

    merged_map = dict()

    #    need_merge = []
    for match in list(dep_graph.match(pattern)):

        dep_noun1_node = match[noun1_node]
        if dep_noun1_node in merged_map:
            dep_noun1_node = merged_map[dep_noun1_node]

        dep_noun2_node = match[noun2_node]
        if dep_noun2_node in merged_map:
            dep_noun2_node = merged_map[dep_noun2_node]

        dep_of_node = match[of_node]

        if not all([dep_noun1_node, dep_noun2_node, dep_of_node]):
            # processed by others
            continue

        involved_in_complex_structure = False
        for child, rel in dep_graph.children(dep_noun2_node):
            if "conj" in rel or "acl" in rel:
                involved_in_complex_structure = True

        for parent, rel in dep_graph.parents(dep_noun2_node):
            if "conj" in rel or "acl" in rel:
                involved_in_complex_structure = True

        if involved_in_complex_structure:
            continue

        if isinstance(dep_noun1_node,
                      DependencyGraphSuperNode) and dep_noun1_node.is_conj:
            continue

        if isinstance(dep_noun2_node,
                      DependencyGraphSuperNode) and dep_noun2_node.is_conj:
            continue

        dep_noun2_parents = [
            parent for parent, rel in dep_graph.parents(dep_noun2_node)
        ]
        if len(dep_noun2_parents) == 1:
            if dep_noun2_parents[0] != dep_noun1_node:
                logger.error("dep_noun1 {0} {1}".format(
                    dep_noun1_node.ID, dep_noun1_node.FORM))
                logger.error("dep_noun2 {0} {1}".format(
                    dep_noun2_node.ID, dep_noun2_node.FORM))
                logger.error("dep_noun2_parent {0} {1}".format(
                    dep_noun2_parents[0].ID, dep_noun2_parents[0].FORM))
                raise Exception("Noun of Noun failed")

            new_noun_nodes = [dep_noun1_node, dep_of_node, dep_noun2_node]
            # <<<<<<< HEAD

            new_noun = merge_dep_nodes(new_noun_nodes,
                                       UPOS=dep_noun1_node.UPOS,
                                       FEATS=dep_noun1_node.FEATS,
                                       LOC=dep_noun1_node.LOC)

            dep_graph.replace_nodes(new_noun_nodes, new_noun)
            for node in new_noun_nodes:
                merged_map[node] = new_noun

            logger.debug("node merged :" + " ".join(
                [dep_noun1_node.ID, dep_of_node.ID, dep_noun2_node.ID]))
Example #30
0
def amod_xcomp_to_acl(dep_graph: DependencyGraph):
    """
    something extracted by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = pattern.create_node(UPOS="NOUN")
    adj_node = pattern.create_node(UPOS="ADJ")
    verb_node = pattern.create_node(UPOS="VERB")

    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, verb_node, r"xcomp")

    for match in list(dep_graph.match(pattern)):

        dep_noun_node = match[noun_node]
        dep_verb_node = match[verb_node]
        dep_adj_node = match[adj_node]

        try:
            [
                dep_graph.get_node(x.ID)
                for x in [dep_noun_node, dep_verb_node, dep_adj_node]
            ]
        except Exception as e:
            # has been processed by previous match
            continue

        xcomp_nodes = [
            n for n, l in dep_graph.children(
                dep_adj_node, filter=lambda n, l: l.startswith("xcomp"))
        ]

        mark_nodes_list = []

        for dep_xcomp_node in xcomp_nodes:

            mark_nodes = [
                n for n, l in dep_graph.children(
                    dep_xcomp_node,
                    filter=lambda n, l: l.startswith("mark") and dep_adj_node.
                    LOC < n.LOC < dep_xcomp_node.LOC)
            ]
            if mark_nodes:
                mark_nodes_list.append(mark_nodes)

        if len(mark_nodes_list) > 1:
            raise Exception("Unexpected Situation Happened")

        new_verb_nodes = [dep_adj_node]
        if mark_nodes_list:
            mark_nodes = mark_nodes_list[0]

            new_verb_nodes.extend(mark_nodes)
            new_verb_nodes.sort(key=lambda x: x.LOC)

        new_verb_nodes = ["(be)"] + new_verb_nodes

        new_node = merge_dep_nodes(new_verb_nodes,
                                   UPOS="VERB",
                                   LOC=new_verb_nodes[-1].LOC,
                                   FEATS={"VerbForm": "Ger"})

        dep_graph.replace_nodes(new_verb_nodes, new_node)

        dep_graph.set_dependency(dep_noun_node, new_node, "acl")

        for dep_xcomp_node in xcomp_nodes:
            dep_graph.remove_dependency(dep_xcomp_node, new_node)
            dep_graph.set_dependency(new_node, dep_verb_node, "obj")