def conjunction(dep_graph: DependencyGraph):
    """

    #### Coordination ####
    #### I like apples, bananas and oranges. conj:and/or with punct
    #### @return a list of list of conjuncted entities
    TODO: currently cannot process nested conjunction. should process from bottom to up
    :param sentence:
    :return:
    """

    # find the root of conj and do the process

    root_of_conj = []

    for node in dep_graph.nodes():

        if any(
                rels.startswith("conj")
                for parent, rels in dep_graph.parents(node)):
            continue

        if any(
                rels.startswith("conj")
                for child, rels in dep_graph.children(node)):
            root_of_conj.append(node)

    for root in root_of_conj:
        logger.debug("found the root of conjunction")
        logger.debug(str(root))

        process_conjunction(dep_graph, root)

    process_head_conj(dep_graph)
Example #2
0
def parallel_list(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                  context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    list_phrases = []
    for n in dep_graph.nodes():

        list_nodes = [
            n
            for n, l in dep_graph.children(n, filter=lambda n, l: "list" in l)
        ]

        if not list_nodes:
            continue

        list_nodes.append(n)
        list_nodes.sort(key=lambda n: n.LOC)

        list_phrases.append(list_nodes)

    for list_nodes in list_phrases:

        pred = oia_graph.add_aux("LIST")

        for idx, node in enumerate(list_nodes):
            oia_arg = oia_graph.add_words(node.position)
            oia_graph.add_argument(pred, oia_arg, idx + 1)
Example #3
0
def and_or_conjunction(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                       context: UD2OIAContext):
    """

    #### Coordination ####
    #### I like apples, bananas and oranges. conj:and/or with punct
    #### @return a list of list of conjuncted entities
    :param sentence:
    :return:
    """

    for node in dep_graph.nodes():

        conj_components = list(
            dep_graph.children(node,
                               filter=lambda n, l: l.startswith("arg_con")))

        if not conj_components:
            continue

        oia_conj_root_node = oia_graph.add_words(node.position)

        for child, rels in conj_components:
            soake_child_node = oia_graph.add_words(child.position)
            arg_index = int(rels.values()[0])

            oia_graph.add_argument(oia_conj_root_node, soake_child_node,
                                   arg_index)
Example #4
0
def goeswith(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    goeswith_phrases = []
    for n in dep_graph.nodes():

        goeswith_nodes = [n for n, l in dep_graph.children(n,
                                                           filter=lambda n, l: "goeswith" in l)]

        if not goeswith_nodes:
            continue

        goeswith_nodes.append(n)
        goeswith_nodes.sort(key=lambda n: n.LOC)

        goeswith_phrases.append(goeswith_nodes)

    for goeswith_nodes in goeswith_phrases:

        upos = "X"
        for node in goeswith_nodes:
            if node.UPOS != "X":
                upos = node.UPOS

        new_node = merge_dep_nodes(goeswith_nodes,
                                   UPOS=upos,
                                   LOC=goeswith_nodes[-1].LOC
                                   )

        dep_graph.replace_nodes(goeswith_nodes, new_node)
Example #5
0
def be_adp_phrase(dep_graph: DependencyGraph):
    """
    example: is for xxx
    this should be not applied:
    1. if xxx is adj, then be_adj_verb will be applied;
    2. if xxx is NOUN, then copula_phrase will be applied
    note that there may be multiple adp:
    the insurgency is out of the picture
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    some_node = pattern.create_node()

    adp_node = pattern.create_node(UPOS="ADP")
    be_node = pattern.create_node(UPOS="AUX")

    pattern.add_dependency(some_node, be_node, r'cop')
    pattern.add_dependency(some_node, adp_node, r'case')

    verb_phrases = []

    for match in dep_graph.match(pattern):

        dep_be_node = match[be_node]
        dep_some_node = match[some_node]

        dep_adp_nodes = [
            n for n, l in dep_graph.children(
                dep_some_node,
                filter=lambda n, l: "case" in l and n.UPOS == "ADP")
        ]

        if not all(dep_be_node.LOC < x.LOC < dep_some_node.LOC
                   for x in dep_adp_nodes):
            continue

        pred = [dep_be_node] + dep_adp_nodes
        head = dep_be_node

        verb_phrases.append((dep_some_node, pred, head))

    for dep_some_node, verbs, root in verb_phrases:

        if not all(dep_graph.get_node(v.ID) for v in verbs):
            continue  # has been processed

        verb_node = merge_dep_nodes(verbs, UPOS="AUX", LOC=root.LOC)

        for node in verbs:
            dep_graph.remove_dependency(dep_some_node, node)
        dep_graph.replace_nodes(verbs, verb_node)
        dep_graph.add_dependency(dep_some_node, verb_node, "cop")
Example #6
0
def amod_obl(dep_graph: DependencyGraph):
    """
    ##### include: more than, successful by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode(UPOS=r"NOUN|PRON")
    adj_node = DependencyGraphNode(UPOS="ADJ")
    adp_node = DependencyGraphNode(UPOS="ADP")
    obl_node = DependencyGraphNode()

    pattern.add_nodes([noun_node, adj_node, adp_node, obl_node])
    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, obl_node, r'obl:\w+')
    pattern.add_dependency(obl_node, adp_node, r'case')

    more_than_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_adj_node = match[adj_node]
        dep_obl_node = match[obl_node]
        dep_adp_node = match[adp_node]

        obl_nodes = list(
            dep_graph.children(dep_adj_node, filter=lambda n, l: "obl" in l))

        if len(obl_nodes) > 1:
            # similar in form to the one
            continue

        if dep_adp_node.FORM not in dep_graph.get_dependency(
                dep_adj_node, dep_obl_node).values():
            continue

        if dep_noun_node.LOC < dep_adj_node.LOC < dep_adp_node.LOC < dep_obl_node.LOC:
            more_than_pred.append(
                (dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node))

    for dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node in more_than_pred:
        nodes = [dep_adj_node, dep_adp_node]
        more_than_pred = merge_dep_nodes(nodes,
                                         UPOS="ADP",
                                         LOC=dep_adp_node.LOC)
        dep_graph.remove_dependency(dep_noun_node, dep_adj_node)
        dep_graph.remove_dependency(dep_adj_node, dep_obl_node)

        dep_graph.replace_nodes([dep_adj_node, dep_adp_node], more_than_pred)
        dep_graph.add_dependency(dep_noun_node, dep_obl_node,
                                 "nmod:" + more_than_pred.FORM)
def det_adjv_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    phrases = []

    for node in dep_graph.nodes(filter=lambda n: n.UPOS in {"ADJ", "ADV"}):

        parent_rels = itertools.chain.from_iterable(
            (rel for parent, rel in dep_graph.parents(node)))
        if any([rel in valid_adj_form for rel in parent_rels]):
            continue

        if any([rel in {"amod", "advmod"} for rel in parent_rels]):
            continue

        det = [
            n for n, l in dep_graph.children(node,
                                             filter=lambda n, l: l == "det")
        ]

        if not det:
            continue

        det.sort(key=lambda x: x.LOC)

        det = det[-1]

        if det.LEMMA not in {"the", "a", "an", "some", "any", "all"}:
            continue

        root = node
        np_elements = list(
            dep_graph.offsprings(
                root, filter=lambda n: det.LOC <= n.LOC <= root.LOC))

        # check the element should be continuous

        np_elements = sorted(list(np_elements), key=lambda x: x.LOC)
        # if np_elements[-1].LOC - np_elements[0].LOC != len(np_elements) - 1:
        #     print ("root", root)
        #     for n in np_elements:
        #         print("np element", n.LOC, n)
        #     raise Exception("Bad Business Logic")

        phrases.append((np_elements, root))

    for np, root in phrases:
        noun_node = merge_dep_nodes(np, UPOS="NOUN", LOC=root.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(np, noun_node)
Example #8
0
def adverbial_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                     context: UD2OIAContext):
    """
    Adverbial Clause
##### run in order to catch it. advcl with mark (in order to) #####
##### he worked hard, replacing his feud. advcl without mark #####

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    pattern = DependencyGraph()
    verb_node = pattern.create_node()
    modifier_node = pattern.create_node()

    pattern.add_dependency(verb_node, modifier_node, "advcl")

    for match in list(dep_graph.match(pattern)):

        dep_verb_node = match[verb_node]
        dep_modifier_node = match[modifier_node]

        if context.is_processed(dep_verb_node, dep_modifier_node):
            continue

        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_modifier_node = oia_graph.add_words(dep_modifier_node.position)

        logger.debug("adverbial clause: verb={0}, modifier={1}".format(
            dep_verb_node.position, dep_modifier_node.position))

        if oia_graph.has_relation(oia_verb_node, oia_modifier_node):
            continue

        mark = list(
            dep_graph.children(dep_modifier_node,
                               filter=lambda n, rel: "mark" in rel))

        if mark:
            mark, rel = mark[0]
            pred_node = oia_graph.add_words(mark.position)
            if pred_node is None:
                continue

            if mark.LEMMA in CONJUNCTION_WORDS[language]:
                continue

            oia_graph.add_argument(pred_node, oia_verb_node, 1, mod=True)
            oia_graph.add_argument(pred_node, oia_modifier_node, 2)
        else:

            oia_graph.add_mod(oia_modifier_node, oia_verb_node)
Example #9
0
def be_not_phrase2(dep_graph: DependencyGraph):
    """TODO: add doc string
    """
    be_not = []
    # for pred_node in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB"}):
    for pred_node in dep_graph.nodes():
        # print('pred_node LEMMA:', pred_node.LEMMA, 'pred_node UPOS:', pred_node.UPOS)
        if not "be" in pred_node.LEMMA.split(" "):
            continue
        objs = []
        for child, rel in dep_graph.children(pred_node):
            if rel.startswith('obj'):
                objs.append(child)
        if not objs:
            continue
        objs.sort(key=lambda x: x.LOC)
        for obj in objs:

            def __interested_node2(n):
                # that conj is ommited
                return (n.UPOS == "PART" and "not" in n.LEMMA.split(" "))

            nodes_of_interests2 = [
                n for n, l in dep_graph.children(
                    obj,
                    filter=lambda n, l: l == "advmod" and __interested_node2(n
                                                                             ))
            ]
            if not nodes_of_interests2:
                continue
            assert len(nodes_of_interests2) == 1
            not_node = nodes_of_interests2[0]
            be_not.append((pred_node, obj, not_node))
    for dep_be_node, dep_obj_node, dep_not_node in be_not:
        dep_graph.remove_dependency(dep_obj_node, dep_not_node, 'advmod')
        verb_node = merge_dep_nodes((dep_be_node, dep_not_node),
                                    UPOS=dep_be_node.UPOS,
                                    LOC=dep_be_node.LOC)
        dep_graph.replace_nodes([dep_be_node, dep_not_node], verb_node)
def complete_missing_case_mark(dep_graph: DependencyGraph, root, root_parents,
                               parallel_components, relation_to_conj,
                               case_marks):
    """

    :param dep_graph:
    :param parallel_components:
    :return:
    """

    parallel_components.sort(key=lambda x: x.LOC)

    for parent in root_parents:
        # ic(str(root))
        # ic(str(parent))

        # ic(relation_to_conj)

        prefix, shared_prefix, required_mark = relation_to_conj[parent.ID]
        if not required_mark:
            continue

        for index, (node,
                    mark) in enumerate(zip(parallel_components,
                                           required_mark)):

            if mark is None:
                continue

            is_exist = any(
                mark == child.LEMMA or mark in child.LEMMA.split(" ")
                for child, l in dep_graph.children(node))
            if is_exist:
                continue

            found_mark = find_mark(case_marks, parallel_components[:index],
                                   mark)

            if found_mark:

                mark_node, rel = found_mark

                dup_case_mark = dep_graph.create_node(FORM=mark_node.FORM,
                                                      LEMMA=mark_node.LEMMA,
                                                      UPOS=mark_node.UPOS,
                                                      LOC=mark_node.LOC)
                dup_case_mark.aux = True
                dep_graph.add_dependency(node, dup_case_mark, rel)
            else:

                logger.warning("cannot find the mark, just add the relation")
Example #11
0
def multi_words_mark(dep_graph: DependencyGraph):
    """
    arise on to
    the "on to" should be combined
    :param dep_graph:
    :param oia_graph:
    :return:
    """
    # print('multi_words_mark')
    mark_phrases = []

    for node in dep_graph.nodes():
        marks = []
        for n, l in dep_graph.children(node, filter=lambda n, l: "mark" in l):
            marks.extend(dep_graph.offsprings(n))

        if not marks:
            continue
        # print('multi_words_mark marks:', marks)
        if len(marks) > 1:
            if any([x.UPOS in {"NOUN", "NUM", "VERB", "ADJ", "ADV", "PRON"} for x in marks]):
                continue

            marks.sort(key=lambda n: n.LOC)
            mark_phrases.append((node, marks))

    for node, marks in mark_phrases:
        # print('multi_words_mark marks:', marks)
        if not all([dep_graph.get_node(x.ID) for x in marks]):
            continue

        mark_min_loc = marks[0].LOC
        mark_max_loc = marks[-1].LOC
        marks = [n for n in dep_graph.nodes() if mark_min_loc <= n.LOC <= mark_max_loc]
        marks.sort(key=lambda n: n.LOC)

        if any([x.UPOS in NOUN_UPOS for x in marks]):
            continue
        # print('marks:')
        # for nnnn in marks:
        #     print(nnnn)
        new_mark_node = merge_dep_nodes(marks,
                                        UPOS=marks[0].UPOS,
                                        LOC=marks[0].LOC
                                        )
        for mark in marks:
            dep_graph.remove_dependency(node, mark)
        dep_graph.replace_nodes(marks, new_mark_node)
        dep_graph.add_dependency(node, new_mark_node, "mark")
Example #12
0
def multi_words_cc(dep_graph: DependencyGraph):
    """
    arise on to
    the "on to" should be combined
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    mark_phrases = []

    for node in dep_graph.nodes():
        marks = []
        for n, l in dep_graph.children(node, filter=lambda n, l: "cc" == l):
            marks.extend(dep_graph.offsprings(n))

        if not marks:
            continue

        if len(marks) > 1:
            if any([x.UPOS in {"NOUN", "NUM", "VERB"} for x in marks]):
                continue

            marks.sort(key=lambda n: n.LOC)
            mark_phrases.append((node, marks))

    for node, marks in mark_phrases:

        mark_min_loc = marks[0].LOC
        mark_max_loc = marks[-1].LOC
        marks = [n for n in dep_graph.nodes() if mark_min_loc <= n.LOC <= mark_max_loc]

        if any([x.UPOS in {"NOUN", "NUM", "VERB"} for x in marks]):
            continue
        if not all([dep_graph.get_node(x.ID) for x in marks]):
            continue
        new_mark_node = merge_dep_nodes(marks,
                                        UPOS=marks[0].UPOS,
                                        LOC=marks[0].LOC
                                        )

        dep_graph.replace_nodes(marks, new_mark_node)
        for mark in marks:
            dep_graph.remove_dependency(node, mark)

        if dep_graph.get_node(node.ID):
            dep_graph.add_dependency(node, new_mark_node, "cc")
def noun_all(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """
    noun_all_phrase = []
    for root in dep_graph.nodes(filter=lambda x: x.UPOS in
                                {"NOUN", "PROPN", "PRON", "X", "NUM", "SYM"}):
        for child, rels in dep_graph.children(root):
            if "det" in rels and child.LEMMA == "all" and child.LOC == root.LOC + 1:
                noun_all_phrase.append((root, child))

    for noun, all in noun_all_phrase:
        noun_node = merge_dep_nodes([noun, all], UPOS=noun.UPOS, LOC=noun.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes([noun, all], noun_node)
Example #14
0
def to_verb(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """
    to_verb_phrase = []
    for root in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB"}):
        if any("to" in rels.values()
               for parent, rels in dep_graph.parents(root)):
            continue

        for child, rels in dep_graph.children(root):
            if "mark" in rels and child.LEMMA == "to" and child.LOC == root.LOC - 1 and \
                    not (isinstance(child, DependencyGraphSuperNode) and child.is_conj):
                to_verb_phrase.append((child, root))

    for to, verb in to_verb_phrase:
        noun_node = merge_dep_nodes([to, verb], UPOS=verb.UPOS, LOC=verb.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes([to, verb], noun_node)
def noun_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    nouns = []
    # we first find np roots
    for root in dep_graph.nodes(
            filter=lambda x: x.UPOS in {"NOUN", "PROPN", "X", "NUM", "SYM"}):

        logger.debug("checking the node:")
        logger.debug(str(root))

        # np_elements = valid_np_element(root, dep_graph)
        parent_rels = set(
            itertools.chain.from_iterable(l.values()
                                          for n, l in dep_graph.parents(root)))
        parent_rels = set(rel.replace("_", " ") for rel in parent_rels)

        escaped_case_node = set()
        if parent_rels:
            case_nodes = [
                x
                for x, l in dep_graph.children(root,
                                               filter=lambda n, l: l == "case")
            ]
            for node in case_nodes:
                if node.LEMMA.lower() in parent_rels or node.FORM.lower(
                ) in parent_rels:
                    # lemma is for including
                    escaped_case_node.add(node)

        valid_np_children = [(n, l) for n, l in dep_graph.children(
            root, filter=lambda n, l: is_valid_np_child(dep_graph, root, l, n))
                             ]
        logger.debug("noun_phrase: valid_np_children:")
        for node, l in valid_np_children:
            logger.debug(str(node))

        np_elements = [root]

        for n, l in valid_np_children:
            if n.UPOS == "ADP":
                continue
            if n.LOC > root.LOC and \
                    not any(l.startswith(x)
                            for x in {"fixed", "compound", "nummod",
                                      "nmod:tmod", "flat", "nmod:npmod", "dep"}):
                continue
            if n in escaped_case_node:
                continue

            if isinstance(n, DependencyGraphSuperNode) and n.is_conj:
                continue

            offsprings = list(dep_graph.offsprings(n))
            valid_np_component = True

            for x in offsprings:
                for parent, rels in dep_graph.parents(x):
                    if any(x in rels
                           for x in {"acl", "obl", "advcl", "subj", "obj"}):
                        valid_np_component = False
                        break
                if not valid_np_component:
                    break
            if valid_np_component:
                np_elements.extend(offsprings)

        logger.debug("noun_phrase: candidate np_elements:")
        for node in np_elements:
            logger.debug(str(node))

        det = [
            n for n, l in dep_graph.children(root,
                                             filter=lambda n, l: l == "det")
        ]
        det = [x for x in det if x.LOC <= root.LOC]
        det.sort(key=lambda x: x.LOC)

        if det:
            # raise Exception("noun phrase without det ")

            det = det[-1]
            # check the element should be continuous
            np_elements = [x for x in np_elements if det.LOC <= x.LOC]
            logger.debug(
                "noun_phrase: det found, cut the nodes before the det")

        filtered_np_elements = sorted(list(np_elements), key=lambda x: x.LOC)
        # if np_elements[-1].LOC - np_elements[0].LOC != len(np_elements) - 1:
        #     print ("root", root)
        #     for n in np_elements:
        #         print("np element", n.LOC, n)
        #     raise Exception("Bad Business Logic")
        changed = True
        while changed:
            changed = False
            if filtered_np_elements and filtered_np_elements[0].LEMMA in {
                    "-", "--"
            }:
                filtered_np_elements.pop(0)
                changed = True
            if filtered_np_elements and filtered_np_elements[0].UPOS in {
                    "ADP", "CCONJ", "PUNCT"
            }:
                filtered_np_elements.pop(0)
                changed = True

        if filtered_np_elements:
            nouns.append((set(filtered_np_elements), root))

    sub_nouns = []
    for idx1, (phrase1, head1) in enumerate(nouns):
        for idx2, (phrase2, head2) in enumerate(nouns):
            if idx1 == idx2:
                continue

            phrasex, phrasey = (
                phrase1, phrase2) if len(phrase1) > len(phrase2) else (phrase2,
                                                                       phrase1)
            common = phrasex.intersection(phrasey)

            if not common:
                continue
            elif len(common) == len(phrasey):
                # node2 is a sub np of node1, delete
                sub_nouns.append(phrasey)
            else:
                print("Phrase 1", [x.ID for x in phrase1])
                print("Phrase 2", [x.ID for x in phrase2])
                # return
                raise Exception("duplicate words found")

    for idx, (phrase, head) in enumerate(nouns):

        if phrase in sub_nouns:
            continue

        phrase = sorted(list(phrase), key=lambda x: x.LOC)

        for node in phrase:
            for child, _ in dep_graph.children(node):
                if child.LOC == phrase[0].LOC - 1 and child.LEMMA in {
                        "\"", "\'"
                }:
                    phrase.insert(0, child)
                if child.LOC == phrase[-1].LOC + 1 and child.LEMMA in {
                        "\"", "\'"
                }:
                    phrase.append(child)

        noun_node = merge_dep_nodes(phrase, UPOS=head.UPOS, LOC=phrase[-1].LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(phrase, noun_node)
def noun_of_noun(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """
    pattern = DependencyGraph()
    noun1_node = pattern.create_node(UPOS="NOUN|PROPN|PRON|X|NUM|SYM")
    of_node = pattern.create_node(LEMMA="of")
    noun2_node = pattern.create_node(UPOS="NOUN|PROPN|PRON|X|NUM|SYM")

    pattern.add_dependency(noun1_node, noun2_node, "nmod:of")
    pattern.add_dependency(noun2_node, of_node, "case")

    merged_map = dict()

    #    need_merge = []
    for match in list(dep_graph.match(pattern)):

        dep_noun1_node = match[noun1_node]
        if dep_noun1_node in merged_map:
            dep_noun1_node = merged_map[dep_noun1_node]

        dep_noun2_node = match[noun2_node]
        if dep_noun2_node in merged_map:
            dep_noun2_node = merged_map[dep_noun2_node]

        dep_of_node = match[of_node]

        if not all([dep_noun1_node, dep_noun2_node, dep_of_node]):
            # processed by others
            continue

        involved_in_complex_structure = False
        for child, rel in dep_graph.children(dep_noun2_node):
            if "conj" in rel or "acl" in rel:
                involved_in_complex_structure = True

        for parent, rel in dep_graph.parents(dep_noun2_node):
            if "conj" in rel or "acl" in rel:
                involved_in_complex_structure = True

        if involved_in_complex_structure:
            continue

        if isinstance(dep_noun1_node,
                      DependencyGraphSuperNode) and dep_noun1_node.is_conj:
            continue

        if isinstance(dep_noun2_node,
                      DependencyGraphSuperNode) and dep_noun2_node.is_conj:
            continue

        dep_noun2_parents = [
            parent for parent, rel in dep_graph.parents(dep_noun2_node)
        ]
        if len(dep_noun2_parents) == 1:
            if dep_noun2_parents[0] != dep_noun1_node:
                logger.error("dep_noun1 {0} {1}".format(
                    dep_noun1_node.ID, dep_noun1_node.FORM))
                logger.error("dep_noun2 {0} {1}".format(
                    dep_noun2_node.ID, dep_noun2_node.FORM))
                logger.error("dep_noun2_parent {0} {1}".format(
                    dep_noun2_parents[0].ID, dep_noun2_parents[0].FORM))
                raise Exception("Noun of Noun failed")

            new_noun_nodes = [dep_noun1_node, dep_of_node, dep_noun2_node]
            # <<<<<<< HEAD

            new_noun = merge_dep_nodes(new_noun_nodes,
                                       UPOS=dep_noun1_node.UPOS,
                                       FEATS=dep_noun1_node.FEATS,
                                       LOC=dep_noun1_node.LOC)

            dep_graph.replace_nodes(new_noun_nodes, new_noun)
            for node in new_noun_nodes:
                merged_map[node] = new_noun

            logger.debug("node merged :" + " ".join(
                [dep_noun1_node.ID, dep_of_node.ID, dep_noun2_node.ID]))
Example #17
0
def multi_word_fix_flat(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    fixed_rels = {"fixed", "flat", "compound"}

    phrases = []

    for node in dep_graph.nodes():

        parents = [n for n, l in dep_graph.parents(node,
                                                   filter=lambda n, l: any(x in l for x in fixed_rels))]

        if parents:
            continue

        phrase = []
        for n, l in dep_graph.children(node,
                                       filter=lambda n, l: any(x in l for x in fixed_rels)):
            phrase.extend(dep_graph.offsprings(n))

        if not phrase:
            continue

        phrase.append(node)

        if len(phrase) > 1:
            phrase.sort(key=lambda n: n.LOC)
            # min_loc = phrase[0].LOC
            # max_loc = phrase[-1].LOC
            # phrase = [n for n in dep_graph.nodes() if min_loc <= n.LOC <= max_loc]
            phrases.append((phrase, node))

    phrases.sort(key=lambda x: len(x[0]), reverse=True)

    for phrase, head in phrases:

        if not all([dep_graph.get_node(x.ID) for x in phrase]):
            continue  # already been processed

        merging_nodes = set()
        min_loc = 10000
        max_loc = -1
        for child in phrase:
            if isinstance(child, DependencyGraphNode):
                min_loc = min(min_loc, child.LOC)
                max_loc = max(min_loc, child.LOC)
            elif isinstance(child, DependencyGraphSuperNode):
                min_loc = min(min_loc, min([x.LOC for x in child.nodes]))
                max_loc = max(max_loc, max([x.LOC for x in child.nodes]))
            merging_nodes.update(dep_graph.offsprings(child))

        merged_nodes = set([n for n in merging_nodes if min_loc <= n.LOC <= max_loc])
        for node in merging_nodes:
            if node.LOC == min_loc - 1 and node.LEMMA in {"\"", "\'"}:
                merged_nodes.add(node)
            if node.LOC == max_loc + 1 and node.LEMMA in {"\"", "\'"}:
                merged_nodes.add(node)
        merged_nodes = list(merged_nodes)
        merged_nodes.sort(key=lambda x: x.LOC)

        logger.debug("multi_word_fix_flat: we are merging ")
        logger.debug("\n".join(str(node) for node in merged_nodes))
        logger.debug("with head \n" + str(head))
        new_node = merge_dep_nodes(merged_nodes, UPOS=head.UPOS, LOC=head.LOC)

        dep_graph.replace_nodes(merged_nodes, new_node)
def secondary_predicate(dep_graph: DependencyGraph):
    """
    detect the case of xcomp as a secondary predicate,
    and add implicit (be) node to make a predicate
    :param dep_graph:
    :return:
    """

    pattern = DependencyGraph()

    pred_node = pattern.create_node()
    xcomp_node = pattern.create_node(UPOS=r'(?!VERB\b)\b\w+')
    xcomp_subj_node = pattern.create_node()

    pattern.add_dependency(pred_node, xcomp_node, "xcomp")
    pattern.add_dependency(xcomp_node, xcomp_subj_node, "nsubj")
    pattern.add_dependency(pred_node, xcomp_subj_node, "obj")

    for match in list(dep_graph.match(pattern)):

        dep_pred_node = match[pred_node]
        dep_xcomp_node = match[xcomp_node]
        dep_xcomp_subj_node = match[xcomp_subj_node]

        # if not (dep_pred_node.LOC < dep_xcomp_subj_node.LOC and dep_pred_node.LOC < dep_xcomp_node.LOC):
        #    raise Exception("Unexpected Situation, let's throw out to see what happens")
        # the position of dep_xcomp_subj_node and dep_xcomp_node may be reversed in questions
        # I can't tell you how ominous I found Bush's performance in that interview.

        if dep_pred_node.LOC < dep_xcomp_subj_node.LOC < dep_xcomp_node.LOC:

            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_node)
            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_subj_node)
            dep_graph.remove_dependency(dep_xcomp_node, dep_xcomp_subj_node)

            if dep_xcomp_node.UPOS == "ADJ" or dep_xcomp_node.UPOS == "ADV":
                new_pred_nodes = ["(be)", dep_xcomp_node]
                dep_be_node = merge_dep_nodes(new_pred_nodes,
                                              UPOS="VERB",
                                              LOC=dep_xcomp_node.LOC)

                dep_graph.add_node(dep_be_node)

                dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj")
                dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node,
                                         "nsubj")

                for child, l in list(dep_graph.children(dep_xcomp_node)):
                    dep_graph.remove_dependency(dep_xcomp_node, child)
                    dep_graph.add_dependency(dep_be_node, child, l)

                dep_graph.remove_node(dep_xcomp_node)

            else:
                dep_be_node = dep_graph.create_node(FORM="(be)",
                                                    LEMMA="(be)",
                                                    UPOS="VERB",
                                                    LOC=dep_xcomp_node.LOC -
                                                    0.5)
                dep_be_node.aux = True

                dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj")
                dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node,
                                         "nsubj")
                dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "obj")

        elif dep_xcomp_node.LOC < dep_pred_node.LOC:

            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_node)
            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_subj_node)
            dep_graph.remove_dependency(dep_xcomp_node, dep_xcomp_subj_node)

            # in question, for example : how ominous
            # I can't tell you how ominous I found Bush's performance in that interview.

            dep_be_node = dep_graph.create_node(FORM="(be)",
                                                LEMMA="(be)",
                                                UPOS="VERB",
                                                LOC=dep_xcomp_node.LOC - 0.5)
            dep_be_node.aux = True

            dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj")
            dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node, "nsubj")

            if dep_xcomp_node.UPOS == "ADJ" or dep_xcomp_node.UPOS == "ADV":
                dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "amod")
            else:
                dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "obj")
Example #19
0
def single_root(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                  for node in oia_graph.nodes()]

    zero_degree_nodes = [n for n, degree in in_degrees if degree == 0]

    if len(zero_degree_nodes) == 0:
        return
    elif len(zero_degree_nodes) == 1:
        root = zero_degree_nodes[0]
    else:
        # len(zero_degree_nodes) >= 2
        dists_to_root = []
        for oia_node in zero_degree_nodes:

            related_dep_nodes = set()
            if isinstance(oia_node, OIAWordsNode):
                dep_node = dep_graph.get_node_by_spans(oia_node.spans)

                if dep_node:
                    if isinstance(dep_node, DependencyGraphNode):
                        related_dep_nodes.add(dep_node)
                    elif isinstance(dep_node, list):
                        for node in dep_node:
                            related_dep_nodes.add(node)
                    else:
                        logger.error("get_node_by_spans return type unknown.")

            children = [n for n, l in oia_graph.children(oia_node)]

            for child in children:
                if isinstance(child, OIAWordsNode):
                    dep_node = dep_graph.get_node_by_spans(child.spans)

                    if dep_node:
                        if isinstance(dep_node, DependencyGraphNode):
                            related_dep_nodes.add(dep_node)
                        elif isinstance(dep_node, list):
                            for node in dep_node:
                                related_dep_nodes.add(node)
                        else:
                            logger.error(
                                "get_node_by_spans return type unknown.")

            dep_root = dep_graph.get_node("0")
            real_dep_root = next(n for n, l in dep_graph.children(dep_root))

            min_dist_to_root = min([
                len(
                    nx.shortest_path(dep_graph.g.to_undirected(),
                                     real_dep_root.ID, dep_node.ID))
                for dep_node in related_dep_nodes
            ])

            dists_to_root.append((oia_node, min_dist_to_root))

        dists_to_root.sort(key=lambda x: x[1])
        root_candidates = []

        min_dist = dists_to_root[0][1]

        for oia_node, dist in dists_to_root:
            if dist == min_dist:
                root_candidates.append(oia_node)

        if len(root_candidates) == 1:

            root = root_candidates[0]

        else:

            scores = []

            score_map = {":": 40, "\"": 30, ";": 20, ",": 10, "(": -10}

            for cand in root_candidates:

                score = -100
                if any([
                        "func" in rel.label
                        for n, rel in oia_graph.children(cand)
                ]):
                    score = 100

                children = [n for n, l in oia_graph.children(cand)]
                dep_children = []
                for child in children:
                    if isinstance(child, OIAWordsNode):
                        dep_node = dep_graph.get_node_by_spans(child.spans)

                        if dep_node:
                            if isinstance(dep_node, DependencyGraphNode):
                                dep_children.append(dep_node)
                            elif isinstance(dep_node, list):
                                for node in dep_node:
                                    dep_children.append(node)
                            else:
                                logger.error(
                                    "get_node_by_spans return type unknown.")
                # check what between them
                dep_children.sort(key=lambda x: x.LOC)

                for node in dep_graph.nodes():
                    if node.LOC is None:
                        continue
                    if dep_children[0].LOC < node.LOC < dep_children[-1].LOC:

                        if node.FORM in score_map:
                            score = max(score, score_map[node.FORM])

                if isinstance(cand, OIAWordsNode):
                    dep_node = dep_graph.get_node_by_spans(cand.spans)
                    if dep_node:
                        if isinstance(dep_node, DependencyGraphNode):
                            if dep_node.LEMMA in IMPORTANT_CONNECTION_WORDS:
                                score += 8
                        elif isinstance(dep_node, list):
                            for node in dep_node:
                                if node.LEMMA in IMPORTANT_CONNECTION_WORDS:
                                    score += 8
                        else:
                            logger.error(
                                "get_node_by_spans return type unknown.")

                elif isinstance(cand,
                                OIAAuxNode) and cand.label == "PARATAXIS":
                    score += 4

                scores.append((cand, score))

            scores.sort(key=lambda x: x[1], reverse=True)

            top_nodes = []
            for node, score in scores:
                if score == scores[0][1]:
                    top_nodes.append(node)

            if len(top_nodes) == 1:
                root = top_nodes[0]

            elif len(top_nodes) >= 3:
                # multiple top node found, merge them to one
                if all(
                        isinstance(node, OIAAuxNode)
                        and node.label == "PARATAXIS" for node in top_nodes):
                    next_nodes = []
                    for top in top_nodes:
                        for n, l in list(oia_graph.children(top)):
                            next_nodes.append(n)
                        oia_graph.remove_node(top)
                        for node in zero_degree_nodes:
                            if node.ID == top.ID:
                                zero_degree_nodes.remove(node)
                    root = oia_graph.add_aux("PARATAXIS")
                    oia_graph.add_node(root)
                    next_nodes.sort(key=lambda x: x.ID)
                    for index, second_node in enumerate(next_nodes):
                        oia_graph.add_argument(root, second_node, index)
                else:
                    logger.error(
                        "Deep intersection point, currently cannot process")
                    return
                # raise Exception("Two top nodes? I think it is not possible ")

            else:  # len(top_nodes) == 2:
                # check who is prev, and who is next

                dep_tops = []

                for top in top_nodes:
                    if isinstance(top, OIAWordsNode):
                        dep_node = dep_graph.get_node_by_spans(top.spans)

                        if dep_node:
                            if isinstance(dep_node, DependencyGraphNode):
                                dep_tops.append((top, dep_node))
                            elif isinstance(dep_node, list):
                                for node in dep_node:
                                    dep_tops.append((top, node))
                            else:
                                logger.error(
                                    "get_node_by_spans return type unknown.")

                if not len(dep_tops) >= 1:
                    logger.error("Multiple AUX head ")
                    return

                dep_tops.sort(key=lambda x: x[1].LOC)

                root = dep_tops[0][0]

    # root obtained, change other zero-in-degree node

    logger.debug("Root obtained ")
    logger.debug(root)

    for node in zero_degree_nodes:
        # print('zero_degree_nodes:', node)
        if root.ID == node.ID:
            continue

        if is_conj_node(node, dep_graph):
            # print('is_conj_node:',node,'  !!!!!!!!!!')
            for child, rel in list(oia_graph.children(node)):
                label = rel.label
                if "pred.arg." in label:
                    arg_no = label.split(".")[-1]
                    new_rel = "as:pred.arg." + arg_no
                    oia_graph.remove_relation(node, child)
                    oia_graph.add_relation(child, node, new_rel)

            continue

        ref_childs = [
            child for child, rel in oia_graph.children(node)
            if rel.label == "ref"
        ]

        if ref_childs:
            for child in ref_childs:
                oia_graph.remove_relation(node, child)
                oia_graph.add_relation(child, node, "as:ref")

            continue

    in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                  for node in oia_graph.nodes()]

    zero_degree_nodes = [
        n for n, degree in in_degrees if degree == 0 and n.ID != root.ID
    ]

    while len(zero_degree_nodes) > 0:

        logger.debug("we found zero_degree_nodes: ")
        for node in zero_degree_nodes:
            logger.debug(node)

        root_offsprings = set(oia_graph.offsprings(root))

        logger.debug("root offsprings :")
        for n in root_offsprings:
            logger.debug(n)

        intersections = []
        for node in zero_degree_nodes:

            node_offspring = set(oia_graph.offsprings(node))

            logger.debug("node offsprings :")
            for n in node_offspring:
                logger.debug(n)

            intersection = root_offsprings.intersection(node_offspring)

            logger.debug("we found {0} initial intersection :".format(
                len(intersection)))
            for n in intersection:
                logger.debug(n)

            if intersection:

                top_intersection_point = None
                parents_to_root = None
                parents_to_other = None
                for x in intersection:
                    parents = set([n for n, l in oia_graph.parents(x)])
                    if not parents.intersection(intersection):
                        top_intersection_point = x
                        parents_to_root = parents.intersection(root_offsprings)
                        parents_to_other = parents.intersection(node_offspring)
                        break

                if top_intersection_point is None:
                    logger.error("It seems we have a problem ")
                    continue

                logger.debug("we found a intersections: ")
                logger.debug(top_intersection_point)

                logger.debug("Its parents to root: ")
                for x in parents_to_root:
                    logger.debug(x)

                logger.debug("Its parents to other: ")
                for x in parents_to_other:
                    logger.debug(x)

                intersections.append((top_intersection_point, parents_to_root,
                                      parents_to_other))

        if len(intersections) == 0:
            logger.error("seems we have disconnected compoenent")
            break
            # raise Exception("Unexpected situation")

        for intersection_point, parents_to_root, parents_to_other in intersections:

            # if node not in set([n for n, l in oia_graph.parents(intersection_point)]):
            #     logger.error("Deep intersection point, currently cannot process")
            #     # raise Exception("Deep intersection point, currently cannot process")
            #     continue

            for node in parents_to_other:

                if isinstance(node, OIAAuxNode) and node.label == "LIST":
                    logger.error("lets see what happens for LIST")
                    if len(list(oia_graph.parents(node))) != 0:
                        logger.error(
                            "it seems different with what we have thought for LIST "
                        )

                    relation = oia_graph.get_edge(node, intersection_point)
                    oia_graph.remove_relation(node, intersection_point)
                    oia_graph.add_relation(intersection_point, node,
                                           "as:" + relation.label)
                    # for parent, l in list(oia_graph.parents(intersection_point)):
                    #     if parent != node:
                    #         oia_graph.remove_relation(parent, intersection_point)
                    #         oia_graph.add_relation(parent, node, l.label)
                elif (isinstance(node, OIAAuxNode)
                      and node.label == "WHETHER"):

                    # parents_to_root = list(oia_graph.parents_on_path(intersection_point, root))
                    if len(list(oia_graph.parents(node))) != 0:
                        logger.error(
                            "it seems different with what we have thought for WHETHER "
                        )

                    for parent in parents_to_root:
                        relation = oia_graph.get_edge(parent,
                                                      intersection_point)
                        oia_graph.remove_relation(parent, intersection_point)
                        oia_graph.add_relation(parent, node, relation.label)
                else:

                    relation = oia_graph.get_edge(node, intersection_point)
                    oia_graph.remove_relation(node, intersection_point)
                    oia_graph.add_relation(intersection_point, node,
                                           "as:" + relation.label)

        in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                      for node in oia_graph.nodes()]

        zero_degree_nodes = [
            n for n, degree in in_degrees if degree == 0 and n.ID != root.ID
        ]
Example #20
0
def acl_verb_obl_case(dep_graph: DependencyGraph):
    """
    something extracted by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    subj_node = pattern.create_node()
    verb_node = pattern.create_node(UPOS="VERB")
    obj_node = pattern.create_node()
    case_node = pattern.create_node()

    pattern.add_dependency(subj_node, verb_node, r'acl')
    pattern.add_dependency(verb_node, obj_node, r'obl:\w*')
    pattern.add_dependency(obj_node, case_node, r'case')

    phrases = []

    for match in dep_graph.match(pattern):

        dep_subj_node = match[subj_node]
        dep_verb_node = match[verb_node]
        dep_obj_node = match[obj_node]
        dep_case_node = match[case_node]

        obl_nodes = [
            n for n, l in dep_graph.children(
                dep_verb_node, filter=lambda n, l: l.startswith("obl"))
        ]
        if len(obl_nodes) > 1:
            continue

        existing_obj_nodes = [
            n for n, l in dep_graph.children(
                dep_verb_node, filter=lambda n, l: "obj" in l or "comp" in l)
        ]
        if existing_obj_nodes:
            continue

        obl_rel = dep_graph.get_dependency(dep_verb_node, dep_obj_node)

        if dep_case_node.FORM not in obl_rel.values():
            continue

        # there are may be other cases, join them all
        dep_case_nodes = [
            n for n, l in
            dep_graph.children(dep_obj_node,
                               filter=lambda n, l: l.startswith("case") and
                               dep_verb_node.LOC < n.LOC < dep_obj_node.LOC)
        ]

        subjs = list(
            dep_graph.children(dep_verb_node, filter=lambda n, l: "subj" in l))

        if len(subjs) > 1:
            continue

        phrases.append(
            (dep_subj_node, dep_verb_node, dep_obj_node, dep_case_nodes))

    for dep_subj_node, dep_verb_node, dep_obj_node, dep_case_nodes in phrases:
        new_verb_phrase = [dep_verb_node] + dep_case_nodes
        logging.debug("acl_verb_obl_case: we are merging nodes")
        logging.debug("\n".join(str(node) for node in new_verb_phrase))

        new_verb_node = merge_dep_nodes(new_verb_phrase,
                                        UPOS=dep_verb_node.UPOS,
                                        LOC=dep_verb_node.LOC,
                                        FEATS=dep_verb_node.FEATS)

        logging.debug("acl_verb_obl_case: we obtain a new node")
        logging.debug(str(new_verb_node))

        dep_graph.remove_dependency(dep_verb_node, dep_obj_node)
        for node in dep_case_nodes:
            dep_graph.remove_dependency(dep_obj_node, node)

        dep_graph.replace_nodes(new_verb_phrase, new_verb_node)
        dep_graph.add_dependency(new_verb_node, dep_obj_node, "obj")
Example #21
0
def amod_xcomp_to_acl(dep_graph: DependencyGraph):
    """
    something extracted by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = pattern.create_node(UPOS="NOUN")
    adj_node = pattern.create_node(UPOS="ADJ")
    verb_node = pattern.create_node(UPOS="VERB")

    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, verb_node, r"xcomp")

    for match in list(dep_graph.match(pattern)):

        dep_noun_node = match[noun_node]
        dep_verb_node = match[verb_node]
        dep_adj_node = match[adj_node]

        try:
            [
                dep_graph.get_node(x.ID)
                for x in [dep_noun_node, dep_verb_node, dep_adj_node]
            ]
        except Exception as e:
            # has been processed by previous match
            continue

        xcomp_nodes = [
            n for n, l in dep_graph.children(
                dep_adj_node, filter=lambda n, l: l.startswith("xcomp"))
        ]

        mark_nodes_list = []

        for dep_xcomp_node in xcomp_nodes:

            mark_nodes = [
                n for n, l in dep_graph.children(
                    dep_xcomp_node,
                    filter=lambda n, l: l.startswith("mark") and dep_adj_node.
                    LOC < n.LOC < dep_xcomp_node.LOC)
            ]
            if mark_nodes:
                mark_nodes_list.append(mark_nodes)

        if len(mark_nodes_list) > 1:
            raise Exception("Unexpected Situation Happened")

        new_verb_nodes = [dep_adj_node]
        if mark_nodes_list:
            mark_nodes = mark_nodes_list[0]

            new_verb_nodes.extend(mark_nodes)
            new_verb_nodes.sort(key=lambda x: x.LOC)

        new_verb_nodes = ["(be)"] + new_verb_nodes

        new_node = merge_dep_nodes(new_verb_nodes,
                                   UPOS="VERB",
                                   LOC=new_verb_nodes[-1].LOC,
                                   FEATS={"VerbForm": "Ger"})

        dep_graph.replace_nodes(new_verb_nodes, new_node)

        dep_graph.set_dependency(dep_noun_node, new_node, "acl")

        for dep_xcomp_node in xcomp_nodes:
            dep_graph.remove_dependency(dep_xcomp_node, new_node)
            dep_graph.set_dependency(new_node, dep_verb_node, "obj")
def adv_ccomp(dep_graph: DependencyGraph, oia_graph: OIAGraph,
              context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    # TODO: it seems that in UD labeling, adv is used instead of adj for noun
    # verb_node = pattern.create_node(UPOS="VERB|NOUN|PROPN")
    adv_node = pattern.create_node(UPOS="ADV|X|NOUN|PART")  # part is for "not"
    ccomp_node = pattern.create_node()

    # pattern.add_dependency(verb_node, adv_node, r'advmod')
    pattern.add_dependency(adv_node, ccomp_node, r"ccomp|xcomp")

    patterns = []
    for match in dep_graph.match(pattern):

        # dep_verb_node = match[verb_node]
        dep_adv_node = match[adv_node]
        dep_ccomp_node = match[ccomp_node]

        if oia_graph.has_relation(dep_adv_node, dep_ccomp_node):
            continue

        dep_case_nodes = [
            n for n, l in
            dep_graph.children(dep_ccomp_node,
                               filter=lambda n, l: "case" == l and dep_adv_node
                               .LOC < n.LOC < dep_ccomp_node.LOC)
        ]

        if dep_case_nodes:
            dep_case_nodes = continuous_component(dep_case_nodes,
                                                  dep_case_nodes[0])
            predicate_nodes = [dep_adv_node] + dep_case_nodes
            predicate_nodes.sort(key=lambda n: n.LOC)
        else:
            predicate_nodes = [dep_adv_node]

        dep_subj_nodes = [
            n for n, l in dep_graph.parents(dep_adv_node,
                                            filter=lambda n, l: "advmod" == l
                                            and n.UPOS in {"ADV", "X", "NOUN"})
        ]
        if len(dep_subj_nodes) > 1:
            raise Exception("Multiple subject")
        elif len(dep_subj_nodes) > 0:
            dep_subj_node = dep_subj_nodes[0]
        else:
            dep_subj_node = None

        patterns.append([dep_subj_node, predicate_nodes, dep_ccomp_node])

    for dep_subj_node, predicate_nodes, dep_ccomp_node in patterns:

        if len(predicate_nodes) > 1:

            new_pred_node = dep_graph.create_node(
                ID=" ".join([x.ID for x in predicate_nodes]),
                FORM=" ".join([x.FORM for x in predicate_nodes]),
                LEMMA=" ".join([x.LEMMA for x in predicate_nodes]),
                UPOS="ADV",
                LOC=predicate_nodes[0].LOC)

            new_pred_node.aux = True

            dep_graph.replace_nodes(predicate_nodes, new_pred_node)

            dep_graph.remove_dependency(dep_ccomp_node, new_pred_node)

        else:
            new_pred_node = predicate_nodes[0]

        oia_pred_node = oia_graph.add_words(new_pred_node.position)

        if dep_subj_node:
            oia_subj_node = oia_graph.add_words(dep_subj_node.position)
            oia_graph.add_argument(oia_pred_node, oia_subj_node, 1, mod=True)

        else:
            oia_ccomp_node = oia_graph.add_words(dep_ccomp_node.position)
            oia_graph.add_argument(oia_pred_node, oia_ccomp_node, 2)
def adv_verb_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                      context: UD2OIAContext):
    """
    the adv before the verb should be processed by verb_phrase
    this converter should process the adv after the verb
    verb1 in order to verb2
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()

    # TODO: it seems that in UD labeling, adv is used instead of adj for noun
    verb_node = DependencyGraphNode(
        UPOS="VERB|NOUN|PROPN|AUX|PRON")  # aux is for be word
    adv_node = DependencyGraphNode(UPOS="ADV|X|NOUN|ADJ|VERB")

    pattern.add_nodes([verb_node, adv_node])

    pattern.add_dependency(verb_node, adv_node, r'advmod')

    for match in dep_graph.match(pattern):

        dep_verb_node = match[verb_node]
        dep_adv_node = match[adv_node]

        if context.is_processed(dep_verb_node, dep_adv_node):
            continue

        if oia_graph.has_relation(dep_verb_node, dep_adv_node):
            continue

        obl_children = [
            x for x, l in dep_graph.children(
                dep_adv_node, filter=lambda n, l: l.startswith("obl"))
        ]

        obl_node = None
        obl_has_case = False
        if len(obl_children) == 1:

            obl_node = obl_children[0]

            case_nodes = list(n for n, l in dep_graph.children(
                obl_node, filter=lambda n, l: "case" in l))

            if case_nodes:
                # if obl with case, let the oblique to process it
                obl_has_case = True

        mark_children = [
            x for x, l in dep_graph.children(
                dep_adv_node, filter=lambda n, l: l.startswith("mark"))
        ]

        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_adv_node = oia_graph.add_words(dep_adv_node.position)

        if obl_node and not obl_has_case:
            # arg_nodes = list(dep_graph.offsprings(obl_node))
            # arg_nodes.sort(key=lambda x: x.LOC)
            # arg_words = [x.ID for x in arg_nodes]
            # head = obl_node.ID

            oia_arg_node = oia_graph.add_words(obl_node.position)

            oia_graph.add_argument(oia_adv_node, oia_verb_node, 1, mod=True)
            oia_graph.add_argument(oia_adv_node, oia_arg_node, 2)
        else:
            if mark_children:
                mark_node = mark_children[0]
                oia_pred_node = oia_graph.add_words(mark_node.position)

                oia_graph.add_argument(oia_pred_node,
                                       oia_verb_node,
                                       1,
                                       mod=True)
                oia_graph.add_argument(oia_pred_node, oia_adv_node, 2)

            else:
                oia_graph.add_mod(oia_adv_node, oia_verb_node)
Example #24
0
def simple_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                  context: UD2OIAContext):
    """
    :TODO badcase  Attached is a new link
    :param dep_graph:
    :param oia_graph:
    :return:
    """
    # for node in dep_graph.nodes():
    #     print('node:',node)
    for pred_node in dep_graph.nodes(
            filter=lambda x: x.UPOS in {"VERB", "ADJ", "NOUN", "AUX", "ADV"}):
        # ADJ is for "With the demand so high,"
        # NOUN is for "X the best for Y"
        # AUX is for have in "I have a cat"
        # print('pred_node', pred_node)
        expl = None
        nsubj = None
        subj = None
        objs = []

        for child, rel in dep_graph.children(pred_node):
            # print('child node:', child)
            # print('child rel:', rel)
            if ('nsubj' in rel or "csubj" in rel):  # and ":xsubj" not in rel:
                nsubj = child
            elif rel.startswith('obj'):
                objs.append((child, 1))
            elif rel.startswith('iobj'):
                objs.append((child, 0))
            elif 'ccomp' in rel or "xcomp" in rel:  # and child.UPOS == "VERB":
                objs.append((child, 2))
            elif "expl" in rel:
                expl = child

        if nsubj:
            # if pred_node.LOC < nsubj.LOC:
            #     # TODO: in what situation?
            #     objs.insert(0, nsubj)
            # else:
            subj = nsubj

        if expl:  # It VERB subj that    # VERB subj it that
            if expl.LOC < pred_node.LOC:
                subj = expl
                objs.insert(0, (subj, -1))
            else:  # expl.LOC > pred_node.LOC:
                objs.insert(0, (expl, -1))

        if not subj and not objs:
            continue

        pred_node = oia_graph.add_words(pred_node.position)

        if not pred_node:
            continue

        arg_index = 1

        if subj is not None:
            if not oia_graph.has_relation(pred_node, subj):
                subj_node = oia_graph.add_words(subj.position)
                oia_graph.add_argument(pred_node, subj_node, arg_index)

        arg_index += 1

        objs.sort(key=lambda x: x[1])

        for obj, weight in objs:
            # print('obj:',obj)
            oia_obj_node = oia_graph.add_words(obj.position)

            # def __sconj_node(n):
            #    # that conj is ommited
            #    return (n.UPOS == "SCONJ" and n.LEMMA not in {"that"})

            def __adv_question_node(n):
                return ((n.UPOS == "ADV"
                         and n.LEMMA in {"when", "where", "how", "whether"}))

            #
            # def __pron_question_node(n):
            #     return (n.UPOS == "PRON" and n.LEMMA in {"what", "who", "which"})

            # def __interested_node2(n):
            #     # that conj is ommited
            #     return (n.UPOS == "PART")

            # sconj_nodes = [n for n, l in dep_graph.children(obj,
            #                      filter=lambda n,l: l == "mark" and __sconj_node(n))]
            adv_question_nodes = [
                n for n, l in dep_graph.children(
                    obj,
                    filter=lambda n, l: l == "mark" and __adv_question_node(n))
            ]

            # subj_question_nodes = [n for n, l in dep_graph.children(obj,
            #                        filter=lambda n,l: "subj" in l and __pron_question_node(n))]
            #
            # obj_question_nodes = [n for n, l in dep_graph.children(obj,
            #                         filter=lambda n,
            #                                       l: ("obj" in l or "comp") in l and __pron_question_node(
            #                             n))]
            # nodes_of_interests2 = [n for n, l in dep_graph.children(obj,
            #                      filter=lambda n,l: l == "advmod" and __interested_node2(n))]
            # print('nodes_of_interests:', nodes_of_interests)
            # if nodes_of_interests2:
            #     assert len(nodes_of_interests2) == 1
            #     interest_node = nodes_of_interests2[0]
            #     oia_interest_node = oia_graph.add_word_with_head(interest_node.LOC)
            #     oia_graph.add_argument(pred_node, oia_interest_node, arg_index)
            #     # oia_graph.add_function(oia_interest_node, oia_obj_node)
            #     arg_index += 1
            #     oia_graph.add_argument(oia_interest_node, oia_obj_node, arg_index)
            #     arg_index += 1

            if adv_question_nodes:
                assert len(adv_question_nodes) == 1
                interest_node = adv_question_nodes[0]
                oia_interest_node = oia_graph.add_words(interest_node.position)
                oia_graph.add_argument(pred_node, oia_interest_node, arg_index)
                oia_graph.add_function(oia_interest_node, oia_obj_node)

            else:
                if not oia_graph.has_relation(pred_node, obj):
                    oia_graph.add_argument(pred_node, oia_obj_node, arg_index)

            arg_index += 1

    pattern = DependencyGraph()
    parent_pred = pattern.create_node()
    child_pred = pattern.create_node()
    question_word = pattern.create_node(LEMMA=r'what|who')

    pattern.add_dependency(parent_pred, child_pred,
                           r'subj|nsubj|iobj|obj|xcomp|ccomp')
    pattern.add_dependency(parent_pred, question_word,
                           r'subj|nsubj|iobj|obj|xcomp|ccomp')
    pattern.add_dependency(child_pred, question_word,
                           r'subj|nsubj|iobj|obj|xcomp|ccomp')

    for match in dep_graph.match(pattern):
        dep_parent_pred, dep_child_pred, dep_question_word = [
            match[x] for x in [parent_pred, child_pred, question_word]
        ]

        oia_parent_pred, oia_child_pred, oia_question_word = [
            oia_graph.add_words(x.position)
            for x in [dep_parent_pred, dep_child_pred, dep_question_word]
        ]

        oia_question_word.is_func = True

        rel = oia_graph.get_edge(oia_child_pred, oia_question_word)

        oia_graph.remove_relation(oia_child_pred, oia_question_word)
        oia_graph.remove_relation(oia_parent_pred, oia_child_pred)

        oia_graph.add_relation(oia_question_word, oia_child_pred,
                               "mod_by:" + rel.label)
Example #25
0
def object_relative_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                           context: UD2OIAContext):
    """
    ##### Object-extracted/referred relative clause #####
    ##### the person that Andy knows #####
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()
    verb_node = DependencyGraphNode()
    entity_node = DependencyGraphNode()
    subj_node = DependencyGraphNode()

    pattern.add_nodes([verb_node, entity_node, subj_node])

    pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*')
    pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*')

    for match in dep_graph.match(pattern):

        dep_entity_node = match[entity_node]
        dep_subj_node = match[subj_node]
        dep_verb_node = match[verb_node]

        if dep_subj_node.LEMMA in {"what", "who", "which", "that"}:
            continue

        logger.debug("we found a objective relative clause")
        logger.debug("entity: {0}".format(dep_entity_node))
        logger.debug("subject: {0}".format(dep_subj_node))
        logger.debug("verb: {0}".format(dep_verb_node))

        if context.is_processed(dep_entity_node, dep_verb_node):
            logger.debug("processed")
            continue

        context.processed(dep_verb_node, dep_subj_node)
        context.processed(dep_entity_node, dep_verb_node)

        oia_entity_node = oia_graph.add_words(dep_entity_node.position)
        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_subj_node = oia_graph.add_words(dep_subj_node.position)

        if oia_graph.has_relation(oia_entity_node, oia_verb_node):
            logger.debug("has relation between entity and verb")
            continue

        oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)

        def __valid_ref(n, l):
            return l == "ref" and dep_entity_node.LOC < n.LOC < dep_verb_node.LOC

        ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node,
                                                          filter=__valid_ref))
        ref_nodes.sort(key=lambda x: x.LOC)

        if ref_nodes:
            ref_node = ref_nodes[-1]

            oia_ref_node = oia_graph.add_words(ref_node.position)

            oia_graph.add_ref(oia_entity_node, oia_ref_node)

            logger.debug("we are coping with ref between:")
            logger.debug(dep_verb_node)
            logger.debug(ref_node)

            ref_relation = dep_graph.get_dependency(dep_verb_node, ref_node)

            case_nodes = list(n for n, l in dep_graph.children(
                ref_node, filter=lambda n, l: "case" in l))
            case_nodes.sort(key=lambda x: x.LOC)

            if ref_relation:
                if case_nodes:
                    # with which xxxx, the with will become the root pred
                    case_node = case_nodes[-1]
                    oia_case_node = oia_graph.add_words(case_node.position)

                    oia_graph.add_argument(oia_case_node,
                                           oia_verb_node,
                                           1,
                                           mod=True)
                    oia_graph.add_argument(oia_case_node, oia_ref_node, 2)
                    oia_graph.add_mod(oia_verb_node, oia_entity_node)
                else:

                    if "obj" in ref_relation:
                        oia_graph.add_argument(oia_verb_node, oia_ref_node, 2)
                    elif ref_relation == "advmod":
                        oia_graph.add_mod(oia_ref_node, oia_verb_node)
                    else:
                        raise Exception(
                            "unknown relation: {}".format(ref_relation))
                    # oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True)

        oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)
        oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True)

        rels = dep_graph.get_dependency(dep_entity_node, dep_verb_node)

        #if rels.endswith("obj"):
        for node, l in dep_graph.children(dep_verb_node):
            if l == "ccomp":
                oia_ccomp_node = oia_graph.add_words(node.position)
                oia_graph.add_argument(oia_verb_node, oia_ccomp_node, 3)
Example #26
0
def parataxis(dep_graph: DependencyGraph, oia_graph: OIAGraph,
              context: UD2OIAContext):
    """

    #################### adverbs like however, then, etc ########################
    :param sentence:
    :return:
    """

    for dep_node in list(dep_graph.nodes()):

        parallel_nodes = [
            n for n, l in dep_graph.children(dep_node) if "parataxis" == l
        ]

        if not parallel_nodes:
            continue

        parallel_nodes.append(dep_node)
        parallel_nodes.sort(key=lambda x: x.LOC)

        predicates = []

        for index, (former, latter) in enumerate(
                more_itertools.pairwise(parallel_nodes)):

            advcon = [
                n for n, l in
                dep_graph.children(latter,
                                   filter=lambda n, l: "advmod" in l and
                                   (former.LOC < n.LOC < latter.LOC) and
                                   (n.UPOS == "SCONJ" or n.LEMMA in {"so"}))
            ]

            coloncon = [
                n for n, l in
                dep_graph.children(dep_node,
                                   filter=lambda n, l: "punct" in l and n.FORM
                                   in {":", ";", "--", ","} and
                                   (former.LOC < n.LOC < latter.LOC))
            ]

            if advcon:
                dep_con = advcon[0]
                # dep_graph.remove_dependency(para, dep_con)
                # otherwise, the dep_con will be recovered by adv_modifier, may cause further question
            elif coloncon:
                dep_con = coloncon[0]
            else:
                dep_con = None

            predicates.append(dep_con)

        if all(x is None for x in predicates):
            oia_pred_node = oia_graph.add_aux("PARATAXIS")
        else:
            if len(predicates) == 1:
                oia_pred_node = oia_graph.add_words(predicates[0].position)
            else:
                position = ["{1}"]
                for i, node in enumerate(predicates):
                    if node is not None:
                        position.extend(node.position)
                    position.append("{{{0}}}".format(i + 2))
                oia_pred_node = oia_graph.add_words(position)

        for idx, node in enumerate(parallel_nodes):
            oia_arg_node = oia_graph.add_words(node.position)
            oia_graph.add_argument(oia_pred_node, oia_arg_node, idx + 1)
Example #27
0
def gradation(dep_graph: DependencyGraph):
    """
    TODO: do not match with the tech report, and the verb is not considered
    ##### Comparative #####
    ##### Periphrastic gradation #####
    ##### He runs faster than her #####
    ##### Martin is more intelligent than Donald #####
    ##### He is a nicer person than Tom
    ##### She is more than a regular cook
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()
    verb_node = pattern.create_node(UPOS="VERB|NOUN|PRON|PROPN|SYM")
    advj_node = pattern.create_node(UPOS="ADJ|ADV", FEATS={"Degree": "Cmp"})
    than_node = pattern.create_node(FORM="than")
    obj_node = pattern.create_node()

    pattern.add_dependency(verb_node, advj_node, r'advmod|amod')
    pattern.add_dependency(advj_node, obj_node,
                           r'\w*(nmod:than|obl:than|advcl:than)\w*')
    pattern.add_dependency(obj_node, than_node, r'\w*case|mark\w*')

    for match in list(dep_graph.match(pattern)):

        dep_verb_node = match[verb_node]
        dep_advj_node = match[advj_node]
        dep_than_node = match[than_node]
        dep_obj_node = match[obj_node]

        def __valid_mod(n, l):
            return (l == "amod" or l == "advmod") and in_interval(
                n, None, dep_advj_node)

        aux_node = list(dep_graph.children(dep_advj_node, filter=__valid_mod))

        if aux_node:
            aux_node = aux_node[0][0]
            offsprings = dep_graph.offsprings(aux_node)

            more_than_nodes = offsprings + [dep_than_node]
        else:
            more_than_nodes = (dep_advj_node, dep_than_node)

        dep_more_than_node = merge_dep_nodes(more_than_nodes,
                                             UPOS="ADP",
                                             LOC=dep_than_node.LOC)

        dep_graph.replace_nodes(more_than_nodes, dep_more_than_node)
        dep_graph.remove_dependency(dep_obj_node, dep_more_than_node)
        dep_graph.remove_dependency(dep_more_than_node, dep_obj_node)
        dep_graph.remove_dependency(dep_verb_node, dep_more_than_node)

        if dep_verb_node.UPOS == "VERB":

            dep_graph.set_dependency(dep_verb_node, dep_obj_node,
                                     "advcl:" + dep_more_than_node.FORM)
            dep_graph.set_dependency(dep_obj_node, dep_more_than_node, "mark")
        else:
            dep_graph.set_dependency(dep_verb_node, dep_obj_node,
                                     "obl:" + dep_more_than_node.FORM)
            dep_graph.set_dependency(dep_obj_node, dep_more_than_node, "case")
def general_question(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                     context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    for verb in dep_graph.nodes(filter=lambda n: n.UPOS == "VERB"):

        if any(
                any(x in n.LEMMA
                    for x in {"what", "how", "why", "when", "where"})
                for n in dep_graph.offsprings(verb)):
            continue

        parents = [n for n, _ in dep_graph.parents(verb)]

        # if not(len(parents) == 1 and parents[0].ID == "0"):
        #    continue
        # check subj and aux

        subj = None
        aux = None
        for child, rel in dep_graph.children(verb):
            if "subj" in rel:
                subj = child
            if "aux" in rel:
                aux = child

        is_be_verb = False

        if not isinstance(verb, DependencyGraphSuperNode):
            is_be_verb = verb.LEMMA == "be"
        else:
            assert isinstance(verb, DependencyGraphSuperNode)
            assert aux is None
            for n in verb.nodes:
                if isinstance(n, DependencyGraphNode):
                    if n.LEMMA == "be":
                        is_be_verb = True
                        # print('verb.nodes:', str(" ".join(str(xx.LEMMA) for xx in verb.nodes)))
                        # print('is_be_verb222:', is_be_verb)
                    if n.UPOS == "AUX":
                        aux = n
        # print('is_be_verb:', is_be_verb)
        if aux is None and not is_be_verb:
            # cannot be a general question
            continue

        expl_child = [n for n, l in dep_graph.children(verb) if l == "expl"]
        if expl_child:
            assert len(expl_child) == 1
            subj = expl_child[0]

        if subj is None:
            logger.warning(
                "subject is none, cannot decide whether it is a question")
            continue
        #        print('subj.LOC:', subj.LOC)
        #        print('subj.LOC type:', type(subj.LOC))
        oia_verb_node = oia_graph.add_words(verb.position)

        is_there_be_verb = is_be_verb and ("there" in verb.LEMMA.split(' ')
                                           or "here" in verb.LEMMA.split(' '))

        is_question = False

        if is_there_be_verb:

            assert isinstance(verb, DependencyGraphSuperNode)
            be_node = [n for n in verb.nodes if n.LEMMA == "be"][0]
            there_node = [
                n for n in verb.nodes
                if n.LEMMA == "there" or n.LEMMA == "here"
            ][0]
            # print('there_node:', there_node)
            if be_node.LOC < there_node.LOC:
                is_question = True

        elif (is_be_verb and verb.LOC < subj.LOC):

            is_question = True

        elif (aux is not None and aux.LOC < subj.LOC):

            is_question = True

        if is_question:
            # if aux is not None and aux.LEMMA == "do":
            #    oia_question_node = oia_graph.add_word_with_head(aux.LOC)
            # else:

            oia_question_node = oia_graph.add_aux("WHETHER")

            oia_graph.add_function(oia_question_node, oia_verb_node)
Example #29
0
def advcl_mark_sconj(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                     context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()
    pred1_node = pattern.create_node()
    pred2_node = pattern.create_node()
    # sconj_node = pattern.create_node(UPOS="SCONJ")
    sconj_node = pattern.create_node()

    pattern.add_dependency(pred1_node, pred2_node, r'advcl\w*')
    # pattern.add_dependency(pred1_node, pred2_node, r'\w*')
    # pattern.add_dependency(pred2_node, sconj_node, r'mark|advmod')
    pattern.add_dependency(pred2_node, sconj_node, 'mark')

    for match in list(dep_graph.match(pattern)):

        dep_pred1_node = match[pred1_node]
        dep_pred2_node = match[pred2_node]
        dep_sconj_node = match[sconj_node]
        # advcl_rel = dep_graph.get_dependency(dep_pred1_node, dep_pred2_node)
        if dep_sconj_node.LEMMA not in CONJUNCTION_WORDS[language]:
            continue

        context.processed(dep_pred2_node, dep_sconj_node)
        context.processed(dep_pred1_node, dep_pred2_node)

        oia_pred1_node = oia_graph.add_words(dep_pred1_node.position)
        oia_pred2_node = oia_graph.add_words(dep_pred2_node.position)

        if dep_sconj_node.LEMMA == "if":
            # check whether there is "then"
            dep_then_nodes = [
                n for n, l in dep_graph.children(dep_pred1_node)
                if n.LEMMA == "then" and l == "advmod"
            ]

            if dep_then_nodes:
                assert len(dep_then_nodes) == 1
                dep_then_node = dep_then_nodes[0]
                context.processed(dep_pred1_node, dep_then_node)

                if_then_position = dep_sconj_node.position + [
                    "{1}"
                ] + dep_then_node.position + ["{2}"]
                oia_condition_node = oia_graph.add_words(if_then_position)
            else:
                oia_condition_node = oia_graph.add_words(
                    dep_sconj_node.position)

            oia_graph.add_argument(oia_condition_node, oia_pred2_node, 1)
            oia_graph.add_argument(oia_condition_node, oia_pred1_node, 2)
        else:
            oia_condition_node = oia_graph.add_words(dep_sconj_node.position)
            if dep_sconj_node.LEMMA in CONJUNCTION_WORDS[language]:
                oia_graph.add_argument(oia_condition_node, oia_pred2_node, 1)
                oia_graph.add_argument(oia_condition_node, oia_pred1_node, 2)
            else:
                oia_graph.add_argument(oia_condition_node,
                                       oia_pred1_node,
                                       1,
                                       mod=True)

                oia_graph.add_argument(oia_condition_node, oia_pred2_node, 2)
Example #30
0
def verb_phrase(dep_graph: DependencyGraph):
    """
    ##### Merging aux and cop with their head VERB #####
    Cases:

    :param sentence:
    :return:
    """
    verb_phrases = []

    for node in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB", "AUX"}):

        if node.UPOS == "AUX":
            parent = [
                n for n, l in dep_graph.parents(node,
                                                filter=lambda n, l: l == "aux")
            ]
            if len(parent) > 0:
                continue

        #        if "VerbForm" in node.FEATS and "Ger" in node.FEATS["VerbForm"]:
        #            continue

        if "Tense" in node.FEATS and "Past" in node.FEATS["Tense"]:
            # if the verb is before the noun, it will be processed by noun_phrase and taken as a part of the noun
            parent = [
                n for n, l in dep_graph.parents(
                    node, filter=lambda n, l: l == "amod" and node.LOC < n.LOC)
            ]
            if len(parent) > 0:
                continue
        # logger.debug("We are checking node {0}".format(node))

        root = node
        verbs = [root]
        for n, l in dep_graph.children(root):
            if dep_graph.get_dependency(n, root):
                continue

            if n.LEMMA in {"so", "also", "why"}:
                continue

            if "advmod" in l:
                offsprings = list(dep_graph.offsprings(n))
                if any(x.UPOS in {"VERB", "NOUN", "AUX", "PRON"}
                       for x in offsprings):
                    continue

                verbs.extend(offsprings)
            elif "compound" in l:
                verbs.append(n)

        verbs = [
            x for x in verbs if x.LOC <= root.LOC
            or "compound" in dep_graph.get_dependency(root, x)
        ]

        # logger.debug("Verb: before continuous component ")
        # logger.debug("\n".join(str(verb) for verb in verbs))

        verbs = continuous_component(verbs, root)

        # add aux
        verbs.extend(n for n, l in dep_graph.children(root) if "aux" in l)

        # logger.debug("Verb: after continuous component ")
        # for verb in verbs:
        #    logger.debug(verb)

        verbs.sort(key=lambda x: x.LOC)
        last_loc = verbs[-1].LOC

        #        next_node = dep_graph.get_node_by_loc(last_loc + 1)
        #        if next_node and next_node.LEMMA == "not":
        #            verbs.append(next_node)

        if len(verbs) > 1:
            verb_phrases.append((verbs, root))

    for verbs, root in verb_phrases:
        verb_node = merge_dep_nodes(verbs,
                                    UPOS="VERB",
                                    LOC=root.LOC,
                                    FEATS=root.FEATS)

        dep_graph.replace_nodes(verbs, verb_node)