Exemple #1
0
def multi_words_mark(dep_graph: DependencyGraph):
    """
    arise on to
    the "on to" should be combined
    :param dep_graph:
    :param oia_graph:
    :return:
    """
    # print('multi_words_mark')
    mark_phrases = []

    for node in dep_graph.nodes():
        marks = []
        for n, l in dep_graph.children(node, filter=lambda n, l: "mark" in l):
            marks.extend(dep_graph.offsprings(n))

        if not marks:
            continue
        # print('multi_words_mark marks:', marks)
        if len(marks) > 1:
            if any([x.UPOS in {"NOUN", "NUM", "VERB", "ADJ", "ADV", "PRON"} for x in marks]):
                continue

            marks.sort(key=lambda n: n.LOC)
            mark_phrases.append((node, marks))

    for node, marks in mark_phrases:
        # print('multi_words_mark marks:', marks)
        if not all([dep_graph.get_node(x.ID) for x in marks]):
            continue

        mark_min_loc = marks[0].LOC
        mark_max_loc = marks[-1].LOC
        marks = [n for n in dep_graph.nodes() if mark_min_loc <= n.LOC <= mark_max_loc]
        marks.sort(key=lambda n: n.LOC)

        if any([x.UPOS in NOUN_UPOS for x in marks]):
            continue
        # print('marks:')
        # for nnnn in marks:
        #     print(nnnn)
        new_mark_node = merge_dep_nodes(marks,
                                        UPOS=marks[0].UPOS,
                                        LOC=marks[0].LOC
                                        )
        for mark in marks:
            dep_graph.remove_dependency(node, mark)
        dep_graph.replace_nodes(marks, new_mark_node)
        dep_graph.add_dependency(node, new_mark_node, "mark")
Exemple #2
0
def and_or_conjunction(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                       context: UD2OIAContext):
    """

    #### Coordination ####
    #### I like apples, bananas and oranges. conj:and/or with punct
    #### @return a list of list of conjuncted entities
    :param sentence:
    :return:
    """

    for node in dep_graph.nodes():

        conj_components = list(
            dep_graph.children(node,
                               filter=lambda n, l: l.startswith("arg_con")))

        if not conj_components:
            continue

        oia_conj_root_node = oia_graph.add_words(node.position)

        for child, rels in conj_components:
            soake_child_node = oia_graph.add_words(child.position)
            arg_index = int(rels.values()[0])

            oia_graph.add_argument(oia_conj_root_node, soake_child_node,
                                   arg_index)
def find_new_nodes(old_node, dep_graph: DependencyGraph):
    """TODO: add doc string
    """
    for node in dep_graph.nodes():
        if old_node.ID in node.ID:
            return node
    return None
def ever_since(dep_graph: DependencyGraph):
    """TODO: add doc string
    """
    ever_nodes = []
    since_nodes = []
    for node in dep_graph.nodes():
        if node.LEMMA == "ever":
            ever_nodes.append(node)
        elif node.LEMMA == "since":
            since_nodes.append(node)
    if not ever_nodes or not since_nodes:
        return
    since_LOCs = [node.LOC for node in since_nodes]
    rel_remove = []
    union_nodes = []
    for ever_node in ever_nodes:
        expect_LOC = ever_node.LOC + 1
        if expect_LOC not in since_LOCs:
            continue
        union_nodes.append(
            (ever_node, since_nodes[since_LOCs.index(expect_LOC)]))
        for p_node, p_rel in dep_graph.parents(ever_node):
            if 'advmod' not in p_rel:
                continue
            rel_remove.append((p_node, ever_node, 'advmod'))
    for src, trg, rel in rel_remove:
        dep_graph.remove_dependency(src, trg, rel)
    for ever_node, since_node in union_nodes:
        new_since_node = merge_dep_nodes([ever_node, since_node],
                                         UPOS=since_node.UPOS,
                                         LOC=since_node.LOC)
        dep_graph.replace_nodes([ever_node, since_node], new_since_node)
Exemple #5
0
def goeswith(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    goeswith_phrases = []
    for n in dep_graph.nodes():

        goeswith_nodes = [n for n, l in dep_graph.children(n,
                                                           filter=lambda n, l: "goeswith" in l)]

        if not goeswith_nodes:
            continue

        goeswith_nodes.append(n)
        goeswith_nodes.sort(key=lambda n: n.LOC)

        goeswith_phrases.append(goeswith_nodes)

    for goeswith_nodes in goeswith_phrases:

        upos = "X"
        for node in goeswith_nodes:
            if node.UPOS != "X":
                upos = node.UPOS

        new_node = merge_dep_nodes(goeswith_nodes,
                                   UPOS=upos,
                                   LOC=goeswith_nodes[-1].LOC
                                   )

        dep_graph.replace_nodes(goeswith_nodes, new_node)
def conjunction(dep_graph: DependencyGraph):
    """

    #### Coordination ####
    #### I like apples, bananas and oranges. conj:and/or with punct
    #### @return a list of list of conjuncted entities
    TODO: currently cannot process nested conjunction. should process from bottom to up
    :param sentence:
    :return:
    """

    # find the root of conj and do the process

    root_of_conj = []

    for node in dep_graph.nodes():

        if any(
                rels.startswith("conj")
                for parent, rels in dep_graph.parents(node)):
            continue

        if any(
                rels.startswith("conj")
                for child, rels in dep_graph.children(node)):
            root_of_conj.append(node)

    for root in root_of_conj:
        logger.debug("found the root of conjunction")
        logger.debug(str(root))

        process_conjunction(dep_graph, root)

    process_head_conj(dep_graph)
Exemple #7
0
def parallel_list(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                  context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    list_phrases = []
    for n in dep_graph.nodes():

        list_nodes = [
            n
            for n, l in dep_graph.children(n, filter=lambda n, l: "list" in l)
        ]

        if not list_nodes:
            continue

        list_nodes.append(n)
        list_nodes.sort(key=lambda n: n.LOC)

        list_phrases.append(list_nodes)

    for list_nodes in list_phrases:

        pred = oia_graph.add_aux("LIST")

        for idx, node in enumerate(list_nodes):
            oia_arg = oia_graph.add_words(node.position)
            oia_graph.add_argument(pred, oia_arg, idx + 1)
Exemple #8
0
def number_per_unit(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    units = []
    for node in dep_graph.nodes(filter=lambda n: n.UPOS == "SYM"):

        previous_node = dep_graph.get_node_by_loc(node.LOC - 1)
        post_node = dep_graph.get_node_by_loc(node.LOC + 1)

        if not previous_node or not post_node:
            continue

        if previous_node.UPOS == "NUM" and post_node.UPOS == "NOUN":
            units.append((previous_node, node, post_node))

    for unit in units:
        unit_node = merge_dep_nodes(unit,
                                    UPOS="NUM",
                                    LOC=unit[-1].LOC
                                    )

        dep_graph.replace_nodes(unit, unit_node)
Exemple #9
0
def multi_words_cc(dep_graph: DependencyGraph):
    """
    arise on to
    the "on to" should be combined
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    mark_phrases = []

    for node in dep_graph.nodes():
        marks = []
        for n, l in dep_graph.children(node, filter=lambda n, l: "cc" == l):
            marks.extend(dep_graph.offsprings(n))

        if not marks:
            continue

        if len(marks) > 1:
            if any([x.UPOS in {"NOUN", "NUM", "VERB"} for x in marks]):
                continue

            marks.sort(key=lambda n: n.LOC)
            mark_phrases.append((node, marks))

    for node, marks in mark_phrases:

        mark_min_loc = marks[0].LOC
        mark_max_loc = marks[-1].LOC
        marks = [n for n in dep_graph.nodes() if mark_min_loc <= n.LOC <= mark_max_loc]

        if any([x.UPOS in {"NOUN", "NUM", "VERB"} for x in marks]):
            continue
        if not all([dep_graph.get_node(x.ID) for x in marks]):
            continue
        new_mark_node = merge_dep_nodes(marks,
                                        UPOS=marks[0].UPOS,
                                        LOC=marks[0].LOC
                                        )

        dep_graph.replace_nodes(marks, new_mark_node)
        for mark in marks:
            dep_graph.remove_dependency(node, mark)

        if dep_graph.get_node(node.ID):
            dep_graph.add_dependency(node, new_mark_node, "cc")
Exemple #10
0
def separated_asas(dep_graph: DependencyGraph):
    """
    ##### Equality comparison #####
    ##### A is as X a C as B #####

    ##### the first 'as' is always the advmod of a following element, X, which is within the range of as... as #####
    ##### the second 'as' is always the dependent of B #####
    ##### B sometimes depends on the first 'as', sometimes dependts on X #####
    ##### Sometimes X has a head that is also within the range of as...as #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    adj_node = DependencyGraphNode(UPOS="ADJ")
    noun_node = DependencyGraphNode(UPOS="NOUN")
    as1_node = DependencyGraphNode(FORM="as")
    as2_node = DependencyGraphNode(FORM="as")
    obj_node = DependencyGraphNode()

    pattern.add_nodes([noun_node, adj_node, as1_node, as2_node, obj_node])
    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, as1_node, r'\w*advmod\w*')
    pattern.add_dependency(as1_node, obj_node, r'\w*advcl:as\w*')
    pattern.add_dependency(obj_node, as2_node, r'mark')

    as_as_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_adj_node = match[adj_node]
        dep_as1_node = match[as1_node]
        dep_as2_node = match[as2_node]
        dep_obj_node = match[obj_node]

        if dep_as1_node.LOC < dep_adj_node.LOC < dep_noun_node.LOC < dep_as2_node.LOC < dep_obj_node.LOC:
            pred = [
                node for node in dep_graph.nodes()
                if dep_as1_node.LOC <= node.LOC <= dep_adj_node.LOC
            ]
            pred.append(dep_as2_node)
            pred.sort(key=lambda x: x.LOC)
            head = dep_adj_node

            asas_node = merge_dep_nodes(pred, UPOS="ADJ", LOC=dep_as2_node.LOC)

            as_as_pred.append(
                (pred, head, asas_node, dep_noun_node, dep_obj_node))

    for pred, head, asas_node, dep_noun_node, dep_obj_node in as_as_pred:
        dep_graph.replace_nodes(pred, asas_node)

        dep_graph.remove_dependency(asas_node, dep_obj_node)
        dep_graph.remove_dependency(dep_noun_node, asas_node)

        dep_graph.add_dependency(dep_noun_node, dep_obj_node,
                                 "acl:" + asas_node.FORM)
def det_adjv_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    phrases = []

    for node in dep_graph.nodes(filter=lambda n: n.UPOS in {"ADJ", "ADV"}):

        parent_rels = itertools.chain.from_iterable(
            (rel for parent, rel in dep_graph.parents(node)))
        if any([rel in valid_adj_form for rel in parent_rels]):
            continue

        if any([rel in {"amod", "advmod"} for rel in parent_rels]):
            continue

        det = [
            n for n, l in dep_graph.children(node,
                                             filter=lambda n, l: l == "det")
        ]

        if not det:
            continue

        det.sort(key=lambda x: x.LOC)

        det = det[-1]

        if det.LEMMA not in {"the", "a", "an", "some", "any", "all"}:
            continue

        root = node
        np_elements = list(
            dep_graph.offsprings(
                root, filter=lambda n: det.LOC <= n.LOC <= root.LOC))

        # check the element should be continuous

        np_elements = sorted(list(np_elements), key=lambda x: x.LOC)
        # if np_elements[-1].LOC - np_elements[0].LOC != len(np_elements) - 1:
        #     print ("root", root)
        #     for n in np_elements:
        #         print("np element", n.LOC, n)
        #     raise Exception("Bad Business Logic")

        phrases.append((np_elements, root))

    for np, root in phrases:
        noun_node = merge_dep_nodes(np, UPOS="NOUN", LOC=root.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(np, noun_node)
Exemple #12
0
def and_or(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    parent_node = pattern.create_node()
    some_node = pattern.create_node()
    and_node = pattern.create_node(LEMMA=r"\band\b")
    or_node = pattern.create_node(LEMMA=r"\bor\b")

    pattern.add_dependency(parent_node, some_node, r'\bconj:\w*')
    pattern.add_dependency(some_node, and_node, r'\bcc\b')
    pattern.add_dependency(some_node, or_node, r'\bcc\b')
    pattern.add_dependency(and_node, or_node, r'\bconj')

    for match in list(dep_graph.match(pattern)):

        dep_parent_node = match[parent_node]
        dep_some_node = match[some_node]
        dep_and_node = match[and_node]
        dep_or_node = match[or_node]

        rel = dep_graph.get_dependency(dep_parent_node, dep_some_node)

        if not rel.startswith("conj:and") and not rel.startswith("conj:or"):
            continue

        and_or_nodes = [n for n in dep_graph.nodes() if dep_and_node.LOC < n.LOC < dep_or_node.LOC]

        if any([node.UPOS in {"VERB", "NOUN", "ADJ", "ADP", "ADV"} for node in and_or_nodes]):
            continue

        and_or_nodes.append(dep_and_node)
        and_or_nodes.append(dep_or_node)
        and_or_nodes.sort(key=lambda n: n.LOC)

        if not all([dep_graph.get_node(x.ID) for x in and_or_nodes]):
            continue

        new_and_or_node = merge_dep_nodes(and_or_nodes,
                                          UPOS=dep_and_node.UPOS,
                                          LOC=dep_and_node.LOC,
                                          FEATS=dep_and_node.FEATS
                                          )

        dep_graph.replace_nodes(and_or_nodes, new_and_or_node)
        dep_graph.set_dependency(dep_parent_node, dep_some_node, "conj:" + new_and_or_node.FORM)
def noun_all(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """
    noun_all_phrase = []
    for root in dep_graph.nodes(filter=lambda x: x.UPOS in
                                {"NOUN", "PROPN", "PRON", "X", "NUM", "SYM"}):
        for child, rels in dep_graph.children(root):
            if "det" in rels and child.LEMMA == "all" and child.LOC == root.LOC + 1:
                noun_all_phrase.append((root, child))

    for noun, all in noun_all_phrase:
        noun_node = merge_dep_nodes([noun, all], UPOS=noun.UPOS, LOC=noun.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes([noun, all], noun_node)
Exemple #14
0
def adjv_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    phrases = []

    for node in dep_graph.nodes(filter=lambda n: n.UPOS in {"ADJ", "ADV"}):

        is_root = True
        for parent, rel in dep_graph.parents(node):
            if "advmod" in rel and parent.UPOS not in {"ADJ", "ADV"}:
                is_root = True
                break
            elif rel.intersect(valid_adj_form):
                is_root = False

        if not is_root:
            continue

        adjv_element = valid_adjv_element(node, dep_graph)

        adjv_element = sorted(list(adjv_element), key=lambda x: x.LOC)

        connected_components = [node]
        start_loc = node.LOC
        for child in reversed(adjv_element):
            # print(str(node.FORM))

            if child.UPOS in {"ADJ", "ADV"} and child.LOC == start_loc - 1:

                connected_components.append(child)
                start_loc = child.LOC

        connected_components.sort(key=lambda x: x.LOC)

        if len(connected_components) > 1:
            phrases.append((connected_components, node))

    for adjv_phrase, node in phrases:
        adjv_node = merge_dep_nodes(adjv_phrase, UPOS=node.UPOS, LOC=node.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(adjv_phrase, adjv_node)
Exemple #15
0
def single_node(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    regular_nodes = [
        n for n in dep_graph.nodes() if n.UPOS not in {"ROOT", "PUNCT"}
    ]
    #logger.debug("regular nodes")
    #for node in regular_nodes:
    #    logger.debug(str(node))

    if len(regular_nodes) == 1:
        node = regular_nodes[0]

        oia_graph.add_words(node.position)
Exemple #16
0
def to_verb(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """
    to_verb_phrase = []
    for root in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB"}):
        if any("to" in rels.values()
               for parent, rels in dep_graph.parents(root)):
            continue

        for child, rels in dep_graph.children(root):
            if "mark" in rels and child.LEMMA == "to" and child.LOC == root.LOC - 1 and \
                    not (isinstance(child, DependencyGraphSuperNode) and child.is_conj):
                to_verb_phrase.append((child, root))

    for to, verb in to_verb_phrase:
        noun_node = merge_dep_nodes([to, verb], UPOS=verb.UPOS, LOC=verb.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes([to, verb], noun_node)
Exemple #17
0
def fallback_sconj(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                   context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    for node in dep_graph.nodes():

        if oia_graph.has_word(node.position):
            continue

        if node.UPOS == "SCONJ" and node.LEMMA in {
                "because", "so", "if", "then", "otherwise", "after", "before",
                "and", "or", "but"
        }:

            parents = [n for n, l in dep_graph.parents(node) if "mark" in l]

            if not parents:
                continue

            assert len(parents) == 1

            parent = parents[0]

            logger.debug("context = " + str(context.processed_edges))

            if context.is_processed(parent, node):
                continue

            oiar_node = oia_graph.add_words(parent.position)
            oia_sconj_node = oia_graph.add_words(node.position)

            if node.LEMMA in {"because", "if"}:
                oia_graph.add_argument(oia_sconj_node, oiar_node, 1)
            else:
                oia_graph.add_argument(oia_sconj_node, oiar_node, 1)
Exemple #18
0
def be_not_phrase2(dep_graph: DependencyGraph):
    """TODO: add doc string
    """
    be_not = []
    # for pred_node in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB"}):
    for pred_node in dep_graph.nodes():
        # print('pred_node LEMMA:', pred_node.LEMMA, 'pred_node UPOS:', pred_node.UPOS)
        if not "be" in pred_node.LEMMA.split(" "):
            continue
        objs = []
        for child, rel in dep_graph.children(pred_node):
            if rel.startswith('obj'):
                objs.append(child)
        if not objs:
            continue
        objs.sort(key=lambda x: x.LOC)
        for obj in objs:

            def __interested_node2(n):
                # that conj is ommited
                return (n.UPOS == "PART" and "not" in n.LEMMA.split(" "))

            nodes_of_interests2 = [
                n for n, l in dep_graph.children(
                    obj,
                    filter=lambda n, l: l == "advmod" and __interested_node2(n
                                                                             ))
            ]
            if not nodes_of_interests2:
                continue
            assert len(nodes_of_interests2) == 1
            not_node = nodes_of_interests2[0]
            be_not.append((pred_node, obj, not_node))
    for dep_be_node, dep_obj_node, dep_not_node in be_not:
        dep_graph.remove_dependency(dep_obj_node, dep_not_node, 'advmod')
        verb_node = merge_dep_nodes((dep_be_node, dep_not_node),
                                    UPOS=dep_be_node.UPOS,
                                    LOC=dep_be_node.LOC)
        dep_graph.replace_nodes([dep_be_node, dep_not_node], verb_node)
Exemple #19
0
def aux_not(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :return:
    """

    aux_not = []
    for node in dep_graph.nodes(filter=lambda n: n.UPOS == "AUX"):

        next_node = dep_graph.get_node_by_loc(node.LOC + 1)

        if not next_node:
            continue

        if next_node.UPOS == "PART" and next_node.FORM == "n't":
            aux_not.append((node, next_node))

    for aux_node, not_node in aux_not:
        new_node = merge_dep_nodes([aux_node, not_node], UPOS=aux_node.UPOS, LOC=aux_node.LOC)

        dep_graph.replace_nodes([aux_node, not_node], new_node)
Exemple #20
0
def two_node_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                       context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    regular_nodes = [
        n for n in dep_graph.nodes() if n.UPOS not in {"ROOT", "PUNCT"}
    ]
    #logger.debug("regular nodes")
    #for node in regular_nodes:
    #    logger.debug(str(node))

    if len(regular_nodes) == 2:
        regular_nodes.sort(key=lambda x: x.LOC)
        case_node, noun_node = regular_nodes
        if dep_graph.get_dependency(noun_node, case_node) == "case":
            oia_case_node = oia_graph.add_words(case_node.position)
            oia_noun_node = oia_graph.add_words(noun_node.position)

            oia_graph.add_argument(oia_case_node, oia_noun_node, 2)
Exemple #21
0
def single_root(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                  for node in oia_graph.nodes()]

    zero_degree_nodes = [n for n, degree in in_degrees if degree == 0]

    if len(zero_degree_nodes) == 0:
        return
    elif len(zero_degree_nodes) == 1:
        root = zero_degree_nodes[0]
    else:
        # len(zero_degree_nodes) >= 2
        dists_to_root = []
        for oia_node in zero_degree_nodes:

            related_dep_nodes = set()
            if isinstance(oia_node, OIAWordsNode):
                dep_node = dep_graph.get_node_by_spans(oia_node.spans)

                if dep_node:
                    if isinstance(dep_node, DependencyGraphNode):
                        related_dep_nodes.add(dep_node)
                    elif isinstance(dep_node, list):
                        for node in dep_node:
                            related_dep_nodes.add(node)
                    else:
                        logger.error("get_node_by_spans return type unknown.")

            children = [n for n, l in oia_graph.children(oia_node)]

            for child in children:
                if isinstance(child, OIAWordsNode):
                    dep_node = dep_graph.get_node_by_spans(child.spans)

                    if dep_node:
                        if isinstance(dep_node, DependencyGraphNode):
                            related_dep_nodes.add(dep_node)
                        elif isinstance(dep_node, list):
                            for node in dep_node:
                                related_dep_nodes.add(node)
                        else:
                            logger.error(
                                "get_node_by_spans return type unknown.")

            dep_root = dep_graph.get_node("0")
            real_dep_root = next(n for n, l in dep_graph.children(dep_root))

            min_dist_to_root = min([
                len(
                    nx.shortest_path(dep_graph.g.to_undirected(),
                                     real_dep_root.ID, dep_node.ID))
                for dep_node in related_dep_nodes
            ])

            dists_to_root.append((oia_node, min_dist_to_root))

        dists_to_root.sort(key=lambda x: x[1])
        root_candidates = []

        min_dist = dists_to_root[0][1]

        for oia_node, dist in dists_to_root:
            if dist == min_dist:
                root_candidates.append(oia_node)

        if len(root_candidates) == 1:

            root = root_candidates[0]

        else:

            scores = []

            score_map = {":": 40, "\"": 30, ";": 20, ",": 10, "(": -10}

            for cand in root_candidates:

                score = -100
                if any([
                        "func" in rel.label
                        for n, rel in oia_graph.children(cand)
                ]):
                    score = 100

                children = [n for n, l in oia_graph.children(cand)]
                dep_children = []
                for child in children:
                    if isinstance(child, OIAWordsNode):
                        dep_node = dep_graph.get_node_by_spans(child.spans)

                        if dep_node:
                            if isinstance(dep_node, DependencyGraphNode):
                                dep_children.append(dep_node)
                            elif isinstance(dep_node, list):
                                for node in dep_node:
                                    dep_children.append(node)
                            else:
                                logger.error(
                                    "get_node_by_spans return type unknown.")
                # check what between them
                dep_children.sort(key=lambda x: x.LOC)

                for node in dep_graph.nodes():
                    if node.LOC is None:
                        continue
                    if dep_children[0].LOC < node.LOC < dep_children[-1].LOC:

                        if node.FORM in score_map:
                            score = max(score, score_map[node.FORM])

                if isinstance(cand, OIAWordsNode):
                    dep_node = dep_graph.get_node_by_spans(cand.spans)
                    if dep_node:
                        if isinstance(dep_node, DependencyGraphNode):
                            if dep_node.LEMMA in IMPORTANT_CONNECTION_WORDS:
                                score += 8
                        elif isinstance(dep_node, list):
                            for node in dep_node:
                                if node.LEMMA in IMPORTANT_CONNECTION_WORDS:
                                    score += 8
                        else:
                            logger.error(
                                "get_node_by_spans return type unknown.")

                elif isinstance(cand,
                                OIAAuxNode) and cand.label == "PARATAXIS":
                    score += 4

                scores.append((cand, score))

            scores.sort(key=lambda x: x[1], reverse=True)

            top_nodes = []
            for node, score in scores:
                if score == scores[0][1]:
                    top_nodes.append(node)

            if len(top_nodes) == 1:
                root = top_nodes[0]

            elif len(top_nodes) >= 3:
                # multiple top node found, merge them to one
                if all(
                        isinstance(node, OIAAuxNode)
                        and node.label == "PARATAXIS" for node in top_nodes):
                    next_nodes = []
                    for top in top_nodes:
                        for n, l in list(oia_graph.children(top)):
                            next_nodes.append(n)
                        oia_graph.remove_node(top)
                        for node in zero_degree_nodes:
                            if node.ID == top.ID:
                                zero_degree_nodes.remove(node)
                    root = oia_graph.add_aux("PARATAXIS")
                    oia_graph.add_node(root)
                    next_nodes.sort(key=lambda x: x.ID)
                    for index, second_node in enumerate(next_nodes):
                        oia_graph.add_argument(root, second_node, index)
                else:
                    logger.error(
                        "Deep intersection point, currently cannot process")
                    return
                # raise Exception("Two top nodes? I think it is not possible ")

            else:  # len(top_nodes) == 2:
                # check who is prev, and who is next

                dep_tops = []

                for top in top_nodes:
                    if isinstance(top, OIAWordsNode):
                        dep_node = dep_graph.get_node_by_spans(top.spans)

                        if dep_node:
                            if isinstance(dep_node, DependencyGraphNode):
                                dep_tops.append((top, dep_node))
                            elif isinstance(dep_node, list):
                                for node in dep_node:
                                    dep_tops.append((top, node))
                            else:
                                logger.error(
                                    "get_node_by_spans return type unknown.")

                if not len(dep_tops) >= 1:
                    logger.error("Multiple AUX head ")
                    return

                dep_tops.sort(key=lambda x: x[1].LOC)

                root = dep_tops[0][0]

    # root obtained, change other zero-in-degree node

    logger.debug("Root obtained ")
    logger.debug(root)

    for node in zero_degree_nodes:
        # print('zero_degree_nodes:', node)
        if root.ID == node.ID:
            continue

        if is_conj_node(node, dep_graph):
            # print('is_conj_node:',node,'  !!!!!!!!!!')
            for child, rel in list(oia_graph.children(node)):
                label = rel.label
                if "pred.arg." in label:
                    arg_no = label.split(".")[-1]
                    new_rel = "as:pred.arg." + arg_no
                    oia_graph.remove_relation(node, child)
                    oia_graph.add_relation(child, node, new_rel)

            continue

        ref_childs = [
            child for child, rel in oia_graph.children(node)
            if rel.label == "ref"
        ]

        if ref_childs:
            for child in ref_childs:
                oia_graph.remove_relation(node, child)
                oia_graph.add_relation(child, node, "as:ref")

            continue

    in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                  for node in oia_graph.nodes()]

    zero_degree_nodes = [
        n for n, degree in in_degrees if degree == 0 and n.ID != root.ID
    ]

    while len(zero_degree_nodes) > 0:

        logger.debug("we found zero_degree_nodes: ")
        for node in zero_degree_nodes:
            logger.debug(node)

        root_offsprings = set(oia_graph.offsprings(root))

        logger.debug("root offsprings :")
        for n in root_offsprings:
            logger.debug(n)

        intersections = []
        for node in zero_degree_nodes:

            node_offspring = set(oia_graph.offsprings(node))

            logger.debug("node offsprings :")
            for n in node_offspring:
                logger.debug(n)

            intersection = root_offsprings.intersection(node_offspring)

            logger.debug("we found {0} initial intersection :".format(
                len(intersection)))
            for n in intersection:
                logger.debug(n)

            if intersection:

                top_intersection_point = None
                parents_to_root = None
                parents_to_other = None
                for x in intersection:
                    parents = set([n for n, l in oia_graph.parents(x)])
                    if not parents.intersection(intersection):
                        top_intersection_point = x
                        parents_to_root = parents.intersection(root_offsprings)
                        parents_to_other = parents.intersection(node_offspring)
                        break

                if top_intersection_point is None:
                    logger.error("It seems we have a problem ")
                    continue

                logger.debug("we found a intersections: ")
                logger.debug(top_intersection_point)

                logger.debug("Its parents to root: ")
                for x in parents_to_root:
                    logger.debug(x)

                logger.debug("Its parents to other: ")
                for x in parents_to_other:
                    logger.debug(x)

                intersections.append((top_intersection_point, parents_to_root,
                                      parents_to_other))

        if len(intersections) == 0:
            logger.error("seems we have disconnected compoenent")
            break
            # raise Exception("Unexpected situation")

        for intersection_point, parents_to_root, parents_to_other in intersections:

            # if node not in set([n for n, l in oia_graph.parents(intersection_point)]):
            #     logger.error("Deep intersection point, currently cannot process")
            #     # raise Exception("Deep intersection point, currently cannot process")
            #     continue

            for node in parents_to_other:

                if isinstance(node, OIAAuxNode) and node.label == "LIST":
                    logger.error("lets see what happens for LIST")
                    if len(list(oia_graph.parents(node))) != 0:
                        logger.error(
                            "it seems different with what we have thought for LIST "
                        )

                    relation = oia_graph.get_edge(node, intersection_point)
                    oia_graph.remove_relation(node, intersection_point)
                    oia_graph.add_relation(intersection_point, node,
                                           "as:" + relation.label)
                    # for parent, l in list(oia_graph.parents(intersection_point)):
                    #     if parent != node:
                    #         oia_graph.remove_relation(parent, intersection_point)
                    #         oia_graph.add_relation(parent, node, l.label)
                elif (isinstance(node, OIAAuxNode)
                      and node.label == "WHETHER"):

                    # parents_to_root = list(oia_graph.parents_on_path(intersection_point, root))
                    if len(list(oia_graph.parents(node))) != 0:
                        logger.error(
                            "it seems different with what we have thought for WHETHER "
                        )

                    for parent in parents_to_root:
                        relation = oia_graph.get_edge(parent,
                                                      intersection_point)
                        oia_graph.remove_relation(parent, intersection_point)
                        oia_graph.add_relation(parent, node, relation.label)
                else:

                    relation = oia_graph.get_edge(node, intersection_point)
                    oia_graph.remove_relation(node, intersection_point)
                    oia_graph.add_relation(intersection_point, node,
                                           "as:" + relation.label)

        in_degrees = [(node, oia_graph.g.in_degree(node.ID))
                      for node in oia_graph.nodes()]

        zero_degree_nodes = [
            n for n, degree in in_degrees if degree == 0 and n.ID != root.ID
        ]
Exemple #22
0
def continuous_asas(dep_graph: DependencyGraph):
    """
    ##### as far as I known #####

    ##### the first 'as' is always the advmod of a following element, X, which is within the range of as... as #####
    ##### the second 'as' is always the dependent of B #####
    ##### B sometimes depends on the first 'as', sometimes dependts on X #####
    ##### Sometimes X has a head that is also within the range of as...as #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    verb_node = DependencyGraphNode(UPOS="VERB|NOUN|PRON|PROPN")
    adv_node = DependencyGraphNode(UPOS="ADV|ADJ")
    as1_node = DependencyGraphNode(LEMMA="as")
    as2_node = DependencyGraphNode(LEMMA="as")
    verb2_node = DependencyGraphNode(UPOS="VERB|ADJ|NOUN|PROPN|PRON")
    # ADJ is for as soon as possible
    pattern1 = DependencyGraph()
    pattern1.add_nodes([verb_node, adv_node, as1_node, as2_node, verb2_node])
    pattern1.add_dependency(verb_node, adv_node, r'advmod|amod')
    pattern1.add_dependency(adv_node, as1_node, r'\w*advmod\w*')
    pattern1.add_dependency(as1_node, verb2_node, r'advcl:as|obl:as|advmod')
    pattern1.add_dependency(verb2_node, as2_node, r'mark|case')

    pattern2 = DependencyGraph()
    pattern2.add_nodes([verb_node, adv_node, as1_node, as2_node, verb2_node])
    pattern2.add_dependency(verb_node, adv_node, r'advmod|amod')
    pattern2.add_dependency(adv_node, as1_node, r'\w*advmod\w*')
    pattern2.add_dependency(adv_node, verb2_node, r'advcl:as|obl:as|advmod')
    pattern2.add_dependency(verb2_node, as2_node, r'mark|case')

    as_as_pred = []
    for match in list(dep_graph.match(pattern1)) + list(
            dep_graph.match(pattern2)):

        dep_verb_node = match[verb_node]
        dep_adv_node = match[adv_node]
        dep_as1_node = match[as1_node]
        dep_as2_node = match[as2_node]
        dep_verb2_node = match[verb2_node]

        if not (dep_as1_node.LOC < dep_adv_node.LOC < dep_as2_node.LOC <
                dep_verb2_node.LOC):
            continue

        as_as_pred.append((dep_as1_node, dep_as2_node, dep_adv_node,
                           dep_verb_node, dep_verb2_node))

        pred = [
            node for node in dep_graph.nodes()
            if dep_as1_node.LOC <= node.LOC <= dep_adv_node.LOC
        ]
        pred.append(dep_as2_node)
        pred.sort(key=lambda x: x.LOC)
        head = dep_adv_node

        dep_asas_node = merge_dep_nodes(pred, UPOS="ADP", LOC=head.LOC)

        dep_graph.replace_nodes(pred, dep_asas_node)
        dep_graph.remove_dependency(dep_verb2_node, dep_asas_node)
        dep_graph.remove_dependency(dep_asas_node, dep_verb2_node)
        dep_graph.remove_dependency(dep_verb_node, dep_asas_node)

        if dep_verb_node.UPOS == "VERB":

            dep_graph.set_dependency(dep_verb_node, dep_verb2_node,
                                     "advcl:" + dep_asas_node.FORM)
            dep_graph.set_dependency(dep_verb2_node, dep_asas_node, "mark")
        else:
            dep_graph.set_dependency(dep_verb_node, dep_verb2_node,
                                     "obl:" + dep_asas_node.FORM)
            dep_graph.set_dependency(dep_verb2_node, dep_asas_node, "case")
Exemple #23
0
def multi_word_fix_flat(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    fixed_rels = {"fixed", "flat", "compound"}

    phrases = []

    for node in dep_graph.nodes():

        parents = [n for n, l in dep_graph.parents(node,
                                                   filter=lambda n, l: any(x in l for x in fixed_rels))]

        if parents:
            continue

        phrase = []
        for n, l in dep_graph.children(node,
                                       filter=lambda n, l: any(x in l for x in fixed_rels)):
            phrase.extend(dep_graph.offsprings(n))

        if not phrase:
            continue

        phrase.append(node)

        if len(phrase) > 1:
            phrase.sort(key=lambda n: n.LOC)
            # min_loc = phrase[0].LOC
            # max_loc = phrase[-1].LOC
            # phrase = [n for n in dep_graph.nodes() if min_loc <= n.LOC <= max_loc]
            phrases.append((phrase, node))

    phrases.sort(key=lambda x: len(x[0]), reverse=True)

    for phrase, head in phrases:

        if not all([dep_graph.get_node(x.ID) for x in phrase]):
            continue  # already been processed

        merging_nodes = set()
        min_loc = 10000
        max_loc = -1
        for child in phrase:
            if isinstance(child, DependencyGraphNode):
                min_loc = min(min_loc, child.LOC)
                max_loc = max(min_loc, child.LOC)
            elif isinstance(child, DependencyGraphSuperNode):
                min_loc = min(min_loc, min([x.LOC for x in child.nodes]))
                max_loc = max(max_loc, max([x.LOC for x in child.nodes]))
            merging_nodes.update(dep_graph.offsprings(child))

        merged_nodes = set([n for n in merging_nodes if min_loc <= n.LOC <= max_loc])
        for node in merging_nodes:
            if node.LOC == min_loc - 1 and node.LEMMA in {"\"", "\'"}:
                merged_nodes.add(node)
            if node.LOC == max_loc + 1 and node.LEMMA in {"\"", "\'"}:
                merged_nodes.add(node)
        merged_nodes = list(merged_nodes)
        merged_nodes.sort(key=lambda x: x.LOC)

        logger.debug("multi_word_fix_flat: we are merging ")
        logger.debug("\n".join(str(node) for node in merged_nodes))
        logger.debug("with head \n" + str(head))
        new_node = merge_dep_nodes(merged_nodes, UPOS=head.UPOS, LOC=head.LOC)

        dep_graph.replace_nodes(merged_nodes, new_node)
Exemple #24
0
def parataxis(dep_graph: DependencyGraph, oia_graph: OIAGraph,
              context: UD2OIAContext):
    """

    #################### adverbs like however, then, etc ########################
    :param sentence:
    :return:
    """

    for dep_node in list(dep_graph.nodes()):

        parallel_nodes = [
            n for n, l in dep_graph.children(dep_node) if "parataxis" == l
        ]

        if not parallel_nodes:
            continue

        parallel_nodes.append(dep_node)
        parallel_nodes.sort(key=lambda x: x.LOC)

        predicates = []

        for index, (former, latter) in enumerate(
                more_itertools.pairwise(parallel_nodes)):

            advcon = [
                n for n, l in
                dep_graph.children(latter,
                                   filter=lambda n, l: "advmod" in l and
                                   (former.LOC < n.LOC < latter.LOC) and
                                   (n.UPOS == "SCONJ" or n.LEMMA in {"so"}))
            ]

            coloncon = [
                n for n, l in
                dep_graph.children(dep_node,
                                   filter=lambda n, l: "punct" in l and n.FORM
                                   in {":", ";", "--", ","} and
                                   (former.LOC < n.LOC < latter.LOC))
            ]

            if advcon:
                dep_con = advcon[0]
                # dep_graph.remove_dependency(para, dep_con)
                # otherwise, the dep_con will be recovered by adv_modifier, may cause further question
            elif coloncon:
                dep_con = coloncon[0]
            else:
                dep_con = None

            predicates.append(dep_con)

        if all(x is None for x in predicates):
            oia_pred_node = oia_graph.add_aux("PARATAXIS")
        else:
            if len(predicates) == 1:
                oia_pred_node = oia_graph.add_words(predicates[0].position)
            else:
                position = ["{1}"]
                for i, node in enumerate(predicates):
                    if node is not None:
                        position.extend(node.position)
                    position.append("{{{0}}}".format(i + 2))
                oia_pred_node = oia_graph.add_words(position)

        for idx, node in enumerate(parallel_nodes):
            oia_arg_node = oia_graph.add_words(node.position)
            oia_graph.add_argument(oia_pred_node, oia_arg_node, idx + 1)
Exemple #25
0
def verb_phrase(dep_graph: DependencyGraph):
    """
    ##### Merging aux and cop with their head VERB #####
    Cases:

    :param sentence:
    :return:
    """
    verb_phrases = []

    for node in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB", "AUX"}):

        if node.UPOS == "AUX":
            parent = [
                n for n, l in dep_graph.parents(node,
                                                filter=lambda n, l: l == "aux")
            ]
            if len(parent) > 0:
                continue

        #        if "VerbForm" in node.FEATS and "Ger" in node.FEATS["VerbForm"]:
        #            continue

        if "Tense" in node.FEATS and "Past" in node.FEATS["Tense"]:
            # if the verb is before the noun, it will be processed by noun_phrase and taken as a part of the noun
            parent = [
                n for n, l in dep_graph.parents(
                    node, filter=lambda n, l: l == "amod" and node.LOC < n.LOC)
            ]
            if len(parent) > 0:
                continue
        # logger.debug("We are checking node {0}".format(node))

        root = node
        verbs = [root]
        for n, l in dep_graph.children(root):
            if dep_graph.get_dependency(n, root):
                continue

            if n.LEMMA in {"so", "also", "why"}:
                continue

            if "advmod" in l:
                offsprings = list(dep_graph.offsprings(n))
                if any(x.UPOS in {"VERB", "NOUN", "AUX", "PRON"}
                       for x in offsprings):
                    continue

                verbs.extend(offsprings)
            elif "compound" in l:
                verbs.append(n)

        verbs = [
            x for x in verbs if x.LOC <= root.LOC
            or "compound" in dep_graph.get_dependency(root, x)
        ]

        # logger.debug("Verb: before continuous component ")
        # logger.debug("\n".join(str(verb) for verb in verbs))

        verbs = continuous_component(verbs, root)

        # add aux
        verbs.extend(n for n, l in dep_graph.children(root) if "aux" in l)

        # logger.debug("Verb: after continuous component ")
        # for verb in verbs:
        #    logger.debug(verb)

        verbs.sort(key=lambda x: x.LOC)
        last_loc = verbs[-1].LOC

        #        next_node = dep_graph.get_node_by_loc(last_loc + 1)
        #        if next_node and next_node.LEMMA == "not":
        #            verbs.append(next_node)

        if len(verbs) > 1:
            verb_phrases.append((verbs, root))

    for verbs, root in verb_phrases:
        verb_node = merge_dep_nodes(verbs,
                                    UPOS="VERB",
                                    LOC=root.LOC,
                                    FEATS=root.FEATS)

        dep_graph.replace_nodes(verbs, verb_node)
def general_question(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                     context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    for verb in dep_graph.nodes(filter=lambda n: n.UPOS == "VERB"):

        if any(
                any(x in n.LEMMA
                    for x in {"what", "how", "why", "when", "where"})
                for n in dep_graph.offsprings(verb)):
            continue

        parents = [n for n, _ in dep_graph.parents(verb)]

        # if not(len(parents) == 1 and parents[0].ID == "0"):
        #    continue
        # check subj and aux

        subj = None
        aux = None
        for child, rel in dep_graph.children(verb):
            if "subj" in rel:
                subj = child
            if "aux" in rel:
                aux = child

        is_be_verb = False

        if not isinstance(verb, DependencyGraphSuperNode):
            is_be_verb = verb.LEMMA == "be"
        else:
            assert isinstance(verb, DependencyGraphSuperNode)
            assert aux is None
            for n in verb.nodes:
                if isinstance(n, DependencyGraphNode):
                    if n.LEMMA == "be":
                        is_be_verb = True
                        # print('verb.nodes:', str(" ".join(str(xx.LEMMA) for xx in verb.nodes)))
                        # print('is_be_verb222:', is_be_verb)
                    if n.UPOS == "AUX":
                        aux = n
        # print('is_be_verb:', is_be_verb)
        if aux is None and not is_be_verb:
            # cannot be a general question
            continue

        expl_child = [n for n, l in dep_graph.children(verb) if l == "expl"]
        if expl_child:
            assert len(expl_child) == 1
            subj = expl_child[0]

        if subj is None:
            logger.warning(
                "subject is none, cannot decide whether it is a question")
            continue
        #        print('subj.LOC:', subj.LOC)
        #        print('subj.LOC type:', type(subj.LOC))
        oia_verb_node = oia_graph.add_words(verb.position)

        is_there_be_verb = is_be_verb and ("there" in verb.LEMMA.split(' ')
                                           or "here" in verb.LEMMA.split(' '))

        is_question = False

        if is_there_be_verb:

            assert isinstance(verb, DependencyGraphSuperNode)
            be_node = [n for n in verb.nodes if n.LEMMA == "be"][0]
            there_node = [
                n for n in verb.nodes
                if n.LEMMA == "there" or n.LEMMA == "here"
            ][0]
            # print('there_node:', there_node)
            if be_node.LOC < there_node.LOC:
                is_question = True

        elif (is_be_verb and verb.LOC < subj.LOC):

            is_question = True

        elif (aux is not None and aux.LOC < subj.LOC):

            is_question = True

        if is_question:
            # if aux is not None and aux.LEMMA == "do":
            #    oia_question_node = oia_graph.add_word_with_head(aux.LOC)
            # else:

            oia_question_node = oia_graph.add_aux("WHETHER")

            oia_graph.add_function(oia_question_node, oia_verb_node)
Exemple #27
0
def simple_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                  context: UD2OIAContext):
    """
    :TODO badcase  Attached is a new link
    :param dep_graph:
    :param oia_graph:
    :return:
    """
    # for node in dep_graph.nodes():
    #     print('node:',node)
    for pred_node in dep_graph.nodes(
            filter=lambda x: x.UPOS in {"VERB", "ADJ", "NOUN", "AUX", "ADV"}):
        # ADJ is for "With the demand so high,"
        # NOUN is for "X the best for Y"
        # AUX is for have in "I have a cat"
        # print('pred_node', pred_node)
        expl = None
        nsubj = None
        subj = None
        objs = []

        for child, rel in dep_graph.children(pred_node):
            # print('child node:', child)
            # print('child rel:', rel)
            if ('nsubj' in rel or "csubj" in rel):  # and ":xsubj" not in rel:
                nsubj = child
            elif rel.startswith('obj'):
                objs.append((child, 1))
            elif rel.startswith('iobj'):
                objs.append((child, 0))
            elif 'ccomp' in rel or "xcomp" in rel:  # and child.UPOS == "VERB":
                objs.append((child, 2))
            elif "expl" in rel:
                expl = child

        if nsubj:
            # if pred_node.LOC < nsubj.LOC:
            #     # TODO: in what situation?
            #     objs.insert(0, nsubj)
            # else:
            subj = nsubj

        if expl:  # It VERB subj that    # VERB subj it that
            if expl.LOC < pred_node.LOC:
                subj = expl
                objs.insert(0, (subj, -1))
            else:  # expl.LOC > pred_node.LOC:
                objs.insert(0, (expl, -1))

        if not subj and not objs:
            continue

        pred_node = oia_graph.add_words(pred_node.position)

        if not pred_node:
            continue

        arg_index = 1

        if subj is not None:
            if not oia_graph.has_relation(pred_node, subj):
                subj_node = oia_graph.add_words(subj.position)
                oia_graph.add_argument(pred_node, subj_node, arg_index)

        arg_index += 1

        objs.sort(key=lambda x: x[1])

        for obj, weight in objs:
            # print('obj:',obj)
            oia_obj_node = oia_graph.add_words(obj.position)

            # def __sconj_node(n):
            #    # that conj is ommited
            #    return (n.UPOS == "SCONJ" and n.LEMMA not in {"that"})

            def __adv_question_node(n):
                return ((n.UPOS == "ADV"
                         and n.LEMMA in {"when", "where", "how", "whether"}))

            #
            # def __pron_question_node(n):
            #     return (n.UPOS == "PRON" and n.LEMMA in {"what", "who", "which"})

            # def __interested_node2(n):
            #     # that conj is ommited
            #     return (n.UPOS == "PART")

            # sconj_nodes = [n for n, l in dep_graph.children(obj,
            #                      filter=lambda n,l: l == "mark" and __sconj_node(n))]
            adv_question_nodes = [
                n for n, l in dep_graph.children(
                    obj,
                    filter=lambda n, l: l == "mark" and __adv_question_node(n))
            ]

            # subj_question_nodes = [n for n, l in dep_graph.children(obj,
            #                        filter=lambda n,l: "subj" in l and __pron_question_node(n))]
            #
            # obj_question_nodes = [n for n, l in dep_graph.children(obj,
            #                         filter=lambda n,
            #                                       l: ("obj" in l or "comp") in l and __pron_question_node(
            #                             n))]
            # nodes_of_interests2 = [n for n, l in dep_graph.children(obj,
            #                      filter=lambda n,l: l == "advmod" and __interested_node2(n))]
            # print('nodes_of_interests:', nodes_of_interests)
            # if nodes_of_interests2:
            #     assert len(nodes_of_interests2) == 1
            #     interest_node = nodes_of_interests2[0]
            #     oia_interest_node = oia_graph.add_word_with_head(interest_node.LOC)
            #     oia_graph.add_argument(pred_node, oia_interest_node, arg_index)
            #     # oia_graph.add_function(oia_interest_node, oia_obj_node)
            #     arg_index += 1
            #     oia_graph.add_argument(oia_interest_node, oia_obj_node, arg_index)
            #     arg_index += 1

            if adv_question_nodes:
                assert len(adv_question_nodes) == 1
                interest_node = adv_question_nodes[0]
                oia_interest_node = oia_graph.add_words(interest_node.position)
                oia_graph.add_argument(pred_node, oia_interest_node, arg_index)
                oia_graph.add_function(oia_interest_node, oia_obj_node)

            else:
                if not oia_graph.has_relation(pred_node, obj):
                    oia_graph.add_argument(pred_node, oia_obj_node, arg_index)

            arg_index += 1

    pattern = DependencyGraph()
    parent_pred = pattern.create_node()
    child_pred = pattern.create_node()
    question_word = pattern.create_node(LEMMA=r'what|who')

    pattern.add_dependency(parent_pred, child_pred,
                           r'subj|nsubj|iobj|obj|xcomp|ccomp')
    pattern.add_dependency(parent_pred, question_word,
                           r'subj|nsubj|iobj|obj|xcomp|ccomp')
    pattern.add_dependency(child_pred, question_word,
                           r'subj|nsubj|iobj|obj|xcomp|ccomp')

    for match in dep_graph.match(pattern):
        dep_parent_pred, dep_child_pred, dep_question_word = [
            match[x] for x in [parent_pred, child_pred, question_word]
        ]

        oia_parent_pred, oia_child_pred, oia_question_word = [
            oia_graph.add_words(x.position)
            for x in [dep_parent_pred, dep_child_pred, dep_question_word]
        ]

        oia_question_word.is_func = True

        rel = oia_graph.get_edge(oia_child_pred, oia_question_word)

        oia_graph.remove_relation(oia_child_pred, oia_question_word)
        oia_graph.remove_relation(oia_parent_pred, oia_child_pred)

        oia_graph.add_relation(oia_question_word, oia_child_pred,
                               "mod_by:" + rel.label)
def noun_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    nouns = []
    # we first find np roots
    for root in dep_graph.nodes(
            filter=lambda x: x.UPOS in {"NOUN", "PROPN", "X", "NUM", "SYM"}):

        logger.debug("checking the node:")
        logger.debug(str(root))

        # np_elements = valid_np_element(root, dep_graph)
        parent_rels = set(
            itertools.chain.from_iterable(l.values()
                                          for n, l in dep_graph.parents(root)))
        parent_rels = set(rel.replace("_", " ") for rel in parent_rels)

        escaped_case_node = set()
        if parent_rels:
            case_nodes = [
                x
                for x, l in dep_graph.children(root,
                                               filter=lambda n, l: l == "case")
            ]
            for node in case_nodes:
                if node.LEMMA.lower() in parent_rels or node.FORM.lower(
                ) in parent_rels:
                    # lemma is for including
                    escaped_case_node.add(node)

        valid_np_children = [(n, l) for n, l in dep_graph.children(
            root, filter=lambda n, l: is_valid_np_child(dep_graph, root, l, n))
                             ]
        logger.debug("noun_phrase: valid_np_children:")
        for node, l in valid_np_children:
            logger.debug(str(node))

        np_elements = [root]

        for n, l in valid_np_children:
            if n.UPOS == "ADP":
                continue
            if n.LOC > root.LOC and \
                    not any(l.startswith(x)
                            for x in {"fixed", "compound", "nummod",
                                      "nmod:tmod", "flat", "nmod:npmod", "dep"}):
                continue
            if n in escaped_case_node:
                continue

            if isinstance(n, DependencyGraphSuperNode) and n.is_conj:
                continue

            offsprings = list(dep_graph.offsprings(n))
            valid_np_component = True

            for x in offsprings:
                for parent, rels in dep_graph.parents(x):
                    if any(x in rels
                           for x in {"acl", "obl", "advcl", "subj", "obj"}):
                        valid_np_component = False
                        break
                if not valid_np_component:
                    break
            if valid_np_component:
                np_elements.extend(offsprings)

        logger.debug("noun_phrase: candidate np_elements:")
        for node in np_elements:
            logger.debug(str(node))

        det = [
            n for n, l in dep_graph.children(root,
                                             filter=lambda n, l: l == "det")
        ]
        det = [x for x in det if x.LOC <= root.LOC]
        det.sort(key=lambda x: x.LOC)

        if det:
            # raise Exception("noun phrase without det ")

            det = det[-1]
            # check the element should be continuous
            np_elements = [x for x in np_elements if det.LOC <= x.LOC]
            logger.debug(
                "noun_phrase: det found, cut the nodes before the det")

        filtered_np_elements = sorted(list(np_elements), key=lambda x: x.LOC)
        # if np_elements[-1].LOC - np_elements[0].LOC != len(np_elements) - 1:
        #     print ("root", root)
        #     for n in np_elements:
        #         print("np element", n.LOC, n)
        #     raise Exception("Bad Business Logic")
        changed = True
        while changed:
            changed = False
            if filtered_np_elements and filtered_np_elements[0].LEMMA in {
                    "-", "--"
            }:
                filtered_np_elements.pop(0)
                changed = True
            if filtered_np_elements and filtered_np_elements[0].UPOS in {
                    "ADP", "CCONJ", "PUNCT"
            }:
                filtered_np_elements.pop(0)
                changed = True

        if filtered_np_elements:
            nouns.append((set(filtered_np_elements), root))

    sub_nouns = []
    for idx1, (phrase1, head1) in enumerate(nouns):
        for idx2, (phrase2, head2) in enumerate(nouns):
            if idx1 == idx2:
                continue

            phrasex, phrasey = (
                phrase1, phrase2) if len(phrase1) > len(phrase2) else (phrase2,
                                                                       phrase1)
            common = phrasex.intersection(phrasey)

            if not common:
                continue
            elif len(common) == len(phrasey):
                # node2 is a sub np of node1, delete
                sub_nouns.append(phrasey)
            else:
                print("Phrase 1", [x.ID for x in phrase1])
                print("Phrase 2", [x.ID for x in phrase2])
                # return
                raise Exception("duplicate words found")

    for idx, (phrase, head) in enumerate(nouns):

        if phrase in sub_nouns:
            continue

        phrase = sorted(list(phrase), key=lambda x: x.LOC)

        for node in phrase:
            for child, _ in dep_graph.children(node):
                if child.LOC == phrase[0].LOC - 1 and child.LEMMA in {
                        "\"", "\'"
                }:
                    phrase.insert(0, child)
                if child.LOC == phrase[-1].LOC + 1 and child.LEMMA in {
                        "\"", "\'"
                }:
                    phrase.append(child)

        noun_node = merge_dep_nodes(phrase, UPOS=head.UPOS, LOC=phrase[-1].LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(phrase, noun_node)
def advp_phrase(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    case: english-UD-12774
    """
    # return
    phrases = []
    remove_rels = []
    for node in dep_graph.nodes(filter=lambda n: n.UPOS in {"ADP"}):
        # is_root = True
        need_merge_node = set()
        # if str(node.FORM).lower() != 'after':
        #     continue
        # print('advp node:', str(node.FORM))

        for parent, rel in dep_graph.parents(node):

            if "case" in rel and \
                    any(node.FORM in l.values() or node.LEMMA in l.values() for x, l in dep_graph.parents(parent)):
                break

            remove_rel = False

            # we find neighborhood adjvs
            silibings = list(dep_graph.children(parent))
            silibings.sort(key=lambda x: x[0].LOC)

            start_loc = -1
            for child, ch_rel in reversed(silibings):
                # print(str(node.FORM))
                if child.LOC >= node.LOC:
                    start_loc = child.LOC
                    continue

                if "advmod" in ch_rel and child.UPOS in {
                        "ADJ", "ADV"
                } and child.LOC == start_loc - 1:
                    # is_root = True
                    need_merge_node.update(
                        set(valid_adjv_element(child, dep_graph)))
                    remove_rel = True
                    start_loc = child.LOC
                    # adjv_element = valid_adjv_element(child, dep_graph)
            if remove_rel:
                if 'case' in rel:
                    remove_rels.append((parent, node, 'case'))
        if len(need_merge_node) == 0:
            continue
        need_merge_node.add(node)
        adjv_element = sorted(list(need_merge_node), key=lambda x: x.LOC)
        phrases.append((adjv_element, node))
    for src, trg, rel in remove_rels:
        dep_graph.remove_dependency(src, trg, rel)
    for adjv_phrase, node in phrases:
        advp_node = merge_dep_nodes(
            adjv_phrase,
            # UPOS=node.UPOS,
            UPOS='ADV',
            LOC=node.LOC)
        # print("Noun detected", noun_node.ID)
        dep_graph.replace_nodes(adjv_phrase, advp_node)