def acl_mod_verb(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                 context: UD2OIAContext):
    """
    this is called after adnominal_clause_mark, which means there is no mark
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|ADJ|ADV|NUM")
    # ADJ is for the cases that "many/some" are abbrv of many X/some X, representing NOUN
    # ADV is for the case of "here" for "i am here thinking xxx"
    verb_node = pattern.create_node(UPOS="VERB|AUX")
    # aux is for can, have which ommits the true verb

    pattern.add_nodes([noun_node, verb_node])

    pattern.add_dependency(noun_node, verb_node, r'acl')

    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_verb_node = match[verb_node]

        if context.is_processed(dep_noun_node, dep_verb_node):
            continue

        if oia_graph.has_relation(dep_noun_node,
                                  dep_verb_node,
                                  direct_link=False):
            continue

        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_noun_node = oia_graph.add_words(dep_noun_node.position)

        dep = dep_graph.get_dependency(dep_noun_node, dep_verb_node)
        labels = [x for x in dep.rels if x.startswith("acl:")]

        pred = None

        if labels:
            assert len(labels) == 1
            label = labels[0]
            pred = label.split(":")[1]
            if pred == "relcl":
                pred = None

        # if pred:
        #     # there is no mark, but we add it because it may be because of not being shared in conjunction
        #
        #     oia_pred_node = oia_graph.add_aux(pred)
        #     oia_graph.add_argument(oia_pred_node, oia_noun_node, 1, mod=True)
        #     oia_graph.add_argument(oia_pred_node, oia_verb_node, 2)
        # else:

        oia_graph.add_mod(oia_verb_node, oia_noun_node)
Exemple #2
0
def amod_obl(dep_graph: DependencyGraph):
    """
    ##### include: more than, successful by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode(UPOS=r"NOUN|PRON")
    adj_node = DependencyGraphNode(UPOS="ADJ")
    adp_node = DependencyGraphNode(UPOS="ADP")
    obl_node = DependencyGraphNode()

    pattern.add_nodes([noun_node, adj_node, adp_node, obl_node])
    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, obl_node, r'obl:\w+')
    pattern.add_dependency(obl_node, adp_node, r'case')

    more_than_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_adj_node = match[adj_node]
        dep_obl_node = match[obl_node]
        dep_adp_node = match[adp_node]

        obl_nodes = list(
            dep_graph.children(dep_adj_node, filter=lambda n, l: "obl" in l))

        if len(obl_nodes) > 1:
            # similar in form to the one
            continue

        if dep_adp_node.FORM not in dep_graph.get_dependency(
                dep_adj_node, dep_obl_node).values():
            continue

        if dep_noun_node.LOC < dep_adj_node.LOC < dep_adp_node.LOC < dep_obl_node.LOC:
            more_than_pred.append(
                (dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node))

    for dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node in more_than_pred:
        nodes = [dep_adj_node, dep_adp_node]
        more_than_pred = merge_dep_nodes(nodes,
                                         UPOS="ADP",
                                         LOC=dep_adp_node.LOC)
        dep_graph.remove_dependency(dep_noun_node, dep_adj_node)
        dep_graph.remove_dependency(dep_adj_node, dep_obl_node)

        dep_graph.replace_nodes([dep_adj_node, dep_adp_node], more_than_pred)
        dep_graph.add_dependency(dep_noun_node, dep_obl_node,
                                 "nmod:" + more_than_pred.FORM)
Exemple #3
0
def and_or(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    parent_node = pattern.create_node()
    some_node = pattern.create_node()
    and_node = pattern.create_node(LEMMA=r"\band\b")
    or_node = pattern.create_node(LEMMA=r"\bor\b")

    pattern.add_dependency(parent_node, some_node, r'\bconj:\w*')
    pattern.add_dependency(some_node, and_node, r'\bcc\b')
    pattern.add_dependency(some_node, or_node, r'\bcc\b')
    pattern.add_dependency(and_node, or_node, r'\bconj')

    for match in list(dep_graph.match(pattern)):

        dep_parent_node = match[parent_node]
        dep_some_node = match[some_node]
        dep_and_node = match[and_node]
        dep_or_node = match[or_node]

        rel = dep_graph.get_dependency(dep_parent_node, dep_some_node)

        if not rel.startswith("conj:and") and not rel.startswith("conj:or"):
            continue

        and_or_nodes = [n for n in dep_graph.nodes() if dep_and_node.LOC < n.LOC < dep_or_node.LOC]

        if any([node.UPOS in {"VERB", "NOUN", "ADJ", "ADP", "ADV"} for node in and_or_nodes]):
            continue

        and_or_nodes.append(dep_and_node)
        and_or_nodes.append(dep_or_node)
        and_or_nodes.sort(key=lambda n: n.LOC)

        if not all([dep_graph.get_node(x.ID) for x in and_or_nodes]):
            continue

        new_and_or_node = merge_dep_nodes(and_or_nodes,
                                          UPOS=dep_and_node.UPOS,
                                          LOC=dep_and_node.LOC,
                                          FEATS=dep_and_node.FEATS
                                          )

        dep_graph.replace_nodes(and_or_nodes, new_and_or_node)
        dep_graph.set_dependency(dep_parent_node, dep_some_node, "conj:" + new_and_or_node.FORM)
Exemple #4
0
def multi_word_sconj(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    verb_node = pattern.create_node(UPOS="VERB")
    verb2_node = pattern.create_node(UPOS="VERB")
    mark_node = pattern.create_node(UPOS="SCONJ")

    pattern.add_dependency(verb_node, verb2_node, r'advcl:\w*')
    pattern.add_dependency(verb2_node, mark_node, r'mark')

    mark_phrases = []
    for match in dep_graph.match(pattern):

        dep_verb_node = match[verb_node]
        dep_verb2_node = match[verb2_node]
        dep_mark_node = match[mark_node]

        if dep_mark_node.LEMMA not in dep_graph.get_dependency(dep_verb_node, dep_verb2_node).values():
            continue

        new_marks = list(dep_graph.offsprings(dep_mark_node))
        if len(new_marks) == 1:
            continue

        new_marks.sort(key=lambda n: n.LOC)
        mark_phrases.append((dep_verb_node, dep_verb2_node, dep_mark_node, new_marks))

    for (dep_verb_node, dep_verb2_node, dep_mark_node, new_marks) in mark_phrases:

        if not all([dep_graph.get_node(x.ID) for x in new_marks]):
            continue

        dep_graph.remove_dependency(dep_verb2_node, dep_mark_node)
        dep_graph.remove_dependency(dep_verb_node, dep_verb2_node)

        new_mark_node = merge_dep_nodes(new_marks,
                                        UPOS=dep_mark_node.UPOS,
                                        LOC=dep_mark_node.LOC
                                        )

        dep_graph.replace_nodes(new_marks, new_mark_node)
        dep_graph.add_dependency(dep_verb_node, dep_verb2_node, "advcl:" + new_mark_node.LEMMA)
        dep_graph.add_dependency(dep_verb2_node, new_mark_node, "mark")
def acl_loop(dep_graph: DependencyGraph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    for n1, n2, deps in dep_graph.dependencies():

        if "acl:relcl" in deps:
            back_deps = dep_graph.get_dependency(n2, n1)
            if any(x in back_deps
                   for x in {"obl", "nsubj", "obj", "mark", "advmod"}):
                dep_graph.remove_dependency(n2, n1)
def nmod_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                   context: UD2OIAContext):
    """

    #################### nmod:x ########################

    ##### the office of the chair #####
    ##### Istanbul in Turkey #####
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()

    parent_node = DependencyGraphNode()
    child_node = DependencyGraphNode()
    case_node = DependencyGraphNode()

    pattern.add_nodes([parent_node, child_node, case_node])

    pattern.add_dependency(parent_node, child_node, r'\w*nmod\w*')
    pattern.add_dependency(child_node, case_node, r'\w*case\w*')

    for match in dep_graph.match(pattern):

        dep_parent_node = match[parent_node]
        dep_child_node = match[child_node]
        dep_case_node = match[case_node]

        rel = dep_graph.get_dependency(dep_parent_node, dep_child_node)

        # vs, lemma = versus
        # according, lemma = accord,
        # but rel always select the shorter one

        if oia_graph.has_relation(dep_parent_node, dep_child_node):
            continue

        if rel != "nmod:" + dep_case_node.LEMMA and rel != 'nmod:' + dep_case_node.FORM:
            pred_node = oia_graph.add_words(dep_case_node.position)
        else:
            pred_node = oia_graph.add_words(dep_case_node.position)

        arg1_node = oia_graph.add_words(dep_parent_node.position)
        arg2_node = oia_graph.add_words(dep_child_node.position)

        oia_graph.add_argument(pred_node, arg1_node, 1, mod=True)
        oia_graph.add_argument(pred_node, arg2_node, 2)
def get_relation_to_conj(dep_graph: DependencyGraph, root, root_parents,
                         parallel_components):
    """

    :param dep_graph:
    :param parallel_components:
    :return:
    """

    relation_to_conj = dict()
    for parent in root_parents:
        prefixs = []
        marks = []
        shared_prefix = True
        for child in parallel_components:
            rels = dep_graph.get_dependency(parent, child)
            if rels:
                rel = list(rels.rels)[0]

                if child != root and rel.startswith("conj"):
                    continue
                if ":" in rel:
                    prefix, mark = rel.split(":")
                    if mark in {"relcl", "xsubj", "pass", "poss", "tmod"}:
                        prefix = rel
                        mark = None
                else:
                    prefix = rel
                    mark = None
                prefixs.append(prefix)
                marks.append(mark)
            else:
                shared_prefix = False
                marks.append(None)
        # ic(str(parent))

        # ic(list(map(str, parallel_components)))
        assert (len(set(prefixs))) == 1
        prefix = prefixs[0]
        if all([m is None for m in marks]):
            marks = None

        if any(x in prefix for x in {"subj", "obj", "ccomp", "xcomp"}):
            marks = None

        relation_to_conj[parent.ID] = (prefix, shared_prefix, marks)

    return relation_to_conj
def oblique_without_prep(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                         context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    # cut X by a knife
    pattern = DependencyGraph()
    verb_node = DependencyGraphNode(UPOS="VERB|NOUN|ADJ|PROPN|PRON")
    oblique_node = DependencyGraphNode()
    pattern.add_node(verb_node)
    pattern.add_node(oblique_node)
    pattern.add_dependency(verb_node, oblique_node, r'obl:tmod|obl:npmod|obl')

    for match in dep_graph.match(pattern):

        dep_verb_node = match[verb_node]
        dep_oblique_node = match[oblique_node]

        if oia_graph.has_relation(dep_verb_node,
                                  dep_oblique_node,
                                  direct_link=False):
            continue

        oblique_edge = dep_graph.get_dependency(dep_verb_node,
                                                dep_oblique_node)
        oblique_types = oblique_edge.values()

        if "tmod" in oblique_types:

            oia_pred_node = oia_graph.add_aux("TIME_IN")

            arg1_node = oia_graph.add_words(dep_verb_node.position)
            arg2_node = oia_graph.add_words(dep_oblique_node.position)

            oia_graph.add_argument(oia_pred_node, arg1_node, 1, mod=True)
            oia_graph.add_argument(oia_pred_node, arg2_node, 2)

        else:  # "npmod" in oblique_types and others

            oia_verb_node = oia_graph.add_words(dep_verb_node.position)
            obl_node = oia_graph.add_words(dep_oblique_node.position)

            oia_graph.add_mod(obl_node, oia_verb_node)
def nmod_without_case(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                      context: UD2OIAContext):
    """

    #################### nmod:x ########################

    :param sentence:
    :return:
    """

    pattern = DependencyGraph()

    center_node = pattern.create_node()
    modifier_node = pattern.create_node()

    pattern.add_dependency(center_node, modifier_node, r'\w*nmod\w*')

    for match in dep_graph.match(pattern):

        dep_center_node = match[center_node]
        dep_modifier_node = match[modifier_node]

        rels = dep_graph.get_dependency(dep_center_node, dep_modifier_node)

        if "nmod:poss" in rels and dep_center_node in set(
                dep_graph.offsprings(dep_modifier_node)):
            # whose in there
            continue

        if oia_graph.has_relation(dep_center_node,
                                  dep_modifier_node,
                                  direct_link=False):
            continue

        oia_center_node = oia_graph.add_words(dep_center_node.position)
        oia_modifier_node = oia_graph.add_words(dep_modifier_node.position)

        oia_graph.add_mod(oia_modifier_node, oia_center_node)
Exemple #10
0
def two_node_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                       context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """
    regular_nodes = [
        n for n in dep_graph.nodes() if n.UPOS not in {"ROOT", "PUNCT"}
    ]
    #logger.debug("regular nodes")
    #for node in regular_nodes:
    #    logger.debug(str(node))

    if len(regular_nodes) == 2:
        regular_nodes.sort(key=lambda x: x.LOC)
        case_node, noun_node = regular_nodes
        if dep_graph.get_dependency(noun_node, case_node) == "case":
            oia_case_node = oia_graph.add_words(case_node.position)
            oia_noun_node = oia_graph.add_words(noun_node.position)

            oia_graph.add_argument(oia_case_node, oia_noun_node, 2)
Exemple #11
0
def acl_verb_obl_case(dep_graph: DependencyGraph):
    """
    something extracted by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    subj_node = pattern.create_node()
    verb_node = pattern.create_node(UPOS="VERB")
    obj_node = pattern.create_node()
    case_node = pattern.create_node()

    pattern.add_dependency(subj_node, verb_node, r'acl')
    pattern.add_dependency(verb_node, obj_node, r'obl:\w*')
    pattern.add_dependency(obj_node, case_node, r'case')

    phrases = []

    for match in dep_graph.match(pattern):

        dep_subj_node = match[subj_node]
        dep_verb_node = match[verb_node]
        dep_obj_node = match[obj_node]
        dep_case_node = match[case_node]

        obl_nodes = [
            n for n, l in dep_graph.children(
                dep_verb_node, filter=lambda n, l: l.startswith("obl"))
        ]
        if len(obl_nodes) > 1:
            continue

        existing_obj_nodes = [
            n for n, l in dep_graph.children(
                dep_verb_node, filter=lambda n, l: "obj" in l or "comp" in l)
        ]
        if existing_obj_nodes:
            continue

        obl_rel = dep_graph.get_dependency(dep_verb_node, dep_obj_node)

        if dep_case_node.FORM not in obl_rel.values():
            continue

        # there are may be other cases, join them all
        dep_case_nodes = [
            n for n, l in
            dep_graph.children(dep_obj_node,
                               filter=lambda n, l: l.startswith("case") and
                               dep_verb_node.LOC < n.LOC < dep_obj_node.LOC)
        ]

        subjs = list(
            dep_graph.children(dep_verb_node, filter=lambda n, l: "subj" in l))

        if len(subjs) > 1:
            continue

        phrases.append(
            (dep_subj_node, dep_verb_node, dep_obj_node, dep_case_nodes))

    for dep_subj_node, dep_verb_node, dep_obj_node, dep_case_nodes in phrases:
        new_verb_phrase = [dep_verb_node] + dep_case_nodes
        logging.debug("acl_verb_obl_case: we are merging nodes")
        logging.debug("\n".join(str(node) for node in new_verb_phrase))

        new_verb_node = merge_dep_nodes(new_verb_phrase,
                                        UPOS=dep_verb_node.UPOS,
                                        LOC=dep_verb_node.LOC,
                                        FEATS=dep_verb_node.FEATS)

        logging.debug("acl_verb_obl_case: we obtain a new node")
        logging.debug(str(new_verb_node))

        dep_graph.remove_dependency(dep_verb_node, dep_obj_node)
        for node in dep_case_nodes:
            dep_graph.remove_dependency(dep_obj_node, node)

        dep_graph.replace_nodes(new_verb_phrase, new_verb_node)
        dep_graph.add_dependency(new_verb_node, dep_obj_node, "obj")
Exemple #12
0
def object_relative_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                           context: UD2OIAContext):
    """
    ##### Object-extracted/referred relative clause #####
    ##### the person that Andy knows #####
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()
    verb_node = DependencyGraphNode()
    entity_node = DependencyGraphNode()
    subj_node = DependencyGraphNode()

    pattern.add_nodes([verb_node, entity_node, subj_node])

    pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*')
    pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*')

    for match in dep_graph.match(pattern):

        dep_entity_node = match[entity_node]
        dep_subj_node = match[subj_node]
        dep_verb_node = match[verb_node]

        if dep_subj_node.LEMMA in {"what", "who", "which", "that"}:
            continue

        logger.debug("we found a objective relative clause")
        logger.debug("entity: {0}".format(dep_entity_node))
        logger.debug("subject: {0}".format(dep_subj_node))
        logger.debug("verb: {0}".format(dep_verb_node))

        if context.is_processed(dep_entity_node, dep_verb_node):
            logger.debug("processed")
            continue

        context.processed(dep_verb_node, dep_subj_node)
        context.processed(dep_entity_node, dep_verb_node)

        oia_entity_node = oia_graph.add_words(dep_entity_node.position)
        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_subj_node = oia_graph.add_words(dep_subj_node.position)

        if oia_graph.has_relation(oia_entity_node, oia_verb_node):
            logger.debug("has relation between entity and verb")
            continue

        oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)

        def __valid_ref(n, l):
            return l == "ref" and dep_entity_node.LOC < n.LOC < dep_verb_node.LOC

        ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node,
                                                          filter=__valid_ref))
        ref_nodes.sort(key=lambda x: x.LOC)

        if ref_nodes:
            ref_node = ref_nodes[-1]

            oia_ref_node = oia_graph.add_words(ref_node.position)

            oia_graph.add_ref(oia_entity_node, oia_ref_node)

            logger.debug("we are coping with ref between:")
            logger.debug(dep_verb_node)
            logger.debug(ref_node)

            ref_relation = dep_graph.get_dependency(dep_verb_node, ref_node)

            case_nodes = list(n for n, l in dep_graph.children(
                ref_node, filter=lambda n, l: "case" in l))
            case_nodes.sort(key=lambda x: x.LOC)

            if ref_relation:
                if case_nodes:
                    # with which xxxx, the with will become the root pred
                    case_node = case_nodes[-1]
                    oia_case_node = oia_graph.add_words(case_node.position)

                    oia_graph.add_argument(oia_case_node,
                                           oia_verb_node,
                                           1,
                                           mod=True)
                    oia_graph.add_argument(oia_case_node, oia_ref_node, 2)
                    oia_graph.add_mod(oia_verb_node, oia_entity_node)
                else:

                    if "obj" in ref_relation:
                        oia_graph.add_argument(oia_verb_node, oia_ref_node, 2)
                    elif ref_relation == "advmod":
                        oia_graph.add_mod(oia_ref_node, oia_verb_node)
                    else:
                        raise Exception(
                            "unknown relation: {}".format(ref_relation))
                    # oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True)

        oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)
        oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True)

        rels = dep_graph.get_dependency(dep_entity_node, dep_verb_node)

        #if rels.endswith("obj"):
        for node, l in dep_graph.children(dep_verb_node):
            if l == "ccomp":
                oia_ccomp_node = oia_graph.add_words(node.position)
                oia_graph.add_argument(oia_verb_node, oia_ccomp_node, 3)
Exemple #13
0
def verb_phrase(dep_graph: DependencyGraph):
    """
    ##### Merging aux and cop with their head VERB #####
    Cases:

    :param sentence:
    :return:
    """
    verb_phrases = []

    for node in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB", "AUX"}):

        if node.UPOS == "AUX":
            parent = [
                n for n, l in dep_graph.parents(node,
                                                filter=lambda n, l: l == "aux")
            ]
            if len(parent) > 0:
                continue

        #        if "VerbForm" in node.FEATS and "Ger" in node.FEATS["VerbForm"]:
        #            continue

        if "Tense" in node.FEATS and "Past" in node.FEATS["Tense"]:
            # if the verb is before the noun, it will be processed by noun_phrase and taken as a part of the noun
            parent = [
                n for n, l in dep_graph.parents(
                    node, filter=lambda n, l: l == "amod" and node.LOC < n.LOC)
            ]
            if len(parent) > 0:
                continue
        # logger.debug("We are checking node {0}".format(node))

        root = node
        verbs = [root]
        for n, l in dep_graph.children(root):
            if dep_graph.get_dependency(n, root):
                continue

            if n.LEMMA in {"so", "also", "why"}:
                continue

            if "advmod" in l:
                offsprings = list(dep_graph.offsprings(n))
                if any(x.UPOS in {"VERB", "NOUN", "AUX", "PRON"}
                       for x in offsprings):
                    continue

                verbs.extend(offsprings)
            elif "compound" in l:
                verbs.append(n)

        verbs = [
            x for x in verbs if x.LOC <= root.LOC
            or "compound" in dep_graph.get_dependency(root, x)
        ]

        # logger.debug("Verb: before continuous component ")
        # logger.debug("\n".join(str(verb) for verb in verbs))

        verbs = continuous_component(verbs, root)

        # add aux
        verbs.extend(n for n, l in dep_graph.children(root) if "aux" in l)

        # logger.debug("Verb: after continuous component ")
        # for verb in verbs:
        #    logger.debug(verb)

        verbs.sort(key=lambda x: x.LOC)
        last_loc = verbs[-1].LOC

        #        next_node = dep_graph.get_node_by_loc(last_loc + 1)
        #        if next_node and next_node.LEMMA == "not":
        #            verbs.append(next_node)

        if len(verbs) > 1:
            verb_phrases.append((verbs, root))

    for verbs, root in verb_phrases:
        verb_node = merge_dep_nodes(verbs,
                                    UPOS="VERB",
                                    LOC=root.LOC,
                                    FEATS=root.FEATS)

        dep_graph.replace_nodes(verbs, verb_node)
Exemple #14
0
def multi_words_case(dep_graph: DependencyGraph):
    """
    :TODO  add example case
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode()
    x_node = DependencyGraphNode()
    case_node = DependencyGraphNode()

    pattern.add_node(noun_node)
    pattern.add_node(x_node)
    pattern.add_node(case_node)

    pattern.add_dependency(noun_node, x_node, r'\w*:\w*')
    pattern.add_dependency(x_node, case_node, r'\bcase\b')



    for match in list(dep_graph.match(pattern)):

        multiword_cases = []

        dep_noun_node = match[noun_node]
        dep_x_node = match[x_node]
        dep_case_node = match[case_node]

        if not dep_graph.has_node(dep_case_node):
            continue

        direct_case_nodes = [n for n, l in dep_graph.children(dep_x_node, filter=lambda n, l: "case" == l)]
        all_case_nodes = set()
        for node in direct_case_nodes:
            all_case_nodes.update(dep_graph.offsprings(node))

        if len(all_case_nodes) == 1:
            continue

        all_case_nodes = sorted(list(all_case_nodes), key=lambda n: n.LOC)
        logger.debug("multi case discovered")
        for node in all_case_nodes:
            logger.debug(str(node))

        #        if len(case_nodes) > 2:
        #            raise Exception("multi_words_case: Unexpected Situation: nodes with more than two cases")

        x_rel = dep_graph.get_dependency(dep_noun_node, dep_x_node)

        for rel in x_rel:
            if ":" in rel:
                # print('-----------------rel:        ',rel)

                rel_str, case_str = rel.split(":")
                # some times, the rel only contains one word
                # Example :
                # that OBSF values within the extended trial balance may be misstated due to data issues ( above and beyond existing conversations with AA on model simplifications)
                if case_str in "_".join([x.LEMMA for x in all_case_nodes]):
                    multiword_cases.append((dep_noun_node, dep_x_node, dep_case_node, all_case_nodes, rel_str))

        for dep_noun_node, dep_x_node, dep_case_node, case_nodes, rel_str in multiword_cases:

            logger.debug("we are merging:")
            for node in case_nodes:
                logger.debug(str(node))

            if not all([dep_graph.has_node(x) for x in case_nodes]):
                continue

            new_case_node = merge_dep_nodes(case_nodes,
                                            UPOS=dep_case_node.UPOS,
                                            LOC=dep_case_node.LOC
                                            )
            dep_graph.replace_nodes(case_nodes, new_case_node)
            dep_graph.remove_dependency(dep_noun_node, dep_x_node)
            dep_graph.add_dependency(dep_noun_node, dep_x_node,
                                     rel_str + ":" + " ".join([x.LEMMA for x in case_nodes]))
def process_conjunction(dep_graph: DependencyGraph, root: DependencyGraphNode):
    """

    :param dep_graph:
    :param root:
    :return:
    """
    conj_childs = [
        child for child, rels in dep_graph.children(
            root, filter=lambda n, l: l.startswith("conj"))
    ]

    assert conj_childs

    parallel_components = [root]

    for child in conj_childs:

        is_nest = any(
            grand_rels.startswith("conj")
            for grand_sun, grand_rels in dep_graph.children(child))
        if is_nest:
            logger.debug("nested conj is found ")
            logger.debug(str(child))

            conj_node, parallel_nodes = process_conjunction(dep_graph, child)
            logger.debug("conj_node is created ")
            logger.debug(str(conj_node))

            for node in parallel_nodes:
                logger.debug("Containing nodes  ")
                logger.debug(str(node))
                rels = list(dep_graph.get_dependency(root, node))
                for rel in rels:
                    if rel.startswith("conj"):
                        logger.debug("remove dependency {0}".format(
                            (root.ID, node.ID, rel)))

                        dep_graph.remove_dependency(root, node, rel)
                        dep_graph.add_dependency(root, conj_node, rel)
            child = conj_node

        parallel_components.append(child)

    parallel_components.sort(key=lambda x: x.LOC)

    # if all(n.UPOS in NOUN_UPOS for n in parallel_components):
    #
    #     logger.debug("Processing all noun conjunction")
    #
    #     is_pure_noun = True
    #
    #     merging_noun_nodes = []
    #     min_loc = 10000
    #     max_loc = -1
    #     for child in parallel_components:
    #         if isinstance(child, DependencyGraphNode):
    #             min_loc = min(min_loc, child.LOC)
    #             max_loc = max(min_loc, child.LOC)
    #         elif isinstance(child, DependencyGraphSuperNode):
    #             min_loc = min(min_loc, min([x.LOC for x in child.nodes]))
    #             max_loc = max(max_loc, max([x.LOC for x in child.nodes]))
    #         merging_noun_nodes.extend(dep_graph.offsprings(child))
    #
    #         logger.debug("Checking acl for {0}".format(child))
    #         for n, l in dep_graph.children(child):
    #             logger.debug(n)
    #             logger.debug("label {0}".format(l))
    #             if "acl" in l:
    #                 is_pure_noun = False
    #                 break
    #
    #     if is_pure_noun:
    #         merging_noun_nodes = [n for n in merging_noun_nodes if min_loc <= n.LOC <= max_loc]
    #         is_pure_noun = not any(n.UPOS in {"ADP", "VERB", "SCONJ", "AUX"} for n in merging_noun_nodes)
    #
    #     if is_pure_noun:
    #         # merged_noun_nodes.sort(key=lambda x: x.LOC)
    #         for node in merging_noun_nodes:
    #             logger.debug("merging {0}".format(node))
    #
    #         new_noun = merge_dep_nodes(merging_noun_nodes, UPOS=root.UPOS, LOC=root.LOC)
    #         dep_graph.replace_nodes(merging_noun_nodes, new_noun)
    #
    #         return new_noun, []

    root_parents = list(set(parent
                            for parent, rels in dep_graph.parents(root)))
    root_parents.sort(key=lambda x: x.LOC)

    # ic(list(map(str, root_parents)))

    conj_node, with_arg_palceholder = build_conjunction_node(
        dep_graph, root, root_parents, parallel_components)

    relation_to_conj = get_relation_to_conj(dep_graph, root, root_parents,
                                            parallel_components)

    case_marks = dict()
    for index, node in enumerate(parallel_components):
        case_marks[node.ID] = [(n, l) for n, l in dep_graph.children(node)
                               if ("case" in l or "mark" in l or "cc" in l)]
    for key, values in case_marks.items():
        for v in values:
            logger.debug("case_marker = {} {} {}".format(
                key, v[0].ID, v[1].rels))

    logger.debug("relation_to_conj = {}".format(relation_to_conj))

    for parent in root_parents:
        # ic(parent)

        prefix, shared_prefix, required_mark = relation_to_conj[parent.ID]
        if any(x in prefix for x in {"subj", "obj", "ccomp", "xcomp"}) \
                or not required_mark or len(set(required_mark)) == 1:

            for node in parallel_components:
                dep_graph.remove_dependency(parent, node)

            relation = prefix

            if required_mark and len(set(required_mark)) == 1:
                ## with same mark

                mark_lemma = list(set(required_mark))[0]

                relation += ":" + mark_lemma

                mark_node = find_mark(case_marks, parallel_components,
                                      mark_lemma)

                if mark_node:

                    mark_node, mark_rel = mark_node

                    dep_graph.remove_node(mark_node)
                    dep_graph.add_node(mark_node)  # clear the dependency

                    dep_graph.add_dependency(conj_node, mark_node, mark_rel)
                else:
                    logger.error("cannot find the mark node")

            dep_graph.add_dependency(parent, conj_node, relation)

        else:

            complete_missing_case_mark(dep_graph, root, root_parents,
                                       parallel_components, relation_to_conj,
                                       case_marks)

            if not required_mark:
                required_mark = [None] * len(parallel_components)

            for index, (node, mark) in enumerate(
                    zip(parallel_components, required_mark)):
                if mark:
                    rel = prefix + ":" + mark
                else:
                    rel = prefix

                # if rel.startswith("conj"):
                #    continue
                logger.debug("add dependency {0}".format(
                    (parent.ID, node.ID, rel)))

                dep_graph.add_dependency(parent, node, rel)

        for idx, node in enumerate(parallel_components):
            if node != root:
                rels = dep_graph.get_dependency(root, node)
                for rel in rels:
                    if rel.startswith("conj"):
                        dep_graph.remove_dependency(root, node)

            if with_arg_palceholder:
                index = idx + 1
            else:
                # a, but b, b should be the arg1 and a be the arg2
                index = len(parallel_components) - idx

            dep_graph.add_dependency(conj_node, node,
                                     "arg_conj:{0}".format(index))

    return conj_node, parallel_components