def oblique_without_prep(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                         context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    # cut X by a knife
    pattern = DependencyGraph()
    verb_node = DependencyGraphNode(UPOS="VERB|NOUN|ADJ|PROPN|PRON")
    oblique_node = DependencyGraphNode()
    pattern.add_node(verb_node)
    pattern.add_node(oblique_node)
    pattern.add_dependency(verb_node, oblique_node, r'obl:tmod|obl:npmod|obl')

    for match in dep_graph.match(pattern):

        dep_verb_node = match[verb_node]
        dep_oblique_node = match[oblique_node]

        if oia_graph.has_relation(dep_verb_node,
                                  dep_oblique_node,
                                  direct_link=False):
            continue

        oblique_edge = dep_graph.get_dependency(dep_verb_node,
                                                dep_oblique_node)
        oblique_types = oblique_edge.values()

        if "tmod" in oblique_types:

            oia_pred_node = oia_graph.add_aux("TIME_IN")

            arg1_node = oia_graph.add_words(dep_verb_node.position)
            arg2_node = oia_graph.add_words(dep_oblique_node.position)

            oia_graph.add_argument(oia_pred_node, arg1_node, 1, mod=True)
            oia_graph.add_argument(oia_pred_node, arg2_node, 2)

        else:  # "npmod" in oblique_types and others

            oia_verb_node = oia_graph.add_words(dep_verb_node.position)
            obl_node = oia_graph.add_words(dep_oblique_node.position)

            oia_graph.add_mod(obl_node, oia_verb_node)
Beispiel #2
0
def such_that(dep_graph: DependencyGraph):
    """
    ##### such a high price that
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode(UPOS="NOUN")
    such_node = DependencyGraphNode(FORM="such")
    clause_pred_node = DependencyGraphNode(UPOS="VERB")
    that_node = DependencyGraphNode(FORM="that")

    pattern.add_nodes([noun_node, such_node, clause_pred_node, that_node])
    pattern.add_dependency(noun_node, such_node, r'det:predet')
    pattern.add_dependency(such_node, clause_pred_node, r'advcl:that')
    pattern.add_dependency(clause_pred_node, that_node, r'mark')

    such_that_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_such_node = match[such_node]
        dep_clause_pred_node = match[clause_pred_node]
        dep_that_node = match[that_node]

        if dep_such_node.LOC < dep_noun_node.LOC < dep_that_node.LOC < dep_clause_pred_node.LOC:
            such_that_pred.append((dep_noun_node, dep_such_node,
                                   dep_clause_pred_node, dep_that_node))

    for dep_noun_node, dep_such_node, dep_clause_pred_node, dep_that_node in such_that_pred:
        nodes = [dep_such_node, dep_that_node]
        such_that_pred = merge_dep_nodes(nodes,
                                         UPOS="SCONJ",
                                         LOC=dep_that_node.LOC)
        dep_graph.add_node(such_that_pred)
        dep_graph.add_dependency(dep_noun_node, dep_clause_pred_node,
                                 "advcl:" + such_that_pred.FORM)
        dep_graph.add_dependency(dep_clause_pred_node, such_that_pred, "mark")

        dep_graph.remove_node(dep_such_node)
        dep_graph.remove_node(dep_that_node)
Beispiel #3
0
def be_not_phrase(dep_graph: DependencyGraph):
    """TODO: add doc string
    """
    pattern = DependencyGraph()

    be_node = pattern.create_node()  # contain the be verb
    obj_node = pattern.create_node()
    # not_node = pattern.create_node(UPOS="PART")
    not_node = pattern.create_node()

    pattern.add_node(be_node)
    pattern.add_node(obj_node)
    pattern.add_node(not_node)

    pattern.add_dependency(be_node, obj_node, r'\w*obj\w*')
    pattern.add_dependency(obj_node, not_node, r'\w*advmod\w*')

    be_not = []
    for match in dep_graph.match(pattern):
        # print("be_not_phrase match !!!!!!!!!!!!!!")
        dep_be_node = match[be_node]
        dep_obj_node = match[obj_node]
        dep_not_node = match[not_node]

        if not "be" in dep_be_node.LEMMA.split(" "):
            continue

        if not "not" in dep_not_node.LEMMA.split(" "):
            continue

        if (dep_not_node.LOC > dep_obj_node.LOC) or (dep_be_node.LOC >
                                                     dep_not_node.LOC):
            continue
        be_not.append((dep_be_node, dep_obj_node, dep_not_node))

    for dep_be_node, dep_obj_node, dep_not_node in be_not:
        dep_graph.remove_dependency(dep_obj_node, dep_not_node, 'advmod')
        verb_node = merge_dep_nodes((dep_be_node, dep_not_node),
                                    UPOS=dep_be_node.UPOS,
                                    LOC=dep_be_node.LOC)
        dep_graph.replace_nodes([dep_be_node, dep_not_node], verb_node)
Beispiel #4
0
def subject_relative_clause(dep_graph, oia_graph, context: UD2OIAContext):
    """
    ##### Subject-extracted/referred relative clause #####
    ##### the person who is tall / that is killed -- with ref #####
    ##### the person waiting for the baby -- without ref #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()
    entity_node = DependencyGraphNode()
    relcl_node = DependencyGraphNode()
    pattern.add_node(entity_node)
    pattern.add_node(relcl_node)
    # pattern.add_dependency(relcl_node, subj_node, r'\w*subj\w*')
    pattern.add_dependency(entity_node, relcl_node, r'\w*acl:relcl\w*')

    for match in dep_graph.match(pattern):

        dep_entity_node = match[entity_node]
        dep_relcl_node = match[relcl_node]

        subj_nodes = [
            n for n, l in dep_graph.children(dep_relcl_node,
                                             filter=lambda n, l: "subj" in l)
        ]
        if subj_nodes and subj_nodes[0].ID != dep_entity_node.ID:
            continue

        oia_verb_node = oia_graph.add_words(dep_relcl_node.position)
        oia_enitity_node = oia_graph.add_words(dep_entity_node.position)

        def __valid_ref(n, l):
            return l == "ref" and dep_entity_node.LOC < n.LOC < dep_relcl_node.LOC

        ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node,
                                                          filter=__valid_ref))
        ref_nodes.sort(key=lambda x: x.LOC)

        if ref_nodes:
            ref_node = ref_nodes[-1]
            oia_ref_node = oia_graph.add_words(ref_node.position)

            case_nodes = list(n for n, l in dep_graph.children(
                ref_node, filter=lambda n, l: "case" in l))
            case_nodes.sort(key=lambda x: x.LOC)

            if case_nodes:
                # with which xxxx, the with will become the root pred
                case_node = case_nodes[-1]
                oia_case_node = oia_graph.add_words(case_node.position)

                oia_graph.add_argument(oia_case_node, oia_verb_node, 1)
                oia_graph.add_argument(oia_case_node,
                                       oia_ref_node,
                                       2,
                                       mod=True)
                oia_graph.add_ref(oia_enitity_node, oia_ref_node)
            else:

                oia_graph.add_argument(oia_verb_node,
                                       oia_ref_node,
                                       1,
                                       mod=True)
                oia_graph.add_ref(oia_enitity_node, oia_ref_node)
        else:

            oia_graph.add_argument(
                oia_verb_node, oia_enitity_node, 1,
                mod=True)  # function and pred, seems we need another label
Beispiel #5
0
def subject_relative_clause_loop(dep_graph, oia_graph, context: UD2OIAContext):
    """
    The loop version is because that the match algorithm donot match part of the loop, see test_match for more detail
    ##### Subject-extracted/referred relative clause #####
    ##### the person who is tall / that is killed -- with ref #####
    ##### the person waiting for the baby -- without ref #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()
    entity_node = DependencyGraphNode()
    relcl_node = DependencyGraphNode()
    pattern.add_node(entity_node)
    pattern.add_node(relcl_node)
    pattern.add_dependency(relcl_node, entity_node, r'\w*subj\w*')
    pattern.add_dependency(entity_node, relcl_node, r'\w*acl:relcl\w*')

    for match in dep_graph.match(pattern):

        dep_entity_node = match[entity_node]
        dep_relcl_node = match[relcl_node]

        oia_verb_node = oia_graph.add_words(dep_relcl_node.position)
        oia_enitity_node = oia_graph.add_words(dep_entity_node.position)

        def __valid_ref(n, l):
            return l == "ref" and dep_entity_node.LOC < n.LOC < dep_relcl_node.LOC

        ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node,
                                                          filter=__valid_ref))
        ref_nodes.sort(key=lambda x: x.LOC)

        if ref_nodes:
            ref_node = ref_nodes[-1]
            oia_ref_node = oia_graph.add_words(ref_node.position)

            dep_case_nodes = list(n for n, l in dep_graph.children(
                ref_node, filter=lambda n, l: "case" in l))
            dep_case_nodes.sort(key=lambda x: x.LOC)

            if dep_case_nodes:
                # with which xxxx, the with will become the root pred
                dep_case_node = dep_case_nodes[-1]
                oia_case_node = oia_graph.add_words(dep_case_node.position)

                oia_graph.add_argument(oia_case_node, oia_verb_node, 1)
                oia_graph.add_argument(oia_case_node, oia_ref_node, 2)
                oia_graph.add_ref(oia_enitity_node, oia_ref_node)

            else:

                oia_graph.add_argument(oia_verb_node, oia_ref_node, 1)
                oia_graph.add_ref(oia_enitity_node, oia_ref_node)
        else:

            oia_graph.add_argument(
                oia_verb_node, oia_enitity_node, 1,
                mod=True)  # function and pred, seems we need another label

    pattern = DependencyGraph()
    verb_node = DependencyGraphNode()
    entity_node = DependencyGraphNode()
    subj_node = DependencyGraphNode(LEMMA=r"what|who|which|that")

    pattern.add_nodes([verb_node, entity_node, subj_node])

    pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*')
    pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*')

    for match in dep_graph.match(pattern):

        dep_entity_node = match[entity_node]
        dep_verb_node = match[verb_node]
        dep_subj_node = match[subj_node]

        context.processed(dep_verb_node, dep_subj_node)
        context.processed(dep_entity_node, dep_verb_node)

        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_enitity_node = oia_graph.add_words(dep_entity_node.position)
        oia_subj_node = oia_graph.add_words(dep_subj_node.position)

        oia_graph.add_mod(oia_verb_node, oia_enitity_node)
        oia_graph.add_ref(oia_enitity_node, oia_subj_node)
        oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)
Beispiel #6
0
def be_adj_verb_phrase(dep_graph):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    adj_node = pattern.create_node(UPOS="ADJ|ADV")
    be_node = pattern.create_node()  # contain the be verb

    pattern.add_node(adj_node)
    pattern.add_node(be_node)

    pattern.add_dependency(adj_node, be_node, r'cop')

    verb_phrases = []

    for match in dep_graph.match(pattern):

        dep_adj_node = match[adj_node]
        dep_be_node = match[be_node]

        if not "be" in dep_be_node.LEMMA.split(" "):
            continue

        if dep_be_node.LOC > dep_adj_node.LOC:
            # may be question
            continue

        if isinstance(dep_adj_node,
                      DependencyGraphSuperNode) and dep_adj_node.is_conj:
            continue

        verb_phrases.append((dep_be_node, dep_adj_node))

    for be_node, adj_node in verb_phrases:

        conj_parents = [
            n for n, l in dep_graph.parents(adj_node) if "arg_con" in l
        ]

        if conj_parents:
            adjv_brothers = [
                n for n, l in dep_graph.children(conj_parents[0])
                if "arg_con" in l and n.UPOS in {"ADJ", "ADV"}
            ]

            for node in adjv_brothers:
                if node != adj_node and len(
                    [n
                     for n, l in dep_graph.children(node) if "cop" in l]) == 0:
                    node.FORM = "(be) " + node.FORM
                    node.LEMMA = "(be) " + node.LEMMA
                #  node.position.insert(0, "(be)")

        verb_node = merge_dep_nodes([be_node, adj_node],
                                    UPOS="VERB",
                                    LOC=be_node.LOC)
        dep_graph.replace_nodes([be_node, adj_node], verb_node)
def secondary_predicate(dep_graph: DependencyGraph):
    """
    detect the case of xcomp as a secondary predicate,
    and add implicit (be) node to make a predicate
    :param dep_graph:
    :return:
    """

    pattern = DependencyGraph()

    pred_node = pattern.create_node()
    xcomp_node = pattern.create_node(UPOS=r'(?!VERB\b)\b\w+')
    xcomp_subj_node = pattern.create_node()

    pattern.add_dependency(pred_node, xcomp_node, "xcomp")
    pattern.add_dependency(xcomp_node, xcomp_subj_node, "nsubj")
    pattern.add_dependency(pred_node, xcomp_subj_node, "obj")

    for match in list(dep_graph.match(pattern)):

        dep_pred_node = match[pred_node]
        dep_xcomp_node = match[xcomp_node]
        dep_xcomp_subj_node = match[xcomp_subj_node]

        # if not (dep_pred_node.LOC < dep_xcomp_subj_node.LOC and dep_pred_node.LOC < dep_xcomp_node.LOC):
        #    raise Exception("Unexpected Situation, let's throw out to see what happens")
        # the position of dep_xcomp_subj_node and dep_xcomp_node may be reversed in questions
        # I can't tell you how ominous I found Bush's performance in that interview.

        if dep_pred_node.LOC < dep_xcomp_subj_node.LOC < dep_xcomp_node.LOC:

            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_node)
            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_subj_node)
            dep_graph.remove_dependency(dep_xcomp_node, dep_xcomp_subj_node)

            if dep_xcomp_node.UPOS == "ADJ" or dep_xcomp_node.UPOS == "ADV":
                new_pred_nodes = ["(be)", dep_xcomp_node]
                dep_be_node = merge_dep_nodes(new_pred_nodes,
                                              UPOS="VERB",
                                              LOC=dep_xcomp_node.LOC)

                dep_graph.add_node(dep_be_node)

                dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj")
                dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node,
                                         "nsubj")

                for child, l in list(dep_graph.children(dep_xcomp_node)):
                    dep_graph.remove_dependency(dep_xcomp_node, child)
                    dep_graph.add_dependency(dep_be_node, child, l)

                dep_graph.remove_node(dep_xcomp_node)

            else:
                dep_be_node = dep_graph.create_node(FORM="(be)",
                                                    LEMMA="(be)",
                                                    UPOS="VERB",
                                                    LOC=dep_xcomp_node.LOC -
                                                    0.5)
                dep_be_node.aux = True

                dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj")
                dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node,
                                         "nsubj")
                dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "obj")

        elif dep_xcomp_node.LOC < dep_pred_node.LOC:

            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_node)
            dep_graph.remove_dependency(dep_pred_node, dep_xcomp_subj_node)
            dep_graph.remove_dependency(dep_xcomp_node, dep_xcomp_subj_node)

            # in question, for example : how ominous
            # I can't tell you how ominous I found Bush's performance in that interview.

            dep_be_node = dep_graph.create_node(FORM="(be)",
                                                LEMMA="(be)",
                                                UPOS="VERB",
                                                LOC=dep_xcomp_node.LOC - 0.5)
            dep_be_node.aux = True

            dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj")
            dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node, "nsubj")

            if dep_xcomp_node.UPOS == "ADJ" or dep_xcomp_node.UPOS == "ADV":
                dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "amod")
            else:
                dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "obj")
Beispiel #8
0
def multi_words_case(dep_graph: DependencyGraph):
    """
    :TODO  add example case
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode()
    x_node = DependencyGraphNode()
    case_node = DependencyGraphNode()

    pattern.add_node(noun_node)
    pattern.add_node(x_node)
    pattern.add_node(case_node)

    pattern.add_dependency(noun_node, x_node, r'\w*:\w*')
    pattern.add_dependency(x_node, case_node, r'\bcase\b')



    for match in list(dep_graph.match(pattern)):

        multiword_cases = []

        dep_noun_node = match[noun_node]
        dep_x_node = match[x_node]
        dep_case_node = match[case_node]

        if not dep_graph.has_node(dep_case_node):
            continue

        direct_case_nodes = [n for n, l in dep_graph.children(dep_x_node, filter=lambda n, l: "case" == l)]
        all_case_nodes = set()
        for node in direct_case_nodes:
            all_case_nodes.update(dep_graph.offsprings(node))

        if len(all_case_nodes) == 1:
            continue

        all_case_nodes = sorted(list(all_case_nodes), key=lambda n: n.LOC)
        logger.debug("multi case discovered")
        for node in all_case_nodes:
            logger.debug(str(node))

        #        if len(case_nodes) > 2:
        #            raise Exception("multi_words_case: Unexpected Situation: nodes with more than two cases")

        x_rel = dep_graph.get_dependency(dep_noun_node, dep_x_node)

        for rel in x_rel:
            if ":" in rel:
                # print('-----------------rel:        ',rel)

                rel_str, case_str = rel.split(":")
                # some times, the rel only contains one word
                # Example :
                # that OBSF values within the extended trial balance may be misstated due to data issues ( above and beyond existing conversations with AA on model simplifications)
                if case_str in "_".join([x.LEMMA for x in all_case_nodes]):
                    multiword_cases.append((dep_noun_node, dep_x_node, dep_case_node, all_case_nodes, rel_str))

        for dep_noun_node, dep_x_node, dep_case_node, case_nodes, rel_str in multiword_cases:

            logger.debug("we are merging:")
            for node in case_nodes:
                logger.debug(str(node))

            if not all([dep_graph.has_node(x) for x in case_nodes]):
                continue

            new_case_node = merge_dep_nodes(case_nodes,
                                            UPOS=dep_case_node.UPOS,
                                            LOC=dep_case_node.LOC
                                            )
            dep_graph.replace_nodes(case_nodes, new_case_node)
            dep_graph.remove_dependency(dep_noun_node, dep_x_node)
            dep_graph.add_dependency(dep_noun_node, dep_x_node,
                                     rel_str + ":" + " ".join([x.LEMMA for x in case_nodes]))
def process_conjunction(dep_graph: DependencyGraph, root: DependencyGraphNode):
    """

    :param dep_graph:
    :param root:
    :return:
    """
    conj_childs = [
        child for child, rels in dep_graph.children(
            root, filter=lambda n, l: l.startswith("conj"))
    ]

    assert conj_childs

    parallel_components = [root]

    for child in conj_childs:

        is_nest = any(
            grand_rels.startswith("conj")
            for grand_sun, grand_rels in dep_graph.children(child))
        if is_nest:
            logger.debug("nested conj is found ")
            logger.debug(str(child))

            conj_node, parallel_nodes = process_conjunction(dep_graph, child)
            logger.debug("conj_node is created ")
            logger.debug(str(conj_node))

            for node in parallel_nodes:
                logger.debug("Containing nodes  ")
                logger.debug(str(node))
                rels = list(dep_graph.get_dependency(root, node))
                for rel in rels:
                    if rel.startswith("conj"):
                        logger.debug("remove dependency {0}".format(
                            (root.ID, node.ID, rel)))

                        dep_graph.remove_dependency(root, node, rel)
                        dep_graph.add_dependency(root, conj_node, rel)
            child = conj_node

        parallel_components.append(child)

    parallel_components.sort(key=lambda x: x.LOC)

    # if all(n.UPOS in NOUN_UPOS for n in parallel_components):
    #
    #     logger.debug("Processing all noun conjunction")
    #
    #     is_pure_noun = True
    #
    #     merging_noun_nodes = []
    #     min_loc = 10000
    #     max_loc = -1
    #     for child in parallel_components:
    #         if isinstance(child, DependencyGraphNode):
    #             min_loc = min(min_loc, child.LOC)
    #             max_loc = max(min_loc, child.LOC)
    #         elif isinstance(child, DependencyGraphSuperNode):
    #             min_loc = min(min_loc, min([x.LOC for x in child.nodes]))
    #             max_loc = max(max_loc, max([x.LOC for x in child.nodes]))
    #         merging_noun_nodes.extend(dep_graph.offsprings(child))
    #
    #         logger.debug("Checking acl for {0}".format(child))
    #         for n, l in dep_graph.children(child):
    #             logger.debug(n)
    #             logger.debug("label {0}".format(l))
    #             if "acl" in l:
    #                 is_pure_noun = False
    #                 break
    #
    #     if is_pure_noun:
    #         merging_noun_nodes = [n for n in merging_noun_nodes if min_loc <= n.LOC <= max_loc]
    #         is_pure_noun = not any(n.UPOS in {"ADP", "VERB", "SCONJ", "AUX"} for n in merging_noun_nodes)
    #
    #     if is_pure_noun:
    #         # merged_noun_nodes.sort(key=lambda x: x.LOC)
    #         for node in merging_noun_nodes:
    #             logger.debug("merging {0}".format(node))
    #
    #         new_noun = merge_dep_nodes(merging_noun_nodes, UPOS=root.UPOS, LOC=root.LOC)
    #         dep_graph.replace_nodes(merging_noun_nodes, new_noun)
    #
    #         return new_noun, []

    root_parents = list(set(parent
                            for parent, rels in dep_graph.parents(root)))
    root_parents.sort(key=lambda x: x.LOC)

    # ic(list(map(str, root_parents)))

    conj_node, with_arg_palceholder = build_conjunction_node(
        dep_graph, root, root_parents, parallel_components)

    relation_to_conj = get_relation_to_conj(dep_graph, root, root_parents,
                                            parallel_components)

    case_marks = dict()
    for index, node in enumerate(parallel_components):
        case_marks[node.ID] = [(n, l) for n, l in dep_graph.children(node)
                               if ("case" in l or "mark" in l or "cc" in l)]
    for key, values in case_marks.items():
        for v in values:
            logger.debug("case_marker = {} {} {}".format(
                key, v[0].ID, v[1].rels))

    logger.debug("relation_to_conj = {}".format(relation_to_conj))

    for parent in root_parents:
        # ic(parent)

        prefix, shared_prefix, required_mark = relation_to_conj[parent.ID]
        if any(x in prefix for x in {"subj", "obj", "ccomp", "xcomp"}) \
                or not required_mark or len(set(required_mark)) == 1:

            for node in parallel_components:
                dep_graph.remove_dependency(parent, node)

            relation = prefix

            if required_mark and len(set(required_mark)) == 1:
                ## with same mark

                mark_lemma = list(set(required_mark))[0]

                relation += ":" + mark_lemma

                mark_node = find_mark(case_marks, parallel_components,
                                      mark_lemma)

                if mark_node:

                    mark_node, mark_rel = mark_node

                    dep_graph.remove_node(mark_node)
                    dep_graph.add_node(mark_node)  # clear the dependency

                    dep_graph.add_dependency(conj_node, mark_node, mark_rel)
                else:
                    logger.error("cannot find the mark node")

            dep_graph.add_dependency(parent, conj_node, relation)

        else:

            complete_missing_case_mark(dep_graph, root, root_parents,
                                       parallel_components, relation_to_conj,
                                       case_marks)

            if not required_mark:
                required_mark = [None] * len(parallel_components)

            for index, (node, mark) in enumerate(
                    zip(parallel_components, required_mark)):
                if mark:
                    rel = prefix + ":" + mark
                else:
                    rel = prefix

                # if rel.startswith("conj"):
                #    continue
                logger.debug("add dependency {0}".format(
                    (parent.ID, node.ID, rel)))

                dep_graph.add_dependency(parent, node, rel)

        for idx, node in enumerate(parallel_components):
            if node != root:
                rels = dep_graph.get_dependency(root, node)
                for rel in rels:
                    if rel.startswith("conj"):
                        dep_graph.remove_dependency(root, node)

            if with_arg_palceholder:
                index = idx + 1
            else:
                # a, but b, b should be the arg1 and a be the arg2
                index = len(parallel_components) - idx

            dep_graph.add_dependency(conj_node, node,
                                     "arg_conj:{0}".format(index))

    return conj_node, parallel_components
def build_conjunction_node(dep_graph: DependencyGraph, root, root_parents,
                           parallel_components):
    """

    :param dep_graph:
    :param parallel_components:
    :return:
    """
    parallel_components.sort(key=lambda x: x.LOC)

    conj_phrases = []

    for n1, n2 in pairwise(parallel_components):

        node1 = n1
        node2 = n2

        cur_conjs = []
        for n, l in sorted(list(dep_graph.children(node2)),
                           key=lambda x: x[0].LOC):

            if not node1.LOC < n.LOC < node2.LOC:
                continue

            if ("case" in l or "mark" in l or "cc" in l) and \
                    (any(x in n.LEMMA for x in {"and", "or", "but", "not", "as well as"}) or n.UPOS == "CCONJ"):
                cur_conjs.append(n)

            if "punct" in l:
                cur_conjs.append(n)

            if ("advmod" in l) and any(x in n.LEMMA for x in {"so", "also"}):
                if len(list(dep_graph.children(n))) == 0:
                    cur_conjs.append(n)

        if not cur_conjs:
            conj_phrases.append(["AND"])
        else:
            conj_phrases.append(cur_conjs)

    if len(conj_phrases) == 1:
        unified_conj_phrase = conj_phrases[0]
        with_arg_palceholder = False
    else:
        with_arg_palceholder = True
        unified_conj_phrase = ["{1}"]
        for index, phrase in enumerate(conj_phrases):
            unified_conj_phrase.extend(phrase)
            unified_conj_phrase.append("{{{0}}}".format(index + 2))

    for n, l in sorted(list(dep_graph.children(parallel_components[0])),
                       key=lambda x: x[0].LOC,
                       reverse=True):
        if l == "cc:preconj":
            unified_conj_phrase.insert(0, n)
            dep_graph.remove_node(n)

    # uposes = set([p.UPOS for p in root_parents])
    # uposes.add(root.UPOS)

    conj_node = merge_dep_nodes(
        unified_conj_phrase,
        is_conj=True,
        UPOS=root.UPOS,
        FEATS=root.FEATS,
        LOC=root.LOC,
    )

    for conj_phrase in conj_phrases:
        for n in conj_phrase:
            if isinstance(n, DependencyGraphNode):
                dep_graph.remove_node(n)

    dep_graph.add_node(conj_node)

    return conj_node, with_arg_palceholder
def oblique_with_prep(dep_graph, oia_graph: OIAGraph, context: UD2OIAContext):
    """

    :param dep_graph:
    :param oia_graph:
    :return:
    """

    # cut X by a knife
    pattern = DependencyGraph()
    verb_node = DependencyGraphNode(UPOS="VERB|ADJ|ADV|NOUN|X|PROPN|PRON")
    # adj is for "has more on "
    # adv is for "south of XXXX"
    prep_node = DependencyGraphNode(UPOS=r"PRON|ADP|VERB|SCONJ|ADJ")
    # verb is for including/according, adj is for "prior to"

    oblique_node = DependencyGraphNode()
    pattern.add_node(verb_node)
    pattern.add_node(prep_node)
    pattern.add_node(oblique_node)
    pattern.add_dependency(verb_node, oblique_node, r'\bobl')
    pattern.add_dependency(oblique_node, prep_node, r"case|mark")

    for match in dep_graph.match(pattern):

        dep_prep_node = match[prep_node]
        dep_verb_node = match[verb_node]
        dep_oblique_node = match[oblique_node]

        if oia_graph.has_relation(dep_verb_node, dep_oblique_node):
            continue

        oblique_edge = dep_graph.get_dependency(dep_verb_node,
                                                dep_oblique_node)
        oblique_cases = oblique_edge.values()

        # if dep_prop_node.LEMMA.lower() not in cases:
        #    continue

        prop_nodes = [
            x for x, l in dep_graph.children(
                dep_oblique_node,
                filter=lambda n, l: l == "case" or l == "mark")
        ]
        connected_case_nodes = continuous_component(prop_nodes, dep_prep_node)

        predicate = tuple([x.ID for x in connected_case_nodes])
        head_node = None
        for node in connected_case_nodes:
            if node.LEMMA.lower() in oblique_cases:
                head_node = node

        if not head_node:
            head_node = connected_case_nodes[-1]

        pred_node = oia_graph.add_words(head_node.position)
        arg1_node = oia_graph.add_words(dep_verb_node.position)
        arg2_node = oia_graph.add_words(dep_oblique_node.position)

        oia_graph.add_argument(pred_node, arg1_node, 1, mod=True)
        oia_graph.add_argument(pred_node, arg2_node, 2)