def obl_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                 context: UD2OIAContext):
    """
    the adv before the verb should be processed by verb_phrase
    this converter should process the adv after the verb
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()

    # TODO: it seems that in UD labeling, adv is used instead of adj for noun
    modified_node = DependencyGraphNode()
    modifier_node = DependencyGraphNode()

    pattern.add_nodes([modified_node, modifier_node])

    pattern.add_dependency(modified_node, modifier_node, r'\bobl')

    for match in dep_graph.match(pattern):

        dep_modified_node = match[modified_node]
        dep_modifier_node = match[modifier_node]

        if oia_graph.has_relation(dep_modified_node,
                                  dep_modifier_node,
                                  direct_link=False):
            continue

        oia_modified_node = oia_graph.add_words(dep_modified_node.position)
        oia_modifier_node = oia_graph.add_words(dep_modifier_node.position)

        oia_graph.add_mod(oia_modifier_node, oia_modified_node)
예제 #2
0
def aclwhose(dep_graph, oia_graph, context: UD2OIAContext):
    """

#### the person whose/who's cat is cute
#### @return a list of four-tuple (noun, whose/who's, possessee, aclmodifier)
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()
    a = DependencyGraphNode()  # person
    b = DependencyGraphNode(FEATS={"PronType": "Int"})  # whose
    c = DependencyGraphNode()  # cat
    d = DependencyGraphNode()  # cute

    pattern.add_nodes([a, b, c, d])

    pattern.add_dependency(a, d, r'.*acl:relcl.*')
    pattern.add_dependency(d, c, r'.*nsubj|obj|iobj.*')
    pattern.add_dependency(c, b, r'.*nmod:poss.*')
    #    pattern.add_dependency(b, a, r'.*ref.*')

    for match in dep_graph.match(pattern):
        dep_a, dep_b, dep_c, dep_d = [match[x] for x in [a, b, c, d]]

        a_node, b_node, c_node, d_node = [
            oia_graph.add_words(x.position)
            for x in [dep_a, dep_b, dep_c, dep_d]
        ]

        oia_graph.add_function(d_node, a_node)
        oia_graph.add_function(b_node, c_node)
        oia_graph.add_ref(b_node, a_node)
def adv_adj_modifier(dep_graph, oia_graph, context: UD2OIAContext):
    """
    the adv before the verb should be processed by verb_phrase
    this converter should process the adv after the verb
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()

    # TODO: it seems that in UD labeling, adv is used instead of adj for noun
    adj_node = DependencyGraphNode(UPOS="ADJ")
    adv_node = DependencyGraphNode(UPOS="ADV|X|NOUN")

    pattern.add_nodes([adj_node, adv_node])

    pattern.add_dependency(adj_node, adv_node, r'advmod')

    for match in dep_graph.match(pattern):

        dep_adj_node = match[adj_node]
        dep_adv_node = match[adv_node]

        if oia_graph.has_relation(dep_adj_node, dep_adv_node):
            continue

        oia_adj_node = oia_graph.add_words(dep_adj_node.position)
        oia_adv_node = oia_graph.add_words(dep_adv_node.position)

        oia_graph.add_mod(oia_adv_node, oia_adj_node)
예제 #4
0
def negation(dep_graph, oia_graph, context: UD2OIAContext):
    """
    #################### Negation ########################
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    not_node = DependencyGraphNode(LEMMA="not")
    parent_node = DependencyGraphNode()

    pattern.add_nodes([not_node, parent_node])

    pattern.add_dependency(parent_node, not_node, r'\w*')

    for match in dep_graph.match(pattern):
        dep_not_node = match[not_node]
        dep_parent_node = match[parent_node]

        oia_pred_node = oia_graph.add_aux(label="SCOPE")

        oia_not_node = oia_graph.add_words(dep_not_node.position)
        oia_parent_node = oia_graph.add_words(dep_parent_node.position)

        oia_graph.add_argument(oia_pred_node, oia_not_node, 1)
        oia_graph.add_argument(oia_pred_node, oia_parent_node, 1)
예제 #5
0
def oblique_relative_clause(dep_graph, oia_graph, context: UD2OIAContext):
    """
##### Oblique relative Clause #####
##### An announcement, in which he stated that #####
    :param sentence:
    :return:
    """
    pattern = DependencyGraph()
    a = DependencyGraphNode()
    b = DependencyGraphNode()
    c = DependencyGraphNode(FEATS={"PronType": "Rel"})
    d = DependencyGraphNode()

    pattern.add_nodes([a, b, c, d])

    pattern.add_dependency(a, d, r'acl:relcl\w*')
    pattern.add_dependency(a, c, r'ref')
    pattern.add_dependency(d, c, r'obl')
    pattern.add_dependency(c, b, r'case')

    for match in dep_graph.match(pattern):
        dep_a, dep_b, dep_c, dep_d = [match[x] for x in [a, b, c, d]]

        a_node, b_node, c_node, d_node = [
            oia_graph.add_words(x.position)
            for x in [dep_a, dep_b, dep_c, dep_d]
        ]

        oia_graph.add_argument(b_node, d_node, 1)
        oia_graph.add_argument(b_node, c_node, 2)
        oia_graph.add_ref(c_node, a_node)
def appositive_phrase(dep_graph, oia_graph, context: UD2OIAContext):
    """
    ##### Apposition:  Trump, president of US, came #####
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()

    subj_node = DependencyGraphNode()

    appos_node = DependencyGraphNode()

    pattern.add_nodes([subj_node, appos_node])

    pattern.add_dependency(subj_node, appos_node, r'\w*appos\w*')

    for match in dep_graph.match(pattern):

        dep_subj_node = match[subj_node]
        dep_appos_node = match[appos_node]

        oia_appos_node = oia_graph.add_words(dep_appos_node.position)
        oia_subj_node = oia_graph.add_words(dep_subj_node.position)

        if oia_appos_node and oia_subj_node:
            pred_node = oia_graph.add_aux(label="APPOS")

            oia_graph.add_argument(pred_node, oia_subj_node, 1)
            oia_graph.add_argument(pred_node, oia_appos_node, 2)
예제 #7
0
def separated_asas(dep_graph: DependencyGraph):
    """
    ##### Equality comparison #####
    ##### A is as X a C as B #####

    ##### the first 'as' is always the advmod of a following element, X, which is within the range of as... as #####
    ##### the second 'as' is always the dependent of B #####
    ##### B sometimes depends on the first 'as', sometimes dependts on X #####
    ##### Sometimes X has a head that is also within the range of as...as #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    adj_node = DependencyGraphNode(UPOS="ADJ")
    noun_node = DependencyGraphNode(UPOS="NOUN")
    as1_node = DependencyGraphNode(FORM="as")
    as2_node = DependencyGraphNode(FORM="as")
    obj_node = DependencyGraphNode()

    pattern.add_nodes([noun_node, adj_node, as1_node, as2_node, obj_node])
    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, as1_node, r'\w*advmod\w*')
    pattern.add_dependency(as1_node, obj_node, r'\w*advcl:as\w*')
    pattern.add_dependency(obj_node, as2_node, r'mark')

    as_as_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_adj_node = match[adj_node]
        dep_as1_node = match[as1_node]
        dep_as2_node = match[as2_node]
        dep_obj_node = match[obj_node]

        if dep_as1_node.LOC < dep_adj_node.LOC < dep_noun_node.LOC < dep_as2_node.LOC < dep_obj_node.LOC:
            pred = [
                node for node in dep_graph.nodes()
                if dep_as1_node.LOC <= node.LOC <= dep_adj_node.LOC
            ]
            pred.append(dep_as2_node)
            pred.sort(key=lambda x: x.LOC)
            head = dep_adj_node

            asas_node = merge_dep_nodes(pred, UPOS="ADJ", LOC=dep_as2_node.LOC)

            as_as_pred.append(
                (pred, head, asas_node, dep_noun_node, dep_obj_node))

    for pred, head, asas_node, dep_noun_node, dep_obj_node in as_as_pred:
        dep_graph.replace_nodes(pred, asas_node)

        dep_graph.remove_dependency(asas_node, dep_obj_node)
        dep_graph.remove_dependency(dep_noun_node, asas_node)

        dep_graph.add_dependency(dep_noun_node, dep_obj_node,
                                 "acl:" + asas_node.FORM)
def acl_mod_verb(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                 context: UD2OIAContext):
    """
    this is called after adnominal_clause_mark, which means there is no mark
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|ADJ|ADV|NUM")
    # ADJ is for the cases that "many/some" are abbrv of many X/some X, representing NOUN
    # ADV is for the case of "here" for "i am here thinking xxx"
    verb_node = pattern.create_node(UPOS="VERB|AUX")
    # aux is for can, have which ommits the true verb

    pattern.add_nodes([noun_node, verb_node])

    pattern.add_dependency(noun_node, verb_node, r'acl')

    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_verb_node = match[verb_node]

        if context.is_processed(dep_noun_node, dep_verb_node):
            continue

        if oia_graph.has_relation(dep_noun_node,
                                  dep_verb_node,
                                  direct_link=False):
            continue

        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_noun_node = oia_graph.add_words(dep_noun_node.position)

        dep = dep_graph.get_dependency(dep_noun_node, dep_verb_node)
        labels = [x for x in dep.rels if x.startswith("acl:")]

        pred = None

        if labels:
            assert len(labels) == 1
            label = labels[0]
            pred = label.split(":")[1]
            if pred == "relcl":
                pred = None

        # if pred:
        #     # there is no mark, but we add it because it may be because of not being shared in conjunction
        #
        #     oia_pred_node = oia_graph.add_aux(pred)
        #     oia_graph.add_argument(oia_pred_node, oia_noun_node, 1, mod=True)
        #     oia_graph.add_argument(oia_pred_node, oia_verb_node, 2)
        # else:

        oia_graph.add_mod(oia_verb_node, oia_noun_node)
예제 #9
0
def amod_obl(dep_graph: DependencyGraph):
    """
    ##### include: more than, successful by
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode(UPOS=r"NOUN|PRON")
    adj_node = DependencyGraphNode(UPOS="ADJ")
    adp_node = DependencyGraphNode(UPOS="ADP")
    obl_node = DependencyGraphNode()

    pattern.add_nodes([noun_node, adj_node, adp_node, obl_node])
    pattern.add_dependency(noun_node, adj_node, r'amod')
    pattern.add_dependency(adj_node, obl_node, r'obl:\w+')
    pattern.add_dependency(obl_node, adp_node, r'case')

    more_than_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_adj_node = match[adj_node]
        dep_obl_node = match[obl_node]
        dep_adp_node = match[adp_node]

        obl_nodes = list(
            dep_graph.children(dep_adj_node, filter=lambda n, l: "obl" in l))

        if len(obl_nodes) > 1:
            # similar in form to the one
            continue

        if dep_adp_node.FORM not in dep_graph.get_dependency(
                dep_adj_node, dep_obl_node).values():
            continue

        if dep_noun_node.LOC < dep_adj_node.LOC < dep_adp_node.LOC < dep_obl_node.LOC:
            more_than_pred.append(
                (dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node))

    for dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node in more_than_pred:
        nodes = [dep_adj_node, dep_adp_node]
        more_than_pred = merge_dep_nodes(nodes,
                                         UPOS="ADP",
                                         LOC=dep_adp_node.LOC)
        dep_graph.remove_dependency(dep_noun_node, dep_adj_node)
        dep_graph.remove_dependency(dep_adj_node, dep_obl_node)

        dep_graph.replace_nodes([dep_adj_node, dep_adp_node], more_than_pred)
        dep_graph.add_dependency(dep_noun_node, dep_obl_node,
                                 "nmod:" + more_than_pred.FORM)
def nmod_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                   context: UD2OIAContext):
    """

    #################### nmod:x ########################

    ##### the office of the chair #####
    ##### Istanbul in Turkey #####
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()

    parent_node = DependencyGraphNode()
    child_node = DependencyGraphNode()
    case_node = DependencyGraphNode()

    pattern.add_nodes([parent_node, child_node, case_node])

    pattern.add_dependency(parent_node, child_node, r'\w*nmod\w*')
    pattern.add_dependency(child_node, case_node, r'\w*case\w*')

    for match in dep_graph.match(pattern):

        dep_parent_node = match[parent_node]
        dep_child_node = match[child_node]
        dep_case_node = match[case_node]

        rel = dep_graph.get_dependency(dep_parent_node, dep_child_node)

        # vs, lemma = versus
        # according, lemma = accord,
        # but rel always select the shorter one

        if oia_graph.has_relation(dep_parent_node, dep_child_node):
            continue

        if rel != "nmod:" + dep_case_node.LEMMA and rel != 'nmod:' + dep_case_node.FORM:
            pred_node = oia_graph.add_words(dep_case_node.position)
        else:
            pred_node = oia_graph.add_words(dep_case_node.position)

        arg1_node = oia_graph.add_words(dep_parent_node.position)
        arg2_node = oia_graph.add_words(dep_child_node.position)

        oia_graph.add_argument(pred_node, arg1_node, 1, mod=True)
        oia_graph.add_argument(pred_node, arg2_node, 2)
예제 #11
0
def such_that(dep_graph: DependencyGraph):
    """
    ##### such a high price that
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode(UPOS="NOUN")
    such_node = DependencyGraphNode(FORM="such")
    clause_pred_node = DependencyGraphNode(UPOS="VERB")
    that_node = DependencyGraphNode(FORM="that")

    pattern.add_nodes([noun_node, such_node, clause_pred_node, that_node])
    pattern.add_dependency(noun_node, such_node, r'det:predet')
    pattern.add_dependency(such_node, clause_pred_node, r'advcl:that')
    pattern.add_dependency(clause_pred_node, that_node, r'mark')

    such_that_pred = []
    for match in dep_graph.match(pattern):

        dep_noun_node = match[noun_node]
        dep_such_node = match[such_node]
        dep_clause_pred_node = match[clause_pred_node]
        dep_that_node = match[that_node]

        if dep_such_node.LOC < dep_noun_node.LOC < dep_that_node.LOC < dep_clause_pred_node.LOC:
            such_that_pred.append((dep_noun_node, dep_such_node,
                                   dep_clause_pred_node, dep_that_node))

    for dep_noun_node, dep_such_node, dep_clause_pred_node, dep_that_node in such_that_pred:
        nodes = [dep_such_node, dep_that_node]
        such_that_pred = merge_dep_nodes(nodes,
                                         UPOS="SCONJ",
                                         LOC=dep_that_node.LOC)
        dep_graph.add_node(such_that_pred)
        dep_graph.add_dependency(dep_noun_node, dep_clause_pred_node,
                                 "advcl:" + such_that_pred.FORM)
        dep_graph.add_dependency(dep_clause_pred_node, such_that_pred, "mark")

        dep_graph.remove_node(dep_such_node)
        dep_graph.remove_node(dep_that_node)
def acl_mod_adjv(dep_graph, oia_graph, context: UD2OIAContext):
    """
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()

    noun_node = DependencyGraphNode(UPOS="NOUN|PRON|PROPN|NUM")
    adjv_node = DependencyGraphNode(UPOS="ADJ|ADV")

    pattern.add_nodes([noun_node, adjv_node])

    pattern.add_dependency(noun_node, adjv_node, r'acl')

    for match in dep_graph.match(pattern):
        dep_noun_node = match[noun_node]
        dep_adjv_node = match[adjv_node]

        oia_noun_node = oia_graph.add_words(dep_noun_node.position)
        oia_adjv_node = oia_graph.add_words(dep_adjv_node.position)

        oia_graph.add_mod(oia_adjv_node, oia_noun_node)
def adv_verb_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                      context: UD2OIAContext):
    """
    the adv before the verb should be processed by verb_phrase
    this converter should process the adv after the verb
    verb1 in order to verb2
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()

    # TODO: it seems that in UD labeling, adv is used instead of adj for noun
    verb_node = DependencyGraphNode(
        UPOS="VERB|NOUN|PROPN|AUX|PRON")  # aux is for be word
    adv_node = DependencyGraphNode(UPOS="ADV|X|NOUN|ADJ|VERB")

    pattern.add_nodes([verb_node, adv_node])

    pattern.add_dependency(verb_node, adv_node, r'advmod')

    for match in dep_graph.match(pattern):

        dep_verb_node = match[verb_node]
        dep_adv_node = match[adv_node]

        if context.is_processed(dep_verb_node, dep_adv_node):
            continue

        if oia_graph.has_relation(dep_verb_node, dep_adv_node):
            continue

        obl_children = [
            x for x, l in dep_graph.children(
                dep_adv_node, filter=lambda n, l: l.startswith("obl"))
        ]

        obl_node = None
        obl_has_case = False
        if len(obl_children) == 1:

            obl_node = obl_children[0]

            case_nodes = list(n for n, l in dep_graph.children(
                obl_node, filter=lambda n, l: "case" in l))

            if case_nodes:
                # if obl with case, let the oblique to process it
                obl_has_case = True

        mark_children = [
            x for x, l in dep_graph.children(
                dep_adv_node, filter=lambda n, l: l.startswith("mark"))
        ]

        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_adv_node = oia_graph.add_words(dep_adv_node.position)

        if obl_node and not obl_has_case:
            # arg_nodes = list(dep_graph.offsprings(obl_node))
            # arg_nodes.sort(key=lambda x: x.LOC)
            # arg_words = [x.ID for x in arg_nodes]
            # head = obl_node.ID

            oia_arg_node = oia_graph.add_words(obl_node.position)

            oia_graph.add_argument(oia_adv_node, oia_verb_node, 1, mod=True)
            oia_graph.add_argument(oia_adv_node, oia_arg_node, 2)
        else:
            if mark_children:
                mark_node = mark_children[0]
                oia_pred_node = oia_graph.add_words(mark_node.position)

                oia_graph.add_argument(oia_pred_node,
                                       oia_verb_node,
                                       1,
                                       mod=True)
                oia_graph.add_argument(oia_pred_node, oia_adv_node, 2)

            else:
                oia_graph.add_mod(oia_adv_node, oia_verb_node)
예제 #14
0
def object_relative_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph,
                           context: UD2OIAContext):
    """
    ##### Object-extracted/referred relative clause #####
    ##### the person that Andy knows #####
    :param sentence:
    :return:
    """

    pattern = DependencyGraph()
    verb_node = DependencyGraphNode()
    entity_node = DependencyGraphNode()
    subj_node = DependencyGraphNode()

    pattern.add_nodes([verb_node, entity_node, subj_node])

    pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*')
    pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*')

    for match in dep_graph.match(pattern):

        dep_entity_node = match[entity_node]
        dep_subj_node = match[subj_node]
        dep_verb_node = match[verb_node]

        if dep_subj_node.LEMMA in {"what", "who", "which", "that"}:
            continue

        logger.debug("we found a objective relative clause")
        logger.debug("entity: {0}".format(dep_entity_node))
        logger.debug("subject: {0}".format(dep_subj_node))
        logger.debug("verb: {0}".format(dep_verb_node))

        if context.is_processed(dep_entity_node, dep_verb_node):
            logger.debug("processed")
            continue

        context.processed(dep_verb_node, dep_subj_node)
        context.processed(dep_entity_node, dep_verb_node)

        oia_entity_node = oia_graph.add_words(dep_entity_node.position)
        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_subj_node = oia_graph.add_words(dep_subj_node.position)

        if oia_graph.has_relation(oia_entity_node, oia_verb_node):
            logger.debug("has relation between entity and verb")
            continue

        oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)

        def __valid_ref(n, l):
            return l == "ref" and dep_entity_node.LOC < n.LOC < dep_verb_node.LOC

        ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node,
                                                          filter=__valid_ref))
        ref_nodes.sort(key=lambda x: x.LOC)

        if ref_nodes:
            ref_node = ref_nodes[-1]

            oia_ref_node = oia_graph.add_words(ref_node.position)

            oia_graph.add_ref(oia_entity_node, oia_ref_node)

            logger.debug("we are coping with ref between:")
            logger.debug(dep_verb_node)
            logger.debug(ref_node)

            ref_relation = dep_graph.get_dependency(dep_verb_node, ref_node)

            case_nodes = list(n for n, l in dep_graph.children(
                ref_node, filter=lambda n, l: "case" in l))
            case_nodes.sort(key=lambda x: x.LOC)

            if ref_relation:
                if case_nodes:
                    # with which xxxx, the with will become the root pred
                    case_node = case_nodes[-1]
                    oia_case_node = oia_graph.add_words(case_node.position)

                    oia_graph.add_argument(oia_case_node,
                                           oia_verb_node,
                                           1,
                                           mod=True)
                    oia_graph.add_argument(oia_case_node, oia_ref_node, 2)
                    oia_graph.add_mod(oia_verb_node, oia_entity_node)
                else:

                    if "obj" in ref_relation:
                        oia_graph.add_argument(oia_verb_node, oia_ref_node, 2)
                    elif ref_relation == "advmod":
                        oia_graph.add_mod(oia_ref_node, oia_verb_node)
                    else:
                        raise Exception(
                            "unknown relation: {}".format(ref_relation))
                    # oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True)

        oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)
        oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True)

        rels = dep_graph.get_dependency(dep_entity_node, dep_verb_node)

        #if rels.endswith("obj"):
        for node, l in dep_graph.children(dep_verb_node):
            if l == "ccomp":
                oia_ccomp_node = oia_graph.add_words(node.position)
                oia_graph.add_argument(oia_verb_node, oia_ccomp_node, 3)
예제 #15
0
def subject_relative_clause_loop(dep_graph, oia_graph, context: UD2OIAContext):
    """
    The loop version is because that the match algorithm donot match part of the loop, see test_match for more detail
    ##### Subject-extracted/referred relative clause #####
    ##### the person who is tall / that is killed -- with ref #####
    ##### the person waiting for the baby -- without ref #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    pattern = DependencyGraph()
    entity_node = DependencyGraphNode()
    relcl_node = DependencyGraphNode()
    pattern.add_node(entity_node)
    pattern.add_node(relcl_node)
    pattern.add_dependency(relcl_node, entity_node, r'\w*subj\w*')
    pattern.add_dependency(entity_node, relcl_node, r'\w*acl:relcl\w*')

    for match in dep_graph.match(pattern):

        dep_entity_node = match[entity_node]
        dep_relcl_node = match[relcl_node]

        oia_verb_node = oia_graph.add_words(dep_relcl_node.position)
        oia_enitity_node = oia_graph.add_words(dep_entity_node.position)

        def __valid_ref(n, l):
            return l == "ref" and dep_entity_node.LOC < n.LOC < dep_relcl_node.LOC

        ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node,
                                                          filter=__valid_ref))
        ref_nodes.sort(key=lambda x: x.LOC)

        if ref_nodes:
            ref_node = ref_nodes[-1]
            oia_ref_node = oia_graph.add_words(ref_node.position)

            dep_case_nodes = list(n for n, l in dep_graph.children(
                ref_node, filter=lambda n, l: "case" in l))
            dep_case_nodes.sort(key=lambda x: x.LOC)

            if dep_case_nodes:
                # with which xxxx, the with will become the root pred
                dep_case_node = dep_case_nodes[-1]
                oia_case_node = oia_graph.add_words(dep_case_node.position)

                oia_graph.add_argument(oia_case_node, oia_verb_node, 1)
                oia_graph.add_argument(oia_case_node, oia_ref_node, 2)
                oia_graph.add_ref(oia_enitity_node, oia_ref_node)

            else:

                oia_graph.add_argument(oia_verb_node, oia_ref_node, 1)
                oia_graph.add_ref(oia_enitity_node, oia_ref_node)
        else:

            oia_graph.add_argument(
                oia_verb_node, oia_enitity_node, 1,
                mod=True)  # function and pred, seems we need another label

    pattern = DependencyGraph()
    verb_node = DependencyGraphNode()
    entity_node = DependencyGraphNode()
    subj_node = DependencyGraphNode(LEMMA=r"what|who|which|that")

    pattern.add_nodes([verb_node, entity_node, subj_node])

    pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*')
    pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*')

    for match in dep_graph.match(pattern):

        dep_entity_node = match[entity_node]
        dep_verb_node = match[verb_node]
        dep_subj_node = match[subj_node]

        context.processed(dep_verb_node, dep_subj_node)
        context.processed(dep_entity_node, dep_verb_node)

        oia_verb_node = oia_graph.add_words(dep_verb_node.position)
        oia_enitity_node = oia_graph.add_words(dep_entity_node.position)
        oia_subj_node = oia_graph.add_words(dep_subj_node.position)

        oia_graph.add_mod(oia_verb_node, oia_enitity_node)
        oia_graph.add_ref(oia_enitity_node, oia_subj_node)
        oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)
예제 #16
0
def continuous_asas(dep_graph: DependencyGraph):
    """
    ##### as far as I known #####

    ##### the first 'as' is always the advmod of a following element, X, which is within the range of as... as #####
    ##### the second 'as' is always the dependent of B #####
    ##### B sometimes depends on the first 'as', sometimes dependts on X #####
    ##### Sometimes X has a head that is also within the range of as...as #####
    :param dep_graph:
    :param oia_graph:
    :return:
    """

    verb_node = DependencyGraphNode(UPOS="VERB|NOUN|PRON|PROPN")
    adv_node = DependencyGraphNode(UPOS="ADV|ADJ")
    as1_node = DependencyGraphNode(LEMMA="as")
    as2_node = DependencyGraphNode(LEMMA="as")
    verb2_node = DependencyGraphNode(UPOS="VERB|ADJ|NOUN|PROPN|PRON")
    # ADJ is for as soon as possible
    pattern1 = DependencyGraph()
    pattern1.add_nodes([verb_node, adv_node, as1_node, as2_node, verb2_node])
    pattern1.add_dependency(verb_node, adv_node, r'advmod|amod')
    pattern1.add_dependency(adv_node, as1_node, r'\w*advmod\w*')
    pattern1.add_dependency(as1_node, verb2_node, r'advcl:as|obl:as|advmod')
    pattern1.add_dependency(verb2_node, as2_node, r'mark|case')

    pattern2 = DependencyGraph()
    pattern2.add_nodes([verb_node, adv_node, as1_node, as2_node, verb2_node])
    pattern2.add_dependency(verb_node, adv_node, r'advmod|amod')
    pattern2.add_dependency(adv_node, as1_node, r'\w*advmod\w*')
    pattern2.add_dependency(adv_node, verb2_node, r'advcl:as|obl:as|advmod')
    pattern2.add_dependency(verb2_node, as2_node, r'mark|case')

    as_as_pred = []
    for match in list(dep_graph.match(pattern1)) + list(
            dep_graph.match(pattern2)):

        dep_verb_node = match[verb_node]
        dep_adv_node = match[adv_node]
        dep_as1_node = match[as1_node]
        dep_as2_node = match[as2_node]
        dep_verb2_node = match[verb2_node]

        if not (dep_as1_node.LOC < dep_adv_node.LOC < dep_as2_node.LOC <
                dep_verb2_node.LOC):
            continue

        as_as_pred.append((dep_as1_node, dep_as2_node, dep_adv_node,
                           dep_verb_node, dep_verb2_node))

        pred = [
            node for node in dep_graph.nodes()
            if dep_as1_node.LOC <= node.LOC <= dep_adv_node.LOC
        ]
        pred.append(dep_as2_node)
        pred.sort(key=lambda x: x.LOC)
        head = dep_adv_node

        dep_asas_node = merge_dep_nodes(pred, UPOS="ADP", LOC=head.LOC)

        dep_graph.replace_nodes(pred, dep_asas_node)
        dep_graph.remove_dependency(dep_verb2_node, dep_asas_node)
        dep_graph.remove_dependency(dep_asas_node, dep_verb2_node)
        dep_graph.remove_dependency(dep_verb_node, dep_asas_node)

        if dep_verb_node.UPOS == "VERB":

            dep_graph.set_dependency(dep_verb_node, dep_verb2_node,
                                     "advcl:" + dep_asas_node.FORM)
            dep_graph.set_dependency(dep_verb2_node, dep_asas_node, "mark")
        else:
            dep_graph.set_dependency(dep_verb_node, dep_verb2_node,
                                     "obl:" + dep_asas_node.FORM)
            dep_graph.set_dependency(dep_verb2_node, dep_asas_node, "case")