def __init__(self, scenario, args):
     """\
     Constructor, just checking the argument values.
     """
     Block.__init__(self, scenario, args)
     if self.language is None:
         raise LoadingException('Language must be defined!')
     self.lexicon = Lexicon()
Beispiel #2
0
class ImposeSubjPredAgr(ImposeAgreement):
    """
    Impose gender and number agreement of relative pronouns with
    their antecedent.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        "Constructor, checking the argument values"
        super(ImposeSubjPredAgr, self).__init__(scenario, args)
        self.lexicon = Lexicon()

    def should_agree(self, tnode):
        "Find finite verbs, with/without a subject."
        # avoid everything except finite verbs
        if not re.match(r'v.+(fin|rc)$', tnode.formeme):
            return False
        anode = tnode.lex_anode
        asubj = first(lambda achild: achild.afun == 'Sb',
                      anode.get_echildren())
        return (anode, asubj)

    def process_excepts(self, tnode, match_nodes):
        "Returns False; there are no special cases for this rule."
        anode, asubj = match_nodes
        # subjectless verbs, reflexive passive and
        # incongruent numerals: 3.ps. sg. neut.
        if (asubj is None and
                (re.match(r'^((po|z|za)?dařit|(za)?líbit)$', anode.lemma) or
                 (tnode.gram_diathesis or tnode.voice) in
                 ['reflexive_diathesis', 'deagent'])) or \
                 (asubj and self.lexicon.is_incongruent_numeral(asubj.lemma)):
            anode.morphcat_gender = 'N'
            anode.morphcat_number = 'S'
            anode.morphcat_person = '3'
            return True
        # This will skip all verbs without subject
        if asubj is None:
            return True
        # Indefinite pronoun subjects
        if re.match(r'^((ně|ni|)kdo|kdokoliv?)$', asubj.lemma):
            anode.morphcat_gender = 'M'
            anode.morphcat_number = asubj.morphcat_number or 'S'
            anode.morphcat_person = '3'
            return True
        return False

    def impose(self, tnode, match_nodes):
        "Impose the subject-predicate agreement on regular nodes."
        anode, asubj = match_nodes
        # Copy the categories from the subject to the predicate
        anode.morphcat_gender = asubj.morphcat_gender
        anode.morphcat_person = asubj.morphcat_person in ['1', '2', '3'] and \
                asubj.morphcat_person or '3'
        anode.morphcat_number = asubj.morphcat_number
        # Correct for coordinated subjects
        if asubj.is_member and asubj.parent.lemma != 'nebo':
            asubj.morphcat_number = 'P'
class GeneratePossessiveAdjectives(Block):
    """\
    According to formemes, this changes the lemma of the surface possessive
    adjectives from the original (deep) lemma which was identical to the noun
    from which the adjective is derived, e.g. changes the a-node lemma from
    'Čapek' to 'Čapkův' if the corresponding t-node has the 'adj:poss' formeme.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        """\
        Constructor, just checking the argument values.
        """
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def load(self):
        self.lexicon.load_possessive_adj_dict(self.scenario.data_dir)

    def process_tnode(self, tnode):
        """\
        Check a t-node if its lexical a-node should be changed;
        if yes, update its lemma.
        """
        # skip all nodes to which this does NOT apply
        if not re.match(r'^(n|adj):poss$', tnode.formeme) or \
                tnode.mlayer_pos == 'P' or tnode.t_lemma == '#PersPron':
            return
        anode = tnode.lex_anode
        poss_adj_lemma = self.lexicon.get_possessive_adj_for(anode.lemma)
        # the corresponding possessive adjective exists, we can use it
        if poss_adj_lemma:
            anode.lemma = poss_adj_lemma
            anode.morphcat_pos = 'A'
            anode.morphcat_subpos = '.'
            anode.morphcat_gender = '.'  # this will be obtained via agreement
            anode.morphcat_number = '.'
        # if the possessive adjective does not exist, we resort to using
        # the noun in genitive
        else:
            tnode.formeme = 'n:2'
            anode.morphcat_case = '2'
class GeneratePossessiveAdjectives(Block):
    """\
    According to formemes, this changes the lemma of the surface possessive
    adjectives from the original (deep) lemma which was identical to the noun
    from which the adjective is derived, e.g. changes the a-node lemma from
    'Čapek' to 'Čapkův' if the corresponding t-node has the 'adj:poss' formeme.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        """\
        Constructor, just checking the argument values.
        """
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def load(self):
        self.lexicon.load_possessive_adj_dict(self.scenario.data_dir)

    def process_tnode(self, tnode):
        """\
        Check a t-node if its lexical a-node should be changed;
        if yes, update its lemma.
        """
        # skip all nodes to which this does NOT apply
        if not re.match(r'^(n|adj):poss$', tnode.formeme) or \
                tnode.mlayer_pos == 'P' or tnode.t_lemma == '#PersPron':
            return
        anode = tnode.lex_anode
        poss_adj_lemma = self.lexicon.get_possessive_adj_for(anode.lemma)
        # the corresponding possessive adjective exists, we can use it
        if poss_adj_lemma:
            anode.lemma = poss_adj_lemma
            anode.morphcat_pos = 'A'
            anode.morphcat_subpos = '.'
            anode.morphcat_gender = '.'  # this will be obtained via agreement
            anode.morphcat_number = '.'
        # if the possessive adjective does not exist, we resort to using
        # the noun in genitive
        else:
            tnode.formeme = 'n:2'
            anode.morphcat_case = '2'
 def __init__(self, scenario, args):
     """\
     Constructor, just checking the argument values.
     """
     Block.__init__(self, scenario, args)
     if self.language is None:
         raise LoadingException('Language must be defined!')
     self.lexicon = Lexicon()
Beispiel #6
0
class AddCoordPunct(Block):
    """
    Add comma to coordinated lists of 3 and more elements, as well as before
    some Czech coordination conjunctions ('ale', 'ani').

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_anode(self, anode):
        "Add coordination punctuation to the given anode, if applicable."
        if anode.afun != 'Coord':
            return
        achildren = anode.get_children(ordered=True)
        if not achildren:
            return
        # add comma before certain conjunctions
        if self.lexicon.is_coord_conj(anode.lemma) == 'Y' and \
                self.is_at_clause_boundary(anode):
            acomma = self.add_comma_node(anode)
            acomma.shift_before_node(anode)
        # add comma in lists with multiple members (before every member
        # except the first one and the last one, which is connected with
        # the conjunction)
        for aprec_member in [
                an for an in anode.get_children()
                if an.is_member and an < anode
        ][1:]:
            acomma = self.add_comma_node(anode)
            acomma.shift_before_subtree(aprec_member)

    def add_comma_node(self, anode):
        "Add a comma AuxX node under the given node."
        return anode.create_child(
            data={
                'form': ',',
                'lemma': ',',
                'afun': 'AuxX',
                'morphcat': {
                    'pos': 'Z'
                },
                'clause_number': 0
            })

    def is_at_clause_boundary(self, anode):
        """Return true if the given node is at a clause boundary (i.e. the
        nodes immediately before and after it belong to different clauses)."""
        prev_node = anode.get_prev_node()
        next_node = anode.get_next_node()
        return prev_node and next_node and \
                prev_node.clause_number != next_node.clause_number
Beispiel #7
0
class AddCoordPunct(Block):
    """
    Add comma to coordinated lists of 3 and more elements, as well as before
    some Czech coordination conjunctions ('ale', 'ani').

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_anode(self, anode):
        "Add coordination punctuation to the given anode, if applicable."
        if anode.afun != 'Coord':
            return
        achildren = anode.get_children(ordered=True)
        if not achildren:
            return
        # add comma before certain conjunctions
        if self.lexicon.is_coord_conj(anode.lemma) == 'Y' and \
                self.is_at_clause_boundary(anode):
            acomma = self.add_comma_node(anode)
            acomma.shift_before_node(anode)
        # add comma in lists with multiple members (before every member
        # except the first one and the last one, which is connected with
        # the conjunction)
        for aprec_member in [an for an in anode.get_children()
                             if an.is_member and an < anode][1:]:
            acomma = self.add_comma_node(anode)
            acomma.shift_before_subtree(aprec_member)

    def add_comma_node(self, anode):
        "Add a comma AuxX node under the given node."
        return anode.create_child(data={'form': ',', 'lemma': ',',
                                        'afun': 'AuxX',
                                        'morphcat': {'pos': 'Z'},
                                        'clause_number': 0})

    def is_at_clause_boundary(self, anode):
        """Return true if the given node is at a clause boundary (i.e. the
        nodes immediately before and after it belong to different clauses)."""
        prev_node = anode.get_prev_node()
        next_node = anode.get_next_node()
        return prev_node and next_node and \
                prev_node.clause_number != next_node.clause_number
Beispiel #8
0
class AddSubconjs(AddAuxWords):
    """
    Add subordinate conjunction a-nodes according to formemes.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def get_aux_forms(self, tnode):
        "Find prepositional nodes to be created."
        match = re.match(r'^v:(.+)\+', tnode.formeme)
        if not match:
            return None
        # obtain the surface forms of the prepositions
        return match.group(1).split('_')

    def new_aux_node(self, anode, form):
        """\
        Create a subordinate conjunction node with the given
        conjunction form and parent.
        """
        new_node = anode.create_child()
        # inflect 'aby' and 'kdyby'
        if form in ['aby', 'kdyby']:
            new_node.form = self.lexicon.inflect_conditional(form,
                    anode.morphcat_number, anode.morphcat_person)
        else:
            new_node.form = form
        new_node.afun = 'AuxC'
        new_node.lemma = form
        new_node.morphcat_pos = 'J'
        new_node.shift_before_subtree(anode)
        return new_node
Beispiel #9
0
class AddSubconjs(AddAuxWords):
    """
    Add subordinate conjunction a-nodes according to formemes.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def get_aux_forms(self, tnode):
        "Find prepositional nodes to be created."
        match = re.match(r'^v:(.+)\+', tnode.formeme)
        if not match:
            return None
        # obtain the surface forms of the prepositions
        return match.group(1).split('_')

    def new_aux_node(self, anode, form):
        """\
        Create a subordinate conjunction node with the given
        conjunction form and parent.
        """
        new_node = anode.create_child()
        # inflect 'aby' and 'kdyby'
        if form in ['aby', 'kdyby']:
            new_node.form = self.lexicon.inflect_conditional(
                form, anode.morphcat_number, anode.morphcat_person)
        else:
            new_node.form = form
        new_node.afun = 'AuxC'
        new_node.lemma = form
        new_node.morphcat_pos = 'J'
        new_node.shift_before_subtree(anode)
        return new_node
Beispiel #10
0
class AddAuxVerbCompoundFuture(Block):
    """
    Add compound future auxiliary 'bude'.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_tnode(self, tnode):
        "Add compound future auxiliary to a node, where appropriate."
        # only future tense + processual aspect or modals
        if tnode.gram_tense != 'post' or (tnode.gram_aspect != 'proc' and
                                          tnode.gram_deontmod == 'decl'):
            return
        # skip synthetic future verbs (this also rules out passives)
        aconj = tnode.get_deref_attr('wild/conjugated')
        if self.lexicon.has_synthetic_future(aconj.lemma):
            return
        # create the new auxiliary node
        anew_aux = aconj.create_child()
        anew_aux.shift_before_node(aconj)
        anew_aux.afun = 'AuxV'
        anew_aux.lemma = 'být'
        # move conjugation
        anew_aux.morphcat = aconj.morphcat
        aconj.morphcat = {'pos': 'V', 'subpos': 'f'}
        anew_aux.morphcat_gender = '-'
        anew_aux.morphcat_tense = 'F'
        # handle links
        tnode.set_deref_attr('wild/conjugated', anew_aux)
        tnode.add_aux_anodes(anew_aux)
Beispiel #11
0
class AddAuxVerbConditional(Block):
    """
    Add conditional auxiliary 'by'/'bych'.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_tnode(self, tnode):
        "Add conditional auxiliary to a node, where appropriate."
        # check if we have to add a conditional auxiliary, end if not
        if tnode.gram_verbmod != 'cdn' or re.search(r'(aby|kdyby)',
                                                    tnode.formeme):
            return
        aconj = tnode.get_deref_attr('wild/conjugated')
        # create the new node
        if aconj.afun == 'AuxV':  # auxiliary conjugated -> make it a sibling
            acdn = aconj.parent.create_child()
        else:  # normal verb conjugated -> make it a child
            acdn = aconj.create_child()
        acdn.shift_before_node(aconj)
        acdn.lemma = 'být'
        acdn.afun = 'AuxV'
        acdn.morphcat_pos = 'V'
        acdn.morphcat_subpos = 'c'
        acdn.form = self.lexicon.inflect_conditional('by',
                                                     aconj.morphcat_number,
                                                     aconj.morphcat_person)
        # set tense of the original to past
        aconj.morphcat_subpos = 'p'
        # fix links
        tnode.add_aux_anodes(acdn)
class AddAuxVerbCompoundFuture(Block):
    """
    Add compound future auxiliary 'bude'.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_tnode(self, tnode):
        "Add compound future auxiliary to a node, where appropriate."
        # only future tense + processual aspect or modals
        if tnode.gram_tense != 'post' or (tnode.gram_aspect != 'proc'
                                          and tnode.gram_deontmod == 'decl'):
            return
        # skip synthetic future verbs (this also rules out passives)
        aconj = tnode.get_deref_attr('wild/conjugated')
        if self.lexicon.has_synthetic_future(aconj.lemma):
            return
        # create the new auxiliary node
        anew_aux = aconj.create_child()
        anew_aux.shift_before_node(aconj)
        anew_aux.afun = 'AuxV'
        anew_aux.lemma = 'být'
        # move conjugation
        anew_aux.morphcat = aconj.morphcat
        aconj.morphcat = {'pos': 'V', 'subpos': 'f'}
        anew_aux.morphcat_gender = '-'
        anew_aux.morphcat_tense = 'F'
        # handle links
        tnode.set_deref_attr('wild/conjugated', anew_aux)
        tnode.add_aux_anodes(anew_aux)
class AddAuxVerbConditional(Block):
    """
    Add conditional auxiliary 'by'/'bych'.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException("Language must be defined!")
        self.lexicon = Lexicon()

    def process_tnode(self, tnode):
        "Add conditional auxiliary to a node, where appropriate."
        # check if we have to add a conditional auxiliary, end if not
        if tnode.gram_verbmod != "cdn" or re.search(r"(aby|kdyby)", tnode.formeme):
            return
        aconj = tnode.get_deref_attr("wild/conjugated")
        # create the new node
        if aconj.afun == "AuxV":  # auxiliary conjugated -> make it a sibling
            acdn = aconj.parent.create_child()
        else:  # normal verb conjugated -> make it a child
            acdn = aconj.create_child()
        acdn.shift_before_node(aconj)
        acdn.lemma = "být"
        acdn.afun = "AuxV"
        acdn.morphcat_pos = "V"
        acdn.morphcat_subpos = "c"
        acdn.form = self.lexicon.inflect_conditional("by", aconj.morphcat_number, aconj.morphcat_person)
        # set tense of the original to past
        aconj.morphcat_subpos = "p"
        # fix links
        tnode.add_aux_anodes(acdn)
Beispiel #14
0
 def __init__(self, scenario, args):
     "Constructor, just checking the argument values"
     super(AddClausalExpletives, self).__init__(scenario, args)
     if self.language is None:
         raise LoadingException('Language must be defined!')
     self.lexicon = Lexicon()
Beispiel #15
0
class AddClausalExpletives(AddAuxWords):
    """
    Add clausal expletive pronoun 'to' (+preposition) to subordinate clauses
    with 'že', if the parent verb requires it.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        super(AddClausalExpletives, self).__init__(scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def get_aux_forms(self, tnode):
        "Return the clausal expletive to be added, if supposed to."
        # no expletives needed when there is no conjunction 'že'
        # (or if they are already included in the formeme)
        if tnode.formeme != 'v:že+fin':
            return None
        # no expletives if the parent verb is not appropriate
        # TODO coordinations are not handled
        expletive = self.lexicon.has_expletive(tnode.parent.t_lemma)
        if not expletive:
            return None
        # there should be an expletive -> return it
        return expletive.split('_')

    def new_aux_node(self, anode, form):
        "Create a node for the expletive/its preposition."
        new_node = anode.create_child()
        # expletive
        if re.match(r'^t(o|oho|mu|om|ím)', form):
            new_node.afun = 'Obj'
            new_node.lemma = 'ten'
            new_node.morphcat = {'pos': 'P', 'subpos': 'D',
                                 'gender': 'N', 'number': 'S'}
        # preposition
        else:
            new_node.afun = 'AuxP'
            new_node.lemma = form
            new_node.morphcat_pos = 'R'
        new_node.form = form
        new_node.shift_before_subtree(anode)
        return new_node

    def postprocess(self, tnode, anode, aux_anodes):
        """\
        Rehang the conjunction 'že', now above the expletive, under it.
        Fix clause numbers and ordering.
        """
        # find the conjunction 'že' and its parent
        aconj_ze = anode.parent.parent
        aparent = aconj_ze.parent
        # rehang all expletives under the parent
        aux_anodes[0].parent = aparent
        aux_anodes[0].clause_number = aparent.clause_number
        if len(aux_anodes) > 1:
            for aux in aux_anodes[1:]:
                aux.parent = aux_anodes[0]
                aux.clause_number = aparent.clause_number
        # rehang the conjunction under them
        aconj_ze.parent = aux_anodes[-1]
        # shift the conjunction after the expletive
        aconj_ze.shift_before_subtree(anode)
        # hang the dependent clause under the expletive
        anode.parent = aconj_ze

    def get_anode(self, tnode):
        "Return the a-node that is the root of the verbal a-subtree."
        if tnode.get_attr('wild/conjugated'):
            aconj = tnode.get_deref_attr('wild/conjugated')
            if aconj.afun == 'AuxV':
                return aconj.parent
            return aconj
        else:
            return tnode.lex_anode
Beispiel #16
0
class ImposeAttrAgr(ImposeAgreement):
    """
    Impose case, gender and number agreement of attributes with their
    governing nouns.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, checking the argument values"
        super(ImposeAttrAgr, self).__init__(scenario, args)
        self.lexicon = Lexicon()

    def should_agree(self, tnode):
        """\
        Find adjectives with a noun parent. Returns the a-layer nodes for the
        adjective and its parent, or False
        """
        if not re.search(r'(attr|poss)', tnode.formeme):
            return False
        anode = tnode.lex_anode
        if not anode:
            return False
        try:
            tnoun = tnode.get_eparents()[0]
            anoun = tnoun.lex_anode
            if anoun.is_root:
                return False
            return (anode, anoun)
        except:
            return False

    def process_excepts(self, tnode, match_nodes):
        "Handle special cases for this rule: nic/něco, numerals."
        anode, anoun = match_nodes
        if anoun.lemma in ['nic', 'něco']:
            # Case agreement, except in nominative and accusative,
            # which require genitive
            anode.morphcat_case = anoun.morphcat_case not in ['1', '4'] and \
                    anoun.morphcat_case or '2'
            # Forced neutrum singular
            anode.morphcat_number = 'S'
            anode.morphcat_gender = 'N'
            return True
        numeral = self.lexicon.number_for(anoun.lemma)
        if numeral is not None and numeral > 1:
            # Force plural in numerals
            anode.morphcat_case = anoun.morphcat_case
            anode.morphcat_gender = anoun.morphcat_gender
            anode.morphcat_number = 'P'
            return True
        return False

    def impose(self, tnode, match_nodes):
        "Impose case, gender and number agreement on attributes."
        anode, anoun = match_nodes
        # Case agreement should take place every time
        anode.morphcat_case = anoun.morphcat_case
        # Gender and number: not for nouns
        if tnode.formeme != 'n:attr' or tnode.mlayer_pos != 'N':
            anode.morphcat_number = anoun.morphcat_number
            anode.morphcat_gender = anoun.morphcat_gender
Beispiel #17
0
 def __init__(self, scenario, args):
     "Constructor, checking the argument values"
     super(ImposeAttrAgr, self).__init__(scenario, args)
     self.lexicon = Lexicon()
Beispiel #18
0
class ImposeAttrAgr(ImposeAgreement):
    """
    Impose case, gender and number agreement of attributes with their
    governing nouns.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        "Constructor, checking the argument values"
        super(ImposeAttrAgr, self).__init__(scenario, args)
        self.lexicon = Lexicon()

    def should_agree(self, tnode):
        """\
        Find adjectives with a noun parent. Returns the a-layer nodes for the
        adjective and its parent, or False
        """
        if not re.search(r'(attr|poss)', tnode.formeme):
            return False
        anode = tnode.lex_anode
        if not anode:
            return False
        try:
            tnoun = tnode.get_eparents()[0]
            anoun = tnoun.lex_anode
            if anoun.is_root:
                return False
            return (anode, anoun)
        except:
            return False

    def process_excepts(self, tnode, match_nodes):
        "Handle special cases for this rule: nic/něco, numerals."
        anode, anoun = match_nodes
        if anoun.lemma in ['nic', 'něco']:
            # Case agreement, except in nominative and accusative,
            # which require genitive
            anode.morphcat_case = anoun.morphcat_case not in ['1', '4'] and \
                    anoun.morphcat_case or '2'
            # Forced neutrum singular
            anode.morphcat_number = 'S'
            anode.morphcat_gender = 'N'
            return True
        numeral = self.lexicon.number_for(anoun.lemma)
        if numeral is not None and numeral > 1:
            # Force plural in numerals
            anode.morphcat_case = anoun.morphcat_case
            anode.morphcat_gender = anoun.morphcat_gender
            anode.morphcat_number = 'P'
            return True
        return False

    def impose(self, tnode, match_nodes):
        "Impose case, gender and number agreement on attributes."
        anode, anoun = match_nodes
        # Case agreement should take place every time
        anode.morphcat_case = anoun.morphcat_case
        # Gender and number: not for nouns
        if tnode.formeme != 'n:attr' or tnode.mlayer_pos != 'N':
            anode.morphcat_number = anoun.morphcat_number
            anode.morphcat_gender = anoun.morphcat_gender
Beispiel #19
0
class ReverseNumberNounDependency(Block):
    """
    This block reverses the dependency of incongruent Czech numerals (5 and
    higher), hanging their parents under them in the a-tree.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        "Constructor, checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_ttree(self, ttree):
        "Rehang the numerals for the given t-tree & a-tree pair"
        for tnode in ttree.get_children():
            self.__process_subtree(tnode)

    def __process_subtree(self, tnode):
        "Process the subtree of the given node"
        # solve the current node
        if tnode.is_coap_root():
            self.__process_coap_tnode(tnode)
        else:
            self.__process_plain_tnode(tnode)
        # recurse deeper
        for child in tnode.get_children():
            self.__process_subtree(child)

    def __process_plain_tnode(self, tnode):
        "Process a normal (non-coap) tnode"
        tnoun = tnode.parent
        # filter out cases where we don't need to do anything: lemma, case
        if tnoun < tnode or not self.__should_reverse(tnode.t_lemma):
            return
        noun_prep, noun_case = self.__get_prepcase(tnoun)
        if noun_case is None or noun_case not in ['1', '4']:
            return
        # make the switch
        self.__swap_anodes(tnode, tnoun)
        self.__update_formemes(tnode, tnoun, noun_prep, noun_case)
        # make the objects singular for Czech decimal numbers
        if re.match(r'^\d+[,.]\d+$', tnode.t_lemma):
            tnode.gram_number = 'sg'

    def __process_coap_tnode(self, tnode):
        "Process a coap root"
        # check if we have actually something to process
        tchildren = [
            tchild for tchild in tnode.get_children(ordered=1)
            if tchild.is_member
        ]
        if not tchildren:
            return
        # check whether the switch should apply to all children
        tnoun = tnode.parent
        if tnoun < tnode or filter(
                lambda tchild: not self.__should_reverse(tchild.t_lemma),
                tchildren):
            return
        # check noun case
        noun_prep, noun_case = self.__get_prepcase(tnoun)
        if noun_case is None or noun_case not in ['1', '4']:
            return
        # switch the coap root with the noun
        self.__swap_anodes(tnode, tnoun)
        for tchild in tchildren:
            self.__update_formemes(tchild, tnoun, noun_prep, noun_case)
        # fix object number according to the last child
        if re.match(r'^\d+[,.]\d+$', tchildren[-1].t_lemma):
            tnode.gram_number = 'sg'

    def __update_formemes(self, tnumber, tnoun, noun_prep, noun_case):
        "Update the formemes to reflect the swap of the nodes"
        # merge number and noun prepositions
        number_prep = re.search(r'(?::(.*)\+)?', tnumber.formeme).group(1)
        if noun_prep and number_prep:
            preps = noun_prep + '_' + number_prep + '+'
        elif noun_prep or number_prep:
            preps = (noun_prep or number_prep) + '+'
        else:
            preps = ''
        # mark formeme origins for debugging
        tnoun.formeme_origin = 'rule-number_from_parent(%s : %s)' % \
                (tnoun.formeme_origin, tnoun.formeme)
        tnumber.formeme_origin = 'rule-number_genitive'
        # Change formemes:
        # number gets merged preposition + noun case, noun gets genitive
        tnumber.formeme = 'n:%s%s' % (preps, noun_case)
        tnoun.formeme = 'n:2'

    def __swap_anodes(self, tnumber, tnoun):
        "Swap the dependency between a number and a noun on the a-layer"
        # the actual swap
        anumber = tnumber.lex_anode
        anoun = anumber.parent
        anumber.parent = anoun.parent
        anoun.parent = anumber
        # fix is_member
        if anoun.is_member:
            anoun.is_member = False
            anumber.is_member = True
        # fix parenthesis
        if anoun.get_attr('wild/is_parenthesis'):
            anoun.set_attr('wild/is_parenthesis', False)
            anumber.set_attr('wild/is_parenthesis', True)

    def __get_prepcase(self, tnoun):
        """\
        Return the preposition and case of a noun formeme
        if the case is nominative or accusative. Returns None otherwise.
        """
        try:
            return re.search(r'^n:(?:(.*)\+)?([14X])$', tnoun.formeme).groups()
        except:
            return None, None

    def __should_reverse(self, lemma):
        """\
        Return true if the given lemma belongs to an incongruent numeral.
        This is actually a hack only to allow for translation of
        the English words "most" and 'more'. Normally, the method
        is_incongruent_numeral should be used directly.
        """
        if self.lexicon.is_incongruent_numeral(lemma) or \
                lemma in ['většina', 'menšina']:
            return True
        return False
Beispiel #20
0
 def __init__(self, scenario, args):
     "Constructor, checking the argument values"
     super(ImposeSubjPredAgr, self).__init__(scenario, args)
     self.lexicon = Lexicon()
Beispiel #21
0
class ImposeSubjPredAgr(ImposeAgreement):
    """
    Impose gender and number agreement of relative pronouns with
    their antecedent.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, checking the argument values"
        super(ImposeSubjPredAgr, self).__init__(scenario, args)
        self.lexicon = Lexicon()

    def should_agree(self, tnode):
        "Find finite verbs, with/without a subject."
        # avoid everything except finite verbs
        if not re.match(r'v.+(fin|rc)$', tnode.formeme):
            return False
        anode = tnode.lex_anode
        asubj = first(lambda achild: achild.afun == 'Sb',
                      anode.get_echildren())
        return (anode, asubj)

    def process_excepts(self, tnode, match_nodes):
        "Returns False; there are no special cases for this rule."
        anode, asubj = match_nodes
        # subjectless verbs, reflexive passive and
        # incongruent numerals: 3.ps. sg. neut.
        if (asubj is None and
                (re.match(r'^((po|z|za)?dařit|(za)?líbit)$', anode.lemma) or
                 (tnode.gram_diathesis or tnode.voice) in
                 ['reflexive_diathesis', 'deagent'])) or \
                 (asubj and self.lexicon.is_incongruent_numeral(asubj.lemma)):
            anode.morphcat_gender = 'N'
            anode.morphcat_number = 'S'
            anode.morphcat_person = '3'
            return True
        # This will skip all verbs without subject
        if asubj is None:
            return True
        # Indefinite pronoun subjects
        if re.match(r'^((ně|ni|)kdo|kdokoliv?)$', asubj.lemma):
            anode.morphcat_gender = 'M'
            anode.morphcat_number = asubj.morphcat_number or 'S'
            anode.morphcat_person = '3'
            return True
        return False

    def impose(self, tnode, match_nodes):
        "Impose the subject-predicate agreement on regular nodes."
        anode, asubj = match_nodes
        # Copy the categories from the subject to the predicate
        anode.morphcat_gender = asubj.morphcat_gender
        anode.morphcat_person = asubj.morphcat_person in ['1', '2', '3'] and \
                asubj.morphcat_person or '3'
        anode.morphcat_number = asubj.morphcat_number
        # Correct for coordinated subjects
        if asubj.is_member and asubj.parent.lemma != 'nebo':
            asubj.morphcat_number = 'P'
Beispiel #22
0
class AddSubordClausePunct(AddClausalPunct):
    """
    Add commas separating subordinate clauses.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_atree(self, aroot):
        "Add subordinate clause punctuation to the given sentence."
        anodes = aroot.get_descendants(ordered=True)
        # examine all places between two nodes
        for (aleft, aright) in zip(anodes[:-1], anodes[1:]):
            # exclude all places where we don't want a comma
            # within the same clause
            if aleft.clause_number == aright.clause_number:
                continue
            # clause boundaries, such as brackets
            if aright.clause_number == 0:
                continue
            # some punctuation is here already
            if [
                    an for an in (aleft, aright)
                    if re.match(r'^[,:;.?!-]', an.lemma)
            ]:
                continue
            # coordinating conjunctions or nodes in clauses belonging
            # to the same coordination
            if [
                    an for an in (aleft, aright)
                    if self.lexicon.is_coord_conj(an.lemma)
            ]:
                continue
            if self.are_in_coord_clauses(aleft, aright):
                continue
            # left token is an opening quote or bracket
            if re.match(r'^[„(]', aleft.lemma):
                continue
            # right token is a closing bracket or quote followed by a period
            if aright.lemma == ')' or \
                    (aright.lemma == '“' and not aright.is_last_node() and
                     aright.get_next_node().lemma == '.'):
                continue
            # left token is a closing quote or bracket preceded by a comma
            # (which has been inserted in the last step)
            if re.match(r'^[“)]', aleft.lemma) and not aleft.is_first_node() \
                        and aright.get_prev_node().lemma == ',':
                continue
            # now we know we want to insert a comma
            acomma = self.insert_comma_between(aleft, aright)
            # move the comma if the left token marks
            # the end of an enquoted clause
            if self.is_clause_in_quotes(aleft):
                acomma.shift_before_node(aleft)
            # move the comma after clausal expletives in expression "poté co"
            if aright.lemma == 'poté':
                acomma.shift_after_node(aright)

    def are_in_coord_clauses(self, aleft, aright):
        "Check if the given nodes are in two coordinated clauses."
        alparent = self.get_clause_parent(aleft)
        arparent = self.get_clause_parent(aright)
        return alparent == arparent and \
                not alparent.is_root and is_coord_conj(alparent.lemma)

    def get_clause_parent(self, anode):
        """Return the parent of the clause the given node belongs to;
        the result may be the root of the tree."""
        if anode.clause_number == 0:
            parent = anode
        else:
            parent = anode.get_clause_root().parent
        while parent.is_coap_root() and parent.is_member:
            parent = parent.parent
        return parent

    def insert_comma_between(self, aleft, aright):
        """Insert a comma node between these two nodes,
        find out where to hang it."""
        # find out the parent
        aleft_clause_root = aleft.get_clause_root()
        aright_clause_root = aright.get_clause_root()
        ahigher_clause_root = aleft_clause_root.get_depth() > \
                aright_clause_root.get_depth() and \
                aleft_clause_root or aright_clause_root
        # insert the new node
        acomma = ahigher_clause_root.create_child(\
                         data={'form': ',', 'lemma': ',', 'afun': 'AuxX',
                               'morphcat': {'pos': 'Z'}, 'clause_number': 0})
        # shift the new node to its rightful place
        acomma.shift_after_node(aleft)
        return acomma
class AddClausalExpletives(AddAuxWords):
    """
    Add clausal expletive pronoun 'to' (+preposition) to subordinate clauses
    with 'že', if the parent verb requires it.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        super(AddClausalExpletives, self).__init__(scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def get_aux_forms(self, tnode):
        "Return the clausal expletive to be added, if supposed to."
        # no expletives needed when there is no conjunction 'že'
        # (or if they are already included in the formeme)
        if tnode.formeme != 'v:že+fin':
            return None
        # no expletives if the parent verb is not appropriate
        # TODO coordinations are not handled
        expletive = self.lexicon.has_expletive(tnode.parent.t_lemma)
        if not expletive:
            return None
        # there should be an expletive -> return it
        return expletive.split('_')

    def new_aux_node(self, anode, form):
        "Create a node for the expletive/its preposition."
        new_node = anode.create_child()
        # expletive
        if re.match(r'^t(o|oho|mu|om|ím)', form):
            new_node.afun = 'Obj'
            new_node.lemma = 'ten'
            new_node.morphcat = {
                'pos': 'P',
                'subpos': 'D',
                'gender': 'N',
                'number': 'S'
            }
        # preposition
        else:
            new_node.afun = 'AuxP'
            new_node.lemma = form
            new_node.morphcat_pos = 'R'
        new_node.form = form
        new_node.shift_before_subtree(anode)
        return new_node

    def postprocess(self, tnode, anode, aux_anodes):
        """\
        Rehang the conjunction 'že', now above the expletive, under it.
        Fix clause numbers and ordering.
        """
        # find the conjunction 'že' and its parent
        aconj_ze = anode.parent.parent
        aparent = aconj_ze.parent
        # rehang all expletives under the parent
        aux_anodes[0].parent = aparent
        aux_anodes[0].clause_number = aparent.clause_number
        if len(aux_anodes) > 1:
            for aux in aux_anodes[1:]:
                aux.parent = aux_anodes[0]
                aux.clause_number = aparent.clause_number
        # rehang the conjunction under them
        aconj_ze.parent = aux_anodes[-1]
        # shift the conjunction after the expletive
        aconj_ze.shift_before_subtree(anode)
        # hang the dependent clause under the expletive
        anode.parent = aconj_ze

    def get_anode(self, tnode):
        "Return the a-node that is the root of the verbal a-subtree."
        if tnode.get_attr('wild/conjugated'):
            aconj = tnode.get_deref_attr('wild/conjugated')
            if aconj.afun == 'AuxV':
                return aconj.parent
            return aconj
        else:
            return tnode.lex_anode
Beispiel #24
0
class ReverseNumberNounDependency(Block):
    """
    This block reverses the dependency of incongruent Czech numerals (5 and
    higher), hanging their parents under them in the a-tree.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_ttree(self, ttree):
        "Rehang the numerals for the given t-tree & a-tree pair"
        for tnode in ttree.get_children():
            self.__process_subtree(tnode)

    def __process_subtree(self, tnode):
        "Process the subtree of the given node"
        # solve the current node
        if tnode.is_coap_root():
            self.__process_coap_tnode(tnode)
        else:
            self.__process_plain_tnode(tnode)
        # recurse deeper
        for child in tnode.get_children():
            self.__process_subtree(child)

    def __process_plain_tnode(self, tnode):
        "Process a normal (non-coap) tnode"
        tnoun = tnode.parent
        # filter out cases where we don't need to do anything: lemma, case
        if tnoun < tnode or not self.__should_reverse(tnode.t_lemma):
            return
        noun_prep, noun_case = self.__get_prepcase(tnoun)
        if noun_case is None or noun_case not in ['1', '4']:
            return
        # make the switch
        self.__swap_anodes(tnode, tnoun)
        self.__update_formemes(tnode, tnoun, noun_prep, noun_case)
        # make the objects singular for Czech decimal numbers
        if re.match(r'^\d+[,.]\d+$', tnode.t_lemma):
            tnode.gram_number = 'sg'

    def __process_coap_tnode(self, tnode):
        "Process a coap root"
        # check if we have actually something to process
        tchildren = [tchild for tchild in tnode.get_children(ordered=1)
                     if tchild.is_member]
        if not tchildren:
            return
        # check whether the switch should apply to all children
        tnoun = tnode.parent
        if tnoun < tnode or filter(lambda tchild:
                                   not self.__should_reverse(tchild.t_lemma),
                                   tchildren):
            return
        # check noun case
        noun_prep, noun_case = self.__get_prepcase(tnoun)
        if noun_case is None or noun_case not in ['1', '4']:
            return
        # switch the coap root with the noun
        self.__swap_anodes(tnode, tnoun)
        for tchild in tchildren:
            self.__update_formemes(tchild, tnoun, noun_prep, noun_case)
        # fix object number according to the last child
        if re.match(r'^\d+[,.]\d+$', tchildren[-1].t_lemma):
            tnode.gram_number = 'sg'

    def __update_formemes(self, tnumber, tnoun, noun_prep, noun_case):
        "Update the formemes to reflect the swap of the nodes"
        # merge number and noun prepositions
        number_prep = re.search(r'(?::(.*)\+)?', tnumber.formeme).group(1)
        if noun_prep and number_prep:
            preps = noun_prep + '_' + number_prep + '+'
        elif noun_prep or number_prep:
            preps = (noun_prep or number_prep) + '+'
        else:
            preps = ''
        # mark formeme origins for debugging
        tnoun.formeme_origin = 'rule-number_from_parent(%s : %s)' % \
                (tnoun.formeme_origin, tnoun.formeme)
        tnumber.formeme_origin = 'rule-number_genitive'
        # Change formemes:
        # number gets merged preposition + noun case, noun gets genitive
        tnumber.formeme = 'n:%s%s' % (preps, noun_case)
        tnoun.formeme = 'n:2'

    def __swap_anodes(self, tnumber, tnoun):
        "Swap the dependency between a number and a noun on the a-layer"
        # the actual swap
        anumber = tnumber.lex_anode
        anoun = anumber.parent
        anumber.parent = anoun.parent
        anoun.parent = anumber
        # fix is_member
        if anoun.is_member:
            anoun.is_member = False
            anumber.is_member = True
        # fix parenthesis
        if anoun.get_attr('wild/is_parenthesis'):
            anoun.set_attr('wild/is_parenthesis', False)
            anumber.set_attr('wild/is_parenthesis', True)

    def __get_prepcase(self, tnoun):
        """\
        Return the preposition and case of a noun formeme
        if the case is nominative or accusative. Returns None otherwise.
        """
        try:
            return re.search(r'^n:(?:(.*)\+)?([14X])$', tnoun.formeme).groups()
        except:
            return None, None

    def __should_reverse(self, lemma):
        """\
        Return true if the given lemma belongs to an incongruent numeral.
        This is actually a hack only to allow for translation of
        the English words "most" and 'more'. Normally, the method
        is_incongruent_numeral should be used directly.
        """
        if self.lexicon.is_incongruent_numeral(lemma) or \
                lemma in ['většina', 'menšina']:
            return True
        return False
class AddAppositionPunct(Block):
    """
    Separating Czech appositions, such as in 'John, my best friend, ...' with
    commas.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_tnode(self, tnode):
        "Adds punctuation a-nodes if the given node is an apposition node."
        tparent = tnode.parent
        # the apposition is correctly parsed on t-layer
        if tnode.functor == 'APPS':
            # just add second comma
            acomma = self.add_comma_node(tnode.lex_anode)
            acomma.shift_after_subtree(tnode.lex_anode)
        # the apposition is expressed as n:attr on the t-layer, where the
        # attribute is a named entity label
        # and follows its parent, which is also a noun.
        elif tnode.formeme == 'n:attr' and tnode.gram_sempos == 'n.denot' and \
                tparent < tnode and tparent.formeme.startswith('n:') and \
                (self.lexicon.is_personal_role(tnode.t_lemma) or
                 self.lexicon.is_named_entity_label(tnode.t_lemma)):
            # create the apposition on the t-layer
            tgrandpa = tparent.parent
            tapp = tgrandpa.create_child(data={'functor': 'APPS',
                                               't_lemma': ';',
                                               'nodetype': 'coap'})
            tapp.shift_before_subtree(tnode)
            tparent.parent = tapp
            tnode.parent = tapp
            # create the apposition on the a-layer
            # TODO hang under the apposition not only the lex_anode,
            # but also aux anodes (if they are above lex_anode).
            agrandpa = tgrandpa.lex_anode if tgrandpa.lex_anode \
                    else tnode.lex_anode.root
            aapp_left = self.add_comma_node(agrandpa)
            aapp_left.afun = 'Apos'
            aapp_left.shift_before_subtree(tnode.lex_anode)
            tnode.lex_anode.parent = aapp_left
            tnode.lex_anode.is_member = True
            tparent.lex_anode.parent = aapp_left
            tparent.lex_anode.is_member = True
            tapp.lex_anode = aapp_left
            # create right comma
            if not self.is_before_punct(tnode.lex_anode):
                aapp_right = self.add_comma_node(aapp_left)
                aapp_right.shift_after_subtree(tnode.lex_anode)
                tapp.add_aux_anodes(aapp_right)

    def add_comma_node(self, aparent):
        "Add a comma a-node to the given parent"
        return aparent.create_child(data={'lemma': ',',
                                          'form': ',',
                                          'afun': 'AuxX'})

    def is_before_punct(self, anode):
        """\
        Test whether the subtree of the given node
        precedes a punctuation node.
        """
        next_node = anode.get_descendants(add_self=True,
                                          ordered=True)[-1].get_next_node()
        return not next_node or re.match(r'[;.,?!„“‚‘"]', next_node.lemma)
 def __init__(self, scenario, args):
     "Constructor, just checking the argument values"
     super(AddClausalExpletives, self).__init__(scenario, args)
     if self.language is None:
         raise LoadingException('Language must be defined!')
     self.lexicon = Lexicon()
Beispiel #27
0
class AddSubordClausePunct(AddClausalPunct):
    """
    Add commas separating subordinate clauses.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, just checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_atree(self, aroot):
        "Add subordinate clause punctuation to the given sentence."
        anodes = aroot.get_descendants(ordered=True)
        # examine all places between two nodes
        for (aleft, aright) in zip(anodes[:-1], anodes[1:]):
            # exclude all places where we don't want a comma
            # within the same clause
            if aleft.clause_number == aright.clause_number:
                continue
            # clause boundaries, such as brackets
            if aright.clause_number == 0:
                continue
            # some punctuation is here already
            if [an for an in (aleft, aright)
                    if re.match(r'^[,:;.?!-]', an.lemma)]:
                continue
            # coordinating conjunctions or nodes in clauses belonging
            # to the same coordination
            if [an for an in (aleft, aright)
                if self.lexicon.is_coord_conj(an.lemma)]:
                continue
            if self.are_in_coord_clauses(aleft, aright):
                continue
            # left token is an opening quote or bracket
            if re.match(r'^[„(]', aleft.lemma):
                continue
            # right token is a closing bracket or quote followed by a period
            if aright.lemma == ')' or \
                    (aright.lemma == '“' and not aright.is_last_node() and
                     aright.get_next_node().lemma == '.'):
                continue
            # left token is a closing quote or bracket preceded by a comma
            # (which has been inserted in the last step)
            if re.match(r'^[“)]', aleft.lemma) and not aleft.is_first_node() \
                        and aright.get_prev_node().lemma == ',':
                continue
            # now we know we want to insert a comma
            acomma = self.insert_comma_between(aleft, aright)
            # move the comma if the left token marks
            # the end of an enquoted clause
            if self.is_clause_in_quotes(aleft):
                acomma.shift_before_node(aleft)
            # move the comma after clausal expletives in expression "poté co"
            if aright.lemma == 'poté':
                acomma.shift_after_node(aright)

    def are_in_coord_clauses(self, aleft, aright):
        "Check if the given nodes are in two coordinated clauses."
        alparent = self.get_clause_parent(aleft)
        arparent = self.get_clause_parent(aright)
        return alparent == arparent and \
                not alparent.is_root and is_coord_conj(alparent.lemma)

    def get_clause_parent(self, anode):
        """Return the parent of the clause the given node belongs to;
        the result may be the root of the tree."""
        if anode.clause_number == 0:
            parent = anode
        else:
            parent = anode.get_clause_root().parent
        while parent.is_coap_root() and parent.is_member:
            parent = parent.parent
        return parent

    def insert_comma_between(self, aleft, aright):
        """Insert a comma node between these two nodes,
        find out where to hang it."""
        # find out the parent
        aleft_clause_root = aleft.get_clause_root()
        aright_clause_root = aright.get_clause_root()
        ahigher_clause_root = aleft_clause_root.get_depth() > \
                aright_clause_root.get_depth() and \
                aleft_clause_root or aright_clause_root
        # insert the new node
        acomma = ahigher_clause_root.create_child(\
                         data={'form': ',', 'lemma': ',', 'afun': 'AuxX',
                               'morphcat': {'pos': 'Z'}, 'clause_number': 0})
        # shift the new node to its rightful place
        acomma.shift_after_node(aleft)
        return acomma