Beispiel #1
0
class ImposeSubjPredAgr(ImposeAgreement):
    """
    Impose gender and number agreement of relative pronouns with
    their antecedent.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        "Constructor, checking the argument values"
        super(ImposeSubjPredAgr, self).__init__(scenario, args)
        self.lexicon = Lexicon()

    def should_agree(self, tnode):
        "Find finite verbs, with/without a subject."
        # avoid everything except finite verbs
        if not re.match(r'v.+(fin|rc)$', tnode.formeme):
            return False
        anode = tnode.lex_anode
        asubj = first(lambda achild: achild.afun == 'Sb',
                      anode.get_echildren())
        return (anode, asubj)

    def process_excepts(self, tnode, match_nodes):
        "Returns False; there are no special cases for this rule."
        anode, asubj = match_nodes
        # subjectless verbs, reflexive passive and
        # incongruent numerals: 3.ps. sg. neut.
        if (asubj is None and
                (re.match(r'^((po|z|za)?dařit|(za)?líbit)$', anode.lemma) or
                 (tnode.gram_diathesis or tnode.voice) in
                 ['reflexive_diathesis', 'deagent'])) or \
                 (asubj and self.lexicon.is_incongruent_numeral(asubj.lemma)):
            anode.morphcat_gender = 'N'
            anode.morphcat_number = 'S'
            anode.morphcat_person = '3'
            return True
        # This will skip all verbs without subject
        if asubj is None:
            return True
        # Indefinite pronoun subjects
        if re.match(r'^((ně|ni|)kdo|kdokoliv?)$', asubj.lemma):
            anode.morphcat_gender = 'M'
            anode.morphcat_number = asubj.morphcat_number or 'S'
            anode.morphcat_person = '3'
            return True
        return False

    def impose(self, tnode, match_nodes):
        "Impose the subject-predicate agreement on regular nodes."
        anode, asubj = match_nodes
        # Copy the categories from the subject to the predicate
        anode.morphcat_gender = asubj.morphcat_gender
        anode.morphcat_person = asubj.morphcat_person in ['1', '2', '3'] and \
                asubj.morphcat_person or '3'
        anode.morphcat_number = asubj.morphcat_number
        # Correct for coordinated subjects
        if asubj.is_member and asubj.parent.lemma != 'nebo':
            asubj.morphcat_number = 'P'
class ReverseNumberNounDependency(Block):
    """
    This block reverses the dependency of incongruent Czech numerals (5 and
    higher), hanging their parents under them in the a-tree.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_ttree(self, ttree):
        "Rehang the numerals for the given t-tree & a-tree pair"
        for tnode in ttree.get_children():
            self.__process_subtree(tnode)

    def __process_subtree(self, tnode):
        "Process the subtree of the given node"
        # solve the current node
        if tnode.is_coap_root():
            self.__process_coap_tnode(tnode)
        else:
            self.__process_plain_tnode(tnode)
        # recurse deeper
        for child in tnode.get_children():
            self.__process_subtree(child)

    def __process_plain_tnode(self, tnode):
        "Process a normal (non-coap) tnode"
        tnoun = tnode.parent
        # filter out cases where we don't need to do anything: lemma, case
        if tnoun < tnode or not self.__should_reverse(tnode.t_lemma):
            return
        noun_prep, noun_case = self.__get_prepcase(tnoun)
        if noun_case is None or noun_case not in ['1', '4']:
            return
        # make the switch
        self.__swap_anodes(tnode, tnoun)
        self.__update_formemes(tnode, tnoun, noun_prep, noun_case)
        # make the objects singular for Czech decimal numbers
        if re.match(r'^\d+[,.]\d+$', tnode.t_lemma):
            tnode.gram_number = 'sg'

    def __process_coap_tnode(self, tnode):
        "Process a coap root"
        # check if we have actually something to process
        tchildren = [tchild for tchild in tnode.get_children(ordered=1)
                     if tchild.is_member]
        if not tchildren:
            return
        # check whether the switch should apply to all children
        tnoun = tnode.parent
        if tnoun < tnode or filter(lambda tchild:
                                   not self.__should_reverse(tchild.t_lemma),
                                   tchildren):
            return
        # check noun case
        noun_prep, noun_case = self.__get_prepcase(tnoun)
        if noun_case is None or noun_case not in ['1', '4']:
            return
        # switch the coap root with the noun
        self.__swap_anodes(tnode, tnoun)
        for tchild in tchildren:
            self.__update_formemes(tchild, tnoun, noun_prep, noun_case)
        # fix object number according to the last child
        if re.match(r'^\d+[,.]\d+$', tchildren[-1].t_lemma):
            tnode.gram_number = 'sg'

    def __update_formemes(self, tnumber, tnoun, noun_prep, noun_case):
        "Update the formemes to reflect the swap of the nodes"
        # merge number and noun prepositions
        number_prep = re.search(r'(?::(.*)\+)?', tnumber.formeme).group(1)
        if noun_prep and number_prep:
            preps = noun_prep + '_' + number_prep + '+'
        elif noun_prep or number_prep:
            preps = (noun_prep or number_prep) + '+'
        else:
            preps = ''
        # mark formeme origins for debugging
        tnoun.formeme_origin = 'rule-number_from_parent(%s : %s)' % \
                (tnoun.formeme_origin, tnoun.formeme)
        tnumber.formeme_origin = 'rule-number_genitive'
        # Change formemes:
        # number gets merged preposition + noun case, noun gets genitive
        tnumber.formeme = 'n:%s%s' % (preps, noun_case)
        tnoun.formeme = 'n:2'

    def __swap_anodes(self, tnumber, tnoun):
        "Swap the dependency between a number and a noun on the a-layer"
        # the actual swap
        anumber = tnumber.lex_anode
        anoun = anumber.parent
        anumber.parent = anoun.parent
        anoun.parent = anumber
        # fix is_member
        if anoun.is_member:
            anoun.is_member = False
            anumber.is_member = True
        # fix parenthesis
        if anoun.get_attr('wild/is_parenthesis'):
            anoun.set_attr('wild/is_parenthesis', False)
            anumber.set_attr('wild/is_parenthesis', True)

    def __get_prepcase(self, tnoun):
        """\
        Return the preposition and case of a noun formeme
        if the case is nominative or accusative. Returns None otherwise.
        """
        try:
            return re.search(r'^n:(?:(.*)\+)?([14X])$', tnoun.formeme).groups()
        except:
            return None, None

    def __should_reverse(self, lemma):
        """\
        Return true if the given lemma belongs to an incongruent numeral.
        This is actually a hack only to allow for translation of
        the English words "most" and 'more'. Normally, the method
        is_incongruent_numeral should be used directly.
        """
        if self.lexicon.is_incongruent_numeral(lemma) or \
                lemma in ['většina', 'menšina']:
            return True
        return False
Beispiel #3
0
class ImposeSubjPredAgr(ImposeAgreement):
    """
    Impose gender and number agreement of relative pronouns with
    their antecedent.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """

    def __init__(self, scenario, args):
        "Constructor, checking the argument values"
        super(ImposeSubjPredAgr, self).__init__(scenario, args)
        self.lexicon = Lexicon()

    def should_agree(self, tnode):
        "Find finite verbs, with/without a subject."
        # avoid everything except finite verbs
        if not re.match(r'v.+(fin|rc)$', tnode.formeme):
            return False
        anode = tnode.lex_anode
        asubj = first(lambda achild: achild.afun == 'Sb',
                      anode.get_echildren())
        return (anode, asubj)

    def process_excepts(self, tnode, match_nodes):
        "Returns False; there are no special cases for this rule."
        anode, asubj = match_nodes
        # subjectless verbs, reflexive passive and
        # incongruent numerals: 3.ps. sg. neut.
        if (asubj is None and
                (re.match(r'^((po|z|za)?dařit|(za)?líbit)$', anode.lemma) or
                 (tnode.gram_diathesis or tnode.voice) in
                 ['reflexive_diathesis', 'deagent'])) or \
                 (asubj and self.lexicon.is_incongruent_numeral(asubj.lemma)):
            anode.morphcat_gender = 'N'
            anode.morphcat_number = 'S'
            anode.morphcat_person = '3'
            return True
        # This will skip all verbs without subject
        if asubj is None:
            return True
        # Indefinite pronoun subjects
        if re.match(r'^((ně|ni|)kdo|kdokoliv?)$', asubj.lemma):
            anode.morphcat_gender = 'M'
            anode.morphcat_number = asubj.morphcat_number or 'S'
            anode.morphcat_person = '3'
            return True
        return False

    def impose(self, tnode, match_nodes):
        "Impose the subject-predicate agreement on regular nodes."
        anode, asubj = match_nodes
        # Copy the categories from the subject to the predicate
        anode.morphcat_gender = asubj.morphcat_gender
        anode.morphcat_person = asubj.morphcat_person in ['1', '2', '3'] and \
                asubj.morphcat_person or '3'
        anode.morphcat_number = asubj.morphcat_number
        # Correct for coordinated subjects
        if asubj.is_member and asubj.parent.lemma != 'nebo':
            asubj.morphcat_number = 'P'
Beispiel #4
0
class ReverseNumberNounDependency(Block):
    """
    This block reverses the dependency of incongruent Czech numerals (5 and
    higher), hanging their parents under them in the a-tree.

    Arguments:
        language: the language of the target tree
        selector: the selector of the target tree
    """
    def __init__(self, scenario, args):
        "Constructor, checking the argument values"
        Block.__init__(self, scenario, args)
        if self.language is None:
            raise LoadingException('Language must be defined!')
        self.lexicon = Lexicon()

    def process_ttree(self, ttree):
        "Rehang the numerals for the given t-tree & a-tree pair"
        for tnode in ttree.get_children():
            self.__process_subtree(tnode)

    def __process_subtree(self, tnode):
        "Process the subtree of the given node"
        # solve the current node
        if tnode.is_coap_root():
            self.__process_coap_tnode(tnode)
        else:
            self.__process_plain_tnode(tnode)
        # recurse deeper
        for child in tnode.get_children():
            self.__process_subtree(child)

    def __process_plain_tnode(self, tnode):
        "Process a normal (non-coap) tnode"
        tnoun = tnode.parent
        # filter out cases where we don't need to do anything: lemma, case
        if tnoun < tnode or not self.__should_reverse(tnode.t_lemma):
            return
        noun_prep, noun_case = self.__get_prepcase(tnoun)
        if noun_case is None or noun_case not in ['1', '4']:
            return
        # make the switch
        self.__swap_anodes(tnode, tnoun)
        self.__update_formemes(tnode, tnoun, noun_prep, noun_case)
        # make the objects singular for Czech decimal numbers
        if re.match(r'^\d+[,.]\d+$', tnode.t_lemma):
            tnode.gram_number = 'sg'

    def __process_coap_tnode(self, tnode):
        "Process a coap root"
        # check if we have actually something to process
        tchildren = [
            tchild for tchild in tnode.get_children(ordered=1)
            if tchild.is_member
        ]
        if not tchildren:
            return
        # check whether the switch should apply to all children
        tnoun = tnode.parent
        if tnoun < tnode or filter(
                lambda tchild: not self.__should_reverse(tchild.t_lemma),
                tchildren):
            return
        # check noun case
        noun_prep, noun_case = self.__get_prepcase(tnoun)
        if noun_case is None or noun_case not in ['1', '4']:
            return
        # switch the coap root with the noun
        self.__swap_anodes(tnode, tnoun)
        for tchild in tchildren:
            self.__update_formemes(tchild, tnoun, noun_prep, noun_case)
        # fix object number according to the last child
        if re.match(r'^\d+[,.]\d+$', tchildren[-1].t_lemma):
            tnode.gram_number = 'sg'

    def __update_formemes(self, tnumber, tnoun, noun_prep, noun_case):
        "Update the formemes to reflect the swap of the nodes"
        # merge number and noun prepositions
        number_prep = re.search(r'(?::(.*)\+)?', tnumber.formeme).group(1)
        if noun_prep and number_prep:
            preps = noun_prep + '_' + number_prep + '+'
        elif noun_prep or number_prep:
            preps = (noun_prep or number_prep) + '+'
        else:
            preps = ''
        # mark formeme origins for debugging
        tnoun.formeme_origin = 'rule-number_from_parent(%s : %s)' % \
                (tnoun.formeme_origin, tnoun.formeme)
        tnumber.formeme_origin = 'rule-number_genitive'
        # Change formemes:
        # number gets merged preposition + noun case, noun gets genitive
        tnumber.formeme = 'n:%s%s' % (preps, noun_case)
        tnoun.formeme = 'n:2'

    def __swap_anodes(self, tnumber, tnoun):
        "Swap the dependency between a number and a noun on the a-layer"
        # the actual swap
        anumber = tnumber.lex_anode
        anoun = anumber.parent
        anumber.parent = anoun.parent
        anoun.parent = anumber
        # fix is_member
        if anoun.is_member:
            anoun.is_member = False
            anumber.is_member = True
        # fix parenthesis
        if anoun.get_attr('wild/is_parenthesis'):
            anoun.set_attr('wild/is_parenthesis', False)
            anumber.set_attr('wild/is_parenthesis', True)

    def __get_prepcase(self, tnoun):
        """\
        Return the preposition and case of a noun formeme
        if the case is nominative or accusative. Returns None otherwise.
        """
        try:
            return re.search(r'^n:(?:(.*)\+)?([14X])$', tnoun.formeme).groups()
        except:
            return None, None

    def __should_reverse(self, lemma):
        """\
        Return true if the given lemma belongs to an incongruent numeral.
        This is actually a hack only to allow for translation of
        the English words "most" and 'more'. Normally, the method
        is_incongruent_numeral should be used directly.
        """
        if self.lexicon.is_incongruent_numeral(lemma) or \
                lemma in ['většina', 'menšina']:
            return True
        return False