Ejemplo n.º 1
0
def pronominalise(np, *features):
    """Create a pronoun for the corresponding noun phrase. """
    # features can be: person, gender, subject|object (case),
    #   possessive determiner, possessive pronoun, reflexive
    get_log().info('Doing pronominalisation on {0}'.format(repr(np)))
    tmp = [x for x in features if str(Gender) == x[0]]
    if len(tmp) == 1:
        gender = tmp[0]
    else:
        gender = lexicon.guess_phrase_gender(np)
    all_features = list(features)
    if gender == Gender.epicene:
        all_features = [x for x in all_features if x != Number.singular]
        all_features.append(Number.plural)
    else:
        all_features.append(gender)
    all_features.extend(list(np._features.items()))
    get_log().debug('Phrase features for pronominalisation:\n\t{}'
                    .format(all_features))
    res = lexicon.pronoun_for_features(*all_features)
    get_log().debug('\tresult:{}'.format(res))
    return res
Ejemplo n.º 2
0
def optimise_ref_exp(phrase, context):
    """Replace anaphoric noun phrases with pronouns when possible. """
    # TODO: include Number in the dicision process (it vs they)
    # FIXME: Coordinated elements need some special attention
    result = copy(phrase)
    # test for selecting phrases taht can be processed
    test = lambda x: isinstance(x, NounPhrase) or isinstance(x, Coordination)
    # reverse so that we start with large phrases first (eg CC)
    get_log().debug('-='*40)
    get_log().debug('constituents:')
    for x in phrase.constituents():
        get_log().debug('\t {}'.format(' '.join(repr(x).split())))
    nps = [x for x in phrase.constituents() if test(x)]
    uttered = []
    processed_ids = set()
    for np in nps:
        replaced = False
        get_log().debug('current NP:\n{}'.format(np))
        gender = lexicon.guess_phrase_gender(np)
        get_log().debug('gender of NP: {}'.format(gender))
        number = lexicon.guess_phrase_number(np)
        get_log().debug('number of NP: {}'.format(number))
        if not np.has_feature('PERSON'):
            if context.is_last_speaker(np):
                person = Person.first
            else:
                person = Person.third
        else:
            person = ('PERSON', np.get_feature('PERSON'))
        phrases = [x for x in (context.np_stack + uttered)
            if lexicon.guess_phrase_gender(x) == gender]
#        get_log().debug('distractors of NP:\n\t{}'.format(distractors))
        if id(np) in processed_ids:
            get_log().debug('current NP: {} was already processed'.format(np))
            continue
#        if ((np in context.np_stack or np in uttered) and np == phrases[-1]):
        if (np in phrases[-1:]):
            # this np is the most salient so pronominalise it
            if isinstance(phrase, Clause):
                if id(np) == id(phrase.subj):
                    pronoun = pronominalise(np, gender, PronounUse.subjective, person)
                elif (np in phrase.subj.constituents() and
                      np in phrase.vp.constituents()):
                    pronoun = pronominalise(np, gender, PronounUse.reflexive, person)
                # TODO: implement -- possessive will be used if it is a complement of an NP?
#                elif any(id(np) in [id(x) for x in pp.constituents()]
#                            for pp in pps):
#                    pronoun = pronominalise(np, gender, PronounUse.possessive)
                elif (np in phrase.vp.constituents()):
                    pronoun = pronominalise(np, gender, PronounUse.objective, person)
                else:
                    pronoun = pronominalise(np, gender, PronounUse.subjective, person)
            else:
                pronoun = pronominalise(np, gender, PronounUse.subjective, person)
            get_log().debug('replacing {}:{} with {}'.format(id(np), np, pronoun))
            replace_element_with_id(result, id(np), pronoun)
            replaced = True
        # if you replace an element, remove all the subphrases from the list
        processed = [y for y in np.constituents()]
        processed_ids.update([id(x) for x in processed])
        unspec_np = deepcopy(np)
        unspec_np.spec = Element()
        uttered.append(unspec_np)
        if not replaced:
            # fix determiners in the processed NP
            optimise_determiner(np, phrases, context)
    context.add_sentence(phrase)
    return result