Exemple #1
0
def optimise_determiner(phrase, np_phrases, context):
    """Select the approrpiate determiner. """
    get_log().debug('Fixing determiners: {}'.format(phrase))
    if (not isinstance(phrase, NounPhrase)):
        get_log().debug('...not an NP')
        return phrase

    get_log().debug('NPs: {}'
        .format(' '.join([str(x) for x in np_phrases])))

    # FIXME: this whould look at all modifiers
    distractors = [x for x in np_phrases
                    if (hasattr(x, 'head') and
                        hasattr(phrase, 'head') and phrase.head == x.head)]
    get_log().debug('distractors: {}'
        .format(' '.join([str(x) for x in distractors])))

    if (phrase.head.has_feature('PROPER', 'true') or
        phrase.head.has_feature('cat', 'PRONOUN')):
            get_log().debug('...proper or pronoun')
            phrase.spec = Element()

    elif (not phrase.head.has_feature('cat', 'PRONOUN') and
              phrase in distractors[-1:] and
              len(distractors) == 1):
          get_log().debug('...unpronominalised phrase that is last mentioned')
          phrase.spec = Word('the', 'DETERMINER')

    elif (lexicon.guess_phrase_number(phrase) != Number.plural and
              not phrase.head.has_feature('cat', 'PRONOUN')):
          get_log().debug('...indefinite')
          if phrase.head.string and phrase.head.string[0] in "aeiouy":
              phrase.spec = Word('an', 'DETERMINER')
          else:
              phrase.spec = Word('a', 'DETERMINER')
    return phrase
Exemple #2
0
def optimise_ref_exp(phrase, context):
    """Replace anaphoric noun phrases with pronouns when possible. """
    # TODO: include Number in the dicision process (it vs they)
    # FIXME: Coordinated elements need some special attention
    result = copy(phrase)
    # test for selecting phrases taht can be processed
    test = lambda x: isinstance(x, NounPhrase) or isinstance(x, Coordination)
    # reverse so that we start with large phrases first (eg CC)
    get_log().debug('-='*40)
    get_log().debug('constituents:')
    for x in phrase.constituents():
        get_log().debug('\t {}'.format(' '.join(repr(x).split())))
    nps = [x for x in phrase.constituents() if test(x)]
    uttered = []
    processed_ids = set()
    for np in nps:
        replaced = False
        get_log().debug('current NP:\n{}'.format(np))
        gender = lexicon.guess_phrase_gender(np)
        get_log().debug('gender of NP: {}'.format(gender))
        number = lexicon.guess_phrase_number(np)
        get_log().debug('number of NP: {}'.format(number))
        if not np.has_feature('PERSON'):
            if context.is_last_speaker(np):
                person = Person.first
            else:
                person = Person.third
        else:
            person = ('PERSON', np.get_feature('PERSON'))
        phrases = [x for x in (context.np_stack + uttered)
            if lexicon.guess_phrase_gender(x) == gender]
#        get_log().debug('distractors of NP:\n\t{}'.format(distractors))
        if id(np) in processed_ids:
            get_log().debug('current NP: {} was already processed'.format(np))
            continue
#        if ((np in context.np_stack or np in uttered) and np == phrases[-1]):
        if (np in phrases[-1:]):
            # this np is the most salient so pronominalise it
            if isinstance(phrase, Clause):
                if id(np) == id(phrase.subj):
                    pronoun = pronominalise(np, gender, PronounUse.subjective, person)
                elif (np in phrase.subj.constituents() and
                      np in phrase.vp.constituents()):
                    pronoun = pronominalise(np, gender, PronounUse.reflexive, person)
                # TODO: implement -- possessive will be used if it is a complement of an NP?
#                elif any(id(np) in [id(x) for x in pp.constituents()]
#                            for pp in pps):
#                    pronoun = pronominalise(np, gender, PronounUse.possessive)
                elif (np in phrase.vp.constituents()):
                    pronoun = pronominalise(np, gender, PronounUse.objective, person)
                else:
                    pronoun = pronominalise(np, gender, PronounUse.subjective, person)
            else:
                pronoun = pronominalise(np, gender, PronounUse.subjective, person)
            get_log().debug('replacing {}:{} with {}'.format(id(np), np, pronoun))
            replace_element_with_id(result, id(np), pronoun)
            replaced = True
        # if you replace an element, remove all the subphrases from the list
        processed = [y for y in np.constituents()]
        processed_ids.update([id(x) for x in processed])
        unspec_np = deepcopy(np)
        unspec_np.spec = Element()
        uttered.append(unspec_np)
        if not replaced:
            # fix determiners in the processed NP
            optimise_determiner(np, phrases, context)
    context.add_sentence(phrase)
    return result