Ejemplo n.º 1
0
def get_words_with_tag(language, words, pos_tag, tagger, separator='_'):
    """
    Returns the words that are tagged with the given tag.

    >>> get_words_with_tag('en', ['cat', 'little'], 'NN', 'pos_tag_stanford')
    [('cat', 'NN')]
    >>> get_words_with_tag('fr', [(u'jouer_VB'), (u'aimer_VB'), \\
    ... (u'peut-être_RB')], 'VB', 'pos_tag_melt')
    [(u'jouer', u'VB'), (u'aimer', u'VB')]

    :language: language id, currently en or fr
    :param words: list of words, optionally tagged <word>_<tag>
    :param tag: POS-tag
    :param tagger: method to use for tagging
    :param separator: the character that separates the word and the tag
                      in the word list
    :return: list of words of the given part of speech
    """
    selected = []
    if len(words) == 0:
        return selected
    else:
        if words[0].find(separator) == -1:
            if language == 'en':
                tagged = tag_en.quick_pos_tag(words, tagger=tagger)
            if language == 'fr':
                tagged = tag_fr.quick_pos_tag(words, tagger=tagger)
            if language == 'fi':
                tagged = tag_fi.quick_pos_tag(words)
        else:
            tagged = helpers.strings_to_tuples(words, separator)
        tagged = helpers.capitalize_nnp(tagged)
        for token in tagged:
            if len(token) > 1:
                if token[1] == pos_tag:
                    selected.append(token)
        return selected
Ejemplo n.º 2
0
def get_words_with_tag(language, words, pos_tag, tagger, separator='_'):
    """
    Returns the words that are tagged with the given tag.

    >>> get_words_with_tag('en', ['cat', 'little'], 'NN', 'pos_tag_stanford')
    [('cat', 'NN')]
    >>> get_words_with_tag('fr', [(u'jouer_VB'), (u'aimer_VB'), \\
    ... (u'peut-être_RB')], 'VB', 'pos_tag_melt')
    [(u'jouer', u'VB'), (u'aimer', u'VB')]

    :language: language id, currently en or fr
    :param words: list of words, optionally tagged <word>_<tag>
    :param tag: POS-tag
    :param tagger: method to use for tagging
    :param separator: the character that separates the word and the tag
                      in the word list
    :return: list of words of the given part of speech
    """
    selected = []
    if len(words) == 0:
        return selected
    else:
        if words[0].find(separator) == -1:
            if language == 'en':
                tagged = tag_en.quick_pos_tag(words, tagger=tagger)
            if language == 'fr':
                tagged = tag_fr.quick_pos_tag(words, tagger=tagger)
            if language == 'fi':
                tagged = tag_fi.quick_pos_tag(words)
        else:
            tagged = helpers.strings_to_tuples(words, separator)
        tagged = helpers.capitalize_nnp(tagged)
        for token in tagged:
            if len(token) > 1:
                if token[1] == pos_tag:
                    selected.append(token)
        return selected