コード例 #1
0
ファイル: filter.py プロジェクト: fashandge/deja
def contain_verb(tag, verb):
    '''whether a sentence contain 'verb' tagged with 'VB'
    '''
    for word, pos in tag:
        if (pos.startswith('VB') and
            util.lemmatize(word, 'v')==verb):
            return True

    return False
コード例 #2
0
ファイル: filter.py プロジェクト: fashandge/deja
def contain_query_words(sentence, query, query_pos='v'):
    # has to compare each query word to the base form of the
    # words in the sentence, which might be time consuming
    if query:
        keywords = query.split()
        for keyword in keywords:
            sentence = [util.lemmatize(word, query_pos) for word
                        in sentence]
            if keyword not in sentence:
                return False

    return True
コード例 #3
0
ファイル: filter.py プロジェクト: fashandge/deja
def canonical_tagged_sentence(tokens, tags):
    keep = []
    for token, tag in zip(tokens, tags):
        if tag in ['DT', 'PRP$', 'TO', '.', '$', ':', ',',
                   'POS', '``', "''", 'CD']:
            continue
        token = util.lemmatize(token, util.tag2wnpos(tag))
        if tag == 'IN':
            token = 'IN'
        elif token == "n't":
            token = 'not'
        elif token == "'s" and tag.startswith('VB'):
            token = 'be'
        elif token == "'re":
            token = 'be'
        keep.append(token)

    # use str is because pytable doesn't support unicode
    # in case we want to store it to pytable
    return str(' '.join(keep))