def correct_verbs(chunk):
    '''Correct plural/singular verb mistakes.
	>>> correct_verbs([('is', 'VBZ'), ('our', 'PRP$'), ('children', 'NNS'), ('learning', 'VBG')])
	[('are', 'VBP'), ('our', 'PRP$'), ('children', 'NNS'), ('learning', 'VBG')]
	>>> correct_verbs([('our', 'PRP$'), ('children', 'NNS'), ('is', 'VBZ'), ('learning', 'VBG')])
	[('our', 'PRP$'), ('children', 'NNS'), ('are', 'VBP'), ('learning', 'VBG')]
	>>> correct_verbs([('our', 'PRP$'), ('child', 'NN'), ('were', 'VBD'), ('learning', 'VBG')])
	[('our', 'PRP$'), ('child', 'NN'), ('was', 'VBD'), ('learning', 'VBG')]
	>>> correct_verbs([('our', 'PRP$'), ('child', 'NN'), ('is', 'VBZ'), ('learning', 'VBG')])
	[('our', 'PRP$'), ('child', 'NN'), ('is', 'VBZ'), ('learning', 'VBG')]
	'''
    vbidx = first_chunk_index(chunk, lambda (word, tag): tag.startswith('VB'))
    # if no verb found, do nothing
    if vbidx is None:
        return chunk

    verb, vbtag = chunk[vbidx]
    nnpred = lambda (word, tag): tag.startswith('NN')
    # find nearest noun to the right of verb
    nnidx = first_chunk_index(chunk, nnpred, start=vbidx + 1)
    # if no noun found to right, look to the left
    if nnidx is None:
        nnidx = first_chunk_index(chunk, nnpred, start=vbidx - 1, step=-1)
    # if no noun found, do nothing
    if nnidx is None:
        return chunk

    noun, nntag = chunk[nnidx]
    # get correct verb form and insert into chunk
    if nntag.endswith('S'):
        chunk[vbidx] = plural_verb_forms.get((verb, vbtag), (verb, vbtag))
    else:
        chunk[vbidx] = singular_verb_forms.get((verb, vbtag), (verb, vbtag))

    return chunk
def correct_verbs(chunk):
	'''Correct plural/singular verb mistakes.
	>>> correct_verbs([('is', 'VBZ'), ('our', 'PRP$'), ('children', 'NNS'), ('learning', 'VBG')])
	[('are', 'VBP'), ('our', 'PRP$'), ('children', 'NNS'), ('learning', 'VBG')]
	>>> correct_verbs([('our', 'PRP$'), ('children', 'NNS'), ('is', 'VBZ'), ('learning', 'VBG')])
	[('our', 'PRP$'), ('children', 'NNS'), ('are', 'VBP'), ('learning', 'VBG')]
	>>> correct_verbs([('our', 'PRP$'), ('child', 'NN'), ('were', 'VBD'), ('learning', 'VBG')])
	[('our', 'PRP$'), ('child', 'NN'), ('was', 'VBD'), ('learning', 'VBG')]
	>>> correct_verbs([('our', 'PRP$'), ('child', 'NN'), ('is', 'VBZ'), ('learning', 'VBG')])
	[('our', 'PRP$'), ('child', 'NN'), ('is', 'VBZ'), ('learning', 'VBG')]
	'''
	vbidx = first_chunk_index(chunk, lambda (word, tag): tag.startswith('VB'))
	# if no verb found, do nothing
	if vbidx is None:
		return chunk
	
	verb, vbtag = chunk[vbidx]
	nnpred = lambda (word, tag): tag.startswith('NN')
	# find nearest noun to the right of verb
	nnidx = first_chunk_index(chunk, nnpred, start=vbidx+1)
	# if no noun found to right, look to the left
	if nnidx is None:
		nnidx = first_chunk_index(chunk, nnpred, start=vbidx-1, step=-1)
	# if no noun found, do nothing
	if nnidx is None:
		return chunk
	
	noun, nntag = chunk[nnidx]
	# get correct verb form and insert into chunk
	if nntag.endswith('S'):
		chunk[vbidx] = plural_verb_forms.get((verb, vbtag), (verb, vbtag))
	else:
		chunk[vbidx] = singular_verb_forms.get((verb, vbtag), (verb, vbtag))
	
	return chunk
Exemple #3
0
def word_tag(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return ''
Exemple #4
0
def filter_pos(text,tagger):
	#st = LancasterStemmer()	
	tokens = nltk.word_tokenize(text)
	tagged = tagger.tag(tokens)
	nouns = list()
	verbs = list()
        for (word, tag) in tagged:
		if tag.startswith('N'):
			nouns.append(st.stem(word))
                elif tag.startswith('V'):
        		verbs.append(st.stem(word))
	return nouns,verbs
Exemple #5
0
def question_3():
    # build inverse tag bigrams and build a conditional frequency distribution
    inv_tag_bigrams = [(b, a) for (a, b) in nltk.bigrams(BROWN_TAGS)]
    tag_cfdist = nltk.probability.ConditionalFreqDist(inv_tag_bigrams)
    # accumulate the counts of all tags occuring before nouns
    tag_is_noun = lambda tag: tag.startswith('N')
    tags_before_noun_dict = collections.defaultdict(int)
    for tag in BROWN_TAGSET:
        if not tag_is_noun(tag):
            continue
        for (predecessor, count) in tag_cfdist[tag].items():
            tags_before_noun_dict[predecessor] += count
    # print the most common predecessors
    predecessors = sorted(tags_before_noun_dict, key=tags_before_noun_dict.get,
        reverse=True)[:5]
    predecessors_counts = [tags_before_noun_dict[elem] for elem in predecessors]
    predecessors_pos = [tag_to_pos(elem) for elem in predecessors]
    predecessors_percentages = ['%.2f' % (100.0 * c / sum(predecessors_counts))
                               for c in predecessors_counts]
    print 'Five most common tags before nouns'
    print table(['Tag'] + predecessors, ['Count'] + predecessors_counts,
                ['%'] + predecessors_percentages,
                ['Part of Speech'] + predecessors_pos)
    print ('Note that this result is very much in keeping with linguistic '
           'intuitions: one would expect articles, prepositions, adjectives, '
           'and other nouns to precede nouns.')
Exemple #6
0
def pos_tags_to_wordnet_form(tagged_sent):
    '''
    takes a list of (token, tag) tuples and converts the tag to
    a wordnet friendly form

    Verb -> v, noun -> n, adverb -> r, adj -> a
    '''
    newtags = dict()
    for token in tagged_sent:
        tok = token[0]
        tag = token[1]
        if tag.startswith('V'):
            newtags[tok] = 'v'
        elif tag.startswith('N'):
            newtags[tok] = 'n'
        elif tag.startswith('J'):
            newtags[tok] = 'a'
        elif tag.startswith('RB'):
            newtags[tok] = 'r'
    return newtags
def swap_verb_phrase(chunk):
	'''Move modifier phrase after verb to front of chunk and drop the verb.
	>>> swap_verb_phrase([('the', 'DT'), ('book', 'NN'), ('was', 'VBD'), ('great', 'JJ')])
	[('great', 'JJ'), ('the', 'DT'), ('book', 'NN')]
	>>> swap_verb_phrase([('this', 'DT'), ('gripping', 'VBG'), ('book', 'NN'), ('is', 'VBZ'), ('fantastic', 'JJ')])
	[('fantastic', 'JJ'), ('this', 'DT'), ('gripping', 'VBG'), ('book', 'NN')]
	'''
	# find location of verb
	vbpred = lambda (word, tag): tag != 'VBG' and tag.startswith('VB') and len(tag) > 2
	vbidx = first_chunk_index(chunk, vbpred)
	
	if vbidx is None:
		return chunk
	
	return chunk[vbidx+1:] + chunk[:vbidx]
def swap_verb_phrase(chunk):
    '''Move modifier phrase after verb to front of chunk and drop the verb.
	>>> swap_verb_phrase([('the', 'DT'), ('book', 'NN'), ('was', 'VBD'), ('great', 'JJ')])
	[('great', 'JJ'), ('the', 'DT'), ('book', 'NN')]
	>>> swap_verb_phrase([('this', 'DT'), ('gripping', 'VBG'), ('book', 'NN'), ('is', 'VBZ'), ('fantastic', 'JJ')])
	[('fantastic', 'JJ'), ('this', 'DT'), ('gripping', 'VBG'), ('book', 'NN')]
	'''
    # find location of verb
    vbpred = lambda (word, tag): tag != 'VBG' and tag.startswith('VB') and len(
        tag) > 2
    vbidx = first_chunk_index(chunk, vbpred)

    if vbidx is None:
        return chunk

    return chunk[vbidx + 1:] + chunk[:vbidx]
def swap_infinitive_phrase(chunk):
	'''Move subject to before the noun preceding the infinitive.
	>>> swap_infinitive_phrase([('book', 'NN'), ('of', 'IN'), ('recipes', 'NNS')])
	[('recipes', 'NNS'), ('book', 'NN')]
	>>> swap_infinitive_phrase([('tastes', 'VBZ'), ('like', 'IN'), ('chicken', 'NN')])
	[('tastes', 'VBZ'), ('like', 'IN'), ('chicken', 'NN')]
	>>> swap_infinitive_phrase([('delicious', 'JJ'), ('book', 'NN'), ('of', 'IN'), ('recipes', 'NNS')])
	[('delicious', 'JJ'), ('recipes', 'NNS'), ('book', 'NN')]
	'''
	inpred = lambda (word, tag): tag == 'IN' and word != 'like'
	inidx = first_chunk_index(chunk, inpred)
	
	if inidx is None:
		return chunk
	
	nnpred = lambda (word, tag): tag.startswith('NN')
	nnidx = first_chunk_index(chunk, nnpred, start=inidx, step=-1) or 0
	
	return chunk[:nnidx] + chunk[inidx+1:] + chunk[nnidx:inidx]
def swap_infinitive_phrase(chunk):
    '''Move subject to before the noun preceding the infinitive.
	>>> swap_infinitive_phrase([('book', 'NN'), ('of', 'IN'), ('recipes', 'NNS')])
	[('recipes', 'NNS'), ('book', 'NN')]
	>>> swap_infinitive_phrase([('tastes', 'VBZ'), ('like', 'IN'), ('chicken', 'NN')])
	[('tastes', 'VBZ'), ('like', 'IN'), ('chicken', 'NN')]
	>>> swap_infinitive_phrase([('delicious', 'JJ'), ('book', 'NN'), ('of', 'IN'), ('recipes', 'NNS')])
	[('delicious', 'JJ'), ('recipes', 'NNS'), ('book', 'NN')]
	'''
    inpred = lambda (word, tag): tag == 'IN' and word != 'like'
    inidx = first_chunk_index(chunk, inpred)

    if inidx is None:
        return chunk

    nnpred = lambda (word, tag): tag.startswith('NN')
    nnidx = first_chunk_index(chunk, nnpred, start=inidx, step=-1) or 0

    return chunk[:nnidx] + chunk[inidx + 1:] + chunk[nnidx:inidx]
	def vbpred(wt):
		word, tag = wt
		return tag != 'VBG' and tag.startswith('VB') and len(tag) > 2
Exemple #12
0
 def vbpred(wt):
     word, tag = wt
     return tag != 'VBG' and tag.startswith('VB') and len(tag) > 2
Exemple #13
0
def tag_to_pos(tag):
    """Make |tag| in the Brown Corpus Tagset more human-readable."""
    import string
    tag = tag.upper()
    if tag.startswith('NN'):
        return 'common noun'
    if tag.startswith('NP'):
        return 'proper noun'
    if tag.startswith('VB'):
        return 'verb'
    if tag.startswith('JJ'):
        return 'adjective'
    if tag in string.punctuation:
        return 'punctuation'
    if tag.startswith('PP'):
        return 'pronoun'
    if tag.startswith('RB'):
        return 'adverb'
    if tag.startswith('CC') or tag.startswith('CS'):
        return 'conjunction'
    if tag.startswith('CD'):
        return 'numeral'
    if tag.startswith('IN'):
        return 'preposition'
    if tag.startswith('AT'):
        return 'article'
    if tag.startswith('TO'):
        return 'infinitival to'
    return 'OTHER'