Ejemplo n.º 1
0
def fill_A_and_B(sentence, a, b):
    result = []
    for word,pos in tag(sentence, tokenize=True):
        if word == 'A':
            result.append(a)
        elif word == 'B':
            result.append(b)
        else:
            result.append(word)

    return ' '.join(result)
def agg_character_count(poems, template):
    logging.info('Starting aggregator: agg_character_count')

    for poem in poems:
        n = 0
        for character in poem.characters:
            for word, pos in tag(character.text):
                if pos.startswith('N') and word != 'of':
                    n += 1
                    continue
        if n > 0:
            template.character_count.append(n)

    logging.info('Aggregator finished: agg_character_count')
Ejemplo n.º 3
0
def pos_tag_text(text):
    def penn_to_wn_tags(pos_tag):
        if pos_tag.startswith('J'):
            return wordnet.ADJ
        elif pos_tag.startswith('V'):
            return wordnet.VERB
        elif pos_tag.startswith('N'):
            return wordnet.NOUN
        elif pos_tag.startswith('R'):
            return wordnet.ADV
        else:
            return None

    tagged_text = tag(text)
    tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag)) for word, pos_tag in tagged_text]
    return tagged_lower_text
Ejemplo n.º 4
0
def topicality_score(text):
    # loop through all nouns
    # (this should give some idea of what's being discussed)
    noun_count = 0
    related_nouns = 0
    for word, pos in tag(text):
        if pos in ['NN', 'NNS', 'NNP', 'NNPS']:
            noun_count += 1
            syn_possibility = wordnet.synsets(word, pos=NOUN)
            for essay_word in ESSAY_NOUNS:
                for synword in syn_possibility:
                    sim = wordnet.similarity(synword, essay_word)
                    if sim > SIMILARITY_THRESHOLD:
                        related_nouns += 1
                        break

    if noun_count > 0:
        return related_nouns / noun_count
    else:
        return 0
Ejemplo n.º 5
0
def topicality_score(text):
	# loop through all nouns 
	# (this should give some idea of what's being discussed)
	noun_count = 0
	related_nouns = 0
	for word, pos in tag(text):
		if pos in ['NN', 'NNS', 'NNP', 'NNPS']:
			noun_count += 1
			syn_possibility = wordnet.synsets(word, pos=NOUN)
			for essay_word in ESSAY_NOUNS:
				for synword in syn_possibility:
					sim = wordnet.similarity(synword, essay_word)
					if sim > SIMILARITY_THRESHOLD:
						related_nouns += 1
						break

	if noun_count > 0:
		return related_nouns / noun_count	
	else:
		return 0
Ejemplo n.º 6
0
def get_synset(phrase):
    synset = None
    for word, pos in tag(phrase):
        if pos.startswith('N') and word != 'of':
            try:
                synset = wordnet.synsets(singularize(lemmatise(word)))[0]
            except IndexError:
                try:
                    synset = wordnet.synsets(lemmatise(word))[0]
                except IndexError:
                    try:
                        synset = wordnet.synsets(singularize(word))[0]
                    except IndexError:
                        try:
                            synset = wordnet.synsets(word)[0]
                        except IndexError:
                            logging.error("Failed to find synset for '" + word + "'")
                            continue
        elif pos == 'PRP':
            return wordnet.synsets('living thing')[0]

    return synset
def detect_line_tense(poem):
    poem_verb_set = []
    for line in poem:
        line_verb = ""
        if "'" in line:
            line = replace_contractions(line)
        for word, t in tag(line, tokenize=True):
            if t.startswith("V"):
                line_verb = str(word)
        poem_verb_set.append(line_verb)

    line_tenses = []
    for line_verb in poem_verb_set:
        if not line_verb:
            continue
        possible_tenses = []
        for tense in tenses(line_verb):
            possible_tenses.append(tense[0])
        try:
            line_tenses.append(detect_overall_tense(possible_tenses))
        except IndexError:
            line_tenses.append('')

    return line_tenses
Ejemplo n.º 8
0
        my_model.save_model(checkpoint)
    elif not train:
        my_model.load_model(checkpoint)
        vocab = list(my_model.get_vocab().keys())

        w = open("extracted/lists/" + args.vocab_fname, "w+")
        with tqdm(total=len(vocab)) as pbar:
            for v in vocab:
                w.write(v + "\n")
                pbar.update(1)
        w.close()

        w = open("extracted/lists/vocab_word2vec_POS.txt", "w+")
        with tqdm(total=len(vocab)) as pbar:
            for v in vocab:
                w.write(tag(v) + "\n")
                pbar.update(1)
        w.close()

        w = open("extracted/lists/" + checkpoint + "_parse.txt", "w+")
        with tqdm(total=len(vocab)) as pbar:
            for v in vocab:
                w.write(parse(str(v) + "\n") + "\n")
                pbar.update(1)
        w.close()

        # These names are in game_names, obtained in scrape.py from taking words before the pattern "is a * game"
        # The list is small and noisy. We will take a single popular game,
        # top100 = my_model.model.most_similar('Borderlands', topn=100)
        tops_adj = {
            'Borderlands': None,
Ejemplo n.º 9
0
# coding=utf-8
'''
Created on Dec 10, 2015

@author: lm8212
'''
from pattern.web import Twitter
from pattern.text.en import tag
from pattern.vector import KNN, count

twitter, knn = Twitter(), KNN()

for i in range(1, 3):
    for tweet in twitter.search('#win OR #fail', start=i, count=100):
        s = tweet.text.lower()
        p = '#win' in s and 'WIN' or 'FAIL'
        v = tag(s)
        v = [word for word, pos in v if pos == 'JJ']  # JJ = adjective
        v = count(v)  # {'sweet': 1}
        if v:
            knn.train(v, type=p)

print knn.classify('sweet potato burger')
print knn.classify('stupid autocorrect')
Ejemplo n.º 10
0
"""
pattern 适用于各种NLP任务
例如词类标注器、n-gram搜索、情感分析、WordNet和机器学习(例如向量空间建模、k均值聚类、朴素贝叶斯、KNN、SVM分类器)
"""
from pattern.text.en import tag
tweet = "I hope it is going good for you!"
tweet_1 = tweet.lower()
tweet_tags = tag(tweet_1)
print(tweet_tags)