def fill_A_and_B(sentence, a, b): result = [] for word,pos in tag(sentence, tokenize=True): if word == 'A': result.append(a) elif word == 'B': result.append(b) else: result.append(word) return ' '.join(result)
def agg_character_count(poems, template): logging.info('Starting aggregator: agg_character_count') for poem in poems: n = 0 for character in poem.characters: for word, pos in tag(character.text): if pos.startswith('N') and word != 'of': n += 1 continue if n > 0: template.character_count.append(n) logging.info('Aggregator finished: agg_character_count')
def pos_tag_text(text): def penn_to_wn_tags(pos_tag): if pos_tag.startswith('J'): return wordnet.ADJ elif pos_tag.startswith('V'): return wordnet.VERB elif pos_tag.startswith('N'): return wordnet.NOUN elif pos_tag.startswith('R'): return wordnet.ADV else: return None tagged_text = tag(text) tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag)) for word, pos_tag in tagged_text] return tagged_lower_text
def topicality_score(text): # loop through all nouns # (this should give some idea of what's being discussed) noun_count = 0 related_nouns = 0 for word, pos in tag(text): if pos in ['NN', 'NNS', 'NNP', 'NNPS']: noun_count += 1 syn_possibility = wordnet.synsets(word, pos=NOUN) for essay_word in ESSAY_NOUNS: for synword in syn_possibility: sim = wordnet.similarity(synword, essay_word) if sim > SIMILARITY_THRESHOLD: related_nouns += 1 break if noun_count > 0: return related_nouns / noun_count else: return 0
def get_synset(phrase): synset = None for word, pos in tag(phrase): if pos.startswith('N') and word != 'of': try: synset = wordnet.synsets(singularize(lemmatise(word)))[0] except IndexError: try: synset = wordnet.synsets(lemmatise(word))[0] except IndexError: try: synset = wordnet.synsets(singularize(word))[0] except IndexError: try: synset = wordnet.synsets(word)[0] except IndexError: logging.error("Failed to find synset for '" + word + "'") continue elif pos == 'PRP': return wordnet.synsets('living thing')[0] return synset
def detect_line_tense(poem): poem_verb_set = [] for line in poem: line_verb = "" if "'" in line: line = replace_contractions(line) for word, t in tag(line, tokenize=True): if t.startswith("V"): line_verb = str(word) poem_verb_set.append(line_verb) line_tenses = [] for line_verb in poem_verb_set: if not line_verb: continue possible_tenses = [] for tense in tenses(line_verb): possible_tenses.append(tense[0]) try: line_tenses.append(detect_overall_tense(possible_tenses)) except IndexError: line_tenses.append('') return line_tenses
my_model.save_model(checkpoint) elif not train: my_model.load_model(checkpoint) vocab = list(my_model.get_vocab().keys()) w = open("extracted/lists/" + args.vocab_fname, "w+") with tqdm(total=len(vocab)) as pbar: for v in vocab: w.write(v + "\n") pbar.update(1) w.close() w = open("extracted/lists/vocab_word2vec_POS.txt", "w+") with tqdm(total=len(vocab)) as pbar: for v in vocab: w.write(tag(v) + "\n") pbar.update(1) w.close() w = open("extracted/lists/" + checkpoint + "_parse.txt", "w+") with tqdm(total=len(vocab)) as pbar: for v in vocab: w.write(parse(str(v) + "\n") + "\n") pbar.update(1) w.close() # These names are in game_names, obtained in scrape.py from taking words before the pattern "is a * game" # The list is small and noisy. We will take a single popular game, # top100 = my_model.model.most_similar('Borderlands', topn=100) tops_adj = { 'Borderlands': None,
# coding=utf-8 ''' Created on Dec 10, 2015 @author: lm8212 ''' from pattern.web import Twitter from pattern.text.en import tag from pattern.vector import KNN, count twitter, knn = Twitter(), KNN() for i in range(1, 3): for tweet in twitter.search('#win OR #fail', start=i, count=100): s = tweet.text.lower() p = '#win' in s and 'WIN' or 'FAIL' v = tag(s) v = [word for word, pos in v if pos == 'JJ'] # JJ = adjective v = count(v) # {'sweet': 1} if v: knn.train(v, type=p) print knn.classify('sweet potato burger') print knn.classify('stupid autocorrect')
""" pattern 适用于各种NLP任务 例如词类标注器、n-gram搜索、情感分析、WordNet和机器学习(例如向量空间建模、k均值聚类、朴素贝叶斯、KNN、SVM分类器) """ from pattern.text.en import tag tweet = "I hope it is going good for you!" tweet_1 = tweet.lower() tweet_tags = tag(tweet_1) print(tweet_tags)