def get_similar_words(cleaned_query_words, doc): global expanded_query_terms try: command = "/home/sneha/phoenix/galago/galago-3.6-bin/bin/galago doc --index=/phoenix/ir_code/galago-index-rb4/ --id=" + doc + " --text=true --metadata=false --tokenize=true | sed -n '/Term vector:/,/<TEXT>/p'" outl = subprocess.check_output(command, shell=True) out = outl.split('\n') word_array = list() for i in range(1, len(out) - 2): words = out[i].split() if(len(words) > 0): word_array.append(words[len(words)-1]) q_syn_list = list() for q in cleaned_query_words: q_wn = Word(q) q_syn_list.append(q_wn.get_synsets(NOUN)) for w in word_array: if w not in cleaned_query_words: w_wn = Word(w) w_syn = w_wn.get_synsets(NOUN) for q_syn in q_syn_list: max_syn = 0 for i in range(0, min(2, len(q_syn))): for j in range(0, min(2, len(w_syn))): syn = q_syn[i].path_similarity(w_syn[j]) max_syn = max(max_syn, syn) if(max_syn > 0.3): expanded_query_terms[w] = max_syn except: pass print "Done processing " + doc
def retrieve_synset_list_for_word(word, whole_synset=True, pos_tag=None): """Returns the synset list for a word. Possibility to return whole synsets or just names of synsets. Also possible to limit to specific POS tags""" blob_word = Word(word) if whole_synset: return blob_word.get_synsets(pos_tag) else: return [name.name() for name in blob_word.get_synsets(pos_tag)]
def _get_synsets(string, pos=None): if pos is None: word = Word(string) synsets = word.synsets else: word = Word(string) synsets = word.get_synsets(pos) return synsets
def sentenceToFeatures(self, sentence): feat = [] for word in sentence: werd = Word(word) syns = [w.lemma_names for w in werd.get_synsets()] for syn in syns: try: feat.append(self.word_to_idx[syn]) except KeyError: continue return list(set(feat))
def initialize(self, sentences): self.max_feat_len = 0 self.word_to_idx = {} idx = self.index_offset for sentence in sentences: syn_count = 0 for word in sentence: werd = Word(word) syns = [w.lemma_names for w in werd.get_synsets()] for syn in syns: syn_count += 1 if syn not in self.word_to_idx: self.word_to_idx[syn] = idx idx += 1 self.max_feat_len = max(self.max_feat_len, syn_count)
def synonym_finder(word, pos_tag, lch_threshold): word = Word(word) for words in word.get_synsets(pos=pos_tag): print(words)
#!/usr/bin/python from textblob import Word from textblob.wordnet import NOUN word = Word("plant") print word.get_synsets(NOUN)
print("Usage: "+sys.argv[0]+" sentence target\nTranslates sentence into a (theoretically synonymous) new sentence with the same approximate sentiment as target (which may be a sentiment polarity value or a model sentence).") sys.exit() try: target=float(sys.argv[2]) except: target=TextBlob(sys.argv[2]).sentiment.polarity sentence=sys.argv[1] blob=TextBlob(sentence) options=[] optionSyns=[] for tag in blob.tags: word=Word(tag[0]) syns=[] try: syns=word.get_synsets(tagFormatConvert(tag[1])) except: syns=word.get_synsets() syns_clean=[tag[0]] singular=True if(tag[1][0]=="N"): singular=guessSingular(tag[0]) for syn in syns: lemmanames=syn.lemma_names() for name in lemmanames: if(name.find(".")>=0): name=name[:name.find(".")] if(not singular): name=Word(name).pluralize() name=name.replace("_", " ") syns_clean.append(name)
def get_synsets(concept_name): word = Word(concept_name) return word.get_synsets()
tag_to_text_dict[line.split()[0]] = " ".join(line.split()[1:]) sentence = input("Enter your sentence : ") parts_of_speech = nltk.pos_tag(nltk.word_tokenize(sentence)) print("\nThe parts of speech in your sentence are : \n") for tup in parts_of_speech: if(tup[0]!='.' and tup[0]!=','): print(tup[0] + " : " + tag_to_text_dict[tup[1]]) print("\nEnter a word you would like synonyms and antonyms for : ") word = Word(input()) synonyms = list(set([l.name() for syn in word.get_synsets() for l in syn.lemmas()])) antonyms = list(set([ant.name() for syn in word.get_synsets() for l in syn.lemmas() for ant in l.antonyms()])) print(f" Synonyms : "+",".join(synonyms)) print(f" Antonyms : "+",".join(antonyms)) languages = [('ar','Arabic'),('zh-CN','Chinese'),('sk','Slovakian'),('ja','Japanese')] lang = random.choice(languages) sentence = TextBlob(input("\nEnter a sentence you'd like to see translated to another language : ")) print("\nYour sentence translated to " + lang[1] + " is : " + str(sentence.translate(to = lang[0]))) print("\nEnter a word you'd like definitions for : ") word2 = Word(input()) print("\nDefinitions: ")
# In[130]: bank3 = bank_word.synsets[2] bank3.lemma_names() bank1.lemma_names() == bank3.lemma_names() # In[131]: bank1 == bank3 # In[132]: from textblob.wordnet import NOUN bank_word.get_synsets(NOUN) # In[133]: car_word = Word("car") car_word.get_synsets(NOUN) # In[134]: car1 = car_word.get_synsets(NOUN)[0] car1.definition() # In[135]: