예제 #1
0
def get_similar_words(cleaned_query_words, doc):
    global expanded_query_terms
    try:
        command = "/home/sneha/phoenix/galago/galago-3.6-bin/bin/galago doc --index=/phoenix/ir_code/galago-index-rb4/ --id=" + doc + " --text=true --metadata=false --tokenize=true | sed -n '/Term vector:/,/<TEXT>/p'"
        outl = subprocess.check_output(command, shell=True) 
        out = outl.split('\n')
        word_array = list()
        for i in range(1, len(out) - 2):
            words = out[i].split()
            if(len(words) > 0):
                word_array.append(words[len(words)-1])
       
        q_syn_list = list()
        for q in cleaned_query_words:
            q_wn = Word(q)
            q_syn_list.append(q_wn.get_synsets(NOUN))

        for w in word_array:
            if w not in cleaned_query_words:
                w_wn = Word(w)
                w_syn = w_wn.get_synsets(NOUN)
                for q_syn in q_syn_list:
                    max_syn = 0
                    for i in range(0, min(2, len(q_syn))):
                        for j in range(0, min(2, len(w_syn))):
                            syn = q_syn[i].path_similarity(w_syn[j])
                            max_syn = max(max_syn, syn)
                    if(max_syn > 0.3):
                        expanded_query_terms[w] = max_syn
    except:
        pass
    print "Done processing " + doc
def retrieve_synset_list_for_word(word, whole_synset=True, pos_tag=None):
    """Returns the synset list for a word. Possibility to return whole synsets or just names
    of synsets. Also possible to limit to specific POS tags"""
    blob_word = Word(word)

    if whole_synset:
        return blob_word.get_synsets(pos_tag)
    else:
        return [name.name() for name in blob_word.get_synsets(pos_tag)]
예제 #3
0
    def _get_synsets(string, pos=None):
        if pos is None:
            word = Word(string)
            synsets = word.synsets
        else:
            word = Word(string)
            synsets = word.get_synsets(pos)

        return synsets
예제 #4
0
    def sentenceToFeatures(self, sentence):
        feat = []

        for word in sentence:
            werd = Word(word)
            syns = [w.lemma_names for w in werd.get_synsets()]
            for syn in syns:
                try:
                    feat.append(self.word_to_idx[syn])
                except KeyError:
                    continue
        return list(set(feat))
예제 #5
0
	def sentenceToFeatures(self, sentence):
		feat = []

		for word in sentence:
			werd = Word(word)
			syns = [w.lemma_names for w in werd.get_synsets()]
			for syn in syns:
				try:
					feat.append(self.word_to_idx[syn])
				except KeyError:
					continue
		return list(set(feat))
예제 #6
0
 def initialize(self, sentences):
     self.max_feat_len = 0
     self.word_to_idx = {}
     idx = self.index_offset
     for sentence in sentences:
         syn_count = 0
         for word in sentence:
             werd = Word(word)
             syns = [w.lemma_names for w in werd.get_synsets()]
             for syn in syns:
                 syn_count += 1
                 if syn not in self.word_to_idx:
                     self.word_to_idx[syn] = idx
                     idx += 1
         self.max_feat_len = max(self.max_feat_len, syn_count)
예제 #7
0
	def initialize(self, sentences):
		self.max_feat_len = 0
		self.word_to_idx = {}
		idx = self.index_offset
		for sentence in sentences:
			syn_count = 0
			for word in sentence:
				werd = Word(word)
				syns = [w.lemma_names for w in werd.get_synsets()]
				for syn in syns:
					syn_count += 1
					if syn not in self.word_to_idx:
						self.word_to_idx[syn] = idx
						idx += 1
			self.max_feat_len = max(self.max_feat_len, syn_count)
예제 #8
0
def synonym_finder(word, pos_tag, lch_threshold):
    word = Word(word)
    for words in word.get_synsets(pos=pos_tag):
        print(words)
예제 #9
0
#!/usr/bin/python

from textblob import Word
from textblob.wordnet import NOUN
word = Word("plant")
print word.get_synsets(NOUN)
예제 #10
0
	print("Usage: "+sys.argv[0]+" sentence target\nTranslates sentence into a (theoretically synonymous) new sentence with the same approximate sentiment as target (which may be a sentiment polarity value or a model sentence).")
	sys.exit()

try:
	target=float(sys.argv[2])
except:
	target=TextBlob(sys.argv[2]).sentiment.polarity
sentence=sys.argv[1]
blob=TextBlob(sentence)
options=[]
optionSyns=[]
for tag in blob.tags:
	word=Word(tag[0])
	syns=[]
	try:
		syns=word.get_synsets(tagFormatConvert(tag[1]))
	except:
		syns=word.get_synsets()
	syns_clean=[tag[0]]
	singular=True
	if(tag[1][0]=="N"):
		singular=guessSingular(tag[0])
	for syn in syns:
		lemmanames=syn.lemma_names()
		for name in lemmanames:
			if(name.find(".")>=0):
				name=name[:name.find(".")]
			if(not singular):
				name=Word(name).pluralize()
			name=name.replace("_", " ")
			syns_clean.append(name)
예제 #11
0
파일: utils.py 프로젝트: vsoch/CogPheno
def get_synsets(concept_name):
    word = Word(concept_name)
    return word.get_synsets()
	 tag_to_text_dict[line.split()[0]] = " ".join(line.split()[1:])


sentence = input("Enter your sentence : ")
parts_of_speech = nltk.pos_tag(nltk.word_tokenize(sentence))
print("\nThe parts of speech in your sentence are : \n")
for tup in parts_of_speech:
		
	if(tup[0]!='.' and tup[0]!=','):
	    print(tup[0] + " : " + tag_to_text_dict[tup[1]])


print("\nEnter a word you would like synonyms and antonyms for : ")
word = Word(input())

synonyms = list(set([l.name() for syn in word.get_synsets() for l in syn.lemmas()]))
antonyms = list(set([ant.name() for syn in word.get_synsets() for l in syn.lemmas() for ant in l.antonyms()]))

print(f" Synonyms : "+",".join(synonyms))
print(f" Antonyms : "+",".join(antonyms))

languages = [('ar','Arabic'),('zh-CN','Chinese'),('sk','Slovakian'),('ja','Japanese')]
lang = random.choice(languages)

sentence = TextBlob(input("\nEnter a sentence you'd like to see translated to another language : "))
print("\nYour sentence translated to " + lang[1] + " is : " + str(sentence.translate(to = lang[0])))

print("\nEnter a word you'd like definitions for : ")
word2 = Word(input())

print("\nDefinitions: ")
예제 #13
0
# In[130]:

bank3 = bank_word.synsets[2]
bank3.lemma_names()
bank1.lemma_names() == bank3.lemma_names()


# In[131]:

bank1 == bank3


# In[132]:

from textblob.wordnet import NOUN
bank_word.get_synsets(NOUN)


# In[133]:

car_word = Word("car")
car_word.get_synsets(NOUN)


# In[134]:

car1 = car_word.get_synsets(NOUN)[0]
car1.definition()


# In[135]: