def get_message(message_parser):
  message_split =  message_parser.split("|")
  mobile_number = message_split[0]
  need_synonyms = ["require", "want", "motivation", "motive", "ask", "call for", "demand", "involve", "necessitate", "need", "postulate", "take", "indigence", "pauperism", "pauperization", "penury"]
  supply_synonyms = ["issue", "furnish", "provide", "render", "add", "append", "cater", "ply", "provision", "supplying", "afford", "yield", "commit", "consecrate", "dedicate", "devote", "spring", "springiness", "impart", "leave", "pass on", "ease up", "give way", "move over", "render", "feed", "generate", "return", "throw", "chip in", "contribute", "kick in", "grant", "pay", "break", "cave in", "collapse", "fall in", "founder", "hand", "pass", "reach", "turn over", "have", "hold", "make", "establish", "open", "apply", "gift", "present", "sacrifice"]
  tokens = nltk.word_tokenize(message_split[1])
  need = len(set(tokens) & set(need_synonyms)) > 0
  need_json = {"need": True} if need else {"supply": True}
  need_json.update({"number": mobile_number})
  tagged_tokens = nltk.pos_tag(tokens)
  for i in range(len(tagged_tokens)):
    if tagged_tokens[i][1] == 'CD':
      current_count = get_integer(tagged_tokens[i][0])
    elif  tagged_tokens[i][1] == 'DT':
      current_count = 1
    elif  tagged_tokens[i][1] in ['NNS','NN']:
      if tagged_tokens[i][0] in ["cups", "cup", "packets","packet","bottle", "bottles", "bundle","bundles","packages", "package", need_synonyms, supply_synonyms]:
          continue
      current_category = tagged_tokens[i][0]
      c = wn.synsets(current_category)
      food = wn.synset('food.n.01')
      water = wn.synset('water.n.01')
      food = food.wup_similarity(c[0])
      water = water.wup_similarity(c[0])
      current_category = "food" if food > water else "water"
      print current_count
      try :
        current_count = current_count
      except NameError:
        current_count =1 
      if current_count == None:
        current_count =1
      need_json.update({current_category: current_count})
      current_count = None
  return need_json
Example #2
0
def define(word, Webster, bestdef, changed, old_topic, new_topic):
	"""Defines a word, if desired by the user, and if the topic has changed."""
	import answer
	if ((Webster != "") and (not changed)):	return (False, Webster)
	if (Webster == ""):
		answer.write("The word " + word + " was not defined under the topic " + old_topic + ".")
	else:
		asked = ask.getPlay("The word " + word + " was defined under the topic " + old_topic + " as " + Webster + ".\nDo you want this meaning to carry over to the new topic " + new_topic + "?  ")
		if yes(asked):
			return (False, Webster)
	undone = True
	dno = 1
	while (undone):
		if (dno == bestdef):	dno += 1
		string = word + ".n." + str(dno)
		try:
			if (dno < len(wordnet.synsets(word, pos = wordnet.NOUN))):
				asked = ask.getPlay("Does " + wordnet.synset(string).definition + " work for your usage of " + word + "?  ")
				undone = not yes(ask)
				newdef = wordnet.synset(string).definition
				dno += 1
			else:
				newdef = ask.getPlay("Then how would you define " + word + "?  ")
				undone = False
		except(Exception):
			newdef = ask.getPlay("How would you define " + word + "?  ")
			undone = False
	return (True, newdef)
def print_other_lexical_rel():
    good1 = wn.synset('good.a.01')
    wn.lemmas('good')
    print("Antonyms of 'good': " + str(good1.lemmas()[0].antonyms()))
    print("")
    print("Entailment of 'walk': " + str(wn.synset('walk.v.01').entailments()))
    print("")
Example #4
0
    def overlapCount(self, sentence):
        #set count to be one so we can guess in case there are no sentences with overlap
        count = 1

        sWiki = TextBlob(self.arrayToString(sentence))
        sVerbs = self.getVerbs(sWiki)

        #compare verbs for similarities and based on wordnet's similarity score
        #if they're exactly the same, they'll score 1
        for sverb in sVerbs:
            synv = wn.synset(sverb + '.v.01')
            for qverb in self.questionVerbs:
                synq = wn.synset(qverb + '.v.01')
                count += synv.path_similarity(synq)

        #remove stop words from sentence AFTER we've gotten POS tags
        s = self.removeStopWords(sentence)
        sLower = self.removeStopWords(sentence.lower())

        for word in self.qList:
             if word in s:
                 count += 1
             else:
                 if word.lower() in sLower:
                     count += 0.1
        return count
Example #5
0
	def ontoList(self, synset):
		# things to pick from
		if self.pos == 'v':
			ln = wn.synset(synset).lexname.split('.')[1]
			hyper = self.lemmatize(self.getHypernyms(synset))
			definition = self.getDefinition(synset)
			lemmas = self.lemmatize(self.getLemmas(synset))
			examples = self.getExamples(synset)
			strings = [string.replace("_", " ") for string in self.getFrameStrings(synset)]
			hypo = self.lemmatize(self.getHyponyms(synset))  
			ontologyList = [strings, ln, lemmas, examples, hypo, definition, hyper]
		else:
			ln = wn.synset(synset).lexname.split('.')[1]
			hyper = self.lemmatize(self.getHypernyms(synset))
			definition = self.getDefinition(synset)
			lemmas = self.lemmatize(self.getLemmas(synset))
			examples = self.getExamples(synset)
			hypo = self.lemmatize(self.getHyponyms(synset)) 
			ontologyList = [ln, lemmas, examples, hypo, definition, hyper]

		returnList = list()
		for o in ontologyList:
			if o:
				returnList.append(o)
		return returnList
def expand_queries(file):
    '''
    For each term in a query, takes the first synset of the word from wordnet and adds all synonyms of that synset
    '''
    file = open(file)
    for sentence in file:
        sentence = sentence.strip()
        if sentence.find('<text>') != -1:
            query = sentence[sentence.find('>')+1: sentence.rfind('<')]
            additions = ''
            updated_q = nltk.pos_tag(nltk.wordpunct_tokenize(query.lower()))
            full_q = query
            for word, pos in updated_q:
               if word not in stopwords.words('english'):
                   looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'                   
                   synsets = wn.synsets(word)
                   if looking_for in str(synsets):
                       new_words = (wn.synset(looking_for).lemma_names) #was .definition
                       for new_word in new_words:
                           if new_word.lower() != word.lower():
                               full_q = full_q +' '+ str(new_word)
                   else:
                       if wn.morphy(word) != None:
                           word = wn.morphy(word)
                           looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
                           print str(looking_for) + ' THIS IS WORD'
                           synsets = wn.synsets(word)
                           if looking_for in str(synsets):
                               new_words = (wn.synset(looking_for).lemma_names) #was .definition
                               for new_word in new_words:
                                   if new_word.lower() != word.lower():
                                       full_q = full_q +' '+ str(new_word)
            print query + ' '+ full_q
def calculate_and_write_edge_weigthings_for_synsets(synset_filenames_dict, file_name):
  max_co_occurrence = calculate_max_co_occurrence(synset_filenames_dict)
  edge_weigthings_for_synsets = dict()
  how_many_added = 0
  how_many_done = 0
  how_many_to_do = len(synset_filenames_dict.keys()) * (len(synset_filenames_dict.keys())-1)
  write_edge_weightings_to_file(dict(), file_name)

  for synset1, filenames1 in synset_filenames_dict.iteritems():
    for synset2, filenames2 in synset_filenames_dict.iteritems():
      if synset1 < synset2:
        how_many_done += 1
        #if (synset1.name, synset2.name) not in similarity_histogram:
        similarity = wn.synset(synset1).lch_similarity(wn.synset(synset2))
        co_occurence = len(set(synset_filenames_dict[synset1]).intersection(set(synset_filenames_dict[synset2])))
        normalized_co_occurrence = co_occurence/max_co_occurrence
        if similarity < 2.0:
          similarity = 0
        if normalized_co_occurrence < 0.4:
          normalized_co_occurrence = 0
        edge_weighting = similarity + 4*normalized_co_occurrence
        if edge_weighting != 0:
          edge_weigthings_for_synsets[(synset1, synset2)] = edge_weighting
          how_many_added += 1
        if how_many_added > 1000:
          print_status("Done with " + str(how_many_done) + " von " + str(how_many_to_do) + "\n")
          write_edge_weightings_to_file(edge_weigthings_for_synsets, file_name, append_to_file=True)
          edge_weigthings_for_synsets = dict()
          how_many_added = 0
  write_edge_weightings_to_file(edge_weigthings_for_synsets, file_name, append_to_file=True)
def probability(tokens, category, dictionary, total):   	  
	if category == "sense":
		total_score = 0
		dic = dictionary
		if len(tokens) == 0:
			return 0
		for token in tokens:
			for dict_sense in dic:
				score = wn.synset(token).path_similarity(wn.synset(dict_sense))
				if score is not None:
					total_score += score * dic[dict_sense]
		return (total_score/len(tokens))
	else:
		p = 0 
		dic = dictionary
		total_instances = total
		for token in tokens:
		    if token in dic:
		    	token_prob = dic[token]
		    else:
		    	token_prob = 0
		    # smooth one out
		    curr = token_prob/float(total_instances)
		    p += curr  
	
	return p
Example #9
0
def process_verb(verb):
    verb = verb[:-1] # Remove newline char
    with open('youtube_setof_verbs.txt') as f:
        verb_dict = f.read()
    verb_dict = verb_dict.split('\n')
    
    max_score = 0
    finl_verb = (verb, '<>')
    verb_list = re.findall('[A-Z][^A-Z]*', verb)
    
    for prob_verb in verb_list:
        if prob_verb[len(prob_verb)-3:] == 'ing':
            prob_verb = prob_verb[:-3] # Remove 'ing' from verb
            if prob_verb.lower() == 'cutt':
                prob_verb = 'cut'
        if wn.synsets(prob_verb):
            try:
                v1 = wn.synset(prob_verb + '.v.01')
                for yout_verb in verb_dict:
                    if yout_verb != '':
                        # if wn.synsets(yout_verb):
                        v2 = wn.synset(yout_verb + '.v.01')
                        score = v1.wup_similarity(v2)
                        if score > max_score:
                            finl_verb = (prob_verb, yout_verb)
                            max_score = score
            except:
                finl_verb = (prob_verb, '<>')
                pass
                
    # print finl_verb, max_score
    return finl_verb[1]
Example #10
0
def process_subj(subj, flag):
    if flag == 1:
        with open('youtube_setof_subjects.txt') as f:
            subj_dict = f.read()
        subj_dict = subj_dict.split('\n')
    elif flag == 2:
        with open('youtube_setof_objects.txt') as f:
            obj_dict = f.read()
        subj_dict = obj_dict.split('\n')
    
    max_score = 0
    finl_subj = (subj, '<>')
    subj_list = subj.split(',')

    if len(subj_list) == 1:
        return subj
    for prob_subj in subj_list:
        prob_subj = prob_subj.strip()
        if wn.synsets(prob_subj):
            try:
                v1 = wn.synset(prob_subj + '.n.01')
                for yout_subj in subj_dict:
                    if yout_subj != '':
                        v2 = wn.synset(yout_subj + '.n.01')
                        score = v1.wup_similarity(v2)
                        if score > max_score:
                            finl_subj = (prob_subj, yout_subj)
                            max_score = score
            except:
                finl_subj = (prob_subj, '<>')
                pass
                
    # print finl_verb, max_score
    return (finl_subj[1])
def get_score(tags, groups):
  sscore = 0
  scount = 0 
  illegal_word = 0

  if (tags != None ) :
   for g in groups:
    
    for x in k.tags:
     try : 
      #print str(x.text), 
      #check substring else calculate words similarity score
      if g in str(x.text).lower():
	sscore += 2.0
        scount += 1
      else:
       tag = wn.synset(str(x.text).lower()+'.n.01')
       group = wn.synset(g+ '.n.01')  
       sem = wn.path_similarity(group,tag)
       if sem >= 0.3 :
        sscore += sem
	scount += 1     
     except:
	illegal_word += 1
  if scount != 0 :
    return sscore/scount
  else :
    return 0
Example #12
0
def preprocess_docs():
    stopwords = nltk.corpus.stopwords.words('english')
    corpus = list(filtered_corpus())
    counter = 0
    for train, topic, title, text in corpus:
        if counter % 10 == 0:
            print "%.2f %%\r" % (counter * 100.0 / len(corpus),),
            sys.stdout.flush()
        counter += 1
        text = [i for i in nltk.word_tokenize(title) if i.lower() not in stopwords]
        buf = []
        for word in text:
            synsets = wn.synsets(word)
            grain = []
            wheat = [] 
            for s in synsets:
                grain.append(s.path_similarity(wn.synset('wheat.n.02')))
                wheat.append(s.path_similarity(wn.synset('grain.n.08')))

            grain = [i for i in grain if i is not None]
            wheat = [i for i in wheat if i is not None]

            if len(grain) == 0:
                grain = 0
            else:
                grain = sum(grain) * 1.0 / len(grain)
            if len(wheat) == 0:
                wheat = 0
            else:
                wheat = sum(wheat) * 1.0 / len(wheat)
            buf.append((word, grain, wheat))
        yield train, topic, buf
    print ""
Example #13
0
def get_similar_words(word):
    lemmas_noun = hypernyms_noun = lemmas_verb = hypernyms_verb =[]
    try:
        lemmas_noun =  [str(lemma.name()) for lemma in wn.synset(word + '.n.01').lemmas()]    
    except WordNetError:
        pass

    try:
        hypernyms_noun = [str(lemma.name()).split('.')[0] for lemma in wn.synset(word + '.n.01').hypernyms()]    
    except WordNetError:
        pass

    if len(lemmas_noun) == 0 and len(hypernyms_noun) == 0:
        """
        Only try verbs if there are no similar nouns
        """
        try:
            lemmas_verb =  [str(lemma.name()) for lemma in wn.synset(word + '.v.01').lemmas()]    
        except WordNetError:
            pass

        try:
            hypernyms_verb = [str(lemma.name()).split('.')[0] for lemma in wn.synset(word + '.v.01').hypernyms()]    
        except WordNetError:
            pass
    
    similar_words = lemmas_noun + hypernyms_noun + lemmas_verb + hypernyms_verb
    # filter words which are not purely alphabets (there will be words with underscore)
    # this is because if we want to process such words like "domestic_animal", we have to 
    # implement 2-grams search which is not done here
    pattern = re.compile('^[a-zA-Z]+$')
    return filter(lambda x: pattern.match(x) and x != word, similar_words)
	def get_similarity(self,word1,word2):
		'''计算相似度:基于WordNet语义词典'''
		'''
		print 'before stemmed:',word1
		print 'after stemmed:',wn.morphy(word1.lower())
		print 'before stemmed:',word2
		print 'after stemmed:',wn.morphy(word2.lower())
		'''
		#stemmed word
		if wn.morphy(word1.lower()) != None :
			word1 = wn.morphy(word1.lower())
		if wn.morphy(word2.lower()) != None :
			word2 = wn.morphy(word2.lower()) 
		word1_synsets = wn.synsets(word1)
		#print word1_synsets
		word2_synsets = wn.synsets(word2)
		#print word2_synsets
		sim = 0

		for syn1 in word1_synsets:
			w1 = wn.synset(syn1.name())
			for syn2 in word2_synsets:
				w2 = wn.synset(syn2.name())
				tmp = w1.path_similarity(w2)
				#print tmp,syn1.name(),syn2.name()
				if tmp > sim:
					sim = tmp
		return sim
def is_ingredient(word):
    """
    Return True if the word is an ingredient, False otherwise.

    >>> is_ingredient('milk')
    True
    >>> is_ingredient('blackberries')
    True
    >>> is_ingredient('Canada')
    False
    >>> is_ingredient('breakfast')
    False
    >>> is_ingredient('dish')
    False
    """
    reject_synsets = ['meal.n.01', 'meal.n.02', 'dish.n.02', 'vitamin.n.01']
    reject_synsets = set(wordnet.synset(w) for w in reject_synsets)
    accept_synsets = ['food.n.01', 'food.n.02']
    accept_synsets = set(wordnet.synset(w) for w in accept_synsets)
    for word_synset in wordnet.synsets(word, wordnet.NOUN):
        all_synsets = set(word_synset.closure(lambda s: s.hypernyms()))
        all_synsets.add(word_synset)
        for synset in reject_synsets:
            if synset in all_synsets:
                return False
        for synset in accept_synsets:
            if synset in all_synsets:
                return True
    return word in wordlists.ingredients
Example #16
0
def getSenseSimilarity(worda,wordb):

	"""

	find similarity betwwn word senses of two words

	"""

	wordasynsets = wn.synsets(worda)

	wordbsynsets = wn.synsets(wordb)

	synsetnamea = [wn.synset(str(syns.name)) for syns in wordasynsets]

	synsetnameb = [wn.synset(str(syns.name)) for syns in wordbsynsets]



	for sseta, ssetb in [(sseta,ssetb) for sseta in synsetnamea for ssetb in synsetnameb]:

		pathsim = sseta.path_similarity(ssetb)

		wupsim = sseta.wup_similarity(ssetb)

		if pathsim != None:

			print "Path Sim Score: ",pathsim," WUP Sim Score: ",wupsim,"\t",sseta.definition, "\t", ssetb.definition
Example #17
0
 def compare(self, word1, word2):
     tmp1 = wn.synsets(word1)[0].name
     tmp2 = wn.synsets(word2)[0].name
     w1 = wn.synset(tmp1)
     w2 = wn.synset(tmp2)
     val = w1.wup_similarity(w2)
     return val
def get_relative_similarity(a,b):
    '''
        Returns path similarity between two word a and b.
        Used for merging two clusters
    '''
    x=wn.synset("%s.n.01"%a)
    y=wn.synset("%s.n.01"%b)
    return x.path_similarity(y)
Example #19
0
 def wsd(self,sent,target, tag=None):
     if tag is None:
         self.scoring(sent, target)
     else:
         self.scoring(sent, target,tag)
     sense = self.getGreedyBestSenses(10)
     print wordnet.synset(sense).definition
     return sense
def similarity(word1, word2, tag):
    obj1 = wn.synset(word1 + "."+ tag+".01")
    obj2 = wn.synset(word2 + "."+ tag+".01")
    #print(obj1)
    brown_ic = wordnet_ic.ic('ic-brown.dat') 	# Information content
    semcor_ic = wordnet_ic.ic('ic-brown.dat')
    value = obj1.res_similarity(obj2, brown_ic)
    return value
def Sim2(text1, text2) :
    
    stop = stopwords.words('english')
    
    text1=regexpProcessing(text1)
    text2=regexpProcessing(text2)
    
    # convert both texts into upper case
    TEXT1=text1.strip()
    TEXT2=text2.strip()
    TEXT1=TEXT1.lower()
    TEXT2=TEXT2.lower()
    
    token1 = generateTokens(TEXT1)
    token2 = generateTokens(TEXT2)
    
    t1List=[]
    for tok1 in token1:
        word1 = Word(tok1)
        w1=word1.spellcheck()
        correctw=w1[0][0]
        confidence = w1[0][1]
        
        if (confidence > 0.8) and (correctw not in stop):
            t1List.append(correctw)
            
            
    t2List=[]
    for tok2 in token2:
        word2 = Word(tok2)
        w2=word2.spellcheck()
        correctw=w2[0][0]
        confidence = w2[0][1]
        
        if (confidence > 0.8) and (correctw not in stop):
            t2List.append(correctw)
            
             
        
        
    
    
    
    for i in range(len(TextItems)):        
        token = generateTokens(TextItems[i])
        tokenList.append(token)
        token = []
    # spell correction
     
    
    # POS Tagging
    word1 = wn.synset('dog.n.01')
    word2 = wn.synset('cat.n.01')

    word1.path_similarity(word2)
    return CosineSimilarity
def senseRange(word, pos):
    '''Given an english word and its POS tag (as required for wordnet), return the number of senses that it has.'''
    sense = 0
    while True:
        try:
            wn.synset("%s.%s.%02d" %(word, pos, sense))
        except nltk.corpus.reader.wordnet.WordNetError:
            return sense
        else:
            sense += 1
def buildCategoryForest(category):
  treeList = []
  hypo = lambda s:s.hyponyms()
  
  treeList.append(getEte2Tree(wn.synset('travel.n.01').tree(hypo)))
  treeList.append(getEte2Tree(wn.synset('travel.v.03').tree(hypo)))
  treeList.append(getEte2Tree(wn.synset('travel.v.04').tree(hypo)))
  treeList.append(getEte2Tree(wn.synset('travel.v.05').tree(hypo)))
  treeList.append(getEte2Tree(wn.synset('travel.v.06').tree(hypo)))
  return treeList
Example #24
0
 def get_threshold(w1, w2):
     if w1 == w2 or w1 == w2 + "s" or w1 == w2 + "es":
         return 1
     else:
         try:
             syn1 = wordnet.synset(w1 + ".n.01")
             syn2 = wordnet.synset(w2 + ".n.01")
             return syn1.wup_similarity(syn2)
         except WordNetError:
             return 0
Example #25
0
def findVerbRelatedToNoun(noun):
    threshold = random.uniform(0.2, 0.5)
    while 1:
        verb = random.choice(verbs)
        nounSynset = wn.synset(noun + ".n.01")
        verbSynset = wn.synset(verb + ".v.01")
        threshold -= 0.01

        if (nounSynset.path_similarity(verbSynset) > threshold or verbSynset.path_similarity(nounSynset) > threshold):
            return verb
def get_path_similarity_between_boy_and_dog():
    """
    Computes the path similarity between "boy" and "dog".

    Returns
    -------
    A float.
    """

    return wn.path_similarity(wn.synset('boy.n.01'), wn.synset('dog.n.01'))
 def determineSynonym(first, second):
     if len(wordnet.synsets(first)) == 0 or len(wordnet.synsets(second)) == 0:
         return 0
     else:
         w1 = wordnet.synset(wordnet.synsets(first)[0].name())
         w2 = wordnet.synset(wordnet.synsets(second)[0].name())
         similarity_ratio = w1.wup_similarity(w2)
         if similarity_ratio != None:
             return similarity_ratio
         else:
             return 0
Example #28
0
def findTwoRelatedNouns():
    threshold = 0.8
    while 1:
        noun1 = random.choice(nouns)
        noun2 = random.choice(nouns)
        word1 = wn.synset(noun1 + ".n.01")
        word2 = wn.synset(noun2 + ".n.01")
        sim = word1.wup_similarity(word2)
        if noun1 != noun2 and sim > threshold:
            print noun1 + " " + noun2 + " " + str(sim)
            return [noun1, noun2]
Example #29
0
def semantic_score(word1, word2):
    '''
    Semantic score between two words based on WordNet
    Returns: float (the semantic score between word1 and word2)
    '''
    try:
        w1 = wn.synset('%s.n.01'%(word1))
        w2 = wn.synset('%s.n.01'%(word2))
        return wn.path_similarity(w1,w2,simulate_root = False)
    except:
        return 0
 def __init__(self, train=False):
     self.tagger = PerceptronTagger()
     self.model = None
     # BOW: triangle, rectangle, circle, hand
     # verbs: draw, wave, rotate
     self.BOW = ['draw', 'wave', 'rotate', 'triangle', 'rectangle', 'circle', 'hand']
     self.VERBS = [wn.synset('draw.v.01'), wn.synset('wave.v.01'), wn.synset('rotate.v.01')]
     self.n_bow, self.n_verbs = len(self.BOW), len(self.VERBS)
     if train: self.train_svm()
     else: self.load_model()
     return
from nltk.corpus import wordnet

syns = wordnet.synsets("program")
print(syns)

print(syns[0].name())
print(syns[0].lemmas()[0].name())
print(syns[0].definition())

synonyms = []
antonyms = []

for syn in wordnet.synsets("good"):
    for l in syn.lemmas():
        synonyms.append(l.name())
        if l.antonyms():
            antonyms.append(l.antonyms()[0].name())

print(synonyms)
print(antonyms)

w1 = wordnet.synset("ship.n.01")
w2 = wordnet.synset("boat.n.01")
print(w1.wup_similarity(w2))
Example #32
0
annotation_file = './.data/command.annotations'
hypernyms_file = './.data/command.hypernyms'
hyponyms_file = './.data/command.hyponyms'

try:
    with open(annotation_file, 'r') as f:
        previous_synsets = json.loads(f.read())
except:
    previous_synsets = {}
confirmed_hyponyms = {}
confirmed_hypernyms = {}

sentence = "take move attack use examine"
annotations = train.annotate_synsets(sentence, previous_synsets)

with open(annotation_file, 'w') as f:
    f.write(json.dumps(annotations))

for word in annotations:
    word_sense = wn.synset(annotations[word]['name'])
    confirmed_hypernyms[word] = train.confirm_hyponyms(
        word, word_sense, do_hypernyms_instead=True)
    confirmed_hyponyms[word] = train.confirm_hyponyms(
        word, word_sense, do_hypernyms_instead=False)

with open(hypernyms_file, 'w') as f:
    f.write(json.dumps(confirmed_hypernyms))

with open(hyponyms_file, 'w') as f:
    f.write(json.dumps(confirmed_hyponyms))
Example #33
0
 def wordnet_entity_determiner(self,
                               subject,
                               tts_output,
                               userin=None,
                               user_prefix=None):
     # print subject
     entity_samples_map = {
         'PERSON': ['person', 'character', 'human', 'individual', 'name'],
         'NORP': ['nationality', 'religion', 'politics'],
         'FACILITY': ['building', 'airport', 'highway', 'bridge', 'port'],
         'ORG': ['company', 'agency', 'institution', 'university'],
         'GPE': ['country', 'city', 'state', 'address', 'capital'],
         'LOC': ['geography', 'mountain', 'ocean', 'river'],
         'PRODUCT': ['product', 'object', 'vehicle', 'food'],
         'EVENT': ['hurricane', 'battle', 'war', 'sport'],
         'WORK_OF_ART': ['art', 'book', 'song', 'painting'],
         'LANGUAGE': ['language', 'accent', 'dialect', 'speech'],
         'DATE': ['year', 'month', 'day'],
         'TIME': ['time', 'hour', 'minute'],
         'PERCENT': ['percent', 'rate', 'ratio', 'fee'],
         'MONEY': ['money', 'cash', 'salary', 'wealth'],
         'QUANTITY':
         ['measurement', 'amount', 'distance', 'height', 'population'],
         'ORDINAL': ['ordinal', 'first', 'second', 'third'],
         'CARDINAL': ['cardinal', 'number', 'amount', 'mathematics']
     }  # entity samples to use it in WordNet similarity
     # The subject must be decoded from utf-8 to unicode because spaCy only
     # supports unicode strings, self.nlp() handles all parsing
     doc = self.nlp(subject)
     subject = []  # empty list to hold the nouns in the subject string
     for word in doc:  # for each word in the subject string
         # if word.pos_ not in
         # ['PUNCT','SYM','X','CONJ','DET','ADP','SPACE']:
         if word.pos_ == 'NOUN':  # if word is a noun then
             subject.append(
                 word.text.lower())  # convert it to lowercase and append it
     entity_scores = {}  # empty dictionary to hold entity scores
     for entity, samples in entity_samples_map.items(
     ):  # iterate over the entity_samples_map
         entity_scores[entity] = 0  # initial score of the entity is 0
         for sample in samples:  # for each sample in the samples
             sample_wn = wn.synset(
                 sample + '.n.01')  # convert the sample to a WordNet noun
             for word in subject:  # for each word in the subject
                 try:
                     word_wn = wn.synset(
                         word +
                         '.n.01')  # convert the word to a WodNet noun
                     # calculate the similarity using WordNet
                     # path_similarity() and add it to the score of the
                     # entity
                     entity_scores[entity] += word_wn.path_similarity(
                         sample_wn)
                 except WordNetError:
                     userin.define(
                         [" "],
                         "NLP(WordNet) error. Unrecognized word: " + word)
                     userin.execute(0)
                     if not tts_output:
                         print("Sorry, " + user_prefix +
                               ". But I'm unable to understand the word '" +
                               word + "'.")
                     if tts_output:
                         userin.say(
                             "Sorry, " + user_prefix +
                             ". But I'm unable to understand the word '" +
                             word + "'.")
                     return 1
         # to calculate the average; divide the total by the amount of
         # samples
         entity_scores[entity] = entity_scores[entity] / len(samples)
     if not tts_output:
         print(sorted(entity_scores.items(), key=lambda x: x[1])[::-1]
               [:3])  # if not tts_output print the best 3 result
     result = sorted(
         entity_scores.items(),
         key=lambda x: x[1])[::-1][0][0]  # assign the best result
     if result == 'FACILITY':
         # currently, spaCy is incorrectly classifying many entities that
         # belongs to FACILITY as ORG. Because of that include ORG to the
         # return
         return [result, 'ORG']
     if result == 'PRODUCT':
         # currently, spaCy is incorrectly classifying many entities that
         # belongs to PRODUCT as ORG. Because of that include ORG to the
         # return
         return [result, 'ORG']
     # if no exception on above lines then return only one result but in an
     # array. For example ['PERSON']
     return [result]
    :param: termsSet
    :return: immidiateHyponyms 
"""


def directHyponyms(termsSet):
    immidiateHyponyms = set()
    for termSyn in termsSet:
        termHyponyms = set()
        termHyponyms = termSyn.hyponyms()
        immidiateHyponyms.update(termHyponyms)
    return immidiateHyponyms


"""taking the Similarity Degree (SD) and the Key term (d1)"""
keyTerm = wn.synset('material.n.01')
SD = 0.8
maxd1 = 0
"""finding the depth of the keyTerm (d1)
There are many paths (and tehrefore depths) to the ROOT, here we take the longest path as :var: maxPath"""
for path in keyTerm.hypernym_paths():
    d1 = len(path)
    if d1 > maxd1:
        maxd1 = d1
        maxPath = path

hyponymsSet = set()
print(d1, maxPath)
"""LCS floor"""
LCS_lower = math.ceil(SD / (2 - SD) * d1)
"""LCS ceiling"""
Example #35
0
def wn_synset2keys(synset):
    if isinstance(synset, str):
        synset = wn.synset(synset)
    return list(set([lemma.key() for lemma in synset.lemmas()]))
Example #36
0
def main():

    #parse args
    global args
    args = parser.parse_args()
    if args.emb_file_name is None:
        raise NameError('args.emb_file_name is not specified')

    #GPU setting
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    cudnn.benchmark = True

    #import dataset
    embedding = torch.load(args.emb_dir + args.emb_file_name)
    print('EMBEDDING TYPE:', embedding['manifold'])
    n_emb_dims = embedding['embeddings'].shape[1]
    args.n_emb_dims = n_emb_dims
    print('NUM OF DIMENSIONS:', n_emb_dims)

    #change labels from synset names into imagenet format
    synset_list = [wn.synset(i) for i in embedding['objects']]
    offset_list = [wn.ss2of(j) for j in synset_list]
    embedding['objects'] = ['n' + i.split('-')[0] for i in offset_list]

    #load the CNN part of the model
    print("=>using pre-trained model '{}'".format(args.arch))
    orig_vgg = models.__dict__[args.arch](pretrained=True)

    #change the model to project into desired embedding space
    if embedding['manifold'] == 'poincare':
        model = PoincareEmbVGG(orig_vgg, args.n_emb_dims)
    elif embedding['manifold'] == 'euclidean':
        model = EuclidEmbVGG(orig_vgg, args.n_emb_dims)
    model.to(device, non_blocking=True)
    model.features = torch.nn.DataParallel(model.features)

    #load weights from training on 1K classes
    if os.path.isfile(args.saved_weights):
        print("=> loading checkpoint '{}'".format(args.saved_weights))
        checkpoint = torch.load(args.saved_weights)
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}'".format(args.saved_weights,
                                                 checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(args.saved_weights))

    #data loading
    evaldir = '/mnt/fast-data15/datasets/imagenet/mammals'
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    eval_dataset = datasets.ImageFolder(
        evaldir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ]))

    eval_loader = torch.utils.data.DataLoader(eval_dataset,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)

    #sort embedding to match image labels
    img_labels = eval_dataset.classes
    img2emb_idx = [embedding['objects'].index(i) for i in img_labels]
    emb_wgts = embedding['embeddings'][img2emb_idx]
    emb_wgts = emb_wgts.float().to(device, non_blocking=True)
    n_classes = emb_wgts.shape[0]

    #load 21k class distance matrix
    class_distance_mat = torch.load('class_dist_mat.pt').to(device,
                                                            non_blocking=True)
    class_distance_mat = class_distance_mat + torch.t(class_distance_mat)

    #trackers
    batch_time = AverageMeter('Time', ':6.3f')
    top5_pos_track = AverageMeter('Top5+', ':6.2f')
    top5_neg_track = AverageMeter('Top5-', ':6.2f')
    progress = ProgressMeter(len(eval_loader),
                             [batch_time, top5_pos_track, top5_neg_track],
                             prefix='Eval: ')

    #evaluate
    model.eval()
    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(eval_loader):
            print(i)
            #if i <= 25329:
            #    continue
            images = images.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)

            #compute output
            output = model(images)

            #evaluate
            preds = prediction(output, emb_wgts, 5, embedding['manifold'])
            target_dist_mat = class_distance_mat[target]
            top5_pos, top5_neg = calc_top5_pos_neg(preds, target_dist_mat)

            #track evaluation
            top5_pos_track.update(top5_pos, preds.shape[0])
            top5_neg_track.update(top5_neg, preds.shape[0])

            #measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if i % args.print_freq == 0:
                progress.display(i)

        print(
            ' * Top5+ {top5_pos_track.avg: .3f} Top5- {top5_neg_track.avg:.3f}'
            .format(top5_pos_track=top5_pos_track,
                    top5_neg_track=top5_neg_track))
Example #37
0
def _get_synset(synset_key):
    """
    The synset key is the unique name of the synset, this can be
    retrieved via synset.name()
    """
    return wn.synset(synset_key)
Example #38
0
from nltk.corpus import wordnet as wn

print(wn.synsets('motorcar'))
print(type(wn.synset('car.n.01').lemma_names))
print(wn.synset('tree.n.01').part_meronyms())
print(wn.synset('walk.v.01').entailments())
motorcar = wn.synset('car.n.01')
types_of_motorcar = motorcar.hyponyms()
print(types_of_motorcar[26])
print(wn.synset('baleen_whale.n.01').min_depth())

right = wn.synset('right_whale.n.01')
minke = wn.synset('minke_whale.n.01')
print(right.path_similarity(minke))
 def find_parent_of(x, ancestor=None):
     for lst in [[ele.name() for ele in hlst] for hlst in wn.synset(x).hypernym_paths()]:
         if ancestor in lst:
             return lst[-2]
Example #40
0
def get_main_verb(query):
    global verb_dict
    global last_main_verb

    tokens = word_tokenize(query)
    text = Text(tokens)
    tags = pos_tag(text)

    result = {}
    verbs = []
    temp_verbs = []

    for i in range(len(tags)):
        if "VB" in str(tags[i][1]):
            if tags[i][0] == 'i':
                continue
            try:
                suggestions = wordnet.synset(str(tags[i][0]) +
                                             ".v.01").lemma_names()
            except:
                suggestions = []

            exist = False
            for x in verb_dict.keys():
                if list(set(suggestions) & set(verb_dict[x])) or list(
                        set(suggestions) & set([x])) or tags[i][0] == x:
                    exist = True
                    break
            if exist:
                verbs.append(tags[i][0])
            else:
                last_main_verb = tags[i][0]
                continue

            result.update({"intent": verbs})

    if len(result.keys()) == 0:
        return {
            "message":
            "Sorry, try again with better keywords. No results found."
        }

    keys = api.keywords("text", query)["keywords"]
    keywords = [keys[0]["text"]]
    ctr = 0

    for key in keys:
        if ctr == 0:
            ctr += 1
            continue

        exist = False

        for x in verb_dict.keys():
            if key["text"] in verb_dict[x] or key["text"] == x:
                exist = True
                break

        if exist:
            keywords.append(key["text"])

    result.update({"keywords": keywords})

    return result
Example #41
0
from nltk.corpus import wordnet as wn
import requests

ingredients = "1 cup soy sauce (preferably low-sodium), 1 cup Coca-Cola, 1/4 cup toasted sesame oil, 1/4 cup hoisin sauce, 4 cloves garlic, chopped, 4 scallions, minced, 2 rib-eye steaks (bone-in or boneless), or other steak, such as sirloin"
tokens = nltk.word_tokenize(ingredients)
tagged = nltk.pos_tag(tokens)

word_list = []
for i in tagged:
    if (i[1] == 'NN' or i[1] == 'NNS'):
        word_list.append(i[0])
result = ' '.join(word_list)

#Output
'cup soy sauce low-sodium cup cup oil cup hoisin sauce cloves scallions steaks bone-in boneless steak sirloin'

food = wn.synset('food.n.02')
food_lexicon = list(
    set([
        w for s in food.closure(lambda s: s.hyponyms())
        for w in s.lemma_names()
    ]))
string = '1 inch-thick boneless shell steak'
string2 = '1 cup soy sauce (preferably low-sodium), 1 cup Coca-Cola, 1/4 cup toasted sesame oil, 1/4 cup hoisin sauce, 4 cloves garlic, chopped, 4 scallions, minced, 2 rib-eye steaks (bone-in or boneless), or other steak, such as sirloin'
words = string2.split()
for i in food_lexicon:
    if (result == i):
        print('I exist: ', i)
    # print(i)
# print(fuzz.partial_ratio(result, another_result))
# print(result)
Example #42
0
def f_oov(data):
    no, word, pos_tag, ne_tag = data
    set=wn.synset(str.lower(word))
Example #43
0
from nltk.corpus import wordnet
print(
    wordnet.synset('advantage.n.01').path_similarity(
        wordnet.synset('advantage.n.01')))
Example #44
0
    parser = nltk.ChartParser(your_grammar) 
    sent = 'describe your work'.split() 
    print ('Sentence: ', sent )
    print ('') 
    print ('Parsing output')
    print (list(parser.parse(sent))) 
    
    from nltk.corpus import wordnet as wn 
    for i,j in enumerate(wn.synsets('man')): 
        print (j.lemma_names())     #[u'book', u'volume'] 
        print  (", ".join(j.lemma_names()))  #book, volume 
        print ('') 


    from nltk.corpus import wordnet as wn 
    book = wn.synset('book.n.02')  # 2nd sense 
    print ('Synonyms: ', book.lemma_names() )
    print ('' )
    print ('Hypernyms: ', book.hypernyms() )
    print ('' )
    print ('Hyponyms: ', book.hyponyms() )
    print ('' )
    print ('Holonyms: ', book.member_holonyms() )

    from nltk.corpus import wordnet as wn 
    #book = wn.synset('book.n.01')  # 1st sense 
    print ('Antonyms: ', wn.lemma('good.a.01.good').antonyms())
    print ('Antonyms: ', wn.lemma('slow.a.01.slow').antonyms())
    print ('Antonyms: ', wn.lemma('increase.v.01.increase').antonyms())
    print ('Antonyms: ', wn.lemma('boy.n.01.boy').antonyms())
    
Example #45
0
def use_wordnet(FreelingFolder, WordnetFolder):
    """
    Call Wordnet using NLTK to get the lexnames.
    Author: #cf
    """
    print("use_wordnet...")

    if not os.path.exists(WordnetFolder):
        os.makedirs(WordnetFolder)

    InPath = FreelingFolder + "*.xml"
    for File in glob.glob(InPath):

        LexErrCounter = collections.Counter()

        with open(File, "r") as InFile:
            Filename = os.path.basename(File)
            Text = InFile.read()
            Text = re.split("</token>", Text)
            NewText = ["<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n<body>"]
            for Line in Text[0:-1]:
                Line = Line + "</token>"
                #print(Line)
                Word = re.findall("form=\"(.*?)\" ", Line)[0]
                #print(Word)
                Line = re.sub("</token>", Word + " </token>", Line)
                #print(Line)
                if "wn=" in Line:
                    #print(Line)
                    SynsetID = re.findall("wn=.*\"", Line)[0]
                    SynsetNumber = int(SynsetID[4:-3])
                    SynsetPOS = SynsetID[-2:-1]
                    #print(SynsetID, SynsetPOS, SynsetNumber)
                    SynsetAbbID = ""
                    try:
                        SynsetAbbID = wn._synset_from_pos_and_offset(
                            SynsetPOS, SynsetNumber)
                    except:
                        ""
                        #print("Error when trying to get synset name.")
                    SynsetAbbID = str(SynsetAbbID)
                    SynsetAbbID = SynsetAbbID[8:-2]
                    #print(SynsetAbbID)
                    Lexname = "XXX"
                    try:
                        Lexname = wn.synset(SynsetAbbID).lexname()
                    except:
                        #print("Error when trying to get lexname.")
                        LexErrCounter.update({"LexNameError": 1})
                    #print(Lexname)
                    Line = re.sub("(wn=.*) >", "\\1 lxn=\"" + Lexname + "\" >",
                                  Line)
                    #print(Line)
                    NewText.append(Line)
                elif "wn=" not in Line and "sentence" not in Line:
                    #print(Line)
                    Line = re.sub(" >", " wn=\"xxx\" lxn=\"xxx\" >", Line)
                    #print(Line)
                    NewText.append(Line)
                elif "sentence" in Line:
                    #print(Line)
                    Line = re.sub(" >", " wn=\"xxx\" lxn=\"xxx\" >", Line)
                    #print(Line)
                    NewText.append(Line)

            if LexErrCounter["LexNameError"] > 0:
                print(
                    str(LexErrCounter["LexNameError"]) +
                    " lexname(s) could not be found in " + str(Filename))
            NewText.append("</sentence>\n</body>")
            NewText = ''.join(NewText)
            with open(WordnetFolder + Filename, "w") as OutFile:
                OutFile.write(NewText)

    print("Done.")
import nltk
from nltk.corpus import wordnet as wn
import pandas as pd
import numpy as np
import statistics
import matplotlib.pyplot as plt
import seaborn as sns
import rpy2

# Assigning Variables to correct synsets found in 'master_definitions&synsets.py'
syn_airplane = wn.synset('airplane.n.01')
syn_ambulance = wn.synset('ambulance.n.01')
syn_angel = wn.synset('angel.n.01')
syn_ant = wn.synset('ant.n.01')
syn_anvil = wn.synset('anvil.n.01')
syn_apple = wn.synset('apple.n.01')
syn_arm = wn.synset('arm.n.01')
syn_asparagus = wn.synset('asparagus.n.01')
syn_axe = wn.synset('ax.n.01')
syn_backpack = wn.synset('backpack.n.01')
syn_banana = wn.synset('banana.n.01')
syn_bandage = wn.synset('bandage.n.01')
syn_barn = wn.synset('barn.n.01')
syn_baseball = wn.synset('baseball.n.01')
syn_basket = wn.synset('basket.n.01')
syn_basketball = wn.synset('basketball.n.01')
syn_bat = wn.synset('bat.n.01')
syn_bathtub = wn.synset('bathtub.n.01')
syn_beach = wn.synset('beach.n.01')
syn_bear = wn.synset('bear.n.01')
syn_beard = wn.synset('beard.n.01')
Example #47
0
            #print(str(key) + ", "+str(dict_syn[key]))

    
    max_path_sim_pair = max(dict_syn, key = dict_syn.get)
    max_Val = dict_syn[max_path_sim_pair]
    #print(max_Val)
    for entry in dict_syn:
        if dict_syn[entry] >= max_Val and entry not in max_pairs:
            if [entry[0], entry[1]] not in max_pairs and [entry[1], entry[0]] not in max_pairs:
                word_net_file.write(str(entry) +'\n')
            max_pairs.append([entry[0], entry[1]])
            max_pairs.append([entry[1], entry[0]])
    word_net_file.close()
    #print(max_pairs)
    #print(max_path_sim, dict_syn[max_path_sim])
    #[print() for pair in max_pairs for tup in pair if tup[0][0]]
    #print(dict_syn)

if __name__ == '__main__':
    if os.path.exists("wordnet.txt"):  
        word_net_file = open("wordnet.txt","w")
        word_net_file.write("")
        word_net_file.close()
    
    print_syn_lemmas_def('fish')
    print_lexical_rel(wn.synset('fish.n.01'))
    #print_syn_lemmas_def('fox') #w2
    print_path_similarity(wn.synset('fish.n.01'),wn.synset('fox.n.01'))
    print_highest_path_similarity(['dog.n.01', 'man.n.01', 'whale.n.01','bark.n.01', 'cat.n.01'])

Example #48
0
#   DESCRIPTION:		Builds a dataset of manually identified locations
#   REQUIREMENTS: 	json ntlk shapely
#
# ******************************************************************

from nltk.corpus import wordnet as wn
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from shapely.geometry import shape, Point
import re, json

regex = re.compile('[^a-zA-Z ]')
stops = [word.encode('ascii') for word in stopwords.words("english")]
lemmatizer = WordNetLemmatizer()

food = wn.synset('food.n.01')
sport = wn.synset('sport.n.01')
park = wn.synset('park.n.01')

mcg = Point(-37.820018, 144.983460).buffer(0.001878)
rod_laver = Point(-37.821419, 144.979023).buffer(0.001878)
ami = Point(-37.825045, 144.983797).buffer(0.001878)
etihad = Point(-37.816477, 144.947622).buffer(0.001878)
stadiums = [mcg, rod_laver, ami, etihad]

vue = Point(-37.818486, 144.957499).buffer(0.000346)
matcha = Point(-37.866772, 144.978368).buffer(0.000171)
centre = Point(-37.816440, 144.965447).buffer(0.000355)
tattersalls = Point(-37.811890, 144.965556).buffer(0.000355)
lygon = Point(-37.783531, 144.969853).buffer(0.001908)
acland = Point(-37.868812, 144.979945).buffer(0.001448)
Example #49
0
 def words(self, synset_name, lang='eng'):
     res = []
     s = wn.synset(synset_name)
     for l in s.lemmas():
         res.append(WSLObject(self, l.name(), 'word', lang=lang))
     return res
Example #50
0
# -*- coding: utf-8 -*-
import matplotlib
matplotlib.use('TkAgg')
import nltk
'''
☼ Investigate the holonym-meronym relations for some nouns.
Remember that there are three kinds of holonym-meronym relation,
so you need to use:
member_meronyms(), part_meronyms(), substance_meronyms(),
member_holonyms(), part_holonyms(), and substance_holonyms().
'''

from nltk.corpus import wordnet as wn

print wn.synsets('tree')
print wn.synset('tree.n.01').member_meronyms()
print wn.synset('tree.n.01').part_meronyms()
print wn.synset('tree.n.01').substance_meronyms()
print wn.synset('tree.n.01').member_holonyms()
print wn.synset('tree.n.01').part_holonyms()
print wn.synset('tree.n.01').substance_holonyms()
Example #51
0
    for each_ele in set_of_n:
        ele_to_add = lesk(tweet, each_ele, 'n')
        if ele_to_add is not None:
            sync_set.add(ele_to_add)
    for each_ele in set_of_v:
        ele_to_add = lesk(tweet, each_ele, 'v')
        if ele_to_add is not None:
            sync_set.add(ele_to_add)
    for each_ele in set_of_adj:
        ele_to_add = lesk(tweet, each_ele, 'a')
        if ele_to_add is not None:
            sync_set.add(ele_to_add)

    for each_know in kb:
        sync_each_know = wordnet.synset(each_know)
        for each_sync in sync_set:
            similarity = each_sync.wup_similarity(sync_each_know)
            if similarity is not None:
                dict_kb[each_know] += similarity

    sorted_dict = sorted(dict_kb.items(),
                         key=operator.itemgetter(1),
                         reverse=True)
    #print(sorted_dict)
    temp_string = ""
    for i in range(num_sem_words):
        dotted_word = sorted_dict[i][0]
        for j in range(len(dotted_word)):
            if dotted_word[j] == '.':
                temp_string += " " + dotted_word[:j]
Example #52
0
    def universality(self, w):
        return None

    def _universality(self, w):
        return None

    def shortest_path(self, w_in, w_out):
        spds = []
        for s_in in wn.synsets(w_in.name, lang=w_in.lang):
            for s_out in wn.synsets(w_out.name, lang=w_out.lang):
                spds.append(s_in.shortest_path_distance(s_out))
        return min(spds)

    def associativeness(self, w_in, w_out):
        k = 10  #under consideration
        sp = self.shortest_path(w_in, w_out)
        rel = self.relatedness(w_in, w_out)
        ass = rel * np.log((1 + sp) / k)
        return ass, rel, sp


if __name__ == '__main__':
    ln = LexicalNet()
    synset = wn.synset('hot.a.01')
    word = 'hot'
    lemma = 'hot.a.01:hot'
    synset = ln.to_WSLObj(synset)
    word = ln.to_WSLObj(word)
    lemma = ln.WSLObj('hot.a.02:hot', 'lemma', 'eng')
    print(word.vector(categ='chocolate'))
Example #53
0
import nltk
from nltk.corpus import wordnet as wn

woman = wn.synset('woman.n.02')
bed = wn.synset('bed.n.01')
print(woman.hypernyms())
woman_paths = woman.hypernym_paths()
for idx, path in enumerate(woman_paths):
    print('\n\nHypernym Path : ', idx + 1)
for synset in path:
    print(synset.name(), ', ', end='')

types_of_beds = bed.hyponyms()
print('\n\nTypes of beds(hyponyms) : ', types_of_beds)
print()
print(
    sorted(
        set(lemma.name() for synset in types_of_beds
            for lemma in synset.lemmas())))
 def _find_similarity(self, word1, word2):
     word1 = wordnet.synset(word1 + ".n.01")
     word2 = wordnet.synset(word2 + ".n.01")
     return wordnet.wup_similarity(word1, word2)
Example #55
0
import nltk
from nltk.corpus import wordnet as wn

syn = wn.synsets('motorcar')

mcar = wn.synset('car.n.01')

name = mcar.name()
lena = mcar.lemma_names()
lem = mcar.lemmas()
defi = mcar.definition()
ex = mcar.examples()
hype = mcar.hypernyms()
hypo = mcar.hyponyms()

print("synset:",syn, "\nname:", name, "\nlemma name:", lena, "\nlemmas:", lem, "\ndefinition:", defi, "\nexample:", ex, "\nhypernym:", hype, "\nhyponym:", hypo)


'''
types_of_motorcar = mcar.hyponyms()
#types_of_motorcar[]

for synset in types_of_motorcar:
   # for lemma in synset.lemmas():
      print(sorted(synset.lemma_names()))


'''
'''
print(mcar.hypernyms())
paths = mcar.hypernym_paths()
Example #56
0
import nltk
import matplotlib
from nltk.corpus import wordnet as wn


def traverse(graph, start, node):
    graph.depth[node.name] = node.shortest_path_distance(start)
    for child in node.hyponyms():
        graph.add_edge(node.name, child.name)
        traverse(graph, start, child)


def hyponym_graph(start):
    G = nx.Graph()
    G.depth = {}
    traverse(G, start, start)
    return G


def graph_draw(graph):
    nx.draw_graphviz(graph,
                     node_size=[16 * graph.degree(n) for n in graph],
                     node_color=[graph.depth[n] for n in graph],
                     with_labels=False)
    matplotlib.pyplot.show()


dog = wn.synset('dog.n.01')
graph = hyponym_graph(dog)
graph_draw(graph)
def exercise5():
    print
    print "Exercise 5"
    print "For Bicycle, wheel"
    print "part_meronyms: ", wn.synset('bicycle.n.01').part_meronyms()
    print "substance_meronyms: ", wn.synset(
        'bicycle.n.01').substance_meronyms()
    print "member_meronyms: ", wn.synset('bicycle.n.01').member_meronyms()
    print "member_holonyms: ", wn.synset('bicycle.n.01').member_holonyms()
    print "part_holonyms: ", wn.synset('bicycle.n.01').part_holonyms()
    print "substance_holonyms: ", wn.synset(
        'bicycle.n.01').substance_holonyms()
    print
    print "For Pen, nib"
    print "part_meronyms: ", wn.synset('pen.n.01').part_meronyms()
    print "substance_meronyms: ", wn.synset('pen.n.01').substance_meronyms()
    print "member_meronyms: ", wn.synset('pen.n.01').member_meronyms()
    print "member_holonyms: ", wn.synset('pen.n.01').member_holonyms()
    print "part_holonyms: ", wn.synset('pen.n.01').part_holonyms()
    print "substance_holonyms: ", wn.synset('pen.n.01').substance_holonyms()
    print
    print "For Fish, fin"
    print "part_meronyms: ", wn.synset('fish.n.01').part_meronyms()
    print "substance_meronyms: ", wn.synset('fish.n.01').substance_meronyms()
    print "member_meronyms: ", wn.synset('fish.n.01').member_meronyms()
    print "member_holonyms: ", wn.synset('fish.n.01').member_holonyms()
    print "part_holonyms: ", wn.synset('fish.n.01').part_holonyms()
    print "substance_holonyms: ", wn.synset('fish.n.01').substance_holonyms()
    print
    print "For Window, glass"
    print "part_meronyms: ", wn.synset('window.n.01').part_meronyms()
    print "substance_meronyms: ", wn.synset('window.n.01').substance_meronyms()
    print "member_meronyms: ", wn.synset('window.n.01').member_meronyms()
    print "member_holonyms: ", wn.synset('window.n.01').member_holonyms()
    print "part_holonyms: ", wn.synset('window.n.01').part_holonyms()
    print "substance_holonyms: ", wn.synset('window.n.01').substance_holonyms()
    print
    print "For Laptop, touchpad"
    print "part_meronyms: ", wn.synset('laptop.n.01').part_meronyms()
    print "substance_meronyms: ", wn.synset('laptop.n.01').substance_meronyms()
    print "member_meronyms: ", wn.synset('laptop.n.01').member_meronyms()
    print "member_holonyms: ", wn.synset('laptop.n.01').member_holonyms()
    print "part_holonyms: ", wn.synset('laptop.n.01').part_holonyms()
    print "substance_holonyms: ", wn.synset('laptop.n.01').substance_holonyms()
    print
    print "For Car, steering"
    print "part_meronyms: ", wn.synset('car.n.01').part_meronyms()
    print "substance_meronyms: ", wn.synset('car.n.01').substance_meronyms()
    print "member_meronyms: ", wn.synset('car.n.01').member_meronyms()
    print "member_holonyms: ", wn.synset('car.n.01').member_holonyms()
    print "part_holonyms: ", wn.synset('car.n.01').part_holonyms()
    print "substance_holonyms: ", wn.synset('car.n.01').substance_holonyms()
    print
    print "For Chair, legs"
    print "part_meronyms: ", wn.synset('chair.n.01').part_meronyms()
    print "substance_meronyms: ", wn.synset('chair.n.01').substance_meronyms()
    print "member_meronyms: ", wn.synset('chair.n.01').member_meronyms()
    print "member_holonyms: ", wn.synset('chair.n.01').member_holonyms()
    print "part_holonyms: ", wn.synset('chair.n.01').part_holonyms()
    print "substance_holonyms: ", wn.synset('chair.n.01').substance_holonyms()
    print
    print "For Book, pages"
    print "part_meronyms: ", wn.synset('book.n.01').part_meronyms()
    print "substance_meronyms: ", wn.synset('book.n.01').substance_meronyms()
    print "member_meronyms: ", wn.synset('book.n.01').member_meronyms()
    print "member_holonyms: ", wn.synset('book.n.01').member_holonyms()
    print "part_holonyms: ", wn.synset('book.n.01').part_holonyms()
    print "substance_holonyms: ", wn.synset('book.n.01').substance_holonyms()
    print
    print "For Telephone, mic"
    print "part_meronyms: ", wn.synset('telephone.n.01').part_meronyms()
    print "substance_meronyms: ", wn.synset(
        'telephone.n.01').substance_meronyms()
    print "member_meronyms: ", wn.synset('telephone.n.01').member_meronyms()
    print "member_holonyms: ", wn.synset('telephone.n.01').member_holonyms()
    print "part_holonyms: ", wn.synset('telephone.n.01').part_holonyms()
    print "substance_holonyms: ", wn.synset(
        'telephone.n.01').substance_holonyms()
    print
    print "For Bird, wings"
    print "part_meronyms: ", wn.synset('bird.n.01').part_meronyms()
    print "substance_meronyms: ", wn.synset('bird.n.01').substance_meronyms()
    print "member_meronyms: ", wn.synset('bird.n.01').member_meronyms()
    print "member_holonyms: ", wn.synset('bird.n.01').member_holonyms()
    print "part_holonyms: ", wn.synset('bird.n.01').part_holonyms()
    print "substance_holonyms: ", wn.synset('bird.n.01').substance_holonyms()
    print
Example #58
0
                    default="animal",
                    help="Synset to dump hyponyms of")
parser.add_argument("-l", "--list", action='store_true', help="List synsets")
parser.add_argument("-p",
                    "--plurals",
                    action='store_true',
                    help="Generate plurals",
                    default=False)
parser.add_argument("-t", "--title", action='store_true', help="Title Case")

args = parser.parse_args()

synsets = []

if '.' in args.synset:
    synset = wn.synset(args.synset)
    if not synset:
        print("Couldn't find synset with ID {}".format(args.synset))
        sys.exit(-1)
    synsets = [synset]
else:
    synsets = wn.synsets(args.synset)
    if not synsets:
        print("Couldn't find synsets matching {}".format(args.synset))
        sys.exit(-1)

if not synsets:
    print("No synsets found for {}".format(args.synset))
    sys.exit(-1)

if args.list:
Example #59
0
 labels = pickle.load(f)

ts= time.time()
tfidf_vectorizer = TfidfVectorizer( max_features=200000, stop_words='english',
                                 use_idf=True, tokenizer=tokenize_and_stem)

tfidf_matrix = tfidf_vectorizer.fit_transform(tweets) #fit the vectorizer to synopses

  
features_word = tfidf_vectorizer.get_feature_names()
print len(features_word)
semantic_features_word=[]  
for word in features_word : 
    for i in range(5):
        try:    
            for j in wn.synset(word+'.n.'+str(i)).lemma_names():
                semantic_features_word.append(j)
        except:
            pass
print len(semantic_features_word)
semantic_features_word = rem_reduncy(semantic_features_word)
print len(semantic_features_word)

terms=features_word+semantic_features_word
terms=rem_reduncy(terms)

tfidf_vectorizer = TfidfVectorizer( max_features=200000, stop_words='english', vocabulary=terms,
                                 use_idf=True, tokenizer=tokenize_and_stem)

tfidf_matrix = tfidf_vectorizer.fit_transform(tweets)
#print tfidf_matrix
Example #60
0
levelOfNode = {}


def get_hyponyms(synset, level):
    if (level == tot):
        levelOfNode[str(synset)] = level
        return
    if not str(synset) in network:
        network[str(synset)] = [str(s) for s in synset.hyponyms()]
        levelOfNode[str(synset)] = level
    for hyponym in synset.hyponyms():
        get_hyponyms(hyponym, level + 1)


mammal = wn.synset('mammal.n.01')
get_hyponyms(mammal, 0)
levelOfNode[str(mammal)] = 0

emb = {}

for a in network:
    for b in network[a]:
        emb[b] = np.random.uniform(low=-0.001, high=0.001, size=(2, ))
    emb[a] = np.random.uniform(low=-0.001, high=0.001, size=(2, ))

vocab = list(emb.keys())
random.shuffle(vocab)

for a in emb:
    if not a in network: