def unify_query(query):
    """ 
    a peek of sorts .. 
    param: list of Word objs 
    return: synset entry from wn
    """
    
    
    
    #build query from 
    query = build_wn_query(query)
    print 'wordnet query: {0}'.format(query)
  
    s = wordnet.synsets(singularize(query), pos=wordnet.NOUN)
    
    if len(s) == 0:
        
        #this is a bit hacky.. it's based on the assumption, if it fails, it may be a two word NN 
        #i.e. thrill ride fails, ride doesn't 
        print 'no entry for {0}..'.format(query) 
        
        s = wordnet.synsets(singularize(query.split()[1]), pos=wordnet.NOUN)
        if len(s) == 0:
             print 'no entry for {0}'.format(query.split()[1]) 
    
        
    
    return s
예제 #2
0
    def test_synset_is_physical_object(self):
        from extract import synset_is_physical_object

        tree_ss = wordnet.synsets("tree")[0]
        self.assertTrue(synset_is_physical_object(tree_ss))
        truth_ss = wordnet.synsets("truth")[0]
        self.assertFalse(synset_is_physical_object(truth_ss))
예제 #3
0
def special_similarity(word):
    if word.startswith("special"):
        return (1.0)
    return (custom_similarity(word, [
        wordnet.synsets('special', pos=wordnet.ADJECTIVE)[1],
        wordnet.synsets('special', pos=wordnet.ADJECTIVE)[3]
    ]))
예제 #4
0
def test():
    word = 'walk'
    pos = 'VB'

    print(wordnet.synsets(word, pos))

    return

    hypers = get_words(word, pos, 'hyper', True, 10)
    # for hyper in hypers:
    #     print(hyper, hypers[hyper])
    print(hypers.keys())
    # print('fruit' in hypers)

    hypos = get_words(word, pos, 'hypo', True, 1)
    # for hypo in hypos:
    #     print(hypo, hypos[hypo])
    print(hypos.keys())

    print()
    print(get_synonyms('big', 'JJ'))
    print(get_antonyms('big', 'JJ'))
    print(get_similar('big', 'JJ'))
    print()
    print(get_synonyms('man', 'NN'))
    print(get_antonyms('man', 'NN'))
    print(get_similar('man', 'NN'))
    print()

    print(wordnet.synsets('go', pos='VB')[0])
    print(wordnet.synsets('nice', pos='JJ')[0])
    print(wordnet.synsets('well', pos='RB')[0])
    print(wordnet.synsets('musical instrument', pos="NN")[0])
예제 #5
0
def number_similarity(word):
    return (custom_similarity(word, [
        wordnet.synsets('number')[0],
        wordnet.synsets('number')[1],
        wordnet.synsets('number')[4]
    ],
                              pos=wordnet.NOUN))
예제 #6
0
def alphabet_similarity(word):
    return (custom_similarity(word, [
        wordnet.synsets('alphabet')[0],
        wordnet.synsets('character')[-2],
        wordnet.synsets('letter')[1]
    ],
                              pos=wordnet.NOUN))
예제 #7
0
def hypernym_search(text, search_word):
    try:
        from pattern.en import wordnet
    except:
        print(
            'Please install pattern: pip install https://github.com/clips/pattern/archive/development.zip'
        )
        sys.exit()

    output = []
    try:
        for search_word in search_word.split('|'):
            synset = wordnet.synsets(search_word)[0]
            pos = synset.pos
            possible_words = re_search(text, pos)
            for match in possible_words:
                word = match[0].string
                synsets = wordnet.synsets(word)
                if len(synsets) > 0:
                    hypernyms = synsets[0].hypernyms(recursive=True)
                    if any(search_word == h.senses[0] for h in hypernyms):
                        output.append(word)
    except IndexError:
        pass

    return set(output)
예제 #8
0
def word_similarity(word1, word2):
	"""
	Similarity of 2 words as a score from 0 to 1, uses wordnet
	"""
	from pattern.en import wordnet
	try:
		a = wordnet.synsets(word1)[0]
		b = wordnet.synsets(word2)[0]
		return wordnet.similarity(a, b) 
	except:
		return 0
예제 #9
0
def get_related_noun_or_not(noun, d=True):
	w = wordnet.synsets(noun)
	if w:
		w = w[0]
		w1 = w.hyponyms()
		w2 = w.hypernyms()
		if w1 + w2:
			nw = random.choice(w1 + w2)
			if nw and nw.senses:
				return nw.senses[0]
	elif wordnet.synsets(singularize(noun)) and d:
		return get_related_noun_or_not(singularize(noun, False))
	return noun
예제 #10
0
def hypernym_search(text, search_word):
    output = []
    synset = wordnet.synsets(search_word)[0]
    pos = synset.pos
    possible_words = re_search(text, pos)
    for match in possible_words:
        word = match[0].string
        synsets = wordnet.synsets(word)
        if len(synsets) > 0:
            hypernyms = synsets[0].hypernyms(recursive=True)
            if any(search_word == h.senses[0] for h in hypernyms):
                output.append(word)
    return set(output)
예제 #11
0
def hypernym_search(text, search_word):
    output = []
    synset = wordnet.synsets(search_word)[0]
    pos = synset.pos
    possible_words = re_search(text, pos)
    for match in possible_words:
        word = match[0].string
        synsets = wordnet.synsets(word)
        if len(synsets) > 0:
            hypernyms = synsets[0].hypernyms(recursive=True)
            if any(search_word == h.senses[0] for h in hypernyms):
                output.append(word)
    return set(output)
예제 #12
0
def get_related_or_not(word, d=True, pos='NN'):
    w = wordnet.synsets(word, pos=pos)
    if w:
        w = w[0]
        w1 = w.hyponyms()
        w2 = w.hypernyms()
        if w1 + w2:
            nw = random.choice([w] + w1 + w2)
            if nw and nw.senses:
                return nw.senses[0]
    elif wordnet.synsets(singularize(word)) and d:
        return get_related_or_not(singularize(word, False, pos))
    return word
예제 #13
0
def custom_similarity(word, synsets, pos=None):
    word = singularize(word.lower())
    similarities = []
    if pos:
        word_synsets = wordnet.synsets(word, pos=pos)
    else:
        word_synsets = wordnet.synsets(word)
    for i in synsets:
        for j in word_synsets:
            try:
                similarities.append(wordnet.similarity(i, j))
            except Exception, e:
                pass
예제 #14
0
파일: word_senses.py 프로젝트: rsteckel/EDA
def wordnet_potential_parent(word1, pos1, word2, pos2, min_sim=0.0):
    syns1 = wn.synsets(word1, pos1)
    syns2 = wn.synsets(word2, pos2)

    parents = []
    for s1 in syns1:
        for s2 in syns2:
            family = wn.ancestor(s1,s2)
            if family:
                sim = wn.similarity(s1,s2)
                if sim > min_sim:
                    parents.append( (family, sim) )
    return parents
예제 #15
0
def custom_similarity(word, synsets, pos=None):
    word = singularize(word.lower())
    similarities = []
    if pos:
        word_synsets = wordnet.synsets(word, pos=pos)
    else:
        word_synsets = wordnet.synsets(word)
    for i in synsets:
        for j in word_synsets:
            try:
                similarities.append(wordnet.similarity(i, j))
            except Exception, e:
                pass
예제 #16
0
def lemma_is_geological_formation(lemma):
    synsets = wordnet.synsets(lemma, wordnet.NOUN)
    if len(synsets) > 0 and all([synset_is_proper(s) for s in synsets]):
        return False
    else:
        return any([synset_is_geological_formation(s) for s in synsets \
                if not(synset_is_proper(s))])
예제 #17
0
def find_replacement(word, pos):
    shuffle(BASIC)
    try:
        w = next(w for w in BASIC if wordnet.synsets(w, pos=pos))
        return (w, True) if random() < 0.3 else (word, False)
    except:
        return word, False
예제 #18
0
def synonyms(lemma, pos=NOUN):
    synonyms = list()
    for synset in wordnet.synsets(lemma, pos):
        for synonym in synset.synonyms:
            if synonym != lemma:
                synonyms.append(synonym)
    return synonyms
예제 #19
0
  def validate(self, tag, media):
    # check with wordnet
    # if synset continue else return -1
    # check category and decide if verify
    # use google vision api to verify
    # result: 1 -> good (keep image) ; 0 -> bad (discard image) ; -1 -> cannot validate (keep)
    tag = singularize(tag).lower()
    synset = wordnet.synsets(tag, pos=NOUN)
    if not synset:
      return -1
    category = synset[0].lexname
    if self.VALIDATE_CATEGORY == 'all':
      pass
    elif category in self.VALIDATE_CATEGORY:
      pass
      # do not return yet
    else:
      return -1 # not all and cannot be validated

    img = requests.get(media)
    gImage = google.cloud.vision.types.Image(content=img.content)
    response = self.vision_client.label_detection(image=gImage)
    labels = map(lambda d: d.description if d.score > 0.9 else None, response.label_annotations)  
    # check if tag in the detected labels with a good probability (score)
    if tag in labels:
      return 1

    # compare synonyms
    synonyms = synset[0].synonyms
    # check if any synonym in labels
    for synonym in synonyms:
      if singularize(tag).lower() in labels:
        return 1
    return 0
def glossdef(token):
    tokens = []
    for term in token:
        for token in wordnet.synsets(term):
            token = token.gloss
            token = tokens.append(token)
    return tokens
예제 #21
0
def shift(noun):
    """ Returns a (random hyponym, description)-tuple for the given noun.
    """
    s = wordnet.synsets(noun)
    s = s[0]
    h = choice(s.hyponyms(recursive=True) or [s])
    return [h.synonyms[0], h.gloss]
예제 #22
0
def shift(noun):
    """ Returns a (random hyponym, description)-tuple for the given noun.
    """
    s = wordnet.synsets(noun)
    s = s[0]
    h = choice(s.hyponyms(recursive=True) or [s])
    return [h.synonyms[0], h.gloss]
예제 #23
0
def sentiment(content):
    if len(wordnet.sentiment) == 0:
        wordnet.sentiment.load()
        
    relevant_types = ['JJ', 'VB', 'RB'] #adjectives, verbs, adverbs
    score = 0
    sentences = split(parse(content))
    for sentence in sentences:
        for index, word in enumerate(sentence.words):
            if word.string != '' and word.type in relevant_types:              
                try:
                    synset = wordnet.synsets(word.string, word.type)
                except KeyError:
                    #incorrect part of speech tag or not in wordnet, skip it
                    continue
                pos, neg, obj = synset[0].weight
                
                #weights concluding statements
                #idea from [Ohana, Tierney '09]
                documentpos = index / float(len(sentence.words))

                #weights more subjective statements
                subjscore = ((pos - neg) * (1 - obj))
                
                score = score + subjscore * documentpos
    return score
예제 #24
0
def lemma_is_natural(lemma):
    synsets = wordnet.synsets(lemma, wordnet.NOUN)
    if len(synsets) > 0 and all([synset_is_proper(s) for s in synsets]):
        return False
    else:
        return any([synset_is_natural(s) for s in synsets \
                if not(synset_is_proper(s))])
예제 #25
0
def get_random_word(t, ref=False):
   """Return a random word from a set filtering on lexname category if necessary"""

   # If there are entries in the lexnames list for the given POS tag, limit results to that,
   # otherwise just return a random word for that POS
   word = None
   if len(POS[t]['lexnames']):

      lexname = ''
      while lexname not in POS[t]['lexnames']:
         word = random.choice(POS[t]['words'])[0]

         synset = wordnet.synsets(get_singular(word), pos=t)

         if synset:
            lexname = synset[0].lexname

   else:
      word = random.choice(POS[t]['words'])[0]

   # If required, prefix with an article 
   if ref:
      word = referenced(word)

   return word.lower()
예제 #26
0
def sentiment(content):
    if len(wordnet.sentiment) == 0:
        wordnet.sentiment.load()

    relevant_types = ['JJ', 'VB', 'RB']  #adjectives, verbs, adverbs
    score = 0
    sentences = split(parse(content))
    for sentence in sentences:
        for index, word in enumerate(sentence.words):
            if word.string != '' and word.type in relevant_types:
                try:
                    synset = wordnet.synsets(word.string, word.type)
                except KeyError:
                    #incorrect part of speech tag or not in wordnet, skip it
                    continue
                pos, neg, obj = synset[0].weight

                #weights concluding statements
                #idea from [Ohana, Tierney '09]
                documentpos = index / float(len(sentence.words))

                #weights more subjective statements
                subjscore = ((pos - neg) * (1 - obj))

                score = score + subjscore * documentpos
    return score
예제 #27
0
def list_hyponyms(word):
    output = []
    synsets = wordnet.synsets(word)
    if len(synsets) > 0:
        hyponyms = synsets[0].hyponyms(recursive=True)
        output = [h.senses[0] for h in hyponyms]
    return output
예제 #28
0
def list_hyponyms(word):
    output = []
    synsets = wordnet.synsets(word)
    if len(synsets) > 0:
        hyponyms = synsets[0].hyponyms(recursive=True)
        output = [h.senses[0] for h in hyponyms]
    return output
예제 #29
0
파일: expand.py 프로젝트: fbkarsdorp/tmi
def expand(term, limit=3):
    # EXPANSION IS ONLY DONE FOR FIRST SYNSET. WHY?
    try:
        hypernyms = WN.synsets(term, 'NN')[0].hypernyms(recursive=True)
        return {w.senses[0] for w in hypernyms[:limit]}.union({term})
    except IndexError:
        return {}
예제 #30
0
def synonyms(lemma, pos=NOUN):
	synonyms = list()
	for synset in wordnet.synsets(lemma, pos):
		for synonym in synset.synonyms:
			if synonym != lemma:
				synonyms.append(synonym)
	return synonyms
예제 #31
0
def lemma_is_natural(lemma):
    synsets = wordnet.synsets(lemma, wordnet.NOUN)
    if len(synsets) > 0 and all([synset_is_proper(s) for s in synsets]):
        return False
    else:
        return any([synset_is_natural(s) for s in synsets \
                if not(synset_is_proper(s))])
예제 #32
0
def lemma_is_geological_formation(lemma):
    synsets = wordnet.synsets(lemma, wordnet.NOUN)
    if len(synsets) > 0 and all([synset_is_proper(s) for s in synsets]):
        return False
    else:
        return any([synset_is_geological_formation(s) for s in synsets \
                if not(synset_is_proper(s))])
예제 #33
0
 def define_word(self, word):
     synsets = wordnet.synsets(word)
     if len(synsets) > 0:
         gloss = synsets[0].gloss
         if gloss.find(';') > -1:
             gloss = gloss[:gloss.find(';')]
         word = word + " (comprising of " + gloss + ") "
     return word
예제 #34
0
def get_synonyms(word, pos):
    """ return a set of strings, lowercase unless proper noun """
    ans = set()
    for ss in wordnet.synsets(word, pos):
        synonyms = [synonym.replace('_', ' ') for synonym in ss.synonyms]
        # print(synonyms)
        ans.update(synonyms)
    return ans
예제 #35
0
def wn_filter_pos(text, pos):
    synsets = wordnet.synsets(text, pos=pos)
    for s in synsets:
        for synonym in s.synonyms:
            if synonym[0].isupper():
                return False
    if len(synsets) > 0:
        return True
예제 #36
0
def wn_filter_pos(text, pos):
	synsets = wordnet.synsets(text, pos=pos)
	for s in synsets:
		for synonym in s.synonyms:
			if synonym[0].isupper():
				return False
	if len(synsets) > 0:
		return True
예제 #37
0
 def define_word(self, word):
     synsets = wordnet.synsets(word)
     if len(synsets) > 0:
         gloss = synsets[0].gloss
         if gloss.find(";") > -1:
             gloss = gloss[: gloss.find(";")]
         word = word + " (comprising of " + gloss + ") "
     return word
예제 #38
0
파일: wordnet.py 프로젝트: Priya22/tweedr
def WordnetFeatures(token):
    synset = wordnet.synsets(token)
    if len(synset) > 0:
        synset = synset[0]
        hypernym = synset.hypernyms(depth=2, recursive=True)
#       hypernym.extend(synset.hyponyms(depth=2,recursive=True))
        return [hyper.senses[0] for hyper in hypernym]
    else:
        return []
예제 #39
0
def lemma_is_person(lemma):
    synsets = wordnet.synsets(lemma, wordnet.NOUN)
    # if ALL the synsets are proper, then it's a person!
    if len(synsets) > 0 and all([synset_is_proper(s) for s in synsets]):
        return True
    # otherwise, check ONLY the non-proper synsets
    else:
        return any([synset_is_person(s) for s in synsets \
                if not(synset_is_proper(s))])
예제 #40
0
def WordnetFeatures(token):
    synset = wordnet.synsets(token)
    if len(synset) > 0:
        synset = synset[0]
        hypernym = synset.hypernyms(depth=2, recursive=True)
        #       hypernym.extend(synset.hyponyms(depth=2,recursive=True))
        return [hyper.senses[0] for hyper in hypernym]
    else:
        return []
예제 #41
0
def lemma_is_person(lemma):
    synsets = wordnet.synsets(lemma, wordnet.NOUN)
    # if ALL the synsets are proper, then it's a person!
    if len(synsets) > 0 and all([synset_is_proper(s) for s in synsets]):
        return True
    # otherwise, check ONLY the non-proper synsets
    else:
        return any([synset_is_person(s) for s in synsets \
                if not(synset_is_proper(s))])
예제 #42
0
def max_ic(words, pos):
    subj = None
    syns = [wordnet.synsets(w, pos=pos) for w in words]
    syns = [random.choice(s) for s in syns if s]
    if syns:
        vals = [(s.synonyms[0], s.ic) for s in syns]
        if vals:
            word, val = max(vals, key=lambda x: x[1])
            subj = word
    return subj
예제 #43
0
def hypernym_search(text, search_word):
    try:
        from pattern.en import wordnet
    except:
        print('Please install pattern: pip install https://github.com/clips/pattern/archive/development.zip')
        sys.exit()

    output = []
    for search_word in search_word.split('|'):
        synset = wordnet.synsets(search_word)[0]
        pos = synset.pos
        possible_words = re_search(text, pos)
        for match in possible_words:
            word = match[0].string
            synsets = wordnet.synsets(word)
            if len(synsets) > 0:
                hypernyms = synsets[0].hypernyms(recursive=True)
                if any(search_word == h.senses[0] for h in hypernyms):
                    output.append(word)
    return set(output)
예제 #44
0
def get_similar(word, pos):
    """ return a set of strings, lowercase unless proper noun """
    ans = set()
    for ss in wordnet.synsets(word, pos):
        if ss.similar():  # a list of synsets
            for ss1 in ss.similar():
                similars = [
                    synonym.replace('_', ' ') for synonym in ss1.synonyms
                ]
                # print(similars)
                ans.update(similars)
    return ans
예제 #45
0
def list_hypernyms(search_word):
    try:
        from pattern.en import wordnet
    except:
        print('Please install pattern: pip install https://github.com/clips/pattern/archive/development.zip')
        sys.exit()

    output = []
    for synset in wordnet.synsets(search_word):
        hypernyms = synset.hypernyms(recursive=True)
        output.append([h.senses[0] for h in hypernyms])
    return output
예제 #46
0
def list_hypernyms(search_word):
    try:
        from pattern.en import wordnet
    except:
        print('Please install pattern: pip install https://github.com/clips/pattern/archive/development.zip')
        sys.exit()

    output = []
    for synset in wordnet.synsets(search_word):
        hypernyms = synset.hypernyms(recursive=True)
        output.append([h.senses[0] for h in hypernyms])
    return output
예제 #47
0
def getOccurencies(dico, lis):
    synlist = {}

    #Loop on chapters, j is the key, dico[j] is the dictionnary of this chapter with a child "fulltext"
    for j in dico:
        c = dico[j]["fulltext"]
        wordpos = {}
        exclude = set(string.punctuation)
        #c = ''.join(ch for ch in c if ch not in exclude)
        #print c

        #Loop on words in list
        c = c.lower()
        for i in range(len(c)):
            for word in lis:
                if c[i:len(word) + i] == word:
                    wordpos[i] = word
        """
		for l in lis:
			wlen = 0
			for pos, words in enumerate(c.split()):
				#print words
				#wlen += len(words)
				#print words
				#for en, char in enumerate(c):
				if l == words and l not in wordpos:
					#if not l in wordpos:
					wordpos[pos] = l
					#break
					#wordpos[l] = pos
			"""
        #End of word loop

        #Get synonyms
        for l in lis:
            syns = wordnet.synsets(l)
            for k in syns:
                if not l in synlist:
                    synlist[l] = []
                synlist[l].extend(k.synonyms)
        #End of synonyms loop

        #We update dico
        dico[j]["occurencies"] = wordpos

    #End of loop on chapters
    setsyn = set()
    lis1 = []
    for word in synlist:
        synlist[word] = list(set(synlist[word]))
    #print(synlist)
    return dico, synlist
예제 #48
0
def list_hyponyms(word):
    try:
        from pattern.en import wordnet
    except:
        print('Please install pattern: pip install https://github.com/clips/pattern/archive/development.zip')
        sys.exit()

    output = []
    synsets = wordnet.synsets(word)
    if len(synsets) > 0:
        hyponyms = synsets[0].hyponyms(recursive=True)
        output = [h.senses[0] for h in hyponyms]
    return output
예제 #49
0
def list_hyponyms(word):
    try:
        from pattern.en import wordnet
    except:
        print('Please install pattern: pip install https://github.com/clips/pattern/archive/development.zip')
        sys.exit()

    output = []
    synsets = wordnet.synsets(word)
    if len(synsets) > 0:
        hyponyms = synsets[0].hyponyms(recursive=True)
        output = [h.senses[0] for h in hyponyms]
    return output
예제 #50
0
def get_related(word, plural=False):
    syn = wordnet.synsets(word)
    if not syn:
        return []
    syn = syn[0]
    if not syn.hypernym:
        return []
    fn = pluralize if plural else lambda x: x
    s = sorted([(fn(' '.join(x[0].split('_'))), x.similarity(syn))
                for x in syn.hypernym.hyponyms()],
               key=lambda z: z[1],
               reverse=True)
    return [x for x in s if x[1] > 0.5 and x[1] < 1 and word not in x[0]]
예제 #51
0
파일: word_senses.py 프로젝트: rsteckel/EDA
def parse_phrases(documents):
    for document in documents:
        ptree = parsetree(document, relations=True, lemmata=True)    
        
        for sentence in ptree:
            print i, sentence.string
            for phrase in sentence.phrases:
                for word in phrase.words:
                    if word.pos in WN_POS:
                        print i, phrase, word, word.pos, wn.synsets(word.lemma, word.pos)        
                    else:
                        print i, phrase, word, word.pos
            print '\n'
예제 #52
0
def all_synsets(word, pos=None):
    map = {
        'NOUN': wordnet.NOUN,
        'VERB': wordnet.VERB,
        'ADJ': wordnet.ADJECTIVE,
        'ADV': wordnet.ADVERB
        }
    if pos is None:
        pos_list = [wordnet.VERB, wordnet.ADJECTIVE, wordnet.NOUN, wordnet.ADVERB]
    else:
        pos_list = [map[pos]]
    ret = []
    for pos in pos_list:
        ret.extend(wordnet.synsets(word, pos=pos))
    return ret
예제 #53
0
def test_wordnet():
    from pattern.en import wordnet
      
    word = "bird"
    word = "Java"
    word = "C++"
    word = "MongoDb"
    for s in wordnet.synsets(word) :
     
        print 'Definition:', s.gloss
        print '  Synonyms:', s.synonyms
        print ' Hypernyms:', s.hypernyms()
        print '  Hyponyms:', s.hyponyms()
        print '  Holonyms:', s.holonyms()
        print '  Meronyms:', s.meronyms()
예제 #54
0
def is_animate(lemma):
    # this "works" but is very eager to grant animacy even for words that have
    # one synset with a person hypernym (things like "rock" count as "animate"
    # because a "rock" can be a person that you depend on)
    hypernyms = list()
    for synset in wordnet.synsets(lemma, pos=NOUN):
        # skip synsets that are proper nouns, as these are always animate!
        if any([s[0].isupper() for s in synset.synonyms]):
            continue
        synonyms = list()
        for s in synset.hypernyms(recursive=True):
            synonyms.extend(s.synonyms)
        if 'person' in synonyms:
            return True
    return False
예제 #55
0
def is_animate(lemma):
	# this "works" but is very eager to grant animacy even for words that have
	# one synset with a person hypernym (things like "rock" count as "animate"
	# because a "rock" can be a person that you depend on)
	hypernyms = list()
	for synset in wordnet.synsets(lemma, pos=NOUN):
		# skip synsets that are proper nouns, as these are always animate!
		if any([s[0].isupper() for s in synset.synonyms]):
			continue
		synonyms = list()
		for s in synset.hypernyms(recursive=True):
			synonyms.extend(s.synonyms)
		if 'person' in synonyms:
			return True
	return False
예제 #56
0
def get_words(word, pos, hyper_hypo, recursive=False, depth=None):
    """ return a dict { str.lower : zipf_freq.log }
    if depth = NOne, depth = 1
    """
    ans = {}  # a dictioinary
    # find out which synset for the word
    for ss in wordnet.synsets(word, pos):
        # if ss.pos.startswith(pos.upper()):  # this is what we want!
        if hyper_hypo == 'hyper':
            for x in ss.hypernyms(recursive, depth):
                ans.update({syn.replace('_', ' ') : zipf_frequency(syn.replace('_', ' '), 'en') \
                            for syn in x.synonyms})
        elif hyper_hypo == 'hypo':
            for x in ss.hyponyms(recursive, depth):
                ans.update({syn.replace('_', ' ') : zipf_frequency(syn.replace('_', ' '), 'en') \
                            for syn in x.synonyms})
        # break  # only use the first synset, assuming it's the most common one
    return ans
예제 #57
0
def get_alternations(word, pos, synset_id, nlp, verbose=False):
    """ return all alternations """
    alternations = {}  # {word_str : simi_score}
    # find all alternations of walk
    for ss in wordnet.synsets(word, pos):
        if int(ss.id) != int(synset_id): continue
        for hyper in ss.hypernyms():
            for hypo in hyper.hyponyms():
                if hypo.pos != pos: continue  # only want the same pos
                for synonym in hypo.synonyms:
                    if synonym.lower() == word.lower():
                        continue  # don't want ss here
                    if synonym not in alternations:
                        simi_score = nlp(word).similarity(nlp(synonym))
                        if simi_score > 0:
                            #                             print(word, synonym, simi_score)
                            alternations[synonym] = simi_score
    if verbose: print('found {} alternations'.format(len(alternations)))
    return alternations
예제 #58
0
def get_tables(words):
   """Build a list of tables for the SQL statement from random words"""

   # http://wordnet.princeton.edu/man/lexnames.5WN.html
   lexnames = ['noun.plant', 
               'noun.animal', 
               'noun.food', 
               'noun.shape',
               'noun.body',
               'noun.artifact',
               'noun.object'
              ]

   # Loop until we find a table name that is less than the MAX_TABLE_NAME_LENGTH and has a
   # noun category that is in the list of lexnames above
   tables = []
   for i in range(0, MAX_TABLES):
      lexname = ''

      while lexname not in lexnames:
         ((word, tag), f) = random.choice(words)
         word = word.lower()

         s = wordnet.synsets(word)

         if len(s):
            s = s[0]
            lexname = s.lexname

            if len(s.hypernym) > MAX_TABLE_NAME_LENGTH:
               lexname = ''
         else:
            lexname = ''

      tables.append(s)

   print '------------------------------------------------------------------------'
   print word
   print tables[0].hyponyms()
   print '------------------------------------------------------------------------'

   return tables
예제 #59
0
	def add_vocab(self,word):
		 if word in self.synsets: 
			self.synsets[word]+=1 #count frequency
		 	if word in self.vocab:
				self.total_words+=1
			else:
				self.vocab[word] = word
				self.total_words+=1
			pass
		 if word in self.vocab:
			self.total_words+= 1
			pass
                 elif not word in self.synsets:
                 	singular = singularize(word)
                        if not wordnet.synsets(singular): singular = word
                        self.vocab[word] = singular
                        self.total_words+= 1
                 elif word in self.synsets:
                        self.vocab[word] = word
                        self.total_words+= 1