def determine_relation(syn1, syn2): relation = '' #both are registered if len(syn1) > 0 and len(syn2) > 0: #verifiable results print 'Common Ancestor: {0}'.format(wordnet.ancestor(syn1[0], syn2[0])) print 'Similarity measure between synsets: {0}'.format(wordnet.similarity(syn1[0], syn2[0])) if is_descendant(syn1, syn2): relation = "is a verifiable fact" elif is_descendant(syn1, syn2, True): relation = "is a verifiable true over-generalization" elif is_descendant(syn2, syn1): relation = "is a verifiable falsehood" elif is_descendant(syn2, syn1, True): relation = "is a verifiable false over-generalization" else: relation = "figurative speech (two entries, with no roots)" else: relation = "undetermined, entries not in wordnet" return relation
def word_similarity(word1, word2): """ Similarity of 2 words as a score from 0 to 1, uses wordnet """ from pattern.en import wordnet try: a = wordnet.synsets(word1)[0] b = wordnet.synsets(word2)[0] return wordnet.similarity(a, b) except: return 0
def custom_similarity(word, synsets, pos=None): word = singularize(word.lower()) similarities = [] if pos: word_synsets = wordnet.synsets(word, pos=pos) else: word_synsets = wordnet.synsets(word) for i in synsets: for j in word_synsets: try: similarities.append(wordnet.similarity(i, j)) except Exception, e: pass
def wordnet_potential_parent(word1, pos1, word2, pos2, min_sim=0.0): syns1 = wn.synsets(word1, pos1) syns2 = wn.synsets(word2, pos2) parents = [] for s1 in syns1: for s2 in syns2: family = wn.ancestor(s1,s2) if family: sim = wn.similarity(s1,s2) if sim > min_sim: parents.append( (family, sim) ) return parents
def try_similarity(w1, w2): try: s = wordnet.similarity(w1, w2) return s except: return 0
df['sentence-pos'] = df['sentence'].apply(lambda x: parse(x, chunks=False).replace('/', '')) X = vectorizer.fit_transform(df['sentence-pos']) selector = SelectKBest(chi2, k=100) S = selector.fit_transform(X, df['bps'].tolist()) fnames = vectorizer.get_feature_names() indices = selector.get_support(True) selected_terms = [ fnames[i] for i in indices ] return selected_terms a = wordnet.synsets('tone', pos=wordnet.ADJECTIVE)[0] b = wordnet.synsets('curly', pos=wordnet.ADJECTIVE)[0] c = wordnet.synsets('box')[0] print wordnet.ancestor(a, b) print wordnet.similarity(a, a) print wordnet.similarity(a, b) print wordnet.similarity(a, c)
print modality(s) # wordnet s = wordnet.synsets('bird')[0] print 'Definition:', s.gloss # Definition string. print ' Synonyms:', s.synonyms # List of word forms (i.e., synonyms) print ' Hypernyms:', s.hypernyms( ) # returns a list of parent synsets (i.e., more general). Synset (semantic parent). print ' Hypernyms:', s.hypernyms(recursive=False, depth=None) print ' Hyponyms:', s.hyponyms( ) # returns a list child synsets (i.e., more specific). print ' Hyponyms:', s.hyponyms(recursive=False, depth=None) print ' Holonyms:', s.holonyms( ) # List of synsets (of which this is a member). print ' Meronyms:', s.meronyms() # List of synsets (members/parts). print ' POS:', s.pos # Part-of-speech: NOUN | VERB | ADJECTIVE | ADVERB. print ' Category:', s.lexname # Category string, or None. print 'Info Cont.:', s.ic # Information Content (float). print ' Antonym:', s.antonym # Synset (semantic opposite). print ' Synsets:', s.similar() # List of synsets (similar adjectives/verbs). # sense similarity a = wordnet.synsets('cat')[0] b = wordnet.synsets('dog')[0] c = wordnet.synsets('box')[0] print wordnet.ancestor(a, b) print wordnet.similarity(a, a) print wordnet.similarity(a, b) print wordnet.similarity(a, c) # synset sentiment print wordnet.synsets('happy', ADJECTIVE)[0].weight print wordnet.synsets('sad', ADJECTIVE)[0].weight
taxonomy.children('plant', recursive=False) taxonomy.classify('bank') from pattern.en import wordnet a = wordnet.synsets('tone')[4] b = wordnet.synsets('color')[0] wordnet.similarity(a,b) a = ['this', 'is', 'a', 'test'] b = ['this', 'was', 'a', 'test'] edit_distance(a, b) jaccard_distance(set(a), set(b)) masi_distance(set(a), set(b))