def test_word_similarity(): from sematch.semantic.similarity import WordNetSimilarity wns = WordNetSimilarity() dog = wns.word2synset('dog') cat = wns.word2synset('cat') # Measuring semantic similarity between concepts using Path method assert wns.similarity(dog[0], cat[0], 'path') is not None # 0.2 # Computing English word similarity using Li method assert wns.word_similarity('dog', 'cat', 'li') is not None # 0.449327301063 # Computing Spanish word similarity using Lin method assert wns.monol_word_similarity('perro', 'gato', 'spa', 'lin') is not None #0.876800984373 # Computing Chinese word similarity using Wu & Palmer method assert wns.monol_word_similarity('狗', '猫', 'cmn', 'wup') is not None # 0.857142857143 # Computing Spanish and English word similarity using Resnik method assert wns.crossl_word_similarity('perro', 'cat', 'spa', 'eng', 'res') is not None #7.91166650904 # Computing Spanish and Chinese word similarity using Jiang & Conrad method assert wns.crossl_word_similarity('perro', '猫', 'spa', 'cmn', 'jcn') is not None #0.31023804699 # Computing Chinese and English word similarity using WPath method assert wns.crossl_word_similarity('狗', 'cat', 'cmn', 'eng', 'wpath') is not None #0.593666388463
def test_wordnet_similarity(): from sematch.semantic.similarity import WordNetSimilarity wns = WordNetSimilarity() dog = wns.word2synset('dog') cat = wns.word2synset('cat') # Measuring semantic similarity between concepts using Path method assert wns.similarity(dog[0], cat[0], 'path') is not None # 0.2 # Computing English word similarity using Li method assert wns.word_similarity('dog', 'cat', 'li') is not None# 0.449327301063 # Computing Spanish word similarity using Lin method assert wns.monol_word_similarity('perro', 'gato', 'spa', 'lin') is not None#0.876800984373 # Computing Chinese word similarity using Wu & Palmer method assert wns.monol_word_similarity('狗', '猫', 'cmn', 'wup') is not None# 0.857142857143 # Computing Spanish and English word similarity using Resnik method assert wns.crossl_word_similarity('perro', 'cat', 'spa', 'eng', 'res') is not None#7.91166650904 # Computing Spanish and Chinese word similarity using Jiang & Conrad method assert wns.crossl_word_similarity('perro', '猫', 'spa', 'cmn', 'jcn') is not None#0.31023804699 # Computing Chinese and English word similarity using WPath method assert wns.crossl_word_similarity('狗', 'cat', 'cmn', 'eng', 'wpath') is not None#0.593666388463
def test_wordsim_evaluation(): from sematch.evaluation import WordSimEvaluation from sematch.semantic.similarity import WordNetSimilarity evaluation = WordSimEvaluation() print evaluation.dataset_names() wns = WordNetSimilarity() # define similarity metrics wpath = lambda x, y: wns.word_similarity_wpath(x, y, 0.8) # evaluate similarity metrics print evaluation.evaluate_metric('wpath', wpath, 'noun_simlex') # performa Steiger's Z significance Test print evaluation.statistical_test('wpath', 'path', 'noun_simlex') wpath_es = lambda x, y: wns.monol_word_similarity(x, y, 'spa', 'path') wpath_en_es = lambda x, y: wns.crossl_word_similarity( x, y, 'eng', 'spa', 'wpath') print evaluation.evaluate_metric('wpath_es', wpath_es, 'rg65_spanish') print evaluation.evaluate_metric('wpath_en_es', wpath_en_es, 'rg65_EN-ES')
from sematch.semantic.similarity import WordNetSimilarity wn_sim = WordNetSimilarity() w1 = 'gil' lang1 = 'pol' w2 = "sowa" lang2 = 'pol' result = [] # for sim_type in ['path','lch','wup','li','res','lin','jcn','wpath','zhou']: for sim_type in ['path', 'wup', 'li', 'res', 'lin', 'jcn', 'wpath', 'zhou']: sim = wn_sim.crossl_word_similarity(w1, w2, lang1, lang2, sim_type) tmp = {'name': sim_type, 'sim': sim} result.append(tmp) print(tmp) avg = (result[0]['sim'] + result[1]['sim'] + result[2]['sim'] + result[3]['sim'] / 10 + result[4]['sim'] + result[5]['sim'] + result[6]['sim']) / 7 print("average from other methods: " + str(avg))
from sematch.semantic.similarity import WordNetSimilarity wns = WordNetSimilarity() # Computing English word similarity using Li method wns.word_similarity('dog', 'cat', 'li') # 0.449327301063 # Computing Spanish word similarity using Lin method wns.monol_word_similarity('perro', 'gato', 'spa', 'lin') #0.876800984373 # Computing Chinese word similarity using Wu & Palmer method wns.monol_word_similarity('狗', '猫', 'cmn', 'wup') # 0.857142857143 # Computing Spanish and English word similarity using Resnik method wns.crossl_word_similarity('perro', 'cat', 'spa', 'eng', 'res') #7.91166650904 # Computing Spanish and Chinese word similarity using Jiang & Conrad method wns.crossl_word_similarity('perro', '猫', 'spa', 'cmn', 'jcn') #0.31023804699 # Computing Chinese and English word similarity using WPath method wns.crossl_word_similarity('狗', 'cat', 'cmn', 'eng', 'wpath') #0.593666388463