コード例 #1
0
def test_word_similarity():
    from sematch.semantic.similarity import WordNetSimilarity
    wns = WordNetSimilarity()
    dog = wns.word2synset('dog')
    cat = wns.word2synset('cat')
    # Measuring semantic similarity between concepts using Path method
    assert wns.similarity(dog[0], cat[0], 'path') is not None  # 0.2
    # Computing English word similarity using Li method
    assert wns.word_similarity('dog', 'cat',
                               'li') is not None  # 0.449327301063
    # Computing Spanish word similarity using Lin method
    assert wns.monol_word_similarity('perro', 'gato', 'spa',
                                     'lin') is not None  #0.876800984373
    # Computing Chinese word similarity using  Wu & Palmer method
    assert wns.monol_word_similarity('狗', '猫', 'cmn',
                                     'wup') is not None  # 0.857142857143
    # Computing Spanish and English word similarity using Resnik method
    assert wns.crossl_word_similarity('perro', 'cat', 'spa', 'eng',
                                      'res') is not None  #7.91166650904
    # Computing Spanish and Chinese word similarity using Jiang & Conrad method
    assert wns.crossl_word_similarity('perro', '猫', 'spa', 'cmn',
                                      'jcn') is not None  #0.31023804699
    # Computing Chinese and English word similarity using WPath method
    assert wns.crossl_word_similarity('狗', 'cat', 'cmn', 'eng',
                                      'wpath') is not None  #0.593666388463
コード例 #2
0
def test_wordnet_similarity():
    from sematch.semantic.similarity import WordNetSimilarity
    wns = WordNetSimilarity()
    dog = wns.word2synset('dog')
    cat = wns.word2synset('cat')
    # Measuring semantic similarity between concepts using Path method
    assert wns.similarity(dog[0], cat[0], 'path') is not None # 0.2
    # Computing English word similarity using Li method
    assert wns.word_similarity('dog', 'cat', 'li') is not None# 0.449327301063
    # Computing Spanish word similarity using Lin method
    assert wns.monol_word_similarity('perro', 'gato', 'spa', 'lin') is not None#0.876800984373
    # Computing Chinese word similarity using  Wu & Palmer method
    assert wns.monol_word_similarity('狗', '猫', 'cmn', 'wup') is not None# 0.857142857143
    # Computing Spanish and English word similarity using Resnik method
    assert wns.crossl_word_similarity('perro', 'cat', 'spa', 'eng', 'res') is not None#7.91166650904
    # Computing Spanish and Chinese word similarity using Jiang & Conrad method
    assert wns.crossl_word_similarity('perro', '猫', 'spa', 'cmn', 'jcn') is not None#0.31023804699
    # Computing Chinese and English word similarity using WPath method
    assert wns.crossl_word_similarity('狗', 'cat', 'cmn', 'eng', 'wpath') is not None#0.593666388463
コード例 #3
0
def test_wordsim_evaluation():
    from sematch.evaluation import WordSimEvaluation
    from sematch.semantic.similarity import WordNetSimilarity
    evaluation = WordSimEvaluation()
    print evaluation.dataset_names()
    wns = WordNetSimilarity()
    # define similarity metrics
    wpath = lambda x, y: wns.word_similarity_wpath(x, y, 0.8)
    # evaluate similarity metrics
    print evaluation.evaluate_metric('wpath', wpath, 'noun_simlex')
    # performa Steiger's Z significance Test
    print evaluation.statistical_test('wpath', 'path', 'noun_simlex')
    wpath_es = lambda x, y: wns.monol_word_similarity(x, y, 'spa', 'path')
    wpath_en_es = lambda x, y: wns.crossl_word_similarity(
        x, y, 'eng', 'spa', 'wpath')
    print evaluation.evaluate_metric('wpath_es', wpath_es, 'rg65_spanish')
    print evaluation.evaluate_metric('wpath_en_es', wpath_en_es, 'rg65_EN-ES')
コード例 #4
0
from sematch.semantic.similarity import WordNetSimilarity

wn_sim = WordNetSimilarity()

w1 = 'gil'
lang1 = 'pol'
w2 = "sowa"
lang2 = 'pol'
result = []
# for sim_type in ['path','lch','wup','li','res','lin','jcn','wpath','zhou']:

for sim_type in ['path', 'wup', 'li', 'res', 'lin', 'jcn', 'wpath', 'zhou']:
    sim = wn_sim.crossl_word_similarity(w1, w2, lang1, lang2, sim_type)
    tmp = {'name': sim_type, 'sim': sim}
    result.append(tmp)
    print(tmp)

avg = (result[0]['sim'] + result[1]['sim'] + result[2]['sim'] +
       result[3]['sim'] / 10 + result[4]['sim'] + result[5]['sim'] +
       result[6]['sim']) / 7
print("average from other methods: " + str(avg))
コード例 #5
0
from sematch.semantic.similarity import WordNetSimilarity
wns = WordNetSimilarity()

# Computing English word similarity using Li method
wns.word_similarity('dog', 'cat', 'li')  # 0.449327301063
# Computing Spanish word similarity using Lin method
wns.monol_word_similarity('perro', 'gato', 'spa', 'lin')  #0.876800984373
# Computing Chinese word similarity using Wu & Palmer method
wns.monol_word_similarity('狗', '猫', 'cmn', 'wup')  # 0.857142857143
# Computing Spanish and English word similarity using Resnik method
wns.crossl_word_similarity('perro', 'cat', 'spa', 'eng', 'res')  #7.91166650904
# Computing Spanish and Chinese word similarity using Jiang & Conrad method
wns.crossl_word_similarity('perro', '猫', 'spa', 'cmn', 'jcn')  #0.31023804699
# Computing Chinese and English word similarity using WPath method
wns.crossl_word_similarity('狗', 'cat', 'cmn', 'eng', 'wpath')  #0.593666388463