def eval_rus(): simlex = read_files.read_simlex_rus_file() wordsim = read_files.read_wordsim_rus_file() wn = WikiWordnet() """ #evaluation of wordnet matches = 0 for key,value in wordsim.items(): synset = wn.get_synsets(key) for syn in synset: for w in syn.get_words(): word = w.lemma() if value == word: matches = matches + 1 """ #evaluation of word vectors word_vectors = api.load('word2vec-ruscorpora-300') matches1 = 0 matches2 = 0 matches3 = 0 matches4 = 0 for key, value in simlex.items(): try: sim_words3 = word_vectors.most_similar(key + '_NOUN', topn=3) sim_words10 = word_vectors.most_similar(key + '_NOUN', topn=10) except: try: sim_words3 = word_vectors.most_similar(key + '_VERB', topn=3) sim_words10 = word_vectors.most_similar(key + '_VERB', topn=10) except: try: sim_words3 = word_vectors.most_similar(key + '_ADJ', topn=3) sim_words10 = word_vectors.most_similar(key + '_ADJ', topn=10) except: continue #print(sim_words10) for word in sim_words3: if word[0].split('_')[0] == value: matches1 = matches1 + 1 for word in sim_words10: if word[0].split('_')[0] == value: matches2 = matches2 + 1 for key, value in wordsim.items(): try: sim_words3 = word_vectors.most_similar(key + '_NOUN', topn=3) sim_words10 = word_vectors.most_similar(key + '_NOUN', topn=10) except: try: sim_words3 = word_vectors.most_similar(key + '_VERB', topn=3) sim_words10 = word_vectors.most_similar(key + '_VERB', topn=10) except: try: sim_words3 = word_vectors.most_similar(key + '_ADJ', topn=3) sim_words10 = word_vectors.most_similar(key + '_ADJ', topn=10) except: continue for word in sim_words3: if word[0].split('_')[0] == value: matches3 = matches3 + 1 for word in sim_words10: if word[0].split('_')[0] == value: matches4 = matches4 + 1 return [matches1, matches2, matches3, matches4]
# -*- coding: utf-8 -*- import pymorphy2 import nltk import copy from wiki_ru_wordnet import WikiWordnet wikiwordnet = WikiWordnet() morph = pymorphy2.MorphAnalyzer() ################ def synonymscheck(inlist, keylist): corrans = 0 #Correct Answers (Верные соответствия) for i in inlist: synsets = wikiwordnet.get_synsets(i) for n in wikiwordnet.get_synsets(keylist[inlist.index(i)]): if n in synsets: corrans += 1 print(corrans, '/3') else: pass ############### def getlemma(inlist): corrans = 0 #Correct Answers (Верные соответствия) for i in inlist: synsets = wikiwordnet.get_synsets(i) for synset in wikiwordnet.get_synsets(i): print(synset) for word in synset.get_words():