Exemple #1
0
def eval_rus():

    simlex = read_files.read_simlex_rus_file()
    wordsim = read_files.read_wordsim_rus_file()

    wn = WikiWordnet()
    """
	#evaluation of wordnet
	matches = 0
	for key,value in wordsim.items():
		synset = wn.get_synsets(key)
		for syn in synset:
			for w in syn.get_words():
				word = w.lemma()
				if value == word:
					matches = matches + 1
	"""

    #evaluation of word vectors
    word_vectors = api.load('word2vec-ruscorpora-300')

    matches1 = 0
    matches2 = 0
    matches3 = 0
    matches4 = 0

    for key, value in simlex.items():
        try:
            sim_words3 = word_vectors.most_similar(key + '_NOUN', topn=3)
            sim_words10 = word_vectors.most_similar(key + '_NOUN', topn=10)
        except:
            try:
                sim_words3 = word_vectors.most_similar(key + '_VERB', topn=3)
                sim_words10 = word_vectors.most_similar(key + '_VERB', topn=10)
            except:
                try:
                    sim_words3 = word_vectors.most_similar(key + '_ADJ',
                                                           topn=3)
                    sim_words10 = word_vectors.most_similar(key + '_ADJ',
                                                            topn=10)
                except:
                    continue

        #print(sim_words10)
        for word in sim_words3:
            if word[0].split('_')[0] == value:
                matches1 = matches1 + 1

        for word in sim_words10:
            if word[0].split('_')[0] == value:
                matches2 = matches2 + 1

    for key, value in wordsim.items():
        try:
            sim_words3 = word_vectors.most_similar(key + '_NOUN', topn=3)
            sim_words10 = word_vectors.most_similar(key + '_NOUN', topn=10)
        except:
            try:
                sim_words3 = word_vectors.most_similar(key + '_VERB', topn=3)
                sim_words10 = word_vectors.most_similar(key + '_VERB', topn=10)
            except:
                try:
                    sim_words3 = word_vectors.most_similar(key + '_ADJ',
                                                           topn=3)
                    sim_words10 = word_vectors.most_similar(key + '_ADJ',
                                                            topn=10)
                except:
                    continue

        for word in sim_words3:
            if word[0].split('_')[0] == value:
                matches3 = matches3 + 1

        for word in sim_words10:
            if word[0].split('_')[0] == value:
                matches4 = matches4 + 1

    return [matches1, matches2, matches3, matches4]
Exemple #2
0
# -*- coding: utf-8 -*-
import pymorphy2
import nltk
import copy
from wiki_ru_wordnet import WikiWordnet

wikiwordnet = WikiWordnet()
morph = pymorphy2.MorphAnalyzer()


################
def synonymscheck(inlist, keylist):
    corrans = 0  #Correct Answers (Верные соответствия)
    for i in inlist:
        synsets = wikiwordnet.get_synsets(i)
        for n in wikiwordnet.get_synsets(keylist[inlist.index(i)]):
            if n in synsets:
                corrans += 1
                print(corrans, '/3')
            else:
                pass


###############
def getlemma(inlist):
    corrans = 0  #Correct Answers (Верные соответствия)
    for i in inlist:
        synsets = wikiwordnet.get_synsets(i)
        for synset in wikiwordnet.get_synsets(i):
            print(synset)
            for word in synset.get_words():