Example #1
0
    def get_gold_synsets(self, heb_word):
        heb_word = HebrewString(heb_word)

        synsets = self.wordnet.synsets(heb_word.heb_ltrs(), lang='heb')
        synsets_number = len(synsets)
        # Case heb_word does not appear in Hebrew Wordnet
        if synsets_number == 0:
            ts = Translator()
            eng_word = ts.translate(heb_word)
            synsets = wordnet.synsets(eng_word)  # @UndefinedVariable
            synsets_number = len(synsets)
            if synsets_number == 0:
                print("No real Synset has been found")
                return None

        prob = 1 / float(synsets_number)
        synset_dict = dict()
        for synset in synsets:
            synset_dict[synset] = prob
        return synset_dict
Example #2
0
 def __init__(self, word2vec_utilities):
     self.translator = Translator()
     self.word2vec = word2vec_utilities
     self.wordnet = wordnet
Example #3
0
class WordnetUtilities:
    def __init__(self, word2vec_utilities):
        self.translator = Translator()
        self.word2vec = word2vec_utilities
        self.wordnet = wordnet

    def get_word2vec_similar_synsets(self, heb_word,
                                     number_of_required_synsets):
        heb_word = HebrewString(heb_word)
        retriever = self.word2vec.build_retriever(heb_word.eng_ltrs())

        word2vec_suggestions = retriever.get(number_of_required_synsets)
        if word2vec_suggestions is None:
            return None

        similar_synsets = dict()

        def need_more_synsets():
            return len(similar_synsets) < number_of_required_synsets

        while need_more_synsets():
            if len(word2vec_suggestions) == 0:
                word2vec_suggestions = retriever.get_more()

            suggestion, similarity = word2vec_suggestions.pop(0)
            suggestion = HebrewString(suggestion)

            if heb_word.eng_ltrs() in suggestion.eng_ltrs():
                print("Passed over {0}".format(suggestion.heb_ltrs()))
                continue

            suggestion_synsets = self._get_suggestion_synsets(suggestion)
            if len(suggestion_synsets) == 0:
                continue

            number_of_suggestion_synsets = min(len(suggestion_synsets),
                                               (number_of_required_synsets -
                                                len(similar_synsets)))
            while need_more_synsets() and len(suggestion_synsets) > 0:
                synset = suggestion_synsets.pop(0)
                if synset not in similar_synsets:
                    similar_synsets[synset] = (similarity /
                                               number_of_suggestion_synsets)

        return similar_synsets

    def _get_suggestion_synsets(self, suggestion):
        suggestion_synsets = self.wordnet.synsets(suggestion.heb_ltrs(),
                                                  lang='heb')
        if len(suggestion_synsets) != 0:
            print("Found {0} in Hebrew WN".format(suggestion.heb_ltrs()))
        else:
            print("Couldn't find {0} in Hebrew WN".
                  format(suggestion.heb_ltrs()))
            translated_text = \
                self.translator.translate(suggestion.heb_ltrs())
            if translated_text is None:
                print ("Couldn't translate {0}".format(suggestion.heb_ltrs()))
                return []
            print("Translated {0}".format(translated_text))
            suggestion_synsets = self.wordnet.synsets(translated_text)
            if len(suggestion_synsets) != 0:
                print("Found {0}({1}) in English WN".
                      format(suggestion.heb_ltrs(), translated_text))
            else:
                print("Couldn't find {0}({1}) in English WN".
                      format(suggestion.heb_ltrs(), translated_text))
        return suggestion_synsets

    def get_gold_synsets(self, heb_word):
        heb_word = HebrewString(heb_word)

        synsets = self.wordnet.synsets(heb_word.heb_ltrs(), lang='heb')
        synsets_number = len(synsets)
        # Case heb_word does not appear in Hebrew Wordnet
        if synsets_number == 0:
            ts = Translator()
            eng_word = ts.translate(heb_word)
            synsets = wordnet.synsets(eng_word)  # @UndefinedVariable
            synsets_number = len(synsets)
            if synsets_number == 0:
                print("No real Synset has been found")
                return None

        prob = 1 / float(synsets_number)
        synset_dict = dict()
        for synset in synsets:
            synset_dict[synset] = prob
        return synset_dict