def _should_replace_with_letter(word, w2v_utilities, i, letter):
    if i != len(word) and word[i - 1] != letter and word[i+1] != letter:
        new_word = _place_letter_at_index(word, letter, i)
        new_word = _remove_puncutation(new_word)
        heb_string = HebrewString(new_word)
        retriever = w2v_utilities.build_retriever(heb_string.eng_ltrs())

        return retriever.get(1) is not None
Exemple #2
0
    def get_gold_synsets(self, heb_word):
        heb_word = HebrewString(heb_word)

        synsets = self.wordnet.synsets(heb_word.heb_ltrs(), lang='heb')
        synsets_number = len(synsets)
        # Case heb_word does not appear in Hebrew Wordnet
        if synsets_number == 0:
            ts = Translator()
            eng_word = ts.translate(heb_word)
            synsets = wordnet.synsets(eng_word)  # @UndefinedVariable
            synsets_number = len(synsets)
            if synsets_number == 0:
                print("No real Synset has been found")
                return None

        prob = 1 / float(synsets_number)
        synset_dict = dict()
        for synset in synsets:
            synset_dict[synset] = prob
        return synset_dict
Exemple #3
0
    def get_word2vec_similar_synsets(self, heb_word,
                                     number_of_required_synsets):
        heb_word = HebrewString(heb_word)
        retriever = self.word2vec.build_retriever(heb_word.eng_ltrs())

        word2vec_suggestions = retriever.get(number_of_required_synsets)
        if word2vec_suggestions is None:
            return None

        similar_synsets = dict()

        def need_more_synsets():
            return len(similar_synsets) < number_of_required_synsets

        while need_more_synsets():
            if len(word2vec_suggestions) == 0:
                word2vec_suggestions = retriever.get_more()

            suggestion, similarity = word2vec_suggestions.pop(0)
            suggestion = HebrewString(suggestion)

            if heb_word.eng_ltrs() in suggestion.eng_ltrs():
                print("Passed over {0}".format(suggestion.heb_ltrs()))
                continue

            suggestion_synsets = self._get_suggestion_synsets(suggestion)
            if len(suggestion_synsets) == 0:
                continue

            number_of_suggestion_synsets = min(len(suggestion_synsets),
                                               (number_of_required_synsets -
                                                len(similar_synsets)))
            while need_more_synsets() and len(suggestion_synsets) > 0:
                synset = suggestion_synsets.pop(0)
                if synset not in similar_synsets:
                    similar_synsets[synset] = (similarity /
                                               number_of_suggestion_synsets)

        return similar_synsets