def _should_replace_with_letter(word, w2v_utilities, i, letter): if i != len(word) and word[i - 1] != letter and word[i+1] != letter: new_word = _place_letter_at_index(word, letter, i) new_word = _remove_puncutation(new_word) heb_string = HebrewString(new_word) retriever = w2v_utilities.build_retriever(heb_string.eng_ltrs()) return retriever.get(1) is not None
def get_word2vec_similar_synsets(self, heb_word, number_of_required_synsets): heb_word = HebrewString(heb_word) retriever = self.word2vec.build_retriever(heb_word.eng_ltrs()) word2vec_suggestions = retriever.get(number_of_required_synsets) if word2vec_suggestions is None: return None similar_synsets = dict() def need_more_synsets(): return len(similar_synsets) < number_of_required_synsets while need_more_synsets(): if len(word2vec_suggestions) == 0: word2vec_suggestions = retriever.get_more() suggestion, similarity = word2vec_suggestions.pop(0) suggestion = HebrewString(suggestion) if heb_word.eng_ltrs() in suggestion.eng_ltrs(): print("Passed over {0}".format(suggestion.heb_ltrs())) continue suggestion_synsets = self._get_suggestion_synsets(suggestion) if len(suggestion_synsets) == 0: continue number_of_suggestion_synsets = min(len(suggestion_synsets), (number_of_required_synsets - len(similar_synsets))) while need_more_synsets() and len(suggestion_synsets) > 0: synset = suggestion_synsets.pop(0) if synset not in similar_synsets: similar_synsets[synset] = (similarity / number_of_suggestion_synsets) return similar_synsets