def test_text_difficulty(self): data = """ { "texts": [ {"content": "Der die das warum, wer nicht fragt bleibt bew\u00f6lkt!", "id": 1}, {"content": "Das ist ein Test.", "id": 2}], "difficulty_computer": "default" } """ with zeeguu.app.app_context(): RankedWord.cache_ranked_words() rv = self.api_post('/get_difficulty_for_text/de', data, 'application/json') difficulties = json.loads(rv.data)['difficulties'] first_text_difficulty = difficulties[0] second_text_difficulty = difficulties[1] assert round(first_text_difficulty['score_average'], 2) == 0.67 assert first_text_difficulty['estimated_difficulty'] == 'HARD' assert second_text_difficulty['estimated_difficulty'] == 'EASY'
def context_words_with_rank(self): ranked_context_words = self.split_words_from_context() while self.origin.word in ranked_context_words: ranked_context_words.remove(self.origin.word) filtered_words_known_from_user = [] for word_known in ranked_context_words: if RankedWord.exists(word_known.lower(), self.origin.language): filtered_words_known_from_user.append(word_known) return filtered_words_known_from_user
def find_or_create(cls, word, user, language): ranked_word = RankedWord.find(word.lower(), language) if EncounterBasedProbability.exists(user, ranked_word): enc_prob = EncounterBasedProbability.find(user, ranked_word) enc_prob.not_looked_up_counter += 1 enc_prob.boost_prob() else: enc_prob = EncounterBasedProbability.find( user, ranked_word, EncounterBasedProbability.DEFAULT_PROBABILITY) return enc_prob
def update_probabilities_for_word(word): try: bookmarks_for_this_word = Bookmark.find_all_by_user_and_word( flask.g.user, word) ex_prob = ExerciseBasedProbability.find(flask.g.user, word) total_prob = 0 for b in bookmarks_for_this_word: ex_prob.calculate_known_bookmark_probability(b) total_prob += float(ex_prob.probability) ex_prob.probability = total_prob / len(bookmarks_for_this_word) if RankedWord.exists(word.word, word.language): ranked_word = RankedWord.find(word.word, word.language) if EncounterBasedProbability.exists(flask.g.user, ranked_word): enc_prob = EncounterBasedProbability.find( flask.g.user, ranked_word) known_word_prob = KnownWordProbability.find( flask.g.user, word, ranked_word) print "!known word prob before: " + str( known_word_prob.probability) print "!ex_prob: " + str(ex_prob.probability) print "!enc_prob: " + str(enc_prob.probability) known_word_prob.probability = KnownWordProbability.calculateKnownWordProb( ex_prob.probability, enc_prob.probability) print "!known word prob after: " + str( known_word_prob.probability) else: known_word_prob = KnownWordProbability.find( flask.g.user, word, ranked_word) known_word_prob.probability = ex_prob.probability db.session.commit() except: print "failed to update probabilities for word with id: " + str( word.id) print "!successfully updated probabilities for word with id: " + str( word.id)
def get_not_encountered_words(self): not_encountered_words_dict_list = [] all_ranks = RankedWord.find_all(self.language) known_word_probs = KnownWordProbability.find_all_by_user_with_rank( self.user) for p in known_word_probs: if p.ranked_word in all_ranks: all_ranks.remove(p.ranked_word) for rank in all_ranks: not_encountered_word_dict = {} not_encountered_word_dict['word'] = rank.word not_encountered_words_dict_list.append(not_encountered_word_dict) return not_encountered_words_dict_list
def text_difficulty(text, language, known_probabilities, difficulty_computer='default', rank_boundary=REFERENCE_VOCABULARY_SIZE): """ :param known_probabilities: the probabilities that the user knows individual words :param language: the learned language :param difficulty_computer: if known the name of the algo used to compute the difficulty. currently only default is implemented :param personalized (deprecated) :param rank_boundary: 10.000 words :param text: text to analyse :return: a dictionary with three items for every text: 1. score_average - average difficulty of the words in the text 2. score_median - median difficulty of the words in the text 3. estimated_difficulty - oen of three "EASY", "MEDIUM", "HARD" """ word_difficulties = [] # Calculate difficulty for each word words = split_words_from_text(text) for word in words: ranked_word = RankedWord.find_cache(word, language) difficulty = word_difficulty(known_probabilities, True, rank_boundary, ranked_word, word) word_difficulties.append(difficulty) # If we can't compute the text difficulty, we estimate hard if (len(word_difficulties)) == 0: return \ dict( score_median=1, score_average=1, estimated_difficulty=1) # Average difficulty for text difficulty_average = sum(word_difficulties) / float(len(word_difficulties)) # Median difficulty word_difficulties.sort() center = int(round(len(word_difficulties) / 2, 0)) difficulty_median = word_difficulties[center] difficulty_scores = dict(score_median=difficulty_median, score_average=difficulty_average, estimated_difficulty=discrete_text_difficulty( difficulty_average, difficulty_median)) return difficulty_scores
def text_difficulty(text, language, known_probabilities, difficulty_computer = 'default', rank_boundary = REFERENCE_VOCABULARY_SIZE): """ :param known_probabilities: the probabilities that the user knows individual words :param language: the learned language :param difficulty_computer: if known the name of the algo used to compute the difficulty. currently only default is implemented :param personalized (deprecated) :param rank_boundary: 10.000 words :param text: text to analyse :return: a dictionary with three items for every text: 1. score_average - average difficulty of the words in the text 2. score_median - median difficulty of the words in the text 3. estimated_difficulty - oen of three "EASY", "MEDIUM", "HARD" """ word_difficulties = [] # Calculate difficulty for each word words = split_words_from_text(text) for word in words: ranked_word = RankedWord.find_cache(word, language) difficulty = word_difficulty(known_probabilities, True, rank_boundary, ranked_word, word) word_difficulties.append(difficulty) # If we can't compute the text difficulty, we estimate hard if (len(word_difficulties)) == 0: return \ dict( score_median=1, score_average=1, estimated_difficulty=1) # Average difficulty for text difficulty_average = sum(word_difficulties) / float(len(word_difficulties)) # Median difficulty word_difficulties.sort() center = int(round(len(word_difficulties) / 2, 0)) difficulty_median = word_difficulties[center] difficulty_scores = dict( score_median=difficulty_median, score_average=difficulty_average, estimated_difficulty=discrete_text_difficulty(difficulty_average, difficulty_median)) return difficulty_scores
def known_words_list(self): lang_id = Language.find(self.lang_code) bookmarks = self.user.all_bookmarks() known_words = [] filtered_known_words_from_user = [] filtered_known_words_dict_list = [] for bookmark in bookmarks: if bookmark.check_is_latest_outcome_too_easy(): known_words.append(bookmark.origin.word) for word_known in known_words: if RankedWord.exists(word_known, lang_id): filtered_known_words_from_user.append(word_known) # zeeguu.db.session.commit() filtered_known_words_from_user = list( set(filtered_known_words_from_user)) for word in filtered_known_words_from_user: filtered_known_words_dict_list.append({'word': word}) return filtered_known_words_dict_list
def calculate_probabilities_after_adding_a_bookmark(self, user,language): """ ML: This has to be refactored. It's a mess. The idea is: you've just added a bookmark. There are two things to do: 1. update the probabilities of the context words (they have been encountered, and not translated) 2. update the probabilities of the word itself - :param user: :param language: :return: """ # 1. computations for adding encounter based probability for the context words for word in self.context_words_with_rank(): enc_prob = EncounterBasedProbability.find_or_create(word, user, language) zeeguu.db.session.add(enc_prob) zeeguu.db.session.commit() user_word = None ranked_word = enc_prob.ranked_word if UserWord.exists(word,language): user_word = UserWord.find(word,language) if ExerciseBasedProbability.exists(user,user_word): #checks if exercise based probability exists for words in context ex_prob = ExerciseBasedProbability.find(user,user_word) known_word_prob = KnownWordProbability.find(user,user_word,ranked_word) known_word_prob.probability = known_word_prob.calculateKnownWordProb(ex_prob.probability, enc_prob.probability) #updates known word probability as exercise based probability already existed. else: if KnownWordProbability.exists(user, user_word,ranked_word): known_word_prob = KnownWordProbability.find(user,user_word,ranked_word) known_word_prob.probability = enc_prob.probability # updates known word probability as encounter based probability already existed else: known_word_prob = KnownWordProbability.find(user,user_word,ranked_word, enc_prob.probability) # new known word probability created as it did not exist zeeguu.db.session.add(known_word_prob) # 2. Update the probabilities of the word itself # 2.a) exercise based prob # ML: Should this thing change? # The ex based probability should probably not change after I add a bookmark # Commenting out the following lines: s # ex_prob = ExerciseBasedProbability.find(user, self.origin) # if ex_prob: # ex_prob.update_probability_after_adding_bookmark_with_same_word(self,user) # zeeguu.db.session.add(ex_prob) # 2.b) encounter based prob ranked_word = RankedWord.find(self.origin.word, language) if ranked_word: #checks if ranked_word exists for that looked up word if EncounterBasedProbability.exists(user, ranked_word): # checks if encounter based probability exists for that looked up word enc_prob = EncounterBasedProbability.find(user, ranked_word) enc_prob.word_has_just_beek_bookmarked() db.session.add(enc_prob) db.session.commit() # 2.c) update known word probability if it exists if KnownWordProbability.exists(user, self.origin,ranked_word): known_word_prob = KnownWordProbability.find(user,self.origin,ranked_word) known_word_prob.word_has_just_beek_bookmarked() db.session.add(known_word_prob) db.session.commit()
app.config["SQLALCHEMY_DATABASE_URI"] = db_connection_string else: # Ooops: we are not testing, and we don't have a DB configured! if not "SQLALCHEMY_DATABASE_URI" in app.config: print("No db configured. You probably have no config file...") exit() print("->> DB Connection String: " + app.config["SQLALCHEMY_DATABASE_URI"]) # getting rid of a warning in new version of SQLAlchemy app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False setup_db_connection() env = flask.ext.assets.Environment(app) env.cache = app.instance_path env.directory = os.path.join(app.instance_path, "gen") env.url = "/gen" env.append_path( os.path.join(os.path.dirname(os.path.abspath(__file__)), "static"), "/static") db.init_app(app) db.create_all(app=app) from zeeguu.model.ranked_word import RankedWord with app.app_context(): RankedWord.cache_ranked_words()
def find_rank(cls, word, language): return RankedWord.find(word, language)
def calculate_probabilities_after_adding_a_bookmark(self, user, language): """ ML: This has to be refactored. It's a mess. The idea is: you've just added a bookmark. There are two things to do: 1. update the probabilities of the context words (they have been encountered, and not translated) 2. update the probabilities of the word itself - :param user: :param language: :return: """ # 1. computations for adding encounter based probability for the context words for word in self.context_words_with_rank(): enc_prob = EncounterBasedProbability.find_or_create( word, user, language) zeeguu.db.session.add(enc_prob) zeeguu.db.session.commit() user_word = None ranked_word = enc_prob.ranked_word if UserWord.exists(word, language): user_word = UserWord.find(word, language) if ExerciseBasedProbability.exists( user, user_word ): #checks if exercise based probability exists for words in context ex_prob = ExerciseBasedProbability.find(user, user_word) known_word_prob = KnownWordProbability.find( user, user_word, ranked_word) known_word_prob.probability = known_word_prob.calculateKnownWordProb( ex_prob.probability, enc_prob.probability ) #updates known word probability as exercise based probability already existed. else: if KnownWordProbability.exists(user, user_word, ranked_word): known_word_prob = KnownWordProbability.find( user, user_word, ranked_word) known_word_prob.probability = enc_prob.probability # updates known word probability as encounter based probability already existed else: known_word_prob = KnownWordProbability.find( user, user_word, ranked_word, enc_prob.probability ) # new known word probability created as it did not exist zeeguu.db.session.add(known_word_prob) # 2. Update the probabilities of the word itself # 2.a) exercise based prob # ML: Should this thing change? # The ex based probability should probably not change after I add a bookmark # Commenting out the following lines: s # ex_prob = ExerciseBasedProbability.find(user, self.origin) # if ex_prob: # ex_prob.update_probability_after_adding_bookmark_with_same_word(self,user) # zeeguu.db.session.add(ex_prob) # 2.b) encounter based prob ranked_word = RankedWord.find(self.origin.word, language) if ranked_word: #checks if ranked_word exists for that looked up word if EncounterBasedProbability.exists( user, ranked_word ): # checks if encounter based probability exists for that looked up word enc_prob = EncounterBasedProbability.find(user, ranked_word) enc_prob.word_has_just_beek_bookmarked() db.session.add(enc_prob) db.session.commit() # 2.c) update known word probability if it exists if KnownWordProbability.exists(user, self.origin, ranked_word): known_word_prob = KnownWordProbability.find( user, self.origin, ranked_word) known_word_prob.word_has_just_beek_bookmarked() db.session.add(known_word_prob) db.session.commit()