def get_lower_bound_percentage_of_extended_vocabulary(self):
     high_known_word_prob_of_user = KnownWordProbability.get_probably_known_words(
         self.user)
     count_high_known_word_prob_of_user_ranked = 0
     for prob in high_known_word_prob_of_user:
         if prob.ranked_word is not None and prob.ranked_word.rank <= 10000:
             count_high_known_word_prob_of_user_ranked += 1
     return round(
         float(count_high_known_word_prob_of_user_ranked) / 10000 * 100, 2)
Example #2
0
def get_difficulty_for_text(lang_code):
    """
    URL parameters:
    :param lang_code: the language of the text

    Json data:
    :param texts: json array that contains the texts to calculate the difficulty for. Each text consists of an array
        with the text itself as 'content' and an additional 'id' which gets roundtripped unchanged
    :param difficulty_computer (optional): calculate difficulty score using a specific algorithm
    :param rank_boundary (deprecated): upper boundary for word frequency rank (between 1 and 10'000)
    :param personalized (deprecated): by default we always compute the personalized difficulty

    For an example of how the Json data looks like, see
        ../tests/api_tests.py#test_txt_difficulty(self):

    :return difficulties: json array, which contains for each text:
      * estimated_difficulty - one of three: "EASY", "MEDIUM", "HARD"
      * id - identifies the text
      * [deprecated] score_average - average difficulty of the words in the text
      * [deprecated] score_median - median difficulty of the words in the text
    """
    language = Language.find(lang_code)
    if not language:
        return 'FAIL'

    data = request.get_json()

    if not 'texts' in data:
        return 'FAIL'

    texts = []
    for text in data['texts']:
        texts.append(text)

    difficulty_computer = 'default'
    if 'difficulty_computer' in data:
        difficulty_computer = data['difficulty_computer'].lower()

    user = flask.g.user
    known_probabilities = KnownWordProbability.find_all_by_user_cached(user)

    difficulties = []
    for text in texts:
        difficulty = text_difficulty(
                text["content"],
                language,
                known_probabilities,
                difficulty_computer
                )
        difficulty["id"] = text["id"]
        difficulties.append(difficulty)

    return json_result(dict(difficulties=difficulties))
def update_probabilities_for_word(word):

    try:
        bookmarks_for_this_word = Bookmark.find_all_by_user_and_word(
            flask.g.user, word)

        ex_prob = ExerciseBasedProbability.find(flask.g.user, word)
        total_prob = 0
        for b in bookmarks_for_this_word:
            ex_prob.calculate_known_bookmark_probability(b)
            total_prob += float(ex_prob.probability)
        ex_prob.probability = total_prob / len(bookmarks_for_this_word)

        if RankedWord.exists(word.word, word.language):
            ranked_word = RankedWord.find(word.word, word.language)
            if EncounterBasedProbability.exists(flask.g.user, ranked_word):
                enc_prob = EncounterBasedProbability.find(
                    flask.g.user, ranked_word)
                known_word_prob = KnownWordProbability.find(
                    flask.g.user, word, ranked_word)
                print "!known word prob before: " + str(
                    known_word_prob.probability)
                print "!ex_prob: " + str(ex_prob.probability)
                print "!enc_prob: " + str(enc_prob.probability)
                known_word_prob.probability = KnownWordProbability.calculateKnownWordProb(
                    ex_prob.probability, enc_prob.probability)
                print "!known word prob after: " + str(
                    known_word_prob.probability)
            else:
                known_word_prob = KnownWordProbability.find(
                    flask.g.user, word, ranked_word)
                known_word_prob.probability = ex_prob.probability

        db.session.commit()
    except:
        print "failed to update probabilities for word with id: " + str(
            word.id)

    print "!successfully updated probabilities for word with id: " + str(
        word.id)
 def get_not_encountered_words(self):
     not_encountered_words_dict_list = []
     all_ranks = RankedWord.find_all(self.language)
     known_word_probs = KnownWordProbability.find_all_by_user_with_rank(
         self.user)
     for p in known_word_probs:
         if p.ranked_word in all_ranks:
             all_ranks.remove(p.ranked_word)
     for rank in all_ranks:
         not_encountered_word_dict = {}
         not_encountered_word_dict['word'] = rank.word
         not_encountered_words_dict_list.append(not_encountered_word_dict)
     return not_encountered_words_dict_list
def set_know_word_prob():
    zeeguu.app.test_request_context().push()
    zeeguu.db.session.commit()
    enc_probs = EncounterBasedProbability.find_all()
    ex_probs = ExerciseBasedProbability.find_all()
    for prob in enc_probs:
        user = prob.user
        word = prob.ranked_word.word
        language = prob.ranked_word.language
        user_word = None
        if UserWord.exists(word, language):
            user_word = UserWord.find(word, language)
        if ExerciseBasedProbability.exists(user, user_word):
            ex_prob = ExerciseBasedProbability.find(user, user_word)
            known_word_prob = KnownWordProbability.calculateKnownWordProb(
                ex_prob.probability, prob.probability)
            known_word_probability_obj = KnownWordProbability.find(
                user, user_word, prob.ranked_word, known_word_prob)
        else:
            known_word_probability_obj = KnownWordProbability.find(
                user, None, prob.ranked_word, prob.probability)
        zeeguu.db.session.add(known_word_probability_obj)
        zeeguu.db.session.commit()
    for prob in ex_probs:
        user = prob.user
        language = prob.user_word.language
        word = prob.user_word.word
        ranked_word = None
        if RankedWord.exists(word, language):
            ranked_word = RankedWord.find(word, language)
        if not EncounterBasedProbability.exists(user, ranked_word):
            if UserWord.exists(word, language):
                user_word = UserWord.find(word, language)
                known_word_probability_obj = KnownWordProbability(
                    user, user_word, ranked_word, prob.probability)
                zeeguu.db.session.add(known_word_probability_obj)
                zeeguu.db.session.commit()
    print('job3')
 def get_percentage_of_probably_known_bookmarked_words(self):
     high_known_word_prob_of_user = KnownWordProbability.get_probably_known_words(
         self.user)
     count_high_known_word_prob_of_user = 0
     count_bookmarks_of_user = len(self.user.all_bookmarks())
     for prob in high_known_word_prob_of_user:
         if prob.user_word is not None:
             count_high_known_word_prob_of_user += 1
     if count_bookmarks_of_user <> 0:
         return round(
             float(count_high_known_word_prob_of_user) /
             count_bookmarks_of_user * 100, 2)
     else:
         return 0
Example #7
0
def get_difficulty_for_text(lang_code):
    """
    URL parameters:
    :param lang_code: the language of the text

    Json data:
    :param texts: json array that contains the texts to calculate the difficulty for. Each text consists of an array
        with the text itself as 'content' and an additional 'id' which gets roundtripped unchanged
    :param difficulty_computer (optional): calculate difficulty score using a specific algorithm
    :param rank_boundary (deprecated): upper boundary for word frequency rank (between 1 and 10'000)
    :param personalized (deprecated): by default we always compute the personalized difficulty

    For an example of how the Json data looks like, see
        ../tests/api_tests.py#test_txt_difficulty(self):

    :return difficulties: json array, which contains for each text:
      * estimated_difficulty - one of three: "EASY", "MEDIUM", "HARD"
      * id - identifies the text
      * [deprecated] score_average - average difficulty of the words in the text
      * [deprecated] score_median - median difficulty of the words in the text
    """
    language = Language.find(lang_code)
    if not language:
        return 'FAIL'

    data = request.get_json()

    if not 'texts' in data:
        return 'FAIL'

    texts = []
    for text in data['texts']:
        texts.append(text)

    difficulty_computer = 'default'
    if 'difficulty_computer' in data:
        difficulty_computer = data['difficulty_computer'].lower()

    user = flask.g.user
    known_probabilities = KnownWordProbability.find_all_by_user_cached(user)

    difficulties = []
    for text in texts:
        difficulty = text_difficulty(text["content"], language,
                                     known_probabilities, difficulty_computer)
        difficulty["id"] = text["id"]
        difficulties.append(difficulty)

    return json_result(dict(difficulties=difficulties))
    def get_probably_known_words(self):
        # TODO: Why the hell does this function return a dict with one key named word???
        probabilities = KnownWordProbability.get_probably_known_words(
            self.user)

        probable_known_words_dict_list = []
        for prob in probabilities:
            probable_known_word_dict = {}
            if prob.ranked_word is not None and prob.ranked_word.language == self.language:
                probable_known_word_dict['word'] = prob.ranked_word.word
                probable_known_words_dict_list.append(probable_known_word_dict)
            elif prob.user_word is not None and prob.user_word.language == self.language:
                probable_known_word_dict['word'] = prob.user_word.word
                probable_known_words_dict_list.append(probable_known_word_dict)

        return probable_known_words_dict_list
Example #9
0
    def calculate_probabilities_after_adding_a_bookmark(self, user,language):
        """
        ML: This has to be refactored.
        It's a mess.

         The idea is: you've just added a bookmark.
         There are two things to do:

          1. update the probabilities of the context words (they have been
          encountered, and not translated)

          2. update the probabilities of the word itself

         -


        :param user:
        :param language:
        :return:
        """

        # 1. computations for adding encounter based probability for the context words
        for word in self.context_words_with_rank():
            enc_prob = EncounterBasedProbability.find_or_create(word, user, language)
            zeeguu.db.session.add(enc_prob)
            zeeguu.db.session.commit()
            user_word = None
            ranked_word = enc_prob.ranked_word
            if UserWord.exists(word,language):
                user_word = UserWord.find(word,language)
                if ExerciseBasedProbability.exists(user,user_word): #checks if exercise based probability exists for words in context
                    ex_prob = ExerciseBasedProbability.find(user,user_word)
                    known_word_prob = KnownWordProbability.find(user,user_word,ranked_word)
                    known_word_prob.probability = known_word_prob.calculateKnownWordProb(ex_prob.probability, enc_prob.probability) #updates known word probability as exercise based probability already existed.
            else:
                if KnownWordProbability.exists(user, user_word,ranked_word):
                    known_word_prob = KnownWordProbability.find(user,user_word,ranked_word)
                    known_word_prob.probability = enc_prob.probability # updates known word probability as encounter based probability already existed
                else:
                    known_word_prob = KnownWordProbability.find(user,user_word,ranked_word, enc_prob.probability) # new known word probability created as it did not exist
                    zeeguu.db.session.add(known_word_prob)

        # 2. Update the probabilities of the word itself

        # 2.a) exercise based prob
        # ML: Should this thing change?
        # The ex based probability should probably not change after I add a bookmark
        # Commenting out the following lines: s
        # ex_prob = ExerciseBasedProbability.find(user, self.origin)
        # if ex_prob:
        #     ex_prob.update_probability_after_adding_bookmark_with_same_word(self,user)
        #     zeeguu.db.session.add(ex_prob)

        # 2.b) encounter based prob
        ranked_word = RankedWord.find(self.origin.word, language)
        if ranked_word: #checks if ranked_word exists for that looked up word
            if EncounterBasedProbability.exists(user, ranked_word): # checks if encounter based probability exists for that looked up word
                enc_prob = EncounterBasedProbability.find(user, ranked_word)
                enc_prob.word_has_just_beek_bookmarked()
                db.session.add(enc_prob)
                db.session.commit()

            # 2.c) update known word probability if it exists
            if KnownWordProbability.exists(user, self.origin,ranked_word):
                known_word_prob = KnownWordProbability.find(user,self.origin,ranked_word)
                known_word_prob.word_has_just_beek_bookmarked()
                db.session.add(known_word_prob)
                db.session.commit()
def get_content_from_url():
    """
    Json data:
    :param urls: json array that contains the urls to get the article content for. Each url consists of an array
        with the url itself as 'url' and an additional 'id' which gets roundtripped unchanged.
        For an example of how the Json data looks like, see
            ../tests/api_tests.py#test_content_from_url(self):

    :param timeout (optional): maximal time in seconds to wait for the results

    :param lang_code (optional): If the user sends along the language, then we compute the difficulty of the texts

    :return contents: json array, contains the contents of the urls that responded within the timeout as arrays
        with the key 'content' for the article content, the url of the main image as 'image' and the 'id' parameter
        to identify the corresponding url

    """
    data = request.get_json()
    queue = Queue.Queue()

    urls = []
    if 'urls' in data:
        for url in data['urls']:
            urls.append(url)
    else:
        return 'FAIL'

    if 'timeout' in data:
        timeout = int(data['timeout'])
    else:
        timeout = 10

    # Start worker threads to get url contents
    threads = []
    for url in urls:
        thread = threading.Thread(target=PageExtractor.worker, args=(url['url'], url['id'], queue))
        thread.daemon = True
        threads.append(thread)
        thread.start()

    # Wait for workers to finish until timeout
    stop = time.time() + timeout
    while any(t.isAlive() for t in threads) and time.time() < stop:
        time.sleep(0.1)

    contents = []
    for i in xrange(len(urls)):
        try:
            contents.append(queue.get_nowait())
        except Queue.Empty:
            pass

    # If the user sends along the language, then we can compute the difficulty
    if 'lang_code' in data:
        lang_code = data['lang_code']
        language = Language.find(lang_code)
        if language is not None:
            print "got language"
            user = flask.g.user
            known_probabilities = KnownWordProbability.find_all_by_user_cached(user)
            for each_content_dict in contents:
                    difficulty = text_difficulty(
                            each_content_dict["content"],
                            language,
                            known_probabilities
                            )
                    each_content_dict["difficulty"] = difficulty

    return json_result(dict(contents=contents))
Example #11
0
    def calculate_probabilities_after_adding_a_bookmark(self, user, language):
        """
        ML: This has to be refactored.
        It's a mess.

         The idea is: you've just added a bookmark.
         There are two things to do:

          1. update the probabilities of the context words (they have been
          encountered, and not translated)

          2. update the probabilities of the word itself

         -


        :param user:
        :param language:
        :return:
        """

        # 1. computations for adding encounter based probability for the context words
        for word in self.context_words_with_rank():
            enc_prob = EncounterBasedProbability.find_or_create(
                word, user, language)
            zeeguu.db.session.add(enc_prob)
            zeeguu.db.session.commit()
            user_word = None
            ranked_word = enc_prob.ranked_word
            if UserWord.exists(word, language):
                user_word = UserWord.find(word, language)
                if ExerciseBasedProbability.exists(
                        user, user_word
                ):  #checks if exercise based probability exists for words in context
                    ex_prob = ExerciseBasedProbability.find(user, user_word)
                    known_word_prob = KnownWordProbability.find(
                        user, user_word, ranked_word)
                    known_word_prob.probability = known_word_prob.calculateKnownWordProb(
                        ex_prob.probability, enc_prob.probability
                    )  #updates known word probability as exercise based probability already existed.
            else:
                if KnownWordProbability.exists(user, user_word, ranked_word):
                    known_word_prob = KnownWordProbability.find(
                        user, user_word, ranked_word)
                    known_word_prob.probability = enc_prob.probability  # updates known word probability as encounter based probability already existed
                else:
                    known_word_prob = KnownWordProbability.find(
                        user, user_word, ranked_word, enc_prob.probability
                    )  # new known word probability created as it did not exist
                    zeeguu.db.session.add(known_word_prob)

        # 2. Update the probabilities of the word itself

        # 2.a) exercise based prob
        # ML: Should this thing change?
        # The ex based probability should probably not change after I add a bookmark
        # Commenting out the following lines: s
        # ex_prob = ExerciseBasedProbability.find(user, self.origin)
        # if ex_prob:
        #     ex_prob.update_probability_after_adding_bookmark_with_same_word(self,user)
        #     zeeguu.db.session.add(ex_prob)

        # 2.b) encounter based prob
        ranked_word = RankedWord.find(self.origin.word, language)
        if ranked_word:  #checks if ranked_word exists for that looked up word
            if EncounterBasedProbability.exists(
                    user, ranked_word
            ):  # checks if encounter based probability exists for that looked up word
                enc_prob = EncounterBasedProbability.find(user, ranked_word)
                enc_prob.word_has_just_beek_bookmarked()
                db.session.add(enc_prob)
                db.session.commit()

            # 2.c) update known word probability if it exists
            if KnownWordProbability.exists(user, self.origin, ranked_word):
                known_word_prob = KnownWordProbability.find(
                    user, self.origin, ranked_word)
                known_word_prob.word_has_just_beek_bookmarked()
                db.session.add(known_word_prob)
                db.session.commit()