Ejemplo n.º 1
0
def add_bookmark(user, original_language, original_word, translation_language,
                 translation_word, date, the_context, the_url, the_url_title):

    url = Url.find(the_url, the_url_title)
    text = Text(the_context, translation_language, url)

    if RankedWord.exists(original_word.lower(), original_language):
        rank1 = UserWord.find_rank(original_word.lower(), original_language)
        w1 = UserWord(original_word, original_language, rank1)
    else:
        w1 = UserWord(original_word, original_language, None)
    if RankedWord.exists(translation_word.lower(), translation_language):
        rank2 = UserWord.find_rank(translation_word.lower(),
                                   translation_language)
        w2 = UserWord(translation_word, translation_language, rank2)
    else:
        w2 = UserWord(translation_word, translation_language, None)

    zeeguu.db.session.add(url)
    zeeguu.db.session.add(text)
    zeeguu.db.session.add(w1)
    zeeguu.db.session.add(w2)
    t1 = Bookmark(w1, w2, user, text, date)
    zeeguu.db.session.add(t1)

    zeeguu.db.session.commit()
    add_probability_to_existing_words_of_user(user, t1, original_language)
Ejemplo n.º 2
0
def set_know_word_prob():
    zeeguu.app.test_request_context().push()
    zeeguu.db.session.commit()
    enc_probs = EncounterBasedProbability.find_all()
    ex_probs = ExerciseBasedProbability.find_all()
    for prob in enc_probs:
        user = prob.user
        word = prob.ranked_word.word
        language = prob.ranked_word.language
        user_word = None
        if UserWord.exists(word, language):
            user_word = UserWord.find(word, language)
        if ExerciseBasedProbability.exists(user, user_word):
            ex_prob = ExerciseBasedProbability.find(user, user_word)
            known_word_prob = KnownWordProbability.calculateKnownWordProb(ex_prob.probability, prob.probability)
            known_word_probability_obj = KnownWordProbability.find(user, user_word, prob.ranked_word, known_word_prob)
        else:
            known_word_probability_obj = KnownWordProbability.find(user, None, prob.ranked_word, prob.probability)
        zeeguu.db.session.add(known_word_probability_obj)
        zeeguu.db.session.commit()
    for prob in ex_probs:
        user = prob.user
        language = prob.user_word.language
        word = prob.user_word.word
        ranked_word = None
        if RankedWord.exists(word, language):
            ranked_word = RankedWord.find(word, language)
        if not EncounterBasedProbability.exists(user, ranked_word):
            if UserWord.exists(word, language):
                user_word = UserWord.find(word, language)
                known_word_probability_obj = KnownWordProbability(user, user_word, ranked_word, prob.probability)
                zeeguu.db.session.add(known_word_probability_obj)
                zeeguu.db.session.commit()
    print "job3"
Ejemplo n.º 3
0
def add_bookmark(user, original_language, original_word, translation_language, translation_word,  date, the_context, the_url, the_url_title):

    url = Url.find (the_url, the_url_title)
    text = Text(the_context, translation_language, url)



    if RankedWord.exists(original_word.lower(), original_language):
        rank1 = UserWord.find_rank(original_word.lower(), original_language)
        w1 = UserWord(original_word, original_language,rank1)
    else:
        w1  = UserWord(original_word, original_language,None)
    if RankedWord.exists(translation_word.lower(), translation_language):
        rank2 = UserWord.find_rank(translation_word.lower(), translation_language)
        w2 = UserWord(translation_word, translation_language,rank2)
    else:
        w2  = UserWord(translation_word, translation_language,None)

    zeeguu.db.session.add(url)
    zeeguu.db.session.add(text)
    zeeguu.db.session.add(w1)
    zeeguu.db.session.add(w2)
    t1= Bookmark(w1,w2, user, text, date)
    zeeguu.db.session.add(t1)

    zeeguu.db.session.commit()
    add_probability_to_existing_words_of_user(user,t1,original_language)
Ejemplo n.º 4
0
def set_default_encounter_based_prob():
    zeeguu.app.test_request_context().push()
    zeeguu.db.session.commit()
    default_probability = 0.5
    languages = Language.all()
    users = User.find_all()
    for user in users:
        for lang in languages:
            marked_words_of_user_in_text = []
            words_of_all_bookmarks_content = []
            for bookmark in Bookmark.find_by_specific_user(user):
                if bookmark.origin.language == lang:
                    # bookmark_content_words = re.sub("[^\w]", " ",  bookmark.text.content).split()
                    bookmark_content_words = re.findall(r'(?u)\w+', bookmark.text.content)
                    words_of_all_bookmarks_content.extend(bookmark_content_words)
                    marked_words_of_user_in_text.append(bookmark.origin.word)
            words_known_from_user= [word for word in words_of_all_bookmarks_content if word not in marked_words_of_user_in_text]
            for word_known in words_known_from_user:
                if RankedWord.exists(word_known, lang):
                   rank = RankedWord.find(word_known, lang)
                   if EncounterBasedProbability.exists(user, rank):
                       prob = EncounterBasedProbability.find(user,rank, default_probability)
                       prob.not_looked_up_counter +=1
                   else:
                       prob = EncounterBasedProbability.find(user,rank,default_probability)
                       zeeguu.db.session.add(prob)
     		       zeeguu.db.session.commit()
    print 'job2'
Ejemplo n.º 5
0
def add_ranked_words_to_db(lang_code):
    zeeguu.app.test_request_context().push()
    zeeguu.db.session.commit()
    from_lang = Language.find(lang_code)
    initial_line_number = 1
    for word in filter_word_list(word_list(lang_code)):
        r = RankedWord(word.lower(), from_lang, initial_line_number)
        zeeguu.db.session.add(r)
        initial_line_number += 1
    zeeguu.db.session.commit()
Ejemplo n.º 6
0
    def test_text_difficulty(self):
        data = json.dumps(dict(
            texts=[dict(content='Der die das warum, wer nicht fragt bleibt bewölkt!', id=1),
                   dict(content='Dies ist ein Test.', id=2)],
            personalized='true'))

        RankedWord.cache_ranked_words()

        rv = self.api_post('/get_difficulty_for_text/de', data, 'application/json')

        difficulties = json.loads(rv.data)['difficulties']
        for difficulty in difficulties:
            assert 0.0 <= difficulty['score_median'] <= 1.0
            assert 0.0 <= difficulty['score_average'] <= 1.0
            if difficulty['id'] is 1:
                assert difficulty['score_median'] == 1.0
                assert round(difficulty['score_average'], 2) == 0.67
            elif difficulty['id'] is 2:
                assert difficulty['score_median'] == 1.0
                assert difficulty['score_average'] == 0.50075
Ejemplo n.º 7
0
def set_know_word_prob():
    zeeguu.app.test_request_context().push()
    zeeguu.db.session.commit()
    enc_probs = EncounterBasedProbability.find_all()
    ex_probs = ExerciseBasedProbability.find_all()
    for prob in enc_probs:
        user = prob.user
        word = prob.ranked_word.word
        language = prob.ranked_word.language
        user_word = None
        if UserWord.exists(word, language):
            user_word = UserWord.find(word, language)
        if ExerciseBasedProbability.exists(user, user_word):
            ex_prob = ExerciseBasedProbability.find(user, user_word)
            known_word_prob = KnownWordProbability.calculateKnownWordProb(
                ex_prob.probability, prob.probability)
            known_word_probability_obj = KnownWordProbability.find(
                user, user_word, prob.ranked_word, known_word_prob)
        else:
            known_word_probability_obj = KnownWordProbability.find(
                user, None, prob.ranked_word, prob.probability)
        zeeguu.db.session.add(known_word_probability_obj)
        zeeguu.db.session.commit()
    for prob in ex_probs:
        user = prob.user
        language = prob.user_word.language
        word = prob.user_word.word
        ranked_word = None
        if RankedWord.exists(word, language):
            ranked_word = RankedWord.find(word, language)
        if not EncounterBasedProbability.exists(user, ranked_word):
            if UserWord.exists(word, language):
                user_word = UserWord.find(word, language)
                known_word_probability_obj = KnownWordProbability(
                    user, user_word, ranked_word, prob.probability)
                zeeguu.db.session.add(known_word_probability_obj)
                zeeguu.db.session.commit()
    print 'job3'
Ejemplo n.º 8
0
def get_known_words(lang_code):
    lang_id = Language.find(lang_code)
    bookmarks = flask.g.user.all_bookmarks()
    known_words = []
    filtered_known_words_from_user = []
    filtered_known_words_dict_list = []
    for bookmark in bookmarks:
        if bookmark.check_is_latest_outcome_too_easy():
            known_words.append(bookmark.origin.word)
    for word_known in known_words:
        if RankedWord.exists(word_known, lang_id):
            filtered_known_words_from_user.append(word_known)
            zeeguu.db.session.commit()
    filtered_known_words_from_user = list(set(filtered_known_words_from_user))
    for word in filtered_known_words_from_user:
        filtered_known_words_dict_list.append({'word': word})
    js = json.dumps(filtered_known_words_dict_list)
    resp = flask.Response(js, status=200, mimetype='application/json')
    return resp
Ejemplo n.º 9
0
def get_known_words(lang_code):
    lang_id = Language.find(lang_code)
    bookmarks = flask.g.user.all_bookmarks()
    known_words=[]
    filtered_known_words_from_user = []
    filtered_known_words_dict_list =[]
    for bookmark in bookmarks:
        if bookmark.check_is_latest_outcome_too_easy():
                known_words.append(bookmark.origin.word)
    for word_known in known_words:
        if RankedWord.exists(word_known, lang_id):
            filtered_known_words_from_user.append(word_known)
            zeeguu.db.session.commit()
    filtered_known_words_from_user = list(set(filtered_known_words_from_user))
    for word in filtered_known_words_from_user:
        filtered_known_words_dict_list.append( {'word': word} )
    js = json.dumps(filtered_known_words_dict_list)
    resp = flask.Response(js, status=200, mimetype='application/json')
    return resp
Ejemplo n.º 10
0
def get_difficulty_for_text(lang_code):
    """
    URL parameters:
    :param lang_code: the language of the text

    Json data:
    :param texts: json array that contains the texts to calculate the difficulty for. Each text consists of an array
        with the text itself as 'content' and an additional 'id' which gets roundtripped unchanged
    :param personalized (optional): calculate difficulty score for a specific user? (Enabled by default)
    :param rank_boundary (optional): upper boundary for word frequency rank (between 1 and 10'000)

    :return difficulties: json array, contains the difficulties as arrays with the key 'score_median' for the median
        and 'score_average' for the average difficulty the value (between 0 (easy) and 1 (hard)) and the 'id' parameter
        to identify the corresponding text
    """
    language = Language.find(lang_code)
    if language is None:
        return 'FAIL'

    data = flask.request.get_json()

    texts = []
    if 'texts' in data:
        for text in data['texts']:
            texts.append(text)
    else:
        return 'FAIL'

    personalized = True
    if 'personalized' in data:
        personalized = data['personalized'].lower()
        if personalized == 'false' or personalized == '0':
            personalized = False

    rank_boundary = 10000.0
    if 'rank_boundary' in data:
        rank_boundary = float(data['rank_boundary'])
        if rank_boundary > 10000.0:
            rank_boundary = 10000.0

    user = flask.g.user
    known_probabilities = KnownWordProbability.find_all_by_user_cached(user)

    difficulties = []
    for text in texts:
        # Calculate difficulty for each word
        words = util.split_words_from_text(text['content'])
        words_difficulty = []
        for word in words:
            ranked_word = RankedWord.find_cache(word, language)

            word_difficulty = 1.0  # Value between 0 (easy) and 1 (hard)
            if ranked_word is not None:
                # Check if the user knows the word
                try:
                    known_propability = known_probabilities[
                        word]  # Value between 0 (unknown) and 1 (known)
                except KeyError:
                    known_propability = None

                if personalized and known_propability is not None:
                    word_difficulty -= float(known_propability)
                elif ranked_word.rank <= rank_boundary:
                    word_frequency = (
                        rank_boundary - (ranked_word.rank - 1)
                    ) / rank_boundary  # Value between 0 (rare) and 1 (frequent)
                    word_difficulty -= word_frequency

            words_difficulty.append(word_difficulty)

        # Uncomment to print data for histogram generation
        #text.generate_histogram(words_difficulty)

        # Median difficulty for text
        words_difficulty.sort()
        center = int(round(len(words_difficulty) / 2, 0))
        difficulty_median = words_difficulty[center]

        # Average difficulty for text
        difficulty_average = sum(words_difficulty) / float(
            len(words_difficulty))

        difficulties.append(
            dict(score_median=difficulty_median,
                 score_average=difficulty_average,
                 id=text['id']))

    response = json.dumps(dict(difficulties=difficulties))

    return flask.Response(response, status=200, mimetype='application/json')
Ejemplo n.º 11
0
    if os.environ.get("ZEEGUU_TESTING"):
        db_name = "zeeguu_test"
        if os.environ.get("ZEEGUU_PERFORMANCE_TESTING"):
            db_name = "zeeguu_performance_test"
        db_connection_string += mysql_hostname + "/" + db_name
        app.config["SQLALCHEMY_DATABASE_URI"] = db_connection_string
    else:
        #  Ooops: we are not testing, and we don't have a DB configured!
        if not "SQLALCHEMY_DATABASE_URI" in app.config:
            print "No db configured. You probably have no config file..."
            exit()

    print "->>  DB Connection String: " + app.config["SQLALCHEMY_DATABASE_URI"]


setup_db_connection()
env = flask.ext.assets.Environment(app)
env.cache = app.instance_path
env.directory = os.path.join(app.instance_path, "gen")
env.url = "/gen"
env.append_path(
    os.path.join(os.path.dirname(os.path.abspath(__file__)), "static"),
    "/static")

db.init_app(app)
db.create_all(app=app)

from zeeguu.model import RankedWord
with app.app_context():
    RankedWord.cache_ranked_words()
Ejemplo n.º 12
0
def get_difficulty_for_text(lang_code):
    """
    URL parameters:
    :param lang_code: the language of the text

    Json data:
    :param texts: json array that contains the texts to calculate the difficulty for. Each text consists of an array
        with the text itself as 'content' and an additional 'id' which gets roundtripped unchanged
    :param personalized (optional): calculate difficulty score for a specific user? (Enabled by default)
    :param rank_boundary (optional): upper boundary for word frequency rank (between 1 and 10'000)

    :return difficulties: json array, contains the difficulties as arrays with the key 'score_median' for the median
        and 'score_average' for the average difficulty the value (between 0 (easy) and 1 (hard)) and the 'id' parameter
        to identify the corresponding text
    """
    language = Language.find(lang_code)
    if language is None:
        return 'FAIL'

    data = flask.request.get_json()

    texts = []
    if 'texts' in data:
        for text in data['texts']:
            texts.append(text)
    else:
        return 'FAIL'

    personalized = True
    if 'personalized' in data:
        personalized = data['personalized'].lower()
        if personalized == 'false' or personalized == '0':
            personalized = False

    rank_boundary = 10000.0
    if 'rank_boundary' in data:
        rank_boundary = float(data['rank_boundary'])
        if rank_boundary > 10000.0:
            rank_boundary = 10000.0

    user = flask.g.user
    known_probabilities = KnownWordProbability.find_all_by_user_cached(user)

    difficulties = []
    for text in texts:
        # Calculate difficulty for each word
        words = util.split_words_from_text(text['content'])
        words_difficulty = []
        for word in words:
            ranked_word = RankedWord.find_cache(word, language)

            word_difficulty = 1.0 # Value between 0 (easy) and 1 (hard)
            if ranked_word is not None:
                # Check if the user knows the word
                try:
                    known_propability = known_probabilities[word] # Value between 0 (unknown) and 1 (known)
                except KeyError:
                    known_propability = None

                if personalized and known_propability is not None:
                    word_difficulty -= float(known_propability)
                elif ranked_word.rank <= rank_boundary:
                    word_frequency = (rank_boundary-(ranked_word.rank-1))/rank_boundary # Value between 0 (rare) and 1 (frequent)
                    word_difficulty -= word_frequency

            words_difficulty.append(word_difficulty)

        # Uncomment to print data for histogram generation
        #text.generate_histogram(words_difficulty)

        # Median difficulty for text
        words_difficulty.sort()
        center = int(round(len(words_difficulty)/2, 0))
        difficulty_median = words_difficulty[center]

        # Average difficulty for text
        difficulty_average = sum(words_difficulty) / float(len(words_difficulty))

        difficulties.append(dict(score_median=difficulty_median, score_average=difficulty_average, id=text['id']))

    response = json.dumps(dict(difficulties=difficulties))

    return flask.Response(response, status=200, mimetype='application/json')
Ejemplo n.º 13
0
    if os.environ.get("ZEEGUU_TESTING"):
        db_name = "zeeguu_test"
        if os.environ.get("ZEEGUU_PERFORMANCE_TESTING"):
            db_name = "zeeguu_performance_test"
        db_connection_string += mysql_hostname+"/"+db_name
        app.config["SQLALCHEMY_DATABASE_URI"] = db_connection_string
    else:
        #  Ooops: we are not testing, and we don't have a DB configured!
        if not "SQLALCHEMY_DATABASE_URI" in app.config:
            print "No db configured. You probably have no config file..."
            exit()

    print "->>  DB Connection String: " + app.config["SQLALCHEMY_DATABASE_URI"]


setup_db_connection()
env = flask.ext.assets.Environment(app)
env.cache = app.instance_path
env.directory = os.path.join(app.instance_path, "gen")
env.url = "/gen"
env.append_path(os.path.join(
    os.path.dirname(os.path.abspath(__file__)), "static"
), "/static")

db.init_app(app)
db.create_all(app=app)

from zeeguu.model import RankedWord
with app.app_context():
    RankedWord.cache_ranked_words()