def create_minimal_test_db(db): drop_current_tables(db) # Some common test fixtures de = Language("de", "German") en = Language("en", "English") nl = Language("nl", "Dutch") es = Language("es", "Spanish") fr = Language("fr", "French") db.session.add_all([en, de, nl, es, fr]) mir = User(TEST_EMAIL, "Mircea", TEST_PASS, de, en) db.session.add(mir) show_solution = ExerciseOutcome("Show solution") retry = ExerciseOutcome("Retry") correct = ExerciseOutcome("Correct") wrong = ExerciseOutcome("Wrong") typo = ExerciseOutcome("Typo") too_easy = ExerciseOutcome("Too easy") outcomes = [show_solution, retry, correct, wrong, typo, too_easy] db.session.add_all(outcomes) recognize = ExerciseSource("Recognize") translate = ExerciseSource("Translate") sources = [recognize, translate] db.session.add_all(sources) b1 = add_bookmark(db, mir, de, "Schaf", en, "sheep", datetime.datetime(2011, 1, 1, 1, 1, 1), "Bitte... zeichne mir ein Schaf!", "http://www.derkleineprinz-online.de/text/2-kapitel/", "Der Kleine Prinz - Kapitel 2") b2 = add_bookmark(db, mir, de, "sprang", en, "jumped", datetime.datetime(2011, 1, 1, 1, 1, 1), "Ich sprang auf die Fusse.", "http://www.derkleineprinz-online.de/text/2-kapitel/", "Der Kleine Prinz - Kapitel 2") bookmarks = [b1, b2] for i in range(0, 5): random_source = sources[random.randint(0, len(sources) - 1)] random_outcome = outcomes[random.randint(0, len(outcomes) - 1)] random_solving_speed = random.randint(500, 5000) exercise = Exercise(random_outcome, random_source, random_solving_speed, datetime.datetime.now()) random_bookmark = bookmarks[random.randint(0, len(bookmarks) - 1)] random_bookmark.add_new_exercise(exercise) global TEST_BOOKMARKS_COUNT TEST_BOOKMARKS_COUNT = 2 db.session.commit()
def __init__(self, email, name, password, learned_language=None, native_language=None, invitation_code=None): self.email = email self.name = name self.update_password(password) self.learned_language = learned_language or Language.default_learned() self.native_language = native_language or Language.default_native_language( ) self.invitation_code = invitation_code
def find_or_create(cls, session, user, _origin: str, _origin_lang: str, _translation: str, _translation_lang: str, _context: str, _url: str, _url_title: str, article_id: int): """ if the bookmark does not exist, it creates it and returns it if it exists, it ** updates the translation** and returns the bookmark object :param _origin: :param _context: :param _url: :return: """ origin_lang = Language.find_or_create(_origin_lang) translation_lang = Language.find_or_create(_translation_lang) origin = UserWord.find_or_create(session, _origin, origin_lang) article = Article.query.filter_by(id=article_id).one() url = Url.find_or_create(session, article.url.as_string(), _url_title) context = Text.find_or_create(session, _context, origin_lang, url, article) translation = UserWord.find_or_create(session, _translation, translation_lang) now = datetime.now() try: # try to find this bookmark bookmark = Bookmark.find_by_user_word_and_text( user, origin, context) # update the translation bookmark.translation = translation except sqlalchemy.orm.exc.NoResultFound as e: bookmark = cls(origin, translation, user, context, now) except Exception as e: raise e session.add(bookmark) session.commit() return bookmark
def get_possible_translations(from_lang_code, to_lang_code): """ Returns a list of possible translations for this :param word: word to be translated :param from_lang_code: :param to_lang_code: :return: json array with dictionaries. each of the dictionaries contains at least one 'translation' and one 'translation_id' key. In the future we envision that the dict will contain other types of information, such as relative frequency, """ translations_json = [] context = request.form.get('context', '') url = request.form.get('url', '') word = request.form['word'] main_translation, alternatives = zeeguu.api.translation_service.translate_from_to(word, from_lang_code, to_lang_code) lan = Language.find(from_lang_code) likelihood = 1.0 for translation in alternatives: wor = UserWord.find(translation, lan) zeeguu.db.session.add(wor) zeeguu.db.session.commit() t_dict = dict(translation_id= wor.id, translation=translation, likelihood=likelihood) translations_json.append(t_dict) likelihood -= 0.01 return json_result(dict(translations=translations_json))
def __init__(self, user, lang_code=None): self.user = user if lang_code: self.lang_code = lang_code else: self.lang_code = self.user.learned_language_id self.language = Language.find(self.lang_code)
def start_following_feed(): """ Start following a feed for which the client provides all the metadata. This is useful for the cases where badly formed feeds can't be parsed by feedparser. :return: """ feed_info = json.loads(request.form.get('feed_info', ''), "utf-8") image_url = feed_info["image"] language = Language.find(feed_info["language"]) url_string = feed_info["url"] title = feed_info["title"] description = feed_info["description"] url = Url.find(url_string) zeeguu.db.session.add(url) # Important to commit this url first; otherwise we end up creating # two domains with the same name for both the urls... zeeguu.db.session.commit() feed_image_url = Url.find(image_url) feed_object = RSSFeed.find_or_create(url, title, description, feed_image_url, language) feed_registration = RSSFeedRegistration.find_or_create( flask.g.user, feed_object) zeeguu.db.session.add_all([feed_image_url, feed_object, feed_registration]) zeeguu.db.session.commit() return "OK"
def get_possible_translations(from_lang_code, to_lang_code): """ Returns a list of possible translations for this :param word: word to be translated :param from_lang_code: :param to_lang_code: :return: json array with dictionaries. each of the dictionaries contains at least one 'translation' and one 'translation_id' key. In the future we envision that the dict will contain other types of information, such as relative frequency, """ translations_json = [] context = request.form.get('context', '') url = request.form.get('url', '') word = request.form['word'] main_translation, alternatives = zeeguu.api.translation_service.translate_from_to( word, from_lang_code, to_lang_code) lan = Language.find(from_lang_code) likelihood = 1.0 for translation in alternatives: wor = UserWord.find(translation, lan) zeeguu.db.session.add(wor) zeeguu.db.session.commit() t_dict = dict(translation_id=wor.id, translation=translation, likelihood=likelihood) translations_json.append(t_dict) likelihood -= 0.01 return json_result(dict(translations=translations_json))
def find_or_create(cls, session, user: User, _url, _title: str, _language): """ create a new object and add it to the db if it's not already there otherwise retrieve the existing object and update in case of creation, the created object is incomplete \ """ language = Language.find(_language) url = Url.find_or_create(session, _url, _title) try: return cls.query.filter_by(user=user, url=url).one() except NoResultFound: try: new = cls(user, url, _title, language) session.add(new) session.commit() return new except Exception as e: print("seems we avoided a race condition") session.rollback() return cls.query.filter_by(user=user, url=url).one()
def set_default_encounter_based_prob(): zeeguu.app.test_request_context().push() zeeguu.db.session.commit() default_probability = 0.5 languages = Language.all() users = User.find_all() for user in users: for lang in languages: marked_words_of_user_in_text = [] words_of_all_bookmarks_content = [] for bookmark in Bookmark.find_by_specific_user(user): if bookmark.origin.language == lang: # bookmark_content_words = re.sub("[^\w]", " ", bookmark.text.content).split() bookmark_content_words = re.findall( r'(?u)\w+', bookmark.text.content) words_of_all_bookmarks_content.extend( bookmark_content_words) marked_words_of_user_in_text.append(bookmark.origin.word) words_known_from_user = [ word for word in words_of_all_bookmarks_content if word not in marked_words_of_user_in_text ] for word_known in words_known_from_user: if RankedWord.exists(word_known, lang): rank = RankedWord.find(word_known, lang) if EncounterBasedProbability.exists(user, rank): prob = EncounterBasedProbability.find( user, rank, default_probability) prob.not_looked_up_counter += 1 else: prob = EncounterBasedProbability.find( user, rank, default_probability) zeeguu.db.session.add(prob) zeeguu.db.session.commit() print 'job2'
def cache_ranked_words(cls): cls.ranked_words_cache = {} for language in Language.all(): ranked_words = cls.find_all(language) for ranked_word in ranked_words: ranked_word_key = language.id + '_' + ranked_word.word cls.ranked_words_cache[ranked_word_key] = ranked_word
def set_default_encounter_based_prob(): zeeguu.app.test_request_context().push() zeeguu.db.session.commit() default_probability = 0.5 languages = Language.all() users = User.find_all() for user in users: for lang in languages: marked_words_of_user_in_text = [] words_of_all_bookmarks_content = [] for bookmark in Bookmark.find_by_specific_user(user): if bookmark.origin.language == lang: # bookmark_content_words = re.sub("[^\w]", " ", bookmark.text.content).split() bookmark_content_words = re.findall(r'(?u)\w+', bookmark.text.content) words_of_all_bookmarks_content.extend(bookmark_content_words) marked_words_of_user_in_text.append(bookmark.origin.word) words_known_from_user= [word for word in words_of_all_bookmarks_content if word not in marked_words_of_user_in_text] for word_known in words_known_from_user: if RankedWord.exists(word_known, lang): rank = RankedWord.find(word_known, lang) if EncounterBasedProbability.exists(user, rank): prob = EncounterBasedProbability.find(user,rank, default_probability) prob.not_looked_up_counter +=1 else: prob = EncounterBasedProbability.find(user,rank,default_probability) zeeguu.db.session.add(prob) zeeguu.db.session.commit() print 'job2'
def create_account(): # A cool way of passing the arguments to the flask template template_arguments = dict ( languages= Language.all(), native_languages = Language.native_languages(), default_learned= Language.default_learned() ) # GET if flask.request.method == "GET": return flask.render_template("create_account.html", **template_arguments) # POST form = flask.request.form password = form.get("password", None) email = form.get("email", None) name = form.get("name", None) code = form.get("code", None) language = Language.find(form.get("language", None)) native_language = Language.find(form.get("native_language", None)) if not (code == "Kairo" or code == "unibe" or code == "rug" or code =="42"): flash("Invitation code is not recognized. Please contact us.") elif password is None or email is None or name is None: flash("Please enter your name, email address, and password") else: try: zeeguu.db.session.add(User(email, name, password, language, native_language)) zeeguu.db.session.commit() user = User.authorize(email, password) flask.session["user"] = user.id return flask.redirect(flask.url_for("account.my_account")) except ValueError: flash("Username could not be created. Please contact us.") except sqlalchemy.exc.IntegrityError: flash(email + " is already in use. Please select a different email.") except: flash("Something went wrong. Please contact us.") finally: zeeguu.db.session.rollback() return flask.render_template("create_account.html", **template_arguments)
def available_languages(): """ :return: jason with language codes for the supported languages. e.g. ["en", "fr", "de", "it", "no", "ro"] """ available_language_codes = map((lambda x: x.id), Language.available_languages()) return json.dumps(available_language_codes)
def available_native_languages(): """ :return: jason with language codes for the supported native languages. curently only english... e.g. ["en", "fr", "de", "it", "no", "ro"]unquote_plus(flask.r """ available_language_codes = map((lambda x: x.id), Language.native_languages()) return json.dumps(available_language_codes)
def setUp(self): # Superclass does prepare the DB before each of the tests super(Dbtest, self).setUp() # Some common test fixtures self.mir = User.find("*****@*****.**") assert self.mir self.de = Language.find("de")
def bookmark_with_context(from_lang_code, to_lang_code, word_str, url_str, title_str, context_str, translation_str): """ This function will lookup a given word-text pair, and if found, it will return that bookmark rather than a new one :param from_lang_code: :param to_lang_code: :param word_str: :param url_str: :param title_str: :param context_str: :param translation_str: :return: """ from_lang = Language.find(from_lang_code) to_lang = Language.find(to_lang_code) user_word = UserWord.find(word_str, from_lang) url = Url.find(url_str, title_str) zeeguu.db.session.add(url) zeeguu.db.session.commit() context = Text.find_or_create(context_str, from_lang, url) zeeguu.db.session.add(context) zeeguu.db.session.commit() translation = UserWord.find(translation_str, to_lang) try: bookmark = Bookmark.find_all_by_user_word_and_text( flask.g.user, user_word, context)[0] # TODO: Think about updating the date of this bookmark, or maybe creating a duplicate # otherwise, in the history this translation will not be visible! except Exception: bookmark = Bookmark(user_word, translation, flask.g.user, context, datetime.now()) zeeguu.db.session.add(bookmark) bookmark.calculate_probabilities_after_adding_a_bookmark( flask.g.user, bookmark.origin.language) zeeguu.db.session.commit() return str(bookmark.id)
def create_anonymous(cls, uuid, password, learned_language_code=None, native_language_code=None): """ :param uuid: :param password: :param learned_language_code: :param native_language_code: :return: """ # since the DB must have an email we generate a fake one fake_email = uuid + cls.ANONYMOUS_EMAIL_DOMAIN if learned_language_code is not None: try: learned_language = Language.find_or_create( learned_language_code) except NoResultFound as e: learned_language = None else: learned_language = None if native_language_code is not None: try: native_language = Language.find_or_create(native_language_code) except NoResultFound as e: native_language = None else: native_language = None new_user = cls(fake_email, uuid, password, learned_language=learned_language, native_language=native_language) # # Until we find_or_create a better way of adding exercises for anonymous and new users... we simply # from zeeguu.temporary.default_words import default_bookmarks # default_bookmarks(new_user, learned_language_code) return new_user
def test_languages_exists(self): language_should_be = LanguageRule().random try: language_to_check = Language.find(language_should_be.code) except NoResultFound: assert False, "No Language found in database" assert language_should_be.code == language_to_check.code \ and language_should_be.name == language_to_check.name
def add_ranked_word_to_db(lang_code): zeeguu.app.test_request_context().push() zeeguu.db.session.commit() from_lang = Language.find(lang_code) initial_line_number = 1 for word in filter_word_list(test_word_list(lang_code)): r = RankedWord(word.lower(), from_lang,initial_line_number) zeeguu.db.session.add(r) initial_line_number+=1 zeeguu.db.session.commit()
def __create_new_language(cls, language_id): language_name = cls.languages.get(language_id) if language_name is None: raise KeyError new_language = Language(language_id, language_name) cls.save(new_language) return new_language
def bookmark_with_context(from_lang_code, to_lang_code, word_str, url_str, title_str, context_str, translation_str): """ This function will lookup a given word-text pair, and if found, it will return that bookmark rather than a new one :param from_lang_code: :param to_lang_code: :param word_str: :param url_str: :param title_str: :param context_str: :param translation_str: :return: """ from_lang = Language.find(from_lang_code) to_lang = Language.find(to_lang_code) user_word = UserWord.find(word_str, from_lang) url = Url.find(url_str, title_str) zeeguu.db.session.add(url) zeeguu.db.session.commit() context = Text.find_or_create(context_str, from_lang, url) zeeguu.db.session.add(context) zeeguu.db.session.commit() translation = UserWord.find(translation_str, to_lang) try: bookmark = Bookmark.find_all_by_user_word_and_text(flask.g.user, user_word, context)[0] # TODO: Think about updating the date of this bookmark, or maybe creating a duplicate # otherwise, in the history this translation will not be visible! except Exception: bookmark = Bookmark(user_word, translation, flask.g.user, context, datetime.now()) zeeguu.db.session.add(bookmark) bookmark.calculate_probabilities_after_adding_a_bookmark(flask.g.user, bookmark.origin.language) zeeguu.db.session.commit() return str(bookmark.id)
def get_learned_bookmarks(lang): lang = Language.find(lang) estimator = SethiKnowledgeEstimator(flask.g.user, lang.id) bk_list = [ dict( id=bookmark.id, origin=bookmark.origin.word, text=bookmark.text.content) for bookmark in estimator.learned_bookmarks()] return json_result(bk_list)
def get_difficulty_for_text(lang_code): """ URL parameters: :param lang_code: the language of the text Json data: :param texts: json array that contains the texts to calculate the difficulty for. Each text consists of an array with the text itself as 'content' and an additional 'id' which gets roundtripped unchanged :param difficulty_computer (optional): calculate difficulty score using a specific algorithm :param rank_boundary (deprecated): upper boundary for word frequency rank (between 1 and 10'000) :param personalized (deprecated): by default we always compute the personalized difficulty For an example of how the Json data looks like, see ../tests/api_tests.py#test_txt_difficulty(self): :return difficulties: json array, which contains for each text: * estimated_difficulty - one of three: "EASY", "MEDIUM", "HARD" * id - identifies the text * [deprecated] score_average - average difficulty of the words in the text * [deprecated] score_median - median difficulty of the words in the text """ language = Language.find(lang_code) if not language: return 'FAIL' data = request.get_json() if not 'texts' in data: return 'FAIL' texts = [] for text in data['texts']: texts.append(text) difficulty_computer = 'default' if 'difficulty_computer' in data: difficulty_computer = data['difficulty_computer'].lower() user = flask.g.user known_probabilities = KnownWordProbability.find_all_by_user_cached(user) difficulties = [] for text in texts: difficulty = text_difficulty( text["content"], language, known_probabilities, difficulty_computer ) difficulty["id"] = text["id"] difficulties.append(difficulty) return json_result(dict(difficulties=difficulties))
def get_learned_bookmarks(lang): lang = Language.find(lang) estimator = SethiKnowledgeEstimator(flask.g.user, lang.id) bk_list = [ dict(id=bookmark.id, origin=bookmark.origin.word, text=bookmark.text.content) for bookmark in estimator.learned_bookmarks() ] return json_result(bk_list)
def __init__(self, email, name, password, learned_language=None, native_language=None, invitation_code=None, cohort=None): self.email = email self.name = name self.update_password(password) self.learned_language = learned_language or Language.default_learned() self.native_language = native_language or Language.default_native_language( ) self.invitation_code = invitation_code self.cohort = cohort # Add the learned language to user languages and set reading_news to True # so that the user has articles in the reader when opening it for the first time. from zeeguu.model import UserLanguage UserLanguage(self, learned_language or Language.default_learned(), reading_news=True)
def get_difficulty_for_text(lang_code): """ URL parameters: :param lang_code: the language of the text Json data: :param texts: json array that contains the texts to calculate the difficulty for. Each text consists of an array with the text itself as 'content' and an additional 'id' which gets roundtripped unchanged :param difficulty_computer (optional): calculate difficulty score using a specific algorithm :param rank_boundary (deprecated): upper boundary for word frequency rank (between 1 and 10'000) :param personalized (deprecated): by default we always compute the personalized difficulty For an example of how the Json data looks like, see ../tests/api_tests.py#test_txt_difficulty(self): :return difficulties: json array, which contains for each text: * estimated_difficulty - one of three: "EASY", "MEDIUM", "HARD" * id - identifies the text * [deprecated] score_average - average difficulty of the words in the text * [deprecated] score_median - median difficulty of the words in the text """ language = Language.find(lang_code) if not language: return 'FAIL' data = request.get_json() if not 'texts' in data: return 'FAIL' texts = [] for text in data['texts']: texts.append(text) difficulty_computer = 'default' if 'difficulty_computer' in data: difficulty_computer = data['difficulty_computer'].lower() user = flask.g.user known_probabilities = KnownWordProbability.find_all_by_user_cached(user) difficulties = [] for text in texts: difficulty = text_difficulty(text["content"], language, known_probabilities, difficulty_computer) difficulty["id"] = text["id"] difficulties.append(difficulty) return json_result(dict(difficulties=difficulties))
def test_same_text_does_not_get_created_multiple_Times(self): context = u'Die kleine Jägermeister' with zeeguu.app.app_context(): url = Url.find('http://mir.lu/stories/german/jagermeister', "Die Kleine Jagermeister (Mircea's Stories)") source_language = Language.find('de') form_data = dict(url=url.as_string(), context=context, word="Die") self.api_post('/translate_and_bookmark/de/en', form_data) text1 = Text.find_or_create(context, source_language, url) self.api_post('/translate_and_bookmark/de/en', form_data) text2 = Text.find_or_create(context, source_language, url) assert (text1 == text2)
def start_following_feeds(): """ A user can start following multiple feeds at once. The feeds are passed as the post parameter :feeds: which contains a json list with URLs for the feeds to be followed. :return: """ json_array_with_feeds = json.loads(request.form.get('feeds', '')) for urlString in json_array_with_feeds: feed = feedparser.parse(urlString).feed feed_image_url_string = "" if "image" in feed: feed_image_url_string = feed.image["href"] lan = None if "language" in feed: lan = Language.find(two_letter_language_code(feed)) url = Url.find(urlString) zeeguu.db.session.add(url) # Important to commit this url first; otherwise we end up creating # two domains with the same name for both the urls... zeeguu.db.session.commit() feed_object = RSSFeed.find_by_url(url) if not feed_object: feed_image_url = Url.find(feed_image_url_string) title = url if "title" in feed: title = feed.title feed_object = RSSFeed.find_or_create(url, title, feed.description, feed_image_url, lan) zeeguu.db.session.add_all([feed_image_url, feed_object]) zeeguu.db.session.commit() feed_registration = RSSFeedRegistration.find_or_create( flask.g.user, feed_object) zeeguu.db.session.add(feed_registration) zeeguu.db.session.commit() return "OK"
def known_words_list(self): lang_id = Language.find(self.lang_code) bookmarks = self.user.all_bookmarks() known_words = [] filtered_known_words_from_user = [] filtered_known_words_dict_list = [] for bookmark in bookmarks: if bookmark.check_is_latest_outcome_too_easy(): known_words.append(bookmark.origin.word) for word_known in known_words: if RankedWord.exists(word_known, lang_id): filtered_known_words_from_user.append(word_known) # zeeguu.db.session.commit() filtered_known_words_from_user = list( set(filtered_known_words_from_user)) for word in filtered_known_words_from_user: filtered_known_words_dict_list.append({'word': word}) return filtered_known_words_dict_list
def get_interesting_reading_languages(): """ 'Interesting languages' are defined as languages the user isn't subscribed to already and thus might subscribe to. :return: a json list with languages the user isn't reading yet. every language in this list is a dictionary with the following info: id = unique id of the language; language = <unicode string> """ all_languages = Language.available_languages() learned_languages = UserLanguage.all_reading_for_user(flask.g.user) interesting_languages = [] for lan in all_languages: if lan not in learned_languages: interesting_languages.append(lan.as_dictionary()) return json_result(interesting_languages)
def get_learnability_for_text(lang_code): """ URL parameters: :param lang_code: the language of the text Json data: :param texts: json array that contains the texts to calculate the learnability for. Each text consists of an array with the text itself as 'content' and an additional 'id' which gets roundtripped unchanged For an example of how the Json data looks like, see ../tests/api_tests.py#test_text_learnability(self): :return learnabilities: json array, contains the learnabilities as arrays with the key 'score' for the learnability value (percentage of words from the text that the user is currently learning), the 'count' of the learned words in the text and the 'id' parameter to identify the corresponding text """ user = flask.g.user language = Language.find(lang_code) if language is None: return 'FAIL' data = request.get_json() texts = [] if 'texts' in data: for text in data['texts']: texts.append(text) else: return 'FAIL' learnabilities = [] for text in texts: e = SethiKnowledgeEstimator(user) count, learnability = text_learnability( text, e.words_being_learned(language)) learnabilities.append( dict(score=learnability, count=count, id=text['id'])) return json_result(dict(learnabilities=learnabilities))
def add_ranked_word_to_db(lang_code, word_list_file, number_of_words): """ Adds the ranks of the words to the DB. :param lang_code: :param word_list_file: :return: """ zeeguu.app.test_request_context().push() zeeguu.db.session.commit() print ("Looking for language ..." + lang_code) language = Language.find(lang_code) print "-> Starting to import words in the DB" current_line_number = 1 word_list = remove_duplicates_based_on_case(read_words_from_file(word_list_file)) word_list = word_list[0:number_of_words] for word in word_list: if RankedWord.exists(word, language): ranked_word = RankedWord.find(word, language) ranked_word.rank = current_line_number else: ranked_word = RankedWord(word, language, current_line_number) zeeguu.db.session.add(ranked_word) current_line_number += 1 print_progress_stats(current_line_number, word) # Commit everything at once - twice as fast as committing after every word zeeguu.db.session.commit() print ('-> Done importing the ranked words in the DB') print ('-> Updating word ranks for words already in the DB...') update_existing_word_ranks(language)
def get_learnability_for_text(lang_code): """ URL parameters: :param lang_code: the language of the text Json data: :param texts: json array that contains the texts to calculate the learnability for. Each text consists of an array with the text itself as 'content' and an additional 'id' which gets roundtripped unchanged For an example of how the Json data looks like, see ../tests/api_tests.py#test_text_learnability(self): :return learnabilities: json array, contains the learnabilities as arrays with the key 'score' for the learnability value (percentage of words from the text that the user is currently learning), the 'count' of the learned words in the text and the 'id' parameter to identify the corresponding text """ user = flask.g.user language = Language.find(lang_code) if language is None: return 'FAIL' data = request.get_json() texts = [] if 'texts' in data: for text in data['texts']: texts.append(text) else: return 'FAIL' learnabilities = [] for text in texts: e = SethiKnowledgeEstimator(user) count, learnability = text_learnability(text, e.words_being_learned(language)) learnabilities.append(dict(score=learnability, count=count, id=text['id'])) return json_result(dict(learnabilities=learnabilities))
def set_default_exercise_based_prob(): zeeguu.app.test_request_context().push() zeeguu.db.session.commit() users = User.find_all() languages = Language.all() for user in users: for language in languages: user_words_by_language = UserWord.find_by_language(language) for word in user_words_by_language: if ExerciseBasedProbability.exists(user, word): prob = ExerciseBasedProbability.find(user, word) bookmarks_by_user_and_word = Bookmark.find_all_by_user_and_word( user, word) total_prob = 0 for bookmark in bookmarks_by_user_and_word: prob.calculate_known_bookmark_probability(bookmark) total_prob += float(prob.probability) if bookmarks_by_user_and_word: prob.probability = total_prob / len( bookmarks_by_user_and_word) zeeguu.db.session.commit() print('job1')
def modify_user_language(): """ This endpoint is for modifying a user language. It tries to find the user_language, and otherwise create it. It then sets all the parameters given. :return: "OK" in case of success """ language_id = int(request.form.get('language_id', '')) try: language_reading = int(request.form.get('language_reading', '')) except: language_reading = None try: language_exercises = int(request.form.get('language_exercises', '')) except: language_exercises = None try: language_level = int(request.form.get('language_level', '')) except: language_level = None language_object = Language.find_by_id(language_id) user_language = UserLanguage.find_or_create(session, flask.g.user, language_object) if language_reading is not None: user_language.reading_news = language_reading recompute_recommender_cache_if_needed(flask.g.user, session) if language_exercises is not None: user_language.doing_exercises = language_exercises if language_level is not None: user_language.declared_level = language_level session.add(user_language) session.commit() return "OK"
def find_or_create(cls, session, _url:str, language=None, sleep_a_bit=False): """ If not found, download and extract all the required info for this article. :param url: :return: """ from zeeguu.model import Url, Article, Language import newspaper url = Url.extract_canonical_url(_url) try: found = cls.find(url) if found: return found art = newspaper.Article(url=url) art.download() art.parse() if art.text == '': raise Exception("Newspaper got empty article from: " + url) if sleep_a_bit: import time from random import randint print("GOT: " + url) sleep_time = randint(3, 33) print(f"sleeping for {sleep_time}s... so we don't annoy our friendly servers") time.sleep(sleep_time) if not language: if art.meta_lang == '': art.meta_lang = detect(art.text) zeeguu.log(f"langdetect: {art.meta_lang} for {url}") language = Language.find_or_create(art.meta_lang) # Create new article and save it to DB url_object = Url.find_or_create(session, url) new_article = Article( url_object, art.title, ', '.join(art.authors), art.text[0:32000], # any article longer than this will be truncated... art.summary, None, None, language ) session.add(new_article) session.commit() return new_article except sqlalchemy.exc.IntegrityError or sqlalchemy.exc.DatabaseError: for i in range(10): try: session.rollback() u = cls.find(url) print("Found article by url after recovering from race") return u except: print("Exception of second degree in article..." + str(i)) time.sleep(0.3) continue break
def get_content_from_url(): """ Json data: :param urls: json array that contains the urls to get the article content for. Each url consists of an array with the url itself as 'url' and an additional 'id' which gets roundtripped unchanged. For an example of how the Json data looks like, see ../tests/api_tests.py#test_content_from_url(self): :param timeout (optional): maximal time in seconds to wait for the results :param lang_code (optional): If the user sends along the language, then we compute the difficulty of the texts :return contents: json array, contains the contents of the urls that responded within the timeout as arrays with the key 'content' for the article content, the url of the main image as 'image' and the 'id' parameter to identify the corresponding url """ data = request.get_json() queue = Queue.Queue() urls = [] if 'urls' in data: for url in data['urls']: urls.append(url) else: return 'FAIL' if 'timeout' in data: timeout = int(data['timeout']) else: timeout = 10 # Start worker threads to get url contents threads = [] for url in urls: thread = threading.Thread(target=PageExtractor.worker, args=(url['url'], url['id'], queue)) thread.daemon = True threads.append(thread) thread.start() # Wait for workers to finish until timeout stop = time.time() + timeout while any(t.isAlive() for t in threads) and time.time() < stop: time.sleep(0.1) contents = [] for i in xrange(len(urls)): try: contents.append(queue.get_nowait()) except Queue.Empty: pass # If the user sends along the language, then we can compute the difficulty if 'lang_code' in data: lang_code = data['lang_code'] language = Language.find(lang_code) if language is not None: print "got language" user = flask.g.user known_probabilities = KnownWordProbability.find_all_by_user_cached(user) for each_content_dict in contents: difficulty = text_difficulty( each_content_dict["content"], language, known_probabilities ) each_content_dict["difficulty"] = difficulty return json_result(dict(contents=contents))
def __init__(self, email, username, password, learned_language=None, native_language = None): self.email = email self.name = username self.update_password(password) self.learned_language = learned_language or Language.default_learned() self.native_language = native_language or Language.default_native_language()
def set_learned_language(self, code): self.learned_language = Language.find(code)
def set_native_language(self, code): self.native_language = Language.find(code)
def get_not_encountered_words(lang_code): return json_result(flask.g.user.get_not_encountered_words(Language.find(lang_code)))