def json_serializable_dict(self, with_context=True, with_title=False): try: translation_word = self.translation.word except AttributeError as e: translation_word = '' zeeguu.log( f"Exception caught: for some reason there was no translation for {self.id}" ) print(str(e)) word_info = Word.stats(self.origin.word, self.origin.language.code) learned_datetime = str( self.learned_time.date()) if self.learned else '' created_day = "today" if self.time.date() == datetime.now().date( ) else '' bookmark_title = "" if with_title: try: bookmark_title = self.text.article.title except Exception as e: print(e) print( f"could not find article title for bookmark with id: {self.id}" ) result = dict( id=self.id, to=translation_word, from_lang=self.origin.language.code, to_lang=self.translation.language.code, title=bookmark_title, url=self.text.url.as_string(), origin_importance=word_info.importance, learned_datetime=learned_datetime, origin_rank=word_info.rank if word_info.rank != 100000 else '', starred=self.starred if self.starred is not None else False, article_id=self.text.article_id if self.text.article_id else '', created_day=created_day, #human readable stuff... time=self.time.strftime(JSON_TIME_FORMAT)) result["from"] = self.origin.word if with_context: result['context'] = self.text.content return result
def translate_and_bookmark(from_lang_code, to_lang_code): """ @deprecated This should be deprecated and /get_possible_translations used instead However, it is still used by the zeeguu chrome extension. This expects in the post parameter the following: - word (to translate) - context (surrounding paragraph of the original word ) - url (of the origin) - title (of the origin page) /get_possible_translations has very similar behavior, only that if focuses on returning the possible alternative translations :param from_lang_code: :param to_lang_code: :return: """ word_str = unquote_plus(request.form['word']) url_str = request.form.get('url', '') title_str = request.form.get('title', '') context_str = request.form.get('context', '') try: minimal_context, query = minimize_context(context_str, from_lang_code, word_str) translator = Translator(from_lang_code, to_lang_code) translations = translator.translate(query).translations best_guess = translations[0]["translation"] bookmark = Bookmark.find_or_create(db_session, flask.g.user, word_str, from_lang_code, best_guess, to_lang_code, minimal_context, url_str, title_str) except ValueError as e: zeeguu.log( f"minimize context failed {e}on: {context_str} x {from_lang_code} x {word_str} " ) return context_str, query return json_result(dict(bookmark_id=bookmark.id, translation=best_guess))
def article_opened(session, value, user): # the url that comes from zeeguu event logger # might be the zeeguu url: which is of the form # https://www.zeeguu.unibe.ch/read/article?articleLanguage=de&articleURL=https://www.nzz.ch/wissenschaft/neandertaler-waren-kuenstler-ld.1358862 # thus we extract only the last part url = value.split('articleURL=')[-1] article = Article.find_or_create(session, url) ua = UserArticle.find(user, article) if not ua: ua = UserArticle.find_or_create(session, user, article, opened=datetime.now()) ua.opened = datetime.now() session.add(ua) session.commit() log(f"{ua}")
def get_subscribed_filters(): """ A user might be subscribed to multiple filters at once. This endpoint returns them as a list. :return: a json list with filters for which the user is registered; every filter in this list is a dictionary with the following info: id = unique id of the topic; title = <unicode string> """ filters = TopicFilter.all_for_user(flask.g.user) filter_list = [] for fil in filters: try: filter_list.append(fil.topic.as_dictionary()) except Exception as e: zeeguu.log(str(e)) return json_result(filter_list)
def get_subscribed_topics(): """ A user might be subscribed to multiple topics at once. This endpoint returns them as a list. :return: a json list with feeds for which the user is registered; every feed in this list is a dictionary with the following info: id = unique id of the topic; title = <unicode string> """ subscriptions = TopicSubscription.all_for_user(flask.g.user) topic_list = [] for sub in subscriptions: try: topic_list.append(sub.topic.as_dictionary()) except Exception as e: zeeguu.log(str(e)) return json_result(topic_list)
def cleanup_non_content_bits(text: str): """ Sometimes newspaper still leaves some individual fragments in the article.text. :param text: :return: """ new_text = text new_text = new_text.replace("\nAdvertisement\n", "") new_text = new_text.replace("\ntrue\n", "") if new_text != text: zeeguu.log("clean") return new_text
def get_subscribed_searches(): """ A user might be subscribed to multiple searches at once. This endpoint returns them as a list. :return: a json list with searches for which the user is registered; every search in this list is a dictionary with the following info: id = unique id of the search; search_keywords = <unicode string> """ subscriptions = SearchSubscription.all_for_user(flask.g.user) searches_list = [] for subs in subscriptions: try: searches_list.append(subs.search.as_dictionary()) except Exception as e: zeeguu.log(str(e)) return json_result(searches_list)
def get_filtered_searches(): """ A user might be subscribed to multiple search filters at once. This endpoint returns them as a list. :return: a json list with searches for which the user is registered; every search in this list is a dictionary with the following info: id = unique id of the topic; search_keywords = <unicode string> """ filters = SearchFilter.all_for_user(flask.g.user) filtered_searches = [] for filt in filters: try: filtered_searches.append(filt.search.as_dictionary()) except Exception as e: zeeguu.log(str(e)) return json_result(filtered_searches)
def unfilter_search(): """ A user can unsubscribe from the search with a given ID :return: OK / ERROR """ search_id = int(request.form.get('search_id', '')) try: to_delete = SearchFilter.with_search_id(search_id, flask.g.user) session.delete(to_delete) to_delete = Search.find_by_id(search_id) session.delete(to_delete) session.commit() recompute_recommender_cache_if_needed(flask.g.user, session) except Exception as e: zeeguu.log(str(e)) return "OOPS. SEARCH AIN'T THERE IT SEEMS (" + str(e) + ")" return "OK"
def create_default_bookmarks(session, user, language_code): bookmarks = [] try: print("trying to load default bookmarks for " + str(user.name)) for data_point in bookmark_data[language_code]: bookmark = Bookmark.find_or_create(session, user, data_point[0], language_code, data_point[1], "en", data_point[2], data_point[3], "Zeeguu Exercises") bookmarks.append(bookmark) except Exception as e: zeeguu.log( "could not load default bookmarks for {0}".format(language_code)) #raise e return [] return bookmarks
def update_bookmark_priority(cls, db, user): """ Update all bookmark priorities of one user :param db: The connection to the database :param user: The user object """ try: bookmarks_for_user = user.all_bookmarks_fit_for_study() fit_for_study_count = len(bookmarks_for_user) zeeguu.log(f"{fit_for_study_count} bookmarks fit for study") if fit_for_study_count == 0: return # tuple(0=bookmark, 1=exercise) bookmark_exercise_of_user = map(cls._get_exercise_of_bookmark, bookmarks_for_user) b1, b2 = itertools.tee(bookmark_exercise_of_user, 2) max_iterations = max( pair.exercise.id if pair.exercise is not None else 0 for pair in b1) exercises_and_priorities = [ cls._calculate_bookmark_priority(x, max_iterations) for x in b2 ] with db.session.no_autoflush: # might not be needed, but just to be safe for each in exercises_and_priorities: entry = BookmarkPriorityARTS.find_or_create( each.bookmark, each.priority) entry.priority = each.priority db.session.add(entry) # print(entry) db.session.commit() except Exception as e: db.session.rollback() print('Error during updating bookmark priority') print(e) print(traceback.format_exc())
def update_learned_status(self, session): """ To call when something happened to the bookmark, that requires it's "learned" status to be updated. :param session: :return: """ is_learned, learned_time = self.is_learned_based_on_exercise_outcomes( True) log = self.sorted_exercise_log() exercise_log_summary = ' '.join( [exercise.short_string_summary() for exercise in log]) if is_learned: zeeguu.log( f"Log: {exercise_log_summary}: bookmark {self.id} learned!") self.learned_time = learned_time self.learned = True session.add(self) else: zeeguu.log( f"Log: {exercise_log_summary}: bookmark {self.id} not learned yet." )
def get_user_articles_sources_languages(user, limit=1000): """ This method is used to get all the user articles for the sources if there are any selected sources for the user, and it otherwise gets all the articles for the current learning languages for the user. :param user: the user for which the articles should be fetched :param limit: the amount of articles for each source or language :return: a list of articles based on the parameters """ user_languages = UserLanguage.all_reading_for_user(user) all_articles = [] for language in user_languages: log(f'Getting articles for {language}') new_articles = language.get_articles(most_recent_first=True) all_articles.extend(new_articles) log(f'Added {len(new_articles)} articles for {language}') return all_articles
def bookmarks_to_study(bookmark_count): """ Returns a number of <bookmark_count> bookmarks that are recommended for this user to study """ int_count = int(bookmark_count) to_study = flask.g.user.bookmarks_to_study(int_count) if not to_study: # We might be in the situation of the priorities never having been # computed since theuser never did an exercise, and currently only # then are priorities recomputed; thus, in this case, we try to # update, and maybe this will solve the problem zeeguu.log( "recomputting bookmark priorities since there seem to be no bookmarks to study" ) BookmarkPriorityUpdater.update_bookmark_priority( zeeguu.db, flask.g.user) to_study = flask.g.user.bookmarks_to_study(int_count) as_json = [bookmark.json_serializable_dict() for bookmark in to_study] return json_result(as_json)
def json_serializable_dict(self, with_context=True): try: translation_word = self.translation.word except AttributeError as e: translation_word = '' zeeguu.log( f"Exception caught: for some reason there was no translation for {self.id}" ) print(str(e)) result = dict( id=self.id, to=translation_word, from_lang=self.origin.language.code, to_lang=self.translation.language.code, title=self.text.url.title, url=self.text.url.as_string(), origin_importance=Word.stats(self.origin.word, self.origin.language.code).importance) result["from"] = self.origin.word if with_context: result['context'] = self.text.content return result
def get_possible_translations(from_lang_code, to_lang_code): """ Returns a list of possible translations in :param to_lang_code for :param word in :param from_lang_code. You must also specify the :param context, :param url, and :param title of the page where the word was found. The context is the sentence. :return: json array with translations """ context_str = request.form.get('context', '') url = request.form.get('url', '') # url = url.split('articleURL=')[-1] zeeguu.log(f"url before being saved: {url}") word_str = request.form['word'] title_str = request.form.get('title', '') minimal_context, query = minimize_context(context_str, from_lang_code, word_str) to_lang_code = flask.g.user.native_language.code zeeguu.log(f'translating to... {to_lang_code}') translator = Translator(from_lang_code, to_lang_code) zeeguu.log(f"Query to translate is: {query}") translations = translator.translate(query).translations # translators talk about quality, but our users expect likelihood. # rename the key in the dictionary for t in translations: t['likelihood'] = t.pop("quality") t['source'] = t.pop('service_name') best_guess = translations[0]["translation"] Bookmark.find_or_create(db_session, flask.g.user, word_str, from_lang_code, best_guess, to_lang_code, minimal_context, url, title_str) return json_result(dict(translations=translations))
def download_from_feed(feed: RSSFeed, session, limit=1000): """ Session is needed because this saves stuff to the DB. last_crawled_time is useful because otherwise there would be a lot of time wasted trying to retrieve the same articles, especially the ones which can't be retrieved, so they won't be cached. """ zeeguu.log(feed) downloaded = 0 skipped = 0 skipped_due_to_low_quality = dict() skipped_already_in_db = 0 last_retrieval_time_from_DB = None last_retrieval_time_seen_this_crawl = None if feed.last_crawled_time: last_retrieval_time_from_DB = feed.last_crawled_time zeeguu.log(f"last retrieval time from DB = {last_retrieval_time_from_DB}") for feed_item in feed.feed_items(): if downloaded >= limit: break try: url = _url_after_redirects(feed_item['url']) except requests.exceptions.TooManyRedirects: zeeguu.log(f"Too many redirects for: {url}") continue try: this_article_time = datetime.strptime(feed_item['published'], SIMPLE_TIME_FORMAT) this_article_time = this_article_time.replace(tzinfo=None) except: zeeguu.log(f"can't get time from {url}: {feed_item['published']}") continue if last_retrieval_time_from_DB: if this_article_time < last_retrieval_time_from_DB: skipped += 1 continue title = feed_item['title'] summary = feed_item['summary'] art = model.Article.find(url) if (not last_retrieval_time_seen_this_crawl) or (this_article_time > last_retrieval_time_seen_this_crawl): last_retrieval_time_seen_this_crawl = this_article_time if art: skipped_already_in_db += 1 else: try: art = newspaper.Article(url) art.download() art.parse() cleaned_up_text = cleanup_non_content_bits(art.text) quality_article = sufficient_quality(art, skipped_due_to_low_quality) if quality_article: from zeeguu.language.difficulty_estimator_factory import DifficultyEstimatorFactory # Create new article and save it to DB new_article = zeeguu.model.Article( Url.find_or_create(session, url), title, ', '.join(art.authors), cleaned_up_text, summary, this_article_time, feed, feed.language ) session.add(new_article) session.commit() downloaded += 1 add_topics(new_article, session) add_searches(title, url, new_article, session) try: session.commit() except Exception as e: zeeguu.log(f'{LOG_CONTEXT}: Something went wrong when committing words/topic to article: {e}') except Exception as e: # raise e import sys ex = sys.exc_info()[0] zeeguu.log(f" {LOG_CONTEXT}: Failed to create zeeguu.Article from {url}\n{str(ex)}") zeeguu.log(f' Skipped due to time: {skipped} ') zeeguu.log(f' Downloaded: {downloaded}') zeeguu.log(f' Low Quality: {skipped_due_to_low_quality}') zeeguu.log(f' Already in DB: {skipped_already_in_db}') if last_retrieval_time_seen_this_crawl: feed.last_crawled_time = last_retrieval_time_seen_this_crawl session.add(feed) session.commit()
def test_languages_exists(self): zeeguu.log("tüst")