def query_performance(mysql, index, size, content, topics, unwanted_topics, user_topics, unwanted_user_topics): language = Language("en", "English") language.id = 5 # build elasticsearch query query_body = build_elastic_query(size, content, topics, unwanted_topics, user_topics, unwanted_user_topics, language, 100, 0) # build Mysql query mysql_query = mysql_fulltext_query(mysql, size, content, topics, unwanted_topics, user_topics, unwanted_user_topics, language, 100, 0) # Elasticsearch res = es.search(index=index, body=query_body) for result in res['hits']['hits']: elastic_title = result['_source']['title'] elastic_content = result['_source']['content'] published_time = result['_source']['published_time'] write_results_to_csv("elastic", elastic_title, elastic_content, published_time) # Mysql result = mysql_query.all() for mysql_result in result: mysql_title = mysql_result.title mysql_content = mysql_result.content published_time = mysql_result.published_time write_results_to_csv("mysql_full_text", mysql_title, mysql_content, published_time)
def system_languages(): result = dict() result["learnable_languages"] = list( map((lambda x: dict(name=x.name, code=x.code)), Language.available_languages())) result["native_languages"] = list( map((lambda x: dict(name=x.name, code=x.code)), Language.native_languages())) return result
def query_performance(mysql, index, size_of_index, size, content, topics, unwanted_topics, user_topics, unwanted_user_topics): language = Language("en", "English") language.id = 5 elastic_query_body = build_elastic_query(size, content, topics, unwanted_topics, user_topics, unwanted_user_topics, language, 100, 0) mysql_query_full_text = mysql_fulltext_query(mysql, size, content, topics, unwanted_topics, user_topics, unwanted_user_topics, language, 100, 0) mysql_query_old = base_mysql_query(mysql, size, content, topics, unwanted_topics, user_topics, unwanted_user_topics, language, 100, 0) elastic_time_lst = [] elastic_returned_articles = [] for j in range(20): start = timer() res = es.search(index=index, body=elastic_query_body) elastic_returned_articles.append(len(res['hits'].get('hits'))) end = timer() elastic_time_lst.append(end - start) write_results_to_csv(size_of_index + " elastic", average_time_in_ms(elastic_time_lst), size) # #MySQL Full Text mysql_time_lst = [] mysql_returned_articles = [] for i in range(10): start = timer() result = mysql_query_full_text.all() mysql_returned_articles.append(len(result)) end = timer() mysql_time_lst.append(end - start) write_results_to_csv(size_of_index + " MySQL Full Text", average_time_in_ms(mysql_time_lst), size) # MySQL Base Version mysql_time_lst = [] mysql_returned_articles = [] for i in range(10): start = timer() result = mysql_query_old.all() mysql_returned_articles.append(len(result)) end = timer() mysql_time_lst.append(end - start) write_results_to_csv(size_of_index + " MySQL Base Version", average_time_in_ms(mysql_time_lst), size)
def get_interesting_topics(): """ Get a list of interesting topics for the given language. Interesting topics are for now defined as: - The topic is not followed yet - The topic is not in the filters list - There are articles with that topic in the language :return: """ topic_data = [] already_filtered = [each.topic for each in TopicFilter.all_for_user(flask.g.user)] already_subscribed = [ each.topic for each in TopicSubscription.all_for_user(flask.g.user) ] reading_languages = Language.all_reading_for_user(flask.g.user) loc_topics = [] for each in reading_languages: loc_topics.extend(LocalizedTopic.all_for_language(each)) topics = [each.topic for each in loc_topics] for topic in topics: if (topic not in already_filtered) and (topic not in already_subscribed): topic_data.append(topic.as_dictionary()) return json_result(topic_data)
def __init__(self, email, name, password, learned_language=None, native_language=None, invitation_code=None, cohort=None): self.email = email self.name = name self.update_password(password) self.learned_language = learned_language or Language.default_learned() self.native_language = native_language or Language.default_native_language( ) self.invitation_code = invitation_code self.cohort = cohort
def update_cohort(cohort_id): """ changes details of a specified cohort. requires input form (inv_code, name, max_students) """ if not has_permission_for_cohort(cohort_id): flask.abort(401) try: params = request.form cohort_to_change = Cohort.query.filter_by(id=cohort_id).one() cohort_to_change.inv_code = params.get("inv_code") cohort_to_change.name = params.get("name") # language_id is deprecated; use language_code instead language_code = params.get("language_code") or params.get( "language_id") cohort_to_change.language_id = Language.find(language_code).id cohort_to_change.declared_level_min = params.get("declared_level_min") cohort_to_change.declared_level_max = params.get("declared_level_max") db.session.commit() return "OK" except ValueError: flask.abort(400) return "ValueError" except sqlalchemy.exc.IntegrityError: flask.abort(400) return "IntegrityError"
def _reading_preferences_hash(user): """ Method to retrieve the hash, as this is done several times. :param user: :return: articles_hash: ArticlesHash """ user_filter_subscriptions = TopicFilter.all_for_user(user) filters = [topic_id.topic for topic_id in user_filter_subscriptions] user_topic_subscriptions = TopicSubscription.all_for_user(user) topics = [topic_id.topic for topic_id in user_topic_subscriptions] user_languages = Language.all_reading_for_user(user) user_search_filters = SearchFilter.all_for_user(user) search_filters = [search_id.search for search_id in user_search_filters] user_searches = SearchSubscription.all_for_user(user) searches = [search_id.search for search_id in user_searches] articles_hash = ArticlesCache.calculate_hash( user, topics, filters, searches, search_filters, user_languages ) return articles_hash
def set_learned_language_level(self, language_code: str, level: str, session=None): learned_language = Language.find_or_create(language_code) from zeeguu_core.model import UserLanguage language = UserLanguage.find_or_create(session, self, learned_language) language.cefr_level = int(level) if session: session.add(language)
def recompute_for_topics_and_languages(): from zeeguu_core.model import Topic, Language for each in Topic.get_all_topics(): each.all_articles() for each in Language.available_languages(): each.get_articles()
def create_own_cohort(): """ Creates a cohort in the database. Requires form input (inv_code, name, language_id, max_students, teacher_id) """ if not _is_teacher(flask.g.user.id): flask.abort(401) params = request.form inv_code = params.get("inv_code") name = params.get("name") # language_id is deprecated and kept here for backwards compatibility # use language_code instead language_code = params.get("language_code") or params.get("language_id") if name is None or inv_code is None or language_code is None: flask.abort(400) available_languages = Language.available_languages() code_allowed = False for code in available_languages: if language_code in str(code): code_allowed = True if not code_allowed: flask.abort(400) language = Language.find_or_create(language_code) teacher_id = flask.g.user.id max_students = params.get("max_students") if int(max_students) < 1: flask.abort(400) try: c = Cohort(inv_code, name, language, max_students) db.session.add(c) db.session.commit() _link_teacher_cohort(teacher_id, c.id) return "OK" except ValueError: flask.abort(400) return "ValueError" except sqlalchemy.exc.IntegrityError: flask.abort(400) return "IntegrityError"
def available_native_languages(): """ :return: jason with language codes for the supported native languages. curently only english... e.g. ["en", "fr", "de", "it", "no", "ro"]unquote_plus(flask.r """ available_language_codes = list(map((lambda x: x.code), Language.native_languages())) return json.dumps(available_language_codes)
def available_languages(): """ :return: jason with language codes for the supported languages. e.g. ["en", "fr", "de", "it", "no", "ro"] """ available_language_codes = list(map((lambda x: x.code), Language.available_languages())) return json.dumps(available_language_codes)
def available_native_languages(): """ :return: jason with language codes for the supported native languages. curently only english... e.g. ["en", "fr", "de", "it", "no", "ro"]unquote_plus(flask.r """ available_language_codes = list( map((lambda x: x.code), Language.native_languages())) return json.dumps(available_language_codes)
def available_languages(): """ :return: jason with language codes for the supported languages. e.g. ["en", "fr", "de", "it", "no", "ro"] """ available_language_codes = list( map((lambda x: x.code), Language.available_languages())) return json.dumps(available_language_codes)
def cleanup_all_articles_in_language(language_code): language_id = Language.find(language_code).id all_articles = Article.query.filter_by(language_id=language_id).all() for each in all_articles: cleaned_content = cleanup_non_content_bits(each.content) if cleaned_content != each.content: each.content = cleaned_content zeeguu_core.db.session.add(each) print(each.title + "\n\n") zeeguu_core.db.session.commit()
def create_anonymous(cls, uuid, password, learned_language_code=None, native_language_code=None): """ :param uuid: :param password: :param learned_language_code: :param native_language_code: :return: """ # since the DB must have an email we generate a fake one fake_email = uuid + cls.ANONYMOUS_EMAIL_DOMAIN if learned_language_code is not None: try: learned_language = Language.find_or_create( learned_language_code) except NoResultFound as e: learned_language = None else: learned_language = None if native_language_code is not None: try: native_language = Language.find_or_create(native_language_code) except NoResultFound as e: native_language = None else: native_language = None new_user = cls( fake_email, uuid, password, learned_language=learned_language, native_language=native_language, ) return new_user
def create_own_cohort(): ''' Creates a cohort in the database. Requires form input (inv_code, name, language_id, max_students, teacher_id) ''' if not _is_teacher(flask.g.user.id): flask.abort(401) inv_code = request.form.get("inv_code") name = request.form.get("name") language_id = request.form.get("language_id") if name is None or inv_code is None or language_id is None: flask.abort(400) available_languages = Language.available_languages() code_allowed = False for code in available_languages: if language_id in str(code): code_allowed = True if not code_allowed: flask.abort(400) language = Language.find_or_create(language_id) teacher_id = flask.g.user.id max_students = request.form.get("max_students") if int(max_students) < 1: flask.abort(400) try: c = Cohort(inv_code, name, language, max_students) db.session.add(c) db.session.commit() _link_teacher_cohort(teacher_id, c.id) return "OK" except ValueError: flask.abort(400) return "ValueError" except sqlalchemy.exc.IntegrityError: flask.abort(400) return "IntegrityError"
def upload_own_text(): db_session.rollback() language = Language.find_or_create(request.form.get("language", "")) content = request.form.get("content", "") title = request.form.get("title", "") new_article_id = Article.create_from_upload( db_session, title, content, flask.g.user, language ) return str(new_article_id)
def cache_articles(code): if code != zeeguu_core.app.config.get("PRIVATE_API_CODE"): return "Nope" from zeeguu_core.model import Topic, Language for each in Topic.get_all_topics(): each.all_articles() for each in Language.available_languages(): each.get_articles() return "OK"
def _get_past_translation(word: str, from_lang_code: str, to_lang_code: str, context: str, user: User = None): try: from_language = Language.find(from_lang_code) to_language = Language.find(to_lang_code) try: origin_word = UserWord.find(word, from_language) text = Text.query.filter_by(content=context).one() except NoResultFound: return None query = Bookmark.query.join(UserWord, UserWord.id==Bookmark.translation_id).\ filter(UserWord.language_id==to_language.id, Bookmark.origin_id==origin_word.id, Bookmark.origin_id==origin_word.id, Bookmark.text_id==text.id) if user: query = query.filter(Bookmark.user_id == user.id) # prioritize older users query.order_by(Bookmark.user_id.asc()) if query.first(): return query.first().translation.word else: return None except Exception as e: capture_exception(e) return None
def update_own_text(id): language = Language.find_or_create(request.form.get("language", "")) content = request.form.get("content", "") title = request.form.get("title", "") a = Article.query.filter(Article.id == id).one() a.language = language a.content = content a.title = title db_session.add(a) db_session.commit() return "OK"
def cache_articles(code): if code != zeeguu_core.app.config.get("PRIVATE_API_CODE"): return "Nope" from zeeguu_core.model import Topic, Language for each in Topic.get_all_topics(): each.all_articles() for each in Language.available_languages(): each.get_articles() print("done caching articles!") return "OK"
def __init__(self): super().__init__() self.rss_feed = self._create_model_object() self.feed = self.rss_feed self.save(self.rss_feed) lang1 = Language.find_or_create('de') url = Url.find_or_create(self.db.session, url_spiegel_rss) self.feed1 = RSSFeed.find_or_create(self.db.session, url, "", "", icon_name_spiegel, language=lang1) self.save(self.feed1)
def article_recommendations_for_user(user, count): """ Retrieve :param count articles which are equally distributed over all the feeds to which the :param user is registered to. Fails if no language is selected. :return: """ # Temporary fix for the experiment of Gabriel AIKI_USERS_COHORT_ID = 109 if user.cohort_id == AIKI_USERS_COHORT_ID: return CohortArticleMap.get_articles_info_for_cohort(user.cohort) import zeeguu_core user_languages = Language.all_reading_for_user(user) if not user_languages: return [user.learned_language] reading_pref_hash = _reading_preferences_hash(user) _recompute_recommender_cache_if_needed(user, zeeguu_core.db.session) # two fast calls ot /articles/recommended might result in a race condition # in _recompute_recommender_cache; # race condition in _recompute_recommender_cache might result in # duplicates in the db; since this is being sunset for the elastic search # it's not worth fixing the race condition; instead we're simply # ensuring that duplicate articles are removed at this point all_articles = set( ArticlesCache.get_articles_for_hash(reading_pref_hash, count)) all_articles = [ each for each in all_articles if (not each.broken and each.published_time) ] all_articles = SortedList(all_articles, lambda x: x.published_time) return [ UserArticle.user_article_info(user, article) for article in reversed(all_articles) ]
def _find_articles_for_user(user): """ This method gets all the topic and search subscriptions for a user. It then returns all the articles that are associated with these. :param user: :return: """ user_languages = Language.all_reading_for_user(user) topic_subscriptions = TopicSubscription.all_for_user(user) search_subscriptions = SearchSubscription.all_for_user(user) subscribed_articles = _filter_subscribed_articles( search_subscriptions, topic_subscriptions, user_languages, user ) return subscribed_articles
def upload_articles(cohort_id): """ uploads articles for a cohort with input from a POST request """ if not has_permission_for_cohort(cohort_id): flask.abort(401) try: for article_data in json.loads(request.data): url = Url("userarticle/{}".format(uuid.uuid4().hex)) title = article_data["title"] authors = article_data["authors"] content = article_data["content"] summary = article_data["summary"] published_time = datetime.now() language_code = article_data["language_code"] language = Language.find(language_code) new_article = Article( url, title, authors, content, summary, published_time, None, # rss feed language, ) db.session.add(new_article) db.session.flush() db.session.refresh(new_article) cohort = Cohort.find(cohort_id) new_cohort_article_map = CohortArticleMap(cohort, new_article) db.session.add(new_cohort_article_map) db.session.commit() return "OK" except ValueError: flask.abort(400) return "ValueError"
def upload_articles(cohort_id): ''' uploads articles for a cohort with input from a POST request ''' if (not has_permission_for_cohort(cohort_id)): flask.abort(401) try: for article_data in json.loads(request.data): url = Url('userarticle/{}'.format(uuid.uuid4().hex)) title = article_data['title'] authors = article_data['authors'] content = article_data['content'] summary = article_data['summary'] published_time = datetime.now() language_code = article_data['language_code'] language = Language.find(language_code) new_article = Article( url, title, authors, content, summary, published_time, None, # rss feed language ) db.session.add(new_article) db.session.flush() db.session.refresh(new_article) cohort = Cohort.find(cohort_id) new_cohort_article_map = CohortArticleMap(cohort, new_article) db.session.add(new_cohort_article_map) db.session.commit() return 'OK' except ValueError: flask.abort(400) return 'ValueError'
def tts(): import zeeguu_core from zeeguu_core.model import UserWord, Language db_session = zeeguu_core.db.session text_to_pronounce = request.form.get("text", "") language_id = request.form.get("language_id", "") if not text_to_pronounce: return "" user_word = UserWord.find_or_create(db_session, text_to_pronounce, Language.find_or_create(language_id)) audio_file_path = _file_name_for_user_word(user_word, language_id) if not os.path.isfile(DATA_FOLDER + audio_file_path): _save_speech_to_file(user_word, language_id, audio_file_path) return audio_file_path
def upload_articles(cohort_id): ''' uploads articles for a cohort with input from a POST request ''' if (not has_permission_for_cohort(cohort_id)): flask.abort(401) try: for article_data in json.loads(request.data): url = Url('userarticle/{}'.format(uuid.uuid4().hex)) title = article_data['title'] authors = article_data['authors'] content = article_data['content'] summary = article_data['summary'] published_time = datetime.now() language_code = article_data['language_code'] language = Language.find(language_code) new_article = Article( url, title, authors, content, summary, published_time, None, # rss feed language) db.session.add(new_article) db.session.flush() db.session.refresh(new_article) cohort = Cohort.find(cohort_id) new_cohort_article_map = CohortArticleMap(cohort, new_article) db.session.add(new_cohort_article_map) db.session.commit() return 'OK' except ValueError: flask.abort(400) return 'ValueError'
def set_learned_language(self, language_code, session=None): self.learned_language = Language.find(language_code) from zeeguu_core.model import UserLanguage # disable the exercises and reading for all the other languages all_other_languages = (UserLanguage.query.filter( User.id == self.id).filter( UserLanguage.doing_exercises == True).all()) for each in all_other_languages: each.doing_exercises = False each.reading_news = False if session: session.add(each) language = UserLanguage.find_or_create(session, self, self.learned_language) language.reading_news = True language.doing_exercises = True if session: session.add(language)
def _get_user_articles_sources_languages(user, limit=1000): """ This method is used to get all the user articles for the sources if there are any selected sources for the user, and it otherwise gets all the articles for the current learning languages for the user. :param user: the user for which the articles should be fetched :param limit: the amount of articles for each source or language :return: a list of articles based on the parameters """ user_languages = Language.all_reading_for_user(user) all_articles = [] for language in user_languages: info(f"Getting articles for {language}") new_articles = language.get_articles(most_recent_first=True) all_articles.extend(new_articles) info(f"Added {len(new_articles)} articles for {language}") return all_articles
def article_recommendations_for_user(user, count): """ Retrieve :param count articles which are equally distributed over all the feeds to which the :param user is registered to. Fails if no language is selected. :return: """ # Temporary fix for the experiment of Gabriel AIKI_USERS_COHORT_ID = 109 if user.cohort_id == AIKI_USERS_COHORT_ID: return CohortArticleMap.get_articles_info_for_cohort(user.cohort) import zeeguu_core user_languages = Language.all_reading_for_user(user) if not user_languages: return [user.learned_language] reading_pref_hash = _reading_preferences_hash(user) _recompute_recommender_cache_if_needed(user, zeeguu_core.db.session) all_articles = ArticlesCache.get_articles_for_hash(reading_pref_hash, count) all_articles = [ each for each in all_articles if (not each.broken and each.published_time) ] all_articles = SortedList(all_articles, lambda x: x.published_time) return [ UserArticle.user_article_info(user, article) for article in reversed(all_articles) ]
def article_search_for_user(user, count, search_terms): """ Handles searching. Find the relational values from the database and use them to search in elasticsearch for relative articles. :param user: :param count: max amount of articles to return :param search_terms: the inputed search string by the user :return: articles """ user_languages = Language.all_reading_for_user(user) per_language_article_count = count / len(user_languages) final_article_mix = [] for language in user_languages: print(f"language: {language}") # 0. Ensure appropriate difficulty declared_level_min, declared_level_max = user.levels_for(language) lower_bounds = declared_level_min * 10 upper_bounds = declared_level_max * 10 # 1. Unwanted user topics # ============================== user_search_filters = SearchFilter.all_for_user(user) unwanted_user_topics = [] for user_search_filter in user_search_filters: unwanted_user_topics.append(user_search_filter.search.keywords) print(f"keywords to exclude: {unwanted_user_topics}") # 2. Topics to exclude / filter out # ================================= excluded_topics = TopicFilter.all_for_user(user) topics_to_exclude = [each.topic.title for each in excluded_topics] print(f"topics to exclude: {topics_to_exclude}") # 3. Topics subscribed, and thus to include # ========================================= topic_subscriptions = TopicSubscription.all_for_user(user) topics_to_include = [ subscription.topic.title for subscription in TopicSubscription.all_for_user(user) ] print(f"topics to include: {topic_subscriptions}") # 4. Wanted user topics # ========================================= user_subscriptions = SearchSubscription.all_for_user(user) wanted_user_topics = [] for sub in user_subscriptions: wanted_user_topics.append(sub.search.keywords) print(f"keywords to include: {wanted_user_topics}") # build the query using elastic_query_builder query_body = build_elastic_query( per_language_article_count, search_terms, _list_to_string(topics_to_include), _list_to_string(topics_to_exclude), _list_to_string(wanted_user_topics), _list_to_string(unwanted_user_topics), language, upper_bounds, lower_bounds, ) es = Elasticsearch(ES_CONN_STRING) res = es.search(index=ES_ZINDEX, body=query_body) hit_list = res["hits"].get("hits") final_article_mix.extend(_to_articles_from_ES_hits(hit_list)) # convert to article_info and return return [ UserArticle.user_article_info(user, article) for article in final_article_mix if article is not None ]
import zeeguu_core from zeeguu_core.model import Article, Language, LocalizedTopic session = zeeguu_core.db.session counter = 0 languages = Language.available_languages() languages = [Language.find('da')] for language in languages: articles = Article.query.filter(Article.language == language).order_by(Article.id.desc()).all() loc_topics = LocalizedTopic.all_for_language(language) total_articles = len(articles) for article in articles: counter += 1 print(f"{article.title}") print(f"{article.url.as_string()}") for loc_topic in loc_topics: if loc_topic.matches_article(article): article.add_topic(loc_topic.topic) print(f" #{loc_topic.topic_translated}") print("") session.add(article) if counter % 1000 == 0: percentage = (100 * counter / total_articles) / 100 print(f"{counter} dorticles done ({percentage}%). last article id: {article.id}. Comitting... ") session.commit()
test_feed = test_feed(_feed_url) feed_name = input(f"Feed name (Enter for: {test_feed.title}): ") or test_feed.title print(f'= {feed_name}') icon_name = input( "Icon name to be found in resources folder (e.g. 20min.png): ") print(f'= {icon_name}') description = input(f'Description (Enter for: {test_feed.description}): ') or test_feed.description print(f'= {description}') _language = input("Language code (e.g. en): ") print(f'= {_language}') feed_url = Url.find_or_create(zeeguu_core.db.session, _feed_url) language = Language.find_or_create(_language) rss_feed = RSSFeed.find_or_create(zeeguu_core.db.session, feed_url, feed_name, description, icon_name=icon_name, language=language) print("Done: ") print(rss_feed.title) print(rss_feed.description) print(rss_feed.language_id) print(rss_feed.url.as_string())
def set_native_language(self, code): self.native_language = Language.find(code)