Example #1
0
def user_article():
    """

        called user_article because it returns info about the article
        but also the user-specific data relative to the article

        takes url as URL argument
        NOTE: the url should be encoded with quote_plus (Pyton) and encodeURIComponent(Javascript)

        this is not perfectly RESTful, but we're not fundamentalist...
        and currently we want to have the url as the URI for the article
        and for some reason if we put the uri as part of the path,
        apache decodes it before we get it in here.
        so for now, we're just not putting it as part of the path


    :return: json as prepared by content_recommender.mixed_recommender.user_article_info

    """

    article_id = request.args.get("article_id", "")
    if not article_id:
        flask.abort(400)

    article_id = int(article_id)

    article = Article.query.filter_by(id=article_id).one()

    return json_result(
        UserArticle.user_article_info(flask.g.user, article,
                                      with_content=True))
def article_search_for_user(user, count, search):
    """


    Retrieve the articles :param user: requested which fit the :param search:
    profile, for the selected sources of the user.

    :return:

    """

    all_articles = _get_user_articles_sources_languages(user, 2500)
    # We are just using the first and second word of the user's search now
    search_articles = _get_articles_for_search_term(search)

    if search_articles is None:
        final = []
    else:
        s = set(all_articles)
        final = [article for article in search_articles if article in s]
        if len(final) < 5:
            all_articles = _get_user_articles_sources_languages(user)
            s = set(all_articles)
            final = [article for article in search_articles if article in s]

    # Sort them, so the first 'count' articles will be the most recent ones
    final.sort(key=lambda each: each.published_time, reverse=True)

    return [UserArticle.user_article_info(user, article) for article in final[:count]]
Example #3
0
def get_user_article_info():
    """

        expects one parameter: url

    :return: json dictionary with info

    """

    url = str(request.form.get("url", ""))

    article = Article.find_or_create(db_session, url)

    return json_result(UserArticle.user_article_info(flask.g.user, article))
Example #4
0
def article_recommendations_for_user(user, count):
    """

            Retrieve :param count articles which are equally distributed
            over all the feeds to which the :param user is registered to.

            Fails if no language is selected.

    :return:

    """

    # Temporary fix for the experiment of Gabriel
    AIKI_USERS_COHORT_ID = 109
    if user.cohort_id == AIKI_USERS_COHORT_ID:
        return CohortArticleMap.get_articles_info_for_cohort(user.cohort)

    import zeeguu_core

    user_languages = Language.all_reading_for_user(user)
    if not user_languages:
        return [user.learned_language]

    reading_pref_hash = _reading_preferences_hash(user)
    _recompute_recommender_cache_if_needed(user, zeeguu_core.db.session)

    # two fast calls ot /articles/recommended might result in a race condition
    # in _recompute_recommender_cache;
    # race condition in _recompute_recommender_cache might result in
    # duplicates in the db; since this is being sunset for the elastic search
    # it's not worth fixing the race condition; instead we're simply
    # ensuring that duplicate articles are removed at this point
    all_articles = set(
        ArticlesCache.get_articles_for_hash(reading_pref_hash, count))

    all_articles = [
        each for each in all_articles
        if (not each.broken and each.published_time)
    ]
    all_articles = SortedList(all_articles, lambda x: x.published_time)

    return [
        UserArticle.user_article_info(user, article)
        for article in reversed(all_articles)
    ]
def more_like_this_article(user, count, article_id):
    """
    Given a article ID find more articles like that one via Elasticsearchs "more_like_this" method

    """
    article = Article.find_by_id(article_id)

    query_body = build_more_like_this_query(count, article.content,
                                            article.language)

    es = Elasticsearch(ES_CONN_STRING)
    res = es.search(index=ES_ZINDEX, body=query_body)  # execute search
    hit_list = res["hits"].get("hits")

    # TODO need to make sure either that the searched on article is always a part of the list \
    #  or that it is never there.
    #  it could be used to show on website; you searched on X, here is what we found related to X

    final_article_mix = _to_articles_from_ES_hits(hit_list)
    return [
        UserArticle.user_article_info(user, article)
        for article in final_article_mix
    ]
Example #6
0
def article_recommendations_for_user(user, count):
    """

            Retrieve :param count articles which are equally distributed
            over all the feeds to which the :param user is registered to.

            Fails if no language is selected.

    :return:

    """

    # Temporary fix for the experiment of Gabriel
    AIKI_USERS_COHORT_ID = 109
    if user.cohort_id == AIKI_USERS_COHORT_ID:
        return CohortArticleMap.get_articles_info_for_cohort(user.cohort)

    import zeeguu_core

    user_languages = Language.all_reading_for_user(user)
    if not user_languages:
        return [user.learned_language]

    reading_pref_hash = _reading_preferences_hash(user)
    _recompute_recommender_cache_if_needed(user, zeeguu_core.db.session)
    all_articles = ArticlesCache.get_articles_for_hash(reading_pref_hash,
                                                       count)
    all_articles = [
        each for each in all_articles
        if (not each.broken and each.published_time)
    ]
    all_articles = SortedList(all_articles, lambda x: x.published_time)

    return [
        UserArticle.user_article_info(user, article)
        for article in reversed(all_articles)
    ]
def article_search_for_user(user, count, search_terms):
    """
    Handles searching.
    Find the relational values from the database and use them to search in elasticsearch for relative articles.

    :param user:
    :param count: max amount of articles to return
    :param search_terms: the inputed search string by the user
    :return: articles

    """

    user_languages = Language.all_reading_for_user(user)

    per_language_article_count = count / len(user_languages)

    final_article_mix = []
    for language in user_languages:
        print(f"language: {language}")

        # 0. Ensure appropriate difficulty
        declared_level_min, declared_level_max = user.levels_for(language)
        lower_bounds = declared_level_min * 10
        upper_bounds = declared_level_max * 10

        # 1. Unwanted user topics
        # ==============================
        user_search_filters = SearchFilter.all_for_user(user)
        unwanted_user_topics = []
        for user_search_filter in user_search_filters:
            unwanted_user_topics.append(user_search_filter.search.keywords)
        print(f"keywords to exclude: {unwanted_user_topics}")

        # 2. Topics to exclude / filter out
        # =================================
        excluded_topics = TopicFilter.all_for_user(user)
        topics_to_exclude = [each.topic.title for each in excluded_topics]
        print(f"topics to exclude: {topics_to_exclude}")

        # 3. Topics subscribed, and thus to include
        # =========================================
        topic_subscriptions = TopicSubscription.all_for_user(user)
        topics_to_include = [
            subscription.topic.title
            for subscription in TopicSubscription.all_for_user(user)
        ]
        print(f"topics to include: {topic_subscriptions}")

        # 4. Wanted user topics
        # =========================================
        user_subscriptions = SearchSubscription.all_for_user(user)

        wanted_user_topics = []
        for sub in user_subscriptions:
            wanted_user_topics.append(sub.search.keywords)
        print(f"keywords to include: {wanted_user_topics}")

        # build the query using elastic_query_builder
        query_body = build_elastic_query(
            per_language_article_count,
            search_terms,
            _list_to_string(topics_to_include),
            _list_to_string(topics_to_exclude),
            _list_to_string(wanted_user_topics),
            _list_to_string(unwanted_user_topics),
            language,
            upper_bounds,
            lower_bounds,
        )

        es = Elasticsearch(ES_CONN_STRING)
        res = es.search(index=ES_ZINDEX, body=query_body)

        hit_list = res["hits"].get("hits")
        final_article_mix.extend(_to_articles_from_ES_hits(hit_list))

    # convert to article_info and return
    return [
        UserArticle.user_article_info(user, article)
        for article in final_article_mix if article is not None
    ]