Exemple #1
0
def article_id():
    """

        returns the article at that URL or creates an article and returns it

        takes url as URL argument
        NOTE: the url should be encoded with quote_plus (Pyton) and encodeURIComponent(Javascript)


    :return: article id

    """

    url = request.args.get("url", "")
    if not url:
        flask.abort(400)

    try:
        article = Article.find_or_create(db_session, url)
        return json_result(dict(article_id=article.id))
    except Exception as e:
        from sentry_sdk import capture_exception

        capture_exception(e)
        zeeguu.core.log(e)
        flask.abort(500)
def get_cohorts_for_article(article_id):
    """
    Gets all the cohorts for this article
    """

    article = Article.find_by_id(article_id)

    return json.dumps(CohortArticleMap.get_cohorts_for_article(article))
Exemple #3
0
    def __init__(self, real=False):
        super().__init__()

        if real:
            self.article = Article.find_or_create(ArticleRule.db.session,
                                                  url_diesel_fahrverbote)
        else:
            self.article = self._create_model_object()
            self.save(self.article)
def teacher_texts():
    """
    Gets all the articles of this teacher
    """

    articles = Article.own_texts_for_user(flask.g.user)
    article_info_dicts = [article.article_info_for_teacher() for article in articles]

    return json.dumps(article_info_dicts)
Exemple #5
0
def upload_own_text():

    db_session.rollback()
    language = Language.find_or_create(request.form.get("language", ""))
    content = request.form.get("content", "")
    title = request.form.get("title", "")

    new_article_id = Article.create_from_upload(db_session, title, content,
                                                flask.g.user, language)

    return str(new_article_id)
Exemple #6
0
    def _create_model_object(self):
        title = " ".join(self.faker.text().split()[:4])
        authors = self.faker.name()
        content = self.faker.text()
        summary = self.faker.text()
        published = datetime.now() - timedelta(minutes=randint(0, 7200))
        rss_feed = RSSFeedRule().feed
        language = LanguageRule().random
        url = UrlRule().url

        article = Article(url, title, authors, content, summary, published,
                          rss_feed, language)

        if self._exists_in_db(article):
            return self._create_model_object()

        return article
def add_article_to_cohort():
    """
    Gets all the articles of this teacher
    """

    cohort = Cohort.find(request.form.get("cohort_id"))

    check_permission_for_cohort(cohort.id)

    article = Article.find_by_id(request.form.get("article_id"))

    if not CohortArticleMap.find(cohort.id, article.id):
        now = datetime.now()
        new_mapping = CohortArticleMap(cohort, article, now)
        db.session.add(new_mapping)
        db.session.commit()

    return "OK"
def delete_article_from_cohort():
    """
    Gets all the articles of this teacher
    """

    cohort = Cohort.find(request.form.get("cohort_id"))

    check_permission_for_cohort(cohort.id)

    article = Article.find_by_id(request.form.get("article_id"))

    mapping = CohortArticleMap.find(cohort.id, article.id)
    if mapping:
        db.session.delete(mapping)
        db.session.commit()
        return "OK"
    else:
        return make_error(401, "That article does not belong to the cohort!")
def upload_articles(cohort_id):
    """
    uploads articles for a cohort with input from a POST request
    """
    check_permission_for_cohort(cohort_id)

    try:
        for article_data in json.loads(request.data):
            url = Url("userarticle/{}".format(uuid.uuid4().hex))
            title = article_data["title"]
            authors = article_data["authors"]
            content = article_data["content"]
            summary = article_data["summary"]
            published_time = datetime.now()
            language_code = article_data["language_code"]
            language = Language.find(language_code)

            new_article = Article(
                url,
                title,
                authors,
                content,
                summary,
                published_time,
                None,  # rss feed
                language,
            )

            db.session.add(new_article)
            db.session.flush()
            db.session.refresh(new_article)

            cohort = Cohort.find(cohort_id)
            now = datetime.now()
            new_cohort_article_map = CohortArticleMap(cohort, new_article, now)

            db.session.add(new_cohort_article_map)
        db.session.commit()
        return "OK"
    except ValueError:
        flask.abort(400)
        return "ValueError"
Exemple #10
0
def more_like_this_article(user, count, article_id):
    """
    Given a article ID find more articles like that one via Elasticsearchs "more_like_this" method

    """
    article = Article.find_by_id(article_id)

    query_body = build_more_like_this_query(count, article.content,
                                            article.language)

    es = Elasticsearch(ES_CONN_STRING)
    res = es.search(index=ES_ZINDEX, body=query_body)  # execute search
    hit_list = res["hits"].get("hits")

    # TODO need to make sure either that the searched on article is always a part of the list \
    #  or that it is never there.
    #  it could be used to show on website; you searched on X, here is what we found related to X

    final_article_mix = _to_articles_from_ES_hits(hit_list)
    return [
        UserArticle.user_article_info(user, article)
        for article in final_article_mix
    ]
    def user_article_info(cls,
                          user: User,
                          article: Article,
                          with_content=False,
                          with_translations=True):

        from zeeguu.core.model import Bookmark

        # Initialize returned info with the default article info
        returned_info = article.article_info(with_content=with_content)

        user_article_info = UserArticle.find(user, article)

        if not user_article_info:
            returned_info["starred"] = False
            returned_info["opened"] = False
            returned_info["liked"] = None
            returned_info["translations"] = []

            return returned_info

        returned_info["starred"] = user_article_info.starred is not None
        returned_info["opened"] = user_article_info.opened is not None
        returned_info["liked"] = user_article_info.liked
        if user_article_info.starred:
            returned_info["starred_time"] = datetime_to_json(
                user_article_info.starred)

        if with_translations:
            translations = Bookmark.find_all_for_user_and_article(
                user, article)
            returned_info["translations"] = [
                each.serializable_dictionary() for each in translations
            ]

        return returned_info
Exemple #12
0
    def _find_article_in_value_or_extra_data(self, db_session):
        """
        Finds or creates an article_id

        return: articleID or NONE

        NOTE: When the article cannot be downloaded anymore,
        either because the article is no longer available or the newspaper.parser() fails

        """

        if self.event in ALL_ARTICLE_INTERACTION_ACTIONS:

            if self.value.startswith("http"):
                url = self.value
            else:
                url = self.find_url_in_extra_data()

            if url:
                return Article.find_or_create(db_session,
                                              url,
                                              sleep_a_bit=True).id

        return None
Exemple #13
0
def _to_articles_from_ES_hits(hits):
    articles = []
    for hit in hits:
        articles.append(Article.find_by_id(hit.get("_id")))
    return articles
 def test_find_or_create(self):
     self.new_art = Article.find_or_create(session,
                                           url_formation_professionnelle)
     assert (self.new_art.fk_difficulty)
 def test_load_article_without_language_information(self):
     art = Article.find_or_create(session, url_plane_crashes)
     assert (art)
found = 1

for text in texts:

    # print(text.article_id)
    if not text.article:

        article_id = None
        if 'articleID' in text.url.as_canonical_string():
            article_id = text.url.as_canonical_string().split("articleID=")[-1]
            # print(f'extracted id: {article_id}')

        if article_id:
            article = Article.query.filter_by(id=article_id).one()
        else:
            article = Article.find(text.url.as_canonical_string())
            # print(text.url.as_canonical_string())

        if not article:
            not_found += 1
            print(f'not found: {not_found}')
        else:
            found += 1
            text.article = article
            zeeguu.core.db.session.add(text)

            # print(text)
            # print(article)
            # print(text.url.as_string())
            # print(text.article.url.as_string())
            print(f'found: {found}')
Exemple #17
0
 def _exists_in_db(obj):
     return Article.exists(obj)
Exemple #18
0
def own_texts():
    r = [e.article_info() for e in Article.own_texts_for_user(flask.g.user)]
    return json_result(r)
Exemple #19
0
def articles_correlations():
    articles_df = pd.DataFrame(columns=[
        "id", "lang", "difficulty", "word_count", "title_length", "opened",
        "translated", "spoken", "liked", "closed"
    ])
    all_users = User.find_all()
    print(len(all_users))
    for reading_language in languages_to_analyze:
        print("\nLANGUAGE:", reading_language)
        language_id = Language.find(reading_language).id
        for user in tqdm(all_users):
            if user.learned_language_id == language_id:
                events = UserActivityData.find(user)
                for event in events:
                    article_id = event.article_id
                    if article_id:
                        article_data = Article.find_by_id(article_id)
                        if article_data.language_id == language_id:
                            if not (articles_df['id'] == article_id).any():
                                title_len = len(article_data.title.split())
                                df = {
                                    "id": article_id,
                                    "lang": article_data.language_id,
                                    "difficulty": article_data.fk_difficulty,
                                    "word_count": article_data.word_count,
                                    "title_length": title_len,
                                    "opened": 0,
                                    "translated": 0,
                                    "spoken": 0,
                                    "liked": 0,
                                    "closed": 0
                                }
                                articles_df = articles_df.append(
                                    df, ignore_index=True)
                            if event.event == "UMR - OPEN ARTICLE":
                                articles_df.loc[articles_df.id == article_id,
                                                'opened'] += 1
                            if event.event == "UMR - TRANSLATE TEXT":
                                articles_df.loc[articles_df.id == article_id,
                                                'translated'] += 1
                            if event.event == "UMR - SPEAK TEXT":
                                articles_df.loc[articles_df.id == article_id,
                                                'spoken'] += 1
                            if event.event == "UMR - LIKE ARTICLE":
                                articles_df.loc[articles_df.id == article_id,
                                                'liked'] += 1
                            if event.event == "UMR - ARTICLE CLOSED":
                                articles_df.loc[articles_df.id == article_id,
                                                'closed'] += 1

        print("Articles:", len(articles_df))

        correlation_variables = [
            "word_count", "difficulty", "liked", "translated", "spoken",
            "opened", "closed", "title_length"
        ]
        # word count & fk_difficulty
        spearman_corr = stats.spearmanr(articles_df[correlation_variables[0]],
                                        articles_df[correlation_variables[1]])
        print(correlation_variables[0], correlation_variables[1],
              spearman_corr[0], spearman_corr[1])
        # liked & fk_difficulty
        spearman_corr = stats.spearmanr(articles_df[correlation_variables[2]],
                                        articles_df[correlation_variables[1]])
        print(correlation_variables[2], correlation_variables[1],
              spearman_corr[0], spearman_corr[1])
        # number of translations & difficulty
        spearman_corr = stats.spearmanr(articles_df[correlation_variables[3]],
                                        articles_df[correlation_variables[1]])
        print(correlation_variables[3], correlation_variables[1],
              spearman_corr[0], spearman_corr[1])
        # number of spoken words & difficulty
        spearman_corr = stats.spearmanr(articles_df[correlation_variables[4]],
                                        articles_df[correlation_variables[1]])
        print(correlation_variables[4], correlation_variables[1],
              spearman_corr[0], spearman_corr[1])
        # number of times article is opened & difficulty
        spearman_corr = stats.spearmanr(articles_df[correlation_variables[5]],
                                        articles_df[correlation_variables[1]])
        print(correlation_variables[5], correlation_variables[1],
              spearman_corr[0], spearman_corr[1])
        # number of times article is closed & difficulty
        spearman_corr = stats.spearmanr(articles_df[correlation_variables[6]],
                                        articles_df[correlation_variables[1]])
        print(correlation_variables[6], correlation_variables[1],
              spearman_corr[0], spearman_corr[1])
        # title length & fk_difficulty
        spearman_corr = stats.spearmanr(articles_df[correlation_variables[7]],
                                        articles_df[correlation_variables[1]])
        print(correlation_variables[7], correlation_variables[1],
              spearman_corr[0], spearman_corr[1])
        # title length & number of times article is opened
        spearman_corr = stats.spearmanr(articles_df[correlation_variables[5]],
                                        articles_df[correlation_variables[7]])
        print(correlation_variables[5], correlation_variables[7],
              spearman_corr[0], spearman_corr[1])
def get_one_translation(from_lang_code, to_lang_code):
    """

    To think about:
    - it would also make sense to separate translation from
    logging; or at least, allow for situations where a translation
    is not associated with an url... or?
    - jul 2021 - Bjorn would like to have the possibility of getting
    a translation without an article; can be done; allow for the
    articleID to be empty; what would be the downside of that?
    - hmm. maybe he can simply work with get_multiple_translations

    :return: json array with translations
    """

    word_str = request.form["word"].strip(punctuation_extended)
    url = request.form.get("url")
    title_str = request.form.get("title", "")
    context = request.form.get("context", "")
    article_id = request.form.get("articleID", None)

    if not article_id:
        # the url comes from elsewhere not from the reader, so we find or create the article
        article = Article.find_or_create(db_session, url)
        article_id = article.id

    minimal_context, query = minimize_context(context, from_lang_code,
                                              word_str)

    # if we have an own / teacher translation that is our first "best guess"
    # ML: TODO: word translated in the same text / articleID / url should still be considered
    # as an own translation; currently only if the "context" is the same counts;
    # which means that translating afslore in a previous paragraph does not count
    best_guess = get_own_past_translation(flask.g.user, word_str,
                                          from_lang_code, to_lang_code,
                                          context)
    if best_guess:
        likelihood = 1
        source = "Own past translation"
    else:

        translations = get_next_results(
            {
                "from_lang_code": from_lang_code,
                "to_lang_code": to_lang_code,
                "url": url,
                "word": word_str,
                "title": title_str,
                "query": query,
                "context": minimal_context,
            },
            number_of_results=1,
        ).translations

        best_guess = translations[0]["translation"]
        likelihood = translations[0].pop("quality")
        source = translations[0].pop("service_name")

    bookmark = Bookmark.find_or_create(
        db_session,
        flask.g.user,
        word_str,
        from_lang_code,
        best_guess,
        to_lang_code,
        minimal_context,
        url,
        title_str,
        article_id,
    )

    print(bookmark)

    return json_result({
        "translation": best_guess,
        "bookmark_id": bookmark.id,
        "source": source,
        "likelihood": likelihood,
    })
import zeeguu.core
from zeeguu.core.model import Article, UserArticle
from zeeguu.core.model.starred_article import StarredArticle

session = zeeguu.core.db.session

for sa in StarredArticle.query.all():
    try:
        article = Article.find_or_create(session, sa.url.as_string())
        ua = UserArticle.find_or_create(session,
                                        sa.user,
                                        article,
                                        starred=sa.starred_date)
        session.add(ua)
        session.commit()
        print(f'{sa.starred_date} x {ua.user.name} x {ua.article.title}')
    except Exception as ex:
        print(f'could not import {sa.url.as_string()}')
        print(ex)
def contribute_translation(from_lang_code, to_lang_code):
    """

        User contributes a translation they think is appropriate for
         a given :param word in :param from_lang_code in a given :param context

        The :param translation is in :param to_lang_code

        Together with the two words and the textual context, you must submit
         also the :param url, :param title of the page where the original
         word and context occurred.

    :return: in case of success, the bookmark_id and main translation

    """

    # All these POST params are mandatory
    word_str = unquote_plus(request.form["word"])
    translation_str = request.form["translation"]
    url = request.form.get("url", "")
    context_str = request.form.get("context", "")
    title_str = request.form.get("title", "")
    # when a translation is added by hand, the servicename_translation is None
    # thus we set it to MANUAL
    service_name = request.form.get("servicename_translation", "MANUAL")

    article_id = None
    if "articleID" in url:
        article_id = url.split("articleID=")[-1]
        url = Article.query.filter_by(
            id=article_id).one().url.as_canonical_string()
    elif "articleURL" in url:
        url = url.split("articleURL=")[-1]
    elif "article?id=" in url:
        article_id = url.split("article?id=")[-1]
        url = Article.query.filter_by(
            id=article_id).one().url.as_canonical_string()
    else:
        # the url comes from elsewhere not from the reader, so we find or create the article
        article = Article.find_or_create(db_session, url)
        article_id = article.id

    # Optional POST param
    selected_from_predefined_choices = request.form.get(
        "selected_from_predefined_choices", "")

    minimal_context, _ = minimize_context(context_str, from_lang_code,
                                          word_str)

    bookmark = Bookmark.find_or_create(
        db_session,
        flask.g.user,
        word_str,
        from_lang_code,
        translation_str,
        to_lang_code,
        minimal_context,
        url,
        title_str,
        article_id,
    )
    # Inform apimux about translation selection
    data = {
        "word_str": word_str,
        "translation_str": translation_str,
        "url": url,
        "context_size": len(context_str),
        "service_name": service_name,
    }
    contribute_trans(data)

    return json_result(dict(bookmark_id=bookmark.id))