コード例 #1
0
def user_article_info(user: User,
                      article: Article,
                      with_content=False,
                      with_translations=True):
    from zeeguu_core.model import UserArticle
    prior_info = UserArticle.find(user, article)

    ua_info = article.article_info(with_content=with_content)

    if not prior_info:
        ua_info['starred'] = False
        ua_info['opened'] = False
        ua_info['liked'] = False
        ua_info['translations'] = []
        return ua_info

    ua_info['starred'] = prior_info.starred is not None
    ua_info['opened'] = prior_info.opened is not None
    ua_info['liked'] = prior_info.liked

    if with_translations:
        translations = Bookmark.find_all_for_user_and_url(user, article.url)
        ua_info['translations'] = [
            each.serializable_dictionary() for each in translations
        ]

    return ua_info
コード例 #2
0
def article_id():
    """

        returns the article at that URL or creates an article and returns it

        takes url as URL argument
        NOTE: the url should be encoded with quote_plus (Pyton) and encodeURIComponent(Javascript)


    :return: article id

    """

    url = request.args.get("url", "")
    if not url:
        flask.abort(400)

    try:
        article = Article.find_or_create(db_session, url)
        return json_result(dict(article_id=article.id))
    except Exception as e:
        from sentry_sdk import capture_exception

        capture_exception(e)
        zeeguu_core.log(e)
        flask.abort(500)
コード例 #3
0
    def user_article_info(cls,
                          user: User,
                          article: Article,
                          with_content=False,
                          with_translations=True):

        from zeeguu_core.model import Bookmark

        # Initialize returned info with the default article info
        returned_info = article.article_info(with_content=with_content)

        user_article_info = UserArticle.find(user, article)

        if not user_article_info:
            returned_info['starred'] = False
            returned_info['opened'] = False
            returned_info['liked'] = False
            returned_info['translations'] = []

            return returned_info

        returned_info['starred'] = user_article_info.starred is not None
        returned_info['opened'] = user_article_info.opened is not None
        returned_info['liked'] = user_article_info.liked

        if with_translations:
            translations = Bookmark.find_all_for_user_and_url(
                user, article.url)
            returned_info['translations'] = [
                each.serializable_dictionary() for each in translations
            ]

        return returned_info
コード例 #4
0
ファイル: article.py プロジェクト: mircealungu/Zeeguu-API
    def create_from_upload(cls, session, title, content, uploader, language):

        new_article = Article(None, title, None, content, None, None, None,
                              language, uploader)
        session.add(new_article)

        session.commit()
        return new_article.id
コード例 #5
0
def get_cohorts_for_article(article_id):
    """
    Gets all the cohorts for this article
    """

    article = Article.find_by_id(article_id)

    return json.dumps(CohortArticleMap.get_cohorts_for_article(article))
コード例 #6
0
    def __init__(self, real=False):
        super().__init__()

        if real:
            self.article = Article.find_or_create(ArticleRule.db.session,
                                                  url_diesel_fahrverbote)
        else:
            self.article = self._create_model_object()
            self.save(self.article)
コード例 #7
0
def get_possible_translations(from_lang_code, to_lang_code):
    """

        Returns a list of possible translations in :param to_lang_code
        for :param word in :param from_lang_code.

        You must also specify the :param context, :param url, and :param title
         of the page where the word was found.

        The context is the sentence.

        :return: json array with translations

    """
    data = {"from_lang_code": from_lang_code, "to_lang_code": to_lang_code}
    data["context"] = request.form.get('context', '')
    url = request.form.get('url', '')
    data["url"] = url
    article_id = None
    if 'articleID' in url:
        article_id = url.split('articleID=')[-1]
        url = Article.query.filter_by(id=article_id).one().url.as_canonical_string()
    elif 'articleURL' in url:
        url = url.split('articleURL=')[-1]
    else:
        # the url comes from elsewhere not from the reader, so we find or creat the article
        article = Article.find_or_create(db_session, url)
        article_id = article.id
    zeeguu_core.log(f"url before being saved: {url}")
    word_str = request.form['word']
    data["word"] = word_str
    title_str = request.form.get('title', '')
    data["title"] = title_str

    zeeguu_core.log(f'translating to... {data["to_lang_code"]}')
    minimal_context, query = minimize_context(
        data["context"], data["from_lang_code"], data["word"])
    zeeguu_core.log(f"Query to translate is: {query}")
    data["query"] = query
    translations = get_all_translations(data).translations
    zeeguu_core.log(f"Got translations: {translations}")

    # translators talk about quality, but our users expect likelihood.
    # rename the key in the dictionary
    for t in translations:
        t['likelihood'] = t.pop("quality")
        t['source'] = t.pop('service_name')

    best_guess = translations[0]["translation"]

    Bookmark.find_or_create(db_session, flask.g.user,
                            word_str, from_lang_code,
                            best_guess, to_lang_code,
                            minimal_context, url, title_str, article_id)

    return json_result(dict(translations=translations))
コード例 #8
0
def translate_and_bookmark(from_lang_code, to_lang_code):
    """

        @deprecated
        This should be deprecated and /get_possible_translations used instead
        However, it is still used by the zeeguu chrome extension.

        This expects in the post parameter the following:
        - word (to translate)
        - context (surrounding paragraph of the original word )
        - url (of the origin)
        - title (of the origin page)

        /get_possible_translations has very similar behavior, only that
          if focuses on returning the possible alternative translations

    :param from_lang_code:
    :param to_lang_code:
    :return:
    """

    data = {"from_lang_code": from_lang_code, "to_lang_code": to_lang_code}
    word_str = unquote_plus(request.form['word'])
    data["word"] = word_str
    url_str = request.form.get('url', '')
    data["url"] = url_str

    title_str = request.form.get('title', '')
    data["title"] = title_str
    context_str = request.form.get('context', '')
    data["context"] = context_str

    # the url comes from elsewhere not from the reader, so we find or creat the article
    article = Article.find_or_create(db_session, url_str)
    article_id = article.id

    try:

        minimal_context, query = minimize_context(
            data["context"], data["from_lang_code"], data["word"])
        data["query"] = query
        translations = get_all_translations(data).translations

        best_guess = translations[0]["translation"]

        bookmark = Bookmark.find_or_create(db_session, flask.g.user,
                                           word_str, from_lang_code,
                                           best_guess, to_lang_code,
                                           minimal_context, url_str, title_str, article_id)
    except ValueError as e:
        zeeguu_core.log(f"minimize context failed {e}on: {context_str} x {from_lang_code} x {word_str} ")
        return context_str, query

    return json_result(dict(
        bookmark_id=bookmark.id,
        translation=best_guess))
コード例 #9
0
def teacher_texts():
    """
    Gets all the articles of this teacher
    """

    articles = Article.own_texts_for_user(flask.g.user)
    article_info_dicts = [
        article.article_info_for_teacher() for article in articles
    ]

    return json.dumps(article_info_dicts)
コード例 #10
0
def upload_own_text():

    db_session.rollback()
    language = Language.find_or_create(request.form.get("language", ""))
    content = request.form.get("content", "")
    title = request.form.get("title", "")

    new_article_id = Article.create_from_upload(
        db_session, title, content, flask.g.user, language
    )

    return str(new_article_id)
コード例 #11
0
def contribute_translation(from_lang_code, to_lang_code):
    """
    
        User contributes a translation they think is appropriate for 
         a given :param word in :param from_lang_code in a given :param context

        The :param translation is in :param to_lang_code

        Together with the two words and the textual context, you must submit
         also the :param url, :param title of the page where the original
         word and context occurred.
    
    :return: in case of success, the bookmark_id and main translation

    """

    # All these POST params are mandatory
    word_str = unquote_plus(request.form['word'])
    translation_str = request.form['translation']
    url = request.form.get('url', '')
    context_str = request.form.get('context', '')
    title_str = request.form.get('title', '')
    # when a translation is added by hand, the servicename_translation is None
    # thus we set it to MANUAL
    service_name = request.form.get('servicename_translation', 'MANUAL')

    article_id = None
    if 'articleID' in url:
        article_id = url.split('articleID=')[-1]
        url = Article.query.filter_by(id=article_id).one().url.as_canonical_string()
    elif 'articleURL' in url:
        url = url.split('articleURL=')[-1]
    else:
        # the url comes from elsewhere not from the reader, so we find or creat the article
        article = Article.find_or_create(db_session, url)
        article_id = article.id

    # Optional POST param
    selected_from_predefined_choices = request.form.get('selected_from_predefined_choices', '')

    minimal_context, _ = minimize_context(context_str, from_lang_code, word_str)

    bookmark = Bookmark.find_or_create(db_session, flask.g.user,
                                       word_str, from_lang_code,
                                       translation_str, to_lang_code,
                                       minimal_context, url, title_str, article_id)
    # Inform apimux about translation selection
    data = {"word_str": word_str, "translation_str": translation_str,
            "url": url, "context_size": len(context_str),
            "service_name": service_name}
    contribute_trans(data)
    return json_result(dict(bookmark_id=bookmark.id))
コード例 #12
0
def get_user_article_info():
    """

        expects one parameter: url

    :return: json dictionary with info

    """

    url = str(request.form.get("url", ""))

    article = Article.find_or_create(db_session, url)

    return json_result(UserArticle.user_article_info(flask.g.user, article))
コード例 #13
0
def delete_articles_older_than(DAYS, print_progress_for_every_article=False):
    print(f"Finding articles older than {DAYS} days...")
    all_articles = Article.all_older_than(days=DAYS)
    print(f" ... article count: {len(all_articles)}")

    i = 0
    referenced_in_this_batch = 0
    deleted = []
    for each in all_articles:
        i += 1
        if print_progress_for_every_article:
            print(f"#{i} -- ID: {each.id}")

        if is_the_article_referenced(each, True):
            referenced_in_this_batch += 1
            continue

        try:
            articles_cache = ArticlesCache.query.filter_by(
                article_id=each.id).all()
            if articles_cache:
                for each_cache_line in articles_cache:
                    print(
                        f"... ID: {each.id} deleting also cache line: {each_cache_line}"
                    )
                    dbs.delete(each_cache_line)

            deleted.append(each.id)
            dbs.delete(each)

            if i % BATCH_COMMIT_SIZE == 0:
                print(
                    f"Keeping {referenced_in_this_batch} articles from the last {BATCH_COMMIT_SIZE} batch..."
                )
                dbs.commit()
                print(
                    f"... the rest of {BATCH_COMMIT_SIZE-referenced_in_this_batch} are now deleted!!!"
                )
                referenced_in_this_batch = 0

        except sqlalchemy.exc.IntegrityError as e:
            traceback.print_exc()
            dbs.rollback()
            continue

    print(f'Deleted: {deleted}')
コード例 #14
0
    def _create_model_object(self):
        title = " ".join(self.faker.text().split()[:4])
        authors = self.faker.name()
        content = self.faker.text()
        summary = self.faker.text()
        published = datetime.now() - timedelta(minutes=randint(0, 7200))
        rss_feed = RSSFeedRule().feed
        language = LanguageRule().random
        url = UrlRule().url

        article = Article(url, title, authors, content, summary, published,
                          rss_feed, language)

        if self._exists_in_db(article):
            return self._create_model_object()

        return article
コード例 #15
0
def add_article_to_cohort():
    """
    Gets all the articles of this teacher
    """

    cohort = Cohort.find(request.form.get("cohort_id"))

    if not has_permission_for_cohort(cohort.id):
        flask.abort(401)

    article = Article.find_by_id(request.form.get("article_id"))

    if not CohortArticleMap.find(cohort.id, article.id):
        new_mapping = CohortArticleMap(cohort, article)
        db.session.add(new_mapping)
        db.session.commit()

    return "OK"
コード例 #16
0
def delete_article_from_cohort():
    """
    Gets all the articles of this teacher
    """

    cohort = Cohort.find(request.form.get("cohort_id"))

    if not has_permission_for_cohort(cohort.id):
        flask.abort(401)

    article = Article.find_by_id(request.form.get("article_id"))

    mapping = CohortArticleMap.find(cohort.id, article.id)
    if mapping:
        db.session.delete(mapping)
        db.session.commit()
        return "OK"
    else:
        return make_error(401, "That article does not belong to the cohort!")
コード例 #17
0
def upload_articles(cohort_id):
    """
    uploads articles for a cohort with input from a POST request
    """
    if not has_permission_for_cohort(cohort_id):
        flask.abort(401)
    try:
        for article_data in json.loads(request.data):
            url = Url("userarticle/{}".format(uuid.uuid4().hex))
            title = article_data["title"]
            authors = article_data["authors"]
            content = article_data["content"]
            summary = article_data["summary"]
            published_time = datetime.now()
            language_code = article_data["language_code"]
            language = Language.find(language_code)

            new_article = Article(
                url,
                title,
                authors,
                content,
                summary,
                published_time,
                None,  # rss feed
                language,
            )

            db.session.add(new_article)
            db.session.flush()
            db.session.refresh(new_article)

            cohort = Cohort.find(cohort_id)
            new_cohort_article_map = CohortArticleMap(cohort, new_article)

            db.session.add(new_cohort_article_map)
        db.session.commit()
        return "OK"
    except ValueError:
        flask.abort(400)
        return "ValueError"
コード例 #18
0
def upload_articles(cohort_id):
    '''
        uploads articles for a cohort with input from a POST request
    '''
    if (not has_permission_for_cohort(cohort_id)):
        flask.abort(401)
    try:
        for article_data in json.loads(request.data):
            url = Url('userarticle/{}'.format(uuid.uuid4().hex))
            title = article_data['title']
            authors = article_data['authors']
            content = article_data['content']
            summary = article_data['summary']
            published_time = datetime.now()
            language_code = article_data['language_code']
            language = Language.find(language_code)

            new_article = Article(
                url,
                title,
                authors,
                content,
                summary,
                published_time,
                None,  # rss feed
                language)

            db.session.add(new_article)
            db.session.flush()
            db.session.refresh(new_article)

            cohort = Cohort.find(cohort_id)
            new_cohort_article_map = CohortArticleMap(cohort, new_article)

            db.session.add(new_cohort_article_map)
        db.session.commit()
        return 'OK'
    except ValueError:
        flask.abort(400)
        return 'ValueError'
コード例 #19
0
ファイル: article.py プロジェクト: mircealungu/Zeeguu-API
def article_id():
    """

        returns the article at that URL or creates an article and returns it

        takes url as URL argument
        NOTE: the url should be encoded with quote_plus (Pyton) and encodeURIComponent(Javascript)


    :return: article id

    """

    url = request.args.get('url', '')
    if not url:
        flask.abort(400)

    try:
        article = Article.find_or_create(db_session, url)
        return json_result(dict(article_id=article.id))
    except Exception as e:
        zeeguu_core.log(e)
        flask.abort(500)
コード例 #20
0
def more_like_this_article(user, count, article_id):
    """
    Given a article ID find more articles like that one via Elasticsearchs "more_like_this" method

    """
    article = Article.find_by_id(article_id)

    query_body = build_more_like_this_query(count, article.content,
                                            article.language)

    es = Elasticsearch(ES_CONN_STRING)
    res = es.search(index=ES_ZINDEX, body=query_body)  # execute search
    hit_list = res["hits"].get("hits")

    # TODO need to make sure either that the searched on article is always a part of the list \
    #  or that it is never there.
    #  it could be used to show on website; you searched on X, here is what we found related to X

    final_article_mix = _to_articles_from_ES_hits(hit_list)
    return [
        UserArticle.user_article_info(user, article)
        for article in final_article_mix
    ]
コード例 #21
0
    def _find_article_in_value_or_extra_data(self, db_session):
        """
        Finds or creates an article_id

        return: articleID or NONE

        NOTE: When the article cannot be downloaded anymore,
        either because the article is no longer available or the newspaper.parser() fails

        """

        if self.event in ALL_ARTICLE_INTERACTION_ACTIONS:

            if self.value.startswith("http"):
                url = self.value
            else:
                url = self.find_url_in_extra_data()

            if url:
                return Article.find_or_create(db_session,
                                              url,
                                              sleep_a_bit=True).id

        return None
コード例 #22
0
found = 1

for text in texts:

    # print(text.article_id)
    if not text.article:

        article_id = None
        if 'articleID' in text.url.as_canonical_string():
            article_id = text.url.as_canonical_string().split("articleID=")[-1]
            # print(f'extracted id: {article_id}')

        if article_id:
            article = Article.query.filter_by(id=article_id).one()
        else:
            article = Article.find(text.url.as_canonical_string())
            # print(text.url.as_canonical_string())

        if not article:
            not_found += 1
            print(f'not found: {not_found}')
        else:
            found += 1
            text.article = article
            zeeguu_core.db.session.add(text)

            # print(text)
            # print(article)
            # print(text.url.as_string())
            # print(text.article.url.as_string())
            print(f'found: {found}')
コード例 #23
0
 def _exists_in_db(obj):
     return Article.exists(obj)
コード例 #24
0
deleted = []

print("1. finding urls in activity data...")
all_urls = set()
all_activity_data = UserActivityData.query.all()
for each in all_activity_data:
    url = each.find_url_in_extra_data()
    if url:
        all_urls.add(url)
print(f" ... url count: {len(all_urls)}")

#

print(f"2. finding articles older than {DAYS} days...")
all_articles = Article.all_older_than(days=DAYS)
print(f" ... article count: {len(all_articles)}")

i = 0
for each in all_articles:
    i += 1
    info = UserArticle.find_by_article(each)
    url_found = each.url.as_string() in all_urls

    if info or url_found:
        if info:
            print(f"WON'T DELETE info! {each.id} {each.title}")
            for ainfo in info:
                print(ainfo.user_info_as_string())
        if url_found:
            print(f"WON'T DELETE url_found! {each.id} {each.title}")
コード例 #25
0
def _to_articles_from_ES_hits(hits):
    articles = []
    for hit in hits:
        articles.append(Article.find_by_id(hit.get("_id")))
    return articles
コード例 #26
0
def own_texts():
    r = [e.article_info() for e in Article.own_texts_for_user(flask.g.user)]
    return json_result(r)
コード例 #27
0
def get_one_translation(from_lang_code, to_lang_code):
    """

    Addressing some of the problems with the
    get_next_translations...
    - it should be separated in get_first and get_alternatives
    - alternatively it can be get one and get all

    To think about:
    - it would also make sense to separate translation from
    logging; or at least, allow for situations where a translation
    is not associated with an url... or?

    :return: json array with translations
    """

    word_str = request.form["word"]
    url = request.form.get("url")
    title_str = request.form.get("title", "")
    context = request.form.get("context", "")

    minimal_context, query = minimize_context(context, from_lang_code, word_str)

    translation = own_translation(
        flask.g.user, word_str, from_lang_code, to_lang_code, minimal_context
    )
    if translation:
        return json_result(dict(translations=translation))

    translations = get_next_results(
        {
            "from_lang_code": from_lang_code,
            "to_lang_code": to_lang_code,
            "url": request.form.get("url"),
            "word": word_str,
            "title": title_str,
            "query": query,
            "context": minimal_context,
        },
        number_of_results=1,
    ).translations

    # do we really need this?
    # translators talk about quality, but our users expect likelihood.
    # rename the key in the dictionary
    for t in translations:
        t["likelihood"] = t.pop("quality")
        t["source"] = t["service_name"]

    article_id = None
    if "article?id=" in url:
        article_id = url.split("article?id=")[-1]
        url = Article.query.filter_by(id=article_id).one().url.as_canonical_string()
    else:
        # the url comes from elsewhere not from the reader, so we find or creat the article
        article = Article.find_or_create(db_session, url)
        article_id = article.id

    if len(translations) > 0:
        best_guess = translations[0]["translation"]

        Bookmark.find_or_create(
            db_session,
            flask.g.user,
            word_str,
            from_lang_code,
            best_guess,
            to_lang_code,
            minimal_context,
            url,
            title_str,
            article_id,
        )

    return json_result(dict(translations=translations))
コード例 #28
0
def get_next_translations(from_lang_code, to_lang_code):
    """
    Returns a list of possible translations in :param to_lang_code
    for :param word in :param from_lang_code.

    You must also specify the :param context, :param url, and :param title
     of the page where the word was found.

    The context is the sentence.

    :return: json array with translations
    """

    data = {"from_lang_code": from_lang_code, "to_lang_code": to_lang_code}
    data["context"] = request.form.get("context", "")
    url = request.form.get("url", "")
    number_of_results = int(request.form.get("numberOfResults", -1))

    service_name = request.form.get("service", "")

    exclude_services = [] if service_name == "" else [service_name]
    currentTranslation = request.form.get("currentTranslation", "")

    exclude_results = [] if currentTranslation == "" else [currentTranslation.lower()]
    data["url"] = url
    article_id = request.form.get("articleID", None)

    if article_id == None:
        if "articleID" in url:
            article_id = url.split("articleID=")[-1]
            url = Article.query.filter_by(id=article_id).one().url.as_canonical_string()
        elif "articleURL" in url:
            url = url.split("articleURL=")[-1]
        else:
            # the url comes from elsewhere not from the reader, so we find or creat the article
            article = Article.find_or_create(db_session, url)
            article_id = article.id
    zeeguu_core.log(f"url before being saved: {url}")
    word_str = request.form["word"]
    data["word"] = word_str
    title_str = request.form.get("title", "")
    data["title"] = title_str

    zeeguu_core.log(f'translating to... {data["to_lang_code"]}')
    minimal_context, query = minimize_context(
        data["context"], data["from_lang_code"], data["word"]
    )
    zeeguu_core.log(f"Query to translate is: {query}")
    data["query"] = query

    first_call_for_this_word = len(exclude_services) == 0

    if first_call_for_this_word:
        translations = own_or_crowdsourced_translation(
            flask.g.user, word_str, from_lang_code, to_lang_code, minimal_context
        )
        if translations:
            return json_result(dict(translations=translations))

    translations = get_next_results(
        data,
        exclude_services=exclude_services,
        exclude_results=exclude_results,
        number_of_results=number_of_results,
    ).translations

    # translators talk about quality, but our users expect likelihood.
    # rename the key in the dictionary
    for t in translations:
        t["likelihood"] = t.pop("quality")
        t["source"] = t["service_name"]

    if len(translations) > 0 and first_call_for_this_word:
        best_guess = translations[0]["translation"]

        Bookmark.find_or_create(
            db_session,
            flask.g.user,
            word_str,
            from_lang_code,
            best_guess,
            to_lang_code,
            minimal_context,
            url,
            title_str,
            article_id,
        )

    return json_result(dict(translations=translations))
コード例 #29
0
    def find_or_create(cls,
                       session,
                       _url: str,
                       language=None,
                       sleep_a_bit=False):
        """

            If not found, download and extract all
            the required info for this article.

        :param url:
        :return:
        """
        from zeeguu_core.model import Url, Article, Language
        import newspaper

        url = Url.extract_canonical_url(_url)

        try:
            found = cls.find(url)
            if found:
                return found

            art = newspaper.Article(url=url)
            art.download()
            art.parse()

            if art.text == '':
                raise Exception("Newspaper got empty article from: " + url)

            if sleep_a_bit:
                import time
                from random import randint
                print("GOT: " + url)
                sleep_time = randint(3, 33)
                print(
                    f"sleeping for {sleep_time}s... so we don't annoy our friendly servers"
                )
                time.sleep(sleep_time)

            if not language:
                if art.meta_lang == '':
                    art.meta_lang = detect(art.text)
                    zeeguu_core.log(f"langdetect: {art.meta_lang} for {url}")
                language = Language.find_or_create(art.meta_lang)

            # Create new article and save it to DB
            url_object = Url.find_or_create(session, url)

            new_article = Article(
                url_object,
                art.title,
                ', '.join(art.authors),
                art.text[
                    0:
                    32000],  # any article longer than this will be truncated...
                art.summary,
                None,
                None,
                language)
            session.add(new_article)

            session.commit()

            return new_article
        except sqlalchemy.exc.IntegrityError or sqlalchemy.exc.DatabaseError:
            for i in range(10):
                try:
                    session.rollback()
                    u = cls.find(url)
                    print("Found article by url after recovering from race")
                    return u
                except:
                    print("Exception of second degree in article..." + str(i))
                    time.sleep(0.3)
                    continue
                break
コード例 #30
0
 def test_load_article_without_language_information(self):
     art = Article.find_or_create(session, url_plane_crashes)
     assert (art)
コード例 #31
0
 def test_find_or_create(self):
     self.new_art = Article.find_or_create(session,
                                           url_formation_professionnelle)
     assert (self.new_art.fk_difficulty)