Exemple #1
0
def user_article():
    """

        called user_article because it returns info about the article
        but also the user-specific data relative to the article

        takes url as URL argument
        NOTE: the url should be encoded with quote_plus (Pyton) and encodeURIComponent(Javascript)

        this is not perfectly RESTful, but we're not fundamentalist...
        and currently we want to have the url as the URI for the article
        and for some reason if we put the uri as part of the path,
        apache decodes it before we get it in here.
        so for now, we're just not putting it as part of the path


    :return: json as prepared by content_recommender.mixed_recommender.user_article_info

    """

    url = request.args.get('url', '')
    if not url:
        flask.abort(400)

    article = Article.find_or_create(db_session, url)

    return json_result(
        user_article_info(flask.g.user, article, with_content=True))
Exemple #2
0
def user_article_update():
    """

        update info about this (user x article) pair
        in the form data you can provide
        - liked=True|1|False|0
        - starred -ibidem-

    :return: json as prepared by content_recommender.mixed_recommender.user_article_info

    """

    url = request.form.get('url')
    starred = request.form.get('starred')
    liked = request.form.get('liked')

    article = Article.find_or_create(db_session, url)
    user_article = UserArticle.find_or_create(db_session, flask.g.user,
                                              article)

    if starred is not None:
        user_article.set_starred(starred in ["True", "1"])

    if liked is not None:
        user_article.set_liked(liked in ["True", "1"])

    db_session.commit()

    return "OK"
Exemple #3
0
def user_article_info(user: User,
                      article: Article,
                      with_content=False,
                      with_translations=True):
    from zeeguu.model import UserArticle
    prior_info = UserArticle.find(user, article)

    ua_info = article.article_info(with_content=with_content)

    if not prior_info:
        ua_info['starred'] = False
        ua_info['opened'] = False
        ua_info['liked'] = False
        ua_info['translations'] = []
        return ua_info

    ua_info['starred'] = prior_info.starred is not None
    ua_info['opened'] = prior_info.opened is not None
    ua_info['liked'] = prior_info.liked

    if with_translations:
        translations = Bookmark.find_all_for_user_and_url(user, article.url)
        ua_info['translations'] = [
            each.serializable_dictionary() for each in translations
        ]

    return ua_info
Exemple #4
0
    def __init__(self, real=False):
        super().__init__()

        if real:
            self.article = Article.find_or_create(ArticleRule.db.session,
                                                  TEST_URL)
        else:
            self.article = self._create_model_object()
            self.save(self.article)
def article_feedback(session, value, extra_data):
    # the url that comes from zeeguu event logger
    # might be the zeeguu url: which is of the form
    # https://www.zeeguu.unibe.ch/read/article?articleLanguage=de&articleURL=https://www.nzz.ch/wissenschaft/neandertaler-waren-kuenstler-ld.1358862
    # thus we extract only the last part
    url = value.split('articleURL=')[-1]
    article = Article.find_or_create(session, url)
    if "not_finished_for_broken" in extra_data:
        article.vote_broken()
        session.add(article)
        session.commit()
def article_liked(session, value, user, like_value):
    # the url that comes from zeeguu event logger
    # might be the zeeguu url: which is of the form
    # https://www.zeeguu.unibe.ch/read/article?articleLanguage=de&articleURL=https://www.nzz.ch/wissenschaft/neandertaler-waren-kuenstler-ld.1358862
    # thus we extract only the last part
    url = value.split('articleURL=')[-1]

    article = Article.find_or_create(session, url)
    ua = UserArticle.find(user, article)
    ua.liked = like_value
    session.add(ua)
    session.commit()
    log(f"{ua}")
Exemple #7
0
def get_user_article_info():
    """

        expects one parameter: url

    :return: json dictionary with info

    """

    url = str(request.form.get('url', ''))

    article = Article.find_or_create(db_session, url)

    return json_result(user_article_info(flask.g.user, article))
Exemple #8
0
    def _create_model_object(self):
        title = " ".join(self.faker.text().split()[:4])
        authors = self.faker.name()
        content = self.faker.text()
        summary = self.faker.text()
        published = datetime.now() - timedelta(minutes=randint(0, 7200))
        rss_feed = RSSFeedRule().feed
        language = LanguageRule().random
        url = UrlRule().url

        article = Article(url, title, authors, content, summary, published,
                          rss_feed, language)

        if self._exists_in_db(article):
            return self._create_model_object()

        return article
Exemple #9
0
def add_bookmark(db, user, original_language, original_word,
                 translation_language, translation_word, date, the_context,
                 the_url, the_url_title):
    session = db.session

    url = Url.find_or_create(session, the_url, the_url_title)

    article = Article.find_or_create(session, url.as_string())

    text = Text.find_or_create(session, the_context, translation_language, url,
                               article)

    origin = UserWord.find_or_create(session, original_word, original_language)

    translation = UserWord.find_or_create(session, translation_word,
                                          translation_language)

    b1 = Bookmark(origin, translation, user, text, date)
    db.session.add(b1)
    db.session.commit()

    return b1
    def _find_article_in_value_or_extra_data(self, db_session):
        """
            Finds or creates an article_id

            return: articleID or NONE

            NOTE: When the article cannot be downloaded anymore,
            either because the article is no longer available or the newspaper.parser() fails

        """

        if self.event in ALL_ARTICLE_INTERACTION_ACTIONS:

            if self.value.startswith('http'):
                url = self.value
            else:
                url = self.find_url_in_extra_data()

            if url:
                return Article.find_or_create(db_session,
                                              url,
                                              sleep_a_bit=True).id

        return None
Exemple #11
0
 def _exists_in_db(obj):
     return Article.exists(obj)
Exemple #12
0
found = 1

for text in texts:

    # print(text.article_id)
    if not text.article:

        article_id = None
        if 'articleID' in text.url.as_canonical_string():
            article_id = text.url.as_canonical_string().split("articleID=")[-1]
            # print(f'extracted id: {article_id}')

        if article_id:
            article = Article.query.filter_by(id=article_id).one()
        else:
            article = Article.find(text.url.as_canonical_string())
            # print(text.url.as_canonical_string())

        if not article:
            not_found += 1
            print(f'not found: {not_found}')
        else:
            found += 1
            text.article = article
            zeeguu.db.session.add(text)

            # print(text)
            # print(article)
            # print(text.url.as_string())
            # print(text.article.url.as_string())
            print(f'found: {found}')
Exemple #13
0
visited_url_user_pairs = []

for bookmark in Bookmark.query.all():
    try:

        urlcrop = str(bookmark.text.url).split('articleURL=')[-1]

        url_user_hash = urlcrop + bookmark.user.name

        if url_user_hash in visited_url_user_pairs:
            continue

        visited_url_user_pairs.append(url_user_hash)

        article = Article.find_or_create(session, urlcrop, bookmark.text.language)

        likes = UserActivityData.find(bookmark.user, extra_filter='title', extra_value=str(bookmark.text.url.title), event_filter='UMR - LIKE ARTICLE')
        Nlikes = len(likes)
        #print(sa.url)
        url_end = urlcrop.find("xtor=RSS")
        if url_end < 0:
            url = str(urlcrop)
        else:
            url = str(urlcrop)[:url_end-1]

        last_opened_act = UserActivityData.find(bookmark.user, extra_filter='articleURL', extra_value=url, event_filter='UMR - OPEN ARTICLE', only_latest=True)
        if last_opened_act is None:
            last_opened = None
        else:
            last_opened = last_opened_act.time
Exemple #14
0
 def test_load_article_without_language_information(self):
     url = 'https://edition.cnn.com/2018/03/12/asia/kathmandu-plane-crash/index.html'
     art = Article.find_or_create(session, url)
     assert (art)
Exemple #15
0
 def test_find_or_create(self):
     self.new_art = Article.find_or_create(session, SOME_ARTICLE_URL)
     assert (self.new_art.fk_difficulty)
Exemple #16
0
    def find_or_create(cls, session, _url:str, language=None, sleep_a_bit=False):
        """

            If not found, download and extract all
            the required info for this article.

        :param url:
        :return:
        """
        from zeeguu.model import Url, Article, Language
        import newspaper

        url = Url.extract_canonical_url(_url)

        try:
            found = cls.find(url)
            if found:
                return found

            art = newspaper.Article(url=url)
            art.download()
            art.parse()

            if art.text == '':
                raise Exception("Newspaper got empty article from: " + url)

            if sleep_a_bit:
                import time
                from random import randint
                print("GOT: " + url)
                sleep_time = randint(3, 33)
                print(f"sleeping for {sleep_time}s... so we don't annoy our friendly servers")
                time.sleep(sleep_time)

            if not language:
                if art.meta_lang == '':
                    art.meta_lang = detect(art.text)
                    zeeguu.log(f"langdetect: {art.meta_lang} for {url}")
                language = Language.find_or_create(art.meta_lang)

            # Create new article and save it to DB
            url_object = Url.find_or_create(session, url)

            new_article = Article(
                url_object,
                art.title,
                ', '.join(art.authors),
                art.text[0:32000],  # any article longer than this will be truncated...
                art.summary,
                None,
                None,
                language
            )
            session.add(new_article)

            session.commit()

            return new_article
        except sqlalchemy.exc.IntegrityError or sqlalchemy.exc.DatabaseError:
            for i in range(10):
                try:
                    session.rollback()
                    u = cls.find(url)
                    print("Found article by url after recovering from race")
                    return u
                except:
                    print("Exception of second degree in article..." + str(i))
                    time.sleep(0.3)
                    continue
                break
Exemple #17
0
deleted = []

print("1. finding urls in activity data...")
all_urls = set()
all_activity_data = UserActivityData.query.all()
for each in all_activity_data:
    url = each.find_url_in_extra_data()
    if url:
        all_urls.add(url)
print(f" ... url count: {len(all_urls)}")

#

print(f"2. finding articles older than {DAYS} data...")
all_articles = Article.all_older_than(days=DAYS)
print(f" ... article count: {len(all_articles)}")
for each in all_articles:
    info = UserArticle.find_by_article(each)
    url_found = each.url.as_string() in all_urls

    if info or url_found:
        if info:
            print(f"WON'T DELETE info! {each.id} {each.title}")
            for ainfo in info:
                print(ainfo.user_info_as_string())
        if url_found:
            print(f"WON'T DELETE url_found! {each.id} {each.title}")
    else:
        deleted.append(each.id)
        dbs.delete(each)
import zeeguu
from zeeguu.model import Article, UserArticle
from zeeguu.model.starred_article import StarredArticle

session = zeeguu.db.session

for sa in StarredArticle.query.all():
    try:
        article = Article.find_or_create(session, sa.url.as_string())
        ua = UserArticle.find_or_create(session,
                                        sa.user,
                                        article,
                                        starred=sa.starred_date)
        session.add(ua)
        session.commit()
        print(f'{sa.starred_date} x {ua.user.name} x {ua.article.title}')
    except Exception as ex:
        print(f'could not import {sa.url.as_string()}')
        print(ex)