コード例 #1
0
    def test_fetchable(self):
        fctrl = FeedController()
        total = fctrl.read().count()
        unix = datetime(1970, 1, 1).replace(tzinfo=timezone.utc)
        count = 0
        for fd in fctrl.list_late():
            count += 1
            self.assertEqual(unix, fd.last_retrieved)
            self.assertEqual(unix, fd.expires)
        self.assertEqual(total, count)

        fetchables = fctrl.list_fetchable()
        now = utc_now()
        for fd in fetchables:
            self.assert_in_range(now - timedelta(seconds=1), fd.last_retrieved,
                                 now)
            self.assertEqual(unix, fd.expires)
        self.assert_late_count(
            0, "no late feed to report because all just fetched")
        fctrl.update({}, {'expires': unix})
        now = utc_now()
        for fd in fctrl.read():  # expires should be corrected
            self.assert_in_range(
                now + timedelta(seconds=conf.feed.min_expires - 1), fd.expires,
                now + timedelta(seconds=conf.feed.min_expires + 1))

        lr_not_matter = timedelta(seconds=conf.feed.min_expires + 10)
        self.update_all_no_ctrl(expires=utc_now() - timedelta(seconds=1),
                                last_retrieved=utc_now() - lr_not_matter)
        self.assert_late_count(total, "all feed just expired")
        self.update_all_no_ctrl(expires=utc_now() + timedelta(seconds=1))
        self.assert_late_count(
            0, "all feed will expire in a second, none are expired")
コード例 #2
0
ファイル: main.py プロジェクト: jaesivsm/JARR
def metrics_users_long_term():
    logger.debug('Counting long term users')
    threshold_connection = utc_now() - timedelta(days=conf.feed.stop_fetch)
    threshold_connection = utc_now() - timedelta(days=conf.feed.stop_fetch)
    threshold_created = utc_now() - timedelta(days=conf.feed.stop_fetch + 1)
    long_term = UserController().read(is_active=True,
                                      last_connection__ge=threshold_connection,
                                      date_created__lt=threshold_created)
    USER.labels(status='long_term').set(long_term.count())
コード例 #3
0
 def test_extract_max_age():
     max_age = conf.feed.max_expires / 2
     headers = {'cache-control': 'garbage max-age=%d garbage' % max_age}
     assert_in_range(
         extract_feed_info(headers)['expires'],
         utc_now() + timedelta(seconds=max_age))
     headers['expires'] = rfc_1123_utc(delta=timedelta(hours=12))
     assert_in_range(
         extract_feed_info(headers)['expires'],
         utc_now() + timedelta(seconds=max_age))
コード例 #4
0
def update_slow_metrics():
    uctrl = UserController()
    USER.labels(status='any').set(uctrl.read().count())
    threshold_connection = utc_now() - timedelta(days=conf.feed.stop_fetch)
    threshold_created = utc_now() - timedelta(days=conf.feed.stop_fetch + 1)
    active = uctrl.read(is_active=True,
                        last_connection__ge=threshold_connection)
    USER.labels(status='active').set(active.count())
    long_term = uctrl.read(is_active=True,
                           last_connection__ge=threshold_connection,
                           date_created__lt=threshold_created)
    USER.labels(status='long_term').set(long_term.count())
コード例 #5
0
ファイル: abstract.py プロジェクト: dmitri-saharov-layer/JARR
    def set_feed_error(self, error=None, parsed_feed=None):
        error_count = self.feed.error_count + 1
        if error:
            last_error = str(error)
        elif parsed_feed:
            last_error = str(parsed_feed.get('bozo_exception', ''))
        if self.feed.error_count > conf.feed.error_threshold:
            level = logging.WARNING
        else:
            level = logging.DEBUG
        logger.log(level, "%r: fetching feed error'd; error count -> %r",
                   self.feed, error_count)
        logger.debug("%r: last error details %r", self.feed, last_error)
        now = utc_now()
        info = {
            'error_count': error_count,
            'last_error': last_error,
            'user_id': self.feed.user_id,
            'last_retrieved': now,
            'expires': None
        }  # forcing compute by controller

        FEED_FETCH.labels(feed_type=self.feed.feed_type.value,
                          result='error').inc()
        return FeedController().update({'id': self.feed.id}, info)
コード例 #6
0
ファイル: feed_test.py プロジェクト: hanakhry/JARR
    def test_ListFeedResource_get(self):
        resp = self.jarr_client('get', 'feeds')
        self.assertStatusCode(401, resp)
        feeds_u1 = self.jarr_client('get', 'feeds', user='******').json
        feeds_u2 = self.jarr_client('get', 'feeds', user='******').json
        feeds_u1 = [f['id'] for f in feeds_u1]
        feeds_u2 = [f['id'] for f in feeds_u2]

        self.assertFalse(set(feeds_u1).intersection(feeds_u2))

        # testing time formating
        feed = self.jarr_client('get', 'feeds', user='******').json[0]
        now = utc_now()
        FeedController().update({'id': feed['id']}, {'last_retrieved': now})
        json = self._get(feed['id'], 'user1')
        self.assertEqual(json['last_retrieved'], now.isoformat())

        FeedController().update({'id': feed['id']},
                {'last_retrieved': now.replace(tzinfo=None)})
        json = self._get(feed['id'], 'user1')
        self.assertEqual(json['last_retrieved'], now.isoformat())

        FeedController().update({'id': feed['id']},
                {'last_retrieved':
                    now.astimezone(timezone(timedelta(hours=12)))})
        json = self._get(feed['id'], 'user1')
        self.assertEqual(json['last_retrieved'], now.isoformat())
コード例 #7
0
ファイル: feed.py プロジェクト: jaesivsm/JARR
    def list_late(self, limit=0):
        """Will list either late feeds or feeds with articles recently created.

        Late feeds are feeds which have been retrieved for the last time sooner
        than now minus the delta (default to 1h). The others are feeds with
        article created later than now minus a quarter the delta (default to
        15 logically).

        The idea is to keep very active feed up to date and to avoid missing
        articles du to high activity (when, for example, the feed only displays
        its 30 last entries and produces more than one per minutes).

        Feeds of inactive (not connected for more than a month) or manually
        desactivated users are ignored.
        """
        now = utc_now()
        min_expiring = now - timedelta(seconds=conf.feed.min_expires)
        max_expiring = now - timedelta(seconds=conf.feed.max_expires)
        filters = self._to_filters(last_retrieved__lt=min_expiring,
                                   __or__=[{
                                       'expires__lt': now,
                                       'expires__ne': None
                                   }, {
                                       'last_retrieved__lt': max_expiring,
                                       'last_retrieved__ne': None
                                   }])
        query = self.get_active_feed().filter(*filters).order_by(Feed.expires)
        if limit:
            query = query.limit(limit)
        yield from query
コード例 #8
0
ファイル: abstract.py プロジェクト: goddess5321/JARR
 def template_article(self):
     return {
         'feed_id': self.feed.id,
         'category_id': self.feed.category_id,
         'user_id': self.feed.user_id,
         'retrieved_date': utc_now()
     }
コード例 #9
0
def _extract_max_age(headers, feed_info):
    if 'max-age' in headers.get('cache-control', ''):
        try:
            max_age = int(MAX_AGE_RE.search(headers['cache-control']).group(1))
            feed_info['expires'] = utc_now() + timedelta(seconds=max_age)
        except Exception:
            logger.exception("something went wrong while parsing max-age")
コード例 #10
0
ファイル: article.py プロジェクト: dmitri-saharov-layer/JARR
 def count_by_user_id(self, **filters):
     conn_max = utc_now() - timedelta(days=30)
     return dict(session.query(Article.user_id, func.count(Article.id))
                        .filter(*self._to_filters(**filters))
                        .join(User).filter(User.is_active.__eq__(True),
                                           User.last_connection >= conn_max)
                        .group_by(Article.user_id).all())
コード例 #11
0
ファイル: cluster_test.py プロジェクト: goddess5321/JARR
    def test_similarity_clustering(self):
        cluster_conf = {'tfidf_min_score': 0.6, 'tfidf_min_sample_size': 10}
        user = Mock(cluster_conf=cluster_conf)
        category = Mock(cluster_conf=cluster_conf)
        feed = Mock(cluster_conf=cluster_conf, user=user, category=category)
        cluster = Mock()

        def gen_articles(factor):
            return [Mock(simple_vector={'Sarkozy': 1, 'garb': 1, 'justice': 1},
                         feed=feed, cluster=cluster)] \
                 + [Mock(feed=feed,
                        simple_vector={'Sark': 1, 'garbge': 1, 'vote': 1}),
                    Mock(feed=feed,
                        simple_vector={'Sark': 1, 'garbae': 1, 'debat': 1}),
                    Mock(feed=feed,
                        simple_vector={'Sark': 1, 'garbag': 1, 'blague': 1}),
                    Mock(feed=feed,
                        simple_vector={'Sark': 1, 'garage': 1, 'chans': 1})] \
                            * factor

        ccontr = ClusterController()
        ccontr._get_query_for_clustering = Mock(return_value=gen_articles(2))

        matching_article = Mock(simple_vector={
            'Morano': 1,
            'garb': 1,
            'justice': 1
        },
                                date=utc_now(),
                                lang='fr',
                                feed=feed)

        self.assertIsNone(ccontr._get_cluster_by_similarity(matching_article))
        ccontr._get_query_for_clustering = Mock(return_value=gen_articles(100))
        self.assertEqual(ccontr._get_cluster_by_similarity(matching_article),
                         cluster)

        solo_article = Mock(simple_vector={
            'Sark': 1,
            'fleur': 1
        },
                            date=utc_now(),
                            lang='fr',
                            feed=feed)
        self.assertNotEqual(cluster,
                            ccontr._get_cluster_by_similarity(solo_article))
        self.assertIsNone(ccontr._get_cluster_by_similarity(solo_article))
コード例 #12
0
ファイル: feed.py プロジェクト: jaesivsm/JARR
 def get_active_feed(self, **filters):
     filters['error_count__lt'] = conf.feed.error_max
     query = self.read(status=FeedStatus.active, **filters)
     if conf.feed.stop_fetch:
         last_conn = utc_now() - timedelta(days=conf.feed.stop_fetch)
         return query.join(User).filter(User.is_active.__eq__(True),
                                        User.last_connection >= last_conn)
     return query
コード例 #13
0
def populate_db():
    fcontr = FeedController()
    ccontr = CategoryController()
    UserController().create(
        **{
            'is_admin': True,
            'is_api': True,
            'cluster_enabled': False,
            'login': '******',
            'password': '******'
        })
    user1, user2 = [
        UserController().create(login=name,
                                cluster_enabled=False,
                                email="*****@*****.**" % name,
                                password=name) for name in ["user1", "user2"]
    ]

    for iteration in range(2):
        article_total = 0

        for user in (user1, user2):
            for iter_cat in range(3):
                cat_id = None
                if iter_cat:
                    cat_id = ccontr.create(user_id=user.id,
                                           name=to_name(
                                               user, iteration, iter_cat)).id
                feed_id = fcontr.create(
                    link="feed%d%d" % (iteration, iter_cat),
                    user_id=user.id,
                    category_id=cat_id,
                    title=to_name(user, iteration, iter_cat, iter_cat)).id
                for iter_art in range(3):
                    entry = to_name(user, iteration, iter_cat, iter_cat,
                                    iter_art)

                    tags = [
                        to_name(user, iteration, iter_cat, iter_cat, iter_art,
                                str(i)) for i in range(2)
                    ]
                    article_total += 1
                    ArticleController().create(
                        entry_id=entry,
                        link='http://test.te/%d' % article_total,
                        feed_id=feed_id,
                        user_id=user.id,
                        tags=tags,
                        category_id=cat_id,
                        title=entry,
                        date=utc_now() + timedelta(seconds=iteration),
                        content="content %d" % article_total)

    session.commit()
    session.flush()
    ClusterController().clusterize_pending_articles()
コード例 #14
0
ファイル: auth.py プロジェクト: jaesivsm/JARR
 def get():
     """Given valid credentials, will provide a token to request the API."""
     jwt = current_app.extensions["jwt"]
     user = UserController(current_identity.id).get(id=current_identity.id)
     access_token = jwt.jwt_encode_callback(user).decode("utf8")
     UserController(user.id).update({"id": user.id},
                                    {"last_connection": utc_now(),
                                     "renew_password_token": ""})
     SERVER.labels(method="get", uri="/auth/refresh", result='2XX').inc()
     return {"access_token": "%s %s" % (conf.auth.jwt_header_prefix,
                                        access_token)}, 200
コード例 #15
0
ファイル: feed.py プロジェクト: jaesivsm/JARR
 def list_fetchable(self, limit=0):
     now, feeds = utc_now(), list(self.list_late(limit))
     if feeds:
         for feed in feeds:
             if feed.last_retrieved == UNIX_START:
                 continue
             FEED_LATENESS.labels(feed_type=feed.feed_type.value)\
                     .observe((now - feed.last_retrieved).total_seconds())
         self.update({'id__in': [feed.id for feed in feeds]},
                     {'last_retrieved': now})
     return feeds
コード例 #16
0
ファイル: feed.py プロジェクト: jaesivsm/JARR
    def __update_default_expires(self, feed, attrs):
        now = utc_now()
        min_delta = timedelta(seconds=conf.feed.min_expires)
        max_delta = timedelta(seconds=conf.feed.max_expires)
        min_expires = now + min_delta
        max_expires = now + max_delta
        method = 'from header'
        feed_type = getattr(feed.feed_type, 'value', '')
        if attrs['expires'] is None:
            attrs['expires'] = max_expires
            method = 'defaulted to max'
        try:
            if not isinstance(attrs['expires'], datetime):
                attrs['expires'] = dateutil.parser.parse(attrs['expires'])
            if not attrs['expires'].tzinfo:
                method = 'from header added tzinfo'
                attrs['expires'] = attrs['expires'].replace(
                    tzinfo=timezone.utc)
            elif max_expires < attrs['expires']:
                method = 'from header max limited'
                attrs['expires'] = max_expires
                logger.debug("%r expiring too late, forcing expire in %ds",
                             feed, conf.feed.max_expires)
            elif attrs['expires'] < min_expires:
                method = 'from header min limited'
                attrs['expires'] = min_expires
                logger.debug("%r expiring too early, forcing expire in %ds",
                             feed, conf.feed.min_expires)
        except Exception:
            attrs['expires'] = max_expires
            method = 'defaulted to max'

        art_count = self.__actrl.read(feed_id=feed.id,
                                      retrieved_date__gt=now -
                                      max_delta * SPAN_FACTOR).count()
        if not art_count and method == 'from header min limited':
            attrs['expires'] = now + 2 * min_delta
            method = 'no article, twice min time'
        elif art_count:
            proposed_expires = now + max_delta / art_count / SPAN_FACTOR
            if min_expires < proposed_expires < attrs['expires']:
                attrs['expires'] = proposed_expires
                method = 'computed'
            if proposed_expires < min_expires:
                method = 'many articles, set to min expire'
                attrs['expires'] = min_expires
        exp_s = (attrs['expires'] - now).total_seconds()
        logger.info('%r : %d articles, expiring in %ds (%s)', feed, art_count,
                    exp_s, method)
        FEED_EXPIRES.labels(method=method, feed_type=feed_type).observe(exp_s)
コード例 #17
0
ファイル: abstract.py プロジェクト: goddess5321/JARR
 def construct(self, entry):
     self.article = self.template_article()
     if not entry:
         return
     self.article['entry_id'] = self.extract_id(entry)
     try:
         self.article['date'] = self.extract_date(entry)
     except Exception:
         self.article['date'] = utc_now()
     self.article['title'] = self.extract_title(entry)
     self.article['tags'] = self.extract_tags(entry)
     self.article['link'] = self.extract_link(entry)
     self.article['content'] = self.extract_content(entry)
     self.article['lang'] = self.extract_lang(entry)
     self.article['comments'] = self.extract_comments(entry)
コード例 #18
0
ファイル: auth.py プロジェクト: jaesivsm/JARR
 def post():
     """Given valid credentials, will provide a token to request the API."""
     attrs = login_parser.parse_args()
     jwt = current_app.extensions["jwt"]
     user = jwt.authentication_callback(attrs["login"], attrs["password"])
     if not user:
         SERVER.labels(method="post", uri="auth", result='4XX').inc()
         raise Forbidden()
     access_token = jwt.jwt_encode_callback(user).decode("utf8")
     UserController(user.id).update({"id": user.id},
                                    {"last_connection": utc_now(),
                                     "renew_password_token": ""})
     SERVER.labels(method="post", uri="/auth", result='2XX').inc()
     return {"access_token": "%s %s" % (conf.auth.jwt_header_prefix,
                                        access_token)}, 200
コード例 #19
0
 def get():
     user_id = current_identity.id
     user = UserController(user_id).get(id=user_id)
     categories = {
         cat.id: cat
         for cat in CategoryController(user_id).read()
     }
     response = make_response(
         render_template('opml.xml',
                         user=user,
                         categories=categories,
                         feeds=FeedController(user_id).read(),
                         now=utc_now()))
     for key, value in OK_GET_HEADERS.items():
         response.headers[key] = value
     return response
コード例 #20
0
    def clean_feed(self, response, **info):
        """Will reset the errors counters on a feed that have known errors"""
        now = utc_now()
        info.update({'error_count': 0, 'last_error': None,
                     'last_retrieved': now, 'expires': None})
        info.update(extract_feed_info(response.headers, response.text))

        feed_permanently_redirected = response.history \
                and self.feed.link != response.url \
                and any(r.status_code in {301, 308} for r in response.history)
        if feed_permanently_redirected:
            logger.warning('%r: feed moved from %r to %r', self.feed,
                           self.feed.link, response.url)
            info['link'] = response.url
        if info:
            FeedController(self.feed.user_id).update({'id': self.feed.id},
                                                     info)
コード例 #21
0
ファイル: manager.py プロジェクト: hanakhry/JARR
def reset_feeds():
    """Will reschedule all active feeds to be fetched in the next two hours"""
    fcontr = FeedController(ignore_context=True)
    now = utc_now()
    feeds = [
        feed[0]
        for feed in fcontr.get_active_feed().with_entities(fcontr._db_cls.id)
    ]

    step = timedelta(seconds=conf.feed.max_expires / len(feeds))
    for i, feed_id in enumerate(feeds):
        fcontr.update(
            {'id': feed_id}, {
                'etag': '',
                'last_modified': '',
                'last_retrieved': datetime(1970, 1, 1, tzinfo=timezone.utc),
                'expires': now + i * step
            })
コード例 #22
0
ファイル: abstract.py プロジェクト: jaesivsm/JARR
 def construct(self, entry):
     self.article = self.template_article()
     if not entry:
         return
     self.article['entry_id'] = self.extract_id(entry)
     try:
         self.article['date'] = self.extract_date(entry)
     except Exception:
         self.article['date'] = utc_now()
     self.article['title'] = self.extract_title(entry)
     self.article['tags'] = self.extract_tags(entry)
     self.article['link'] = self.extract_link(entry)
     self.article['content'] = self.extract_content(entry)
     self.article['lang'] = clean_lang(self.extract_lang(entry))
     self.article['comments'] = self.extract_comments(entry)
     if self.article.get('link'):
         self.article['link_hash'] = self.to_hash(self.article['link'])
         if self.article.get('content'):
             self.article['content'] = clean_urls(self.article['content'],
                                                  self.article['link'])
コード例 #23
0
    def test_scheduler(self):
        scheduler()
        UserController().update({}, {'last_connection': utc_now()})
        fctrl = FeedController()

        epoch = datetime(1970, 1, 1, tzinfo=timezone.utc)
        self.assertEqual(fctrl.read().count(),
                         self.process_feed_patch.apply_async.call_count)
        self.assertEqual(0, self.clusteriser_patch.apply_async.call_count)
        self.assertEqual(0, self.feed_cleaner_patch.apply_async.call_count)
        feed1, feed2, feed3 = list(FeedController().read().limit(3))
        FeedController().update({'id__in': [feed1.id, feed3.id]},
                                {'status': 'to_delete'})
        FeedController().update({'id': feed2.id}, {
            'last_retrieved': epoch,
            'expires': epoch
        })
        self.assertEqual(1, len(list(fctrl.list_fetchable())))
        scheduler()
        self.assertEqual(fctrl.read().count(),
                         self.process_feed_patch.apply_async.call_count)
        self.assertEqual(0, self.clusteriser_patch.apply_async.call_count)
        self.assertEqual(1, self.feed_cleaner_patch.apply_async.call_count)
コード例 #24
0
ファイル: oauth.py プロジェクト: jaesivsm/JARR
    def process_ids(cls, social_id, username, email):  # pragma: no cover

        labels = {"method": "get", "uri": "/oauth/callback/" + cls.provider}
        if social_id is None:
            SERVER.labels(result="4XX", **labels).inc()
            raise UnprocessableEntity('No social id, authentication failed')
        ucontr = UserController()
        try:
            user = ucontr.get(**{'%s_identity' % cls.provider: social_id})
        except NotFound:
            user = None
        if not user and not conf.oauth.allow_signup:
            SERVER.labels(result="4XX", **labels).inc()
            raise BadRequest('Account creation is not allowed through OAuth.')
        if not user:
            if username and not ucontr.read(login=username).count():
                login = username
            else:
                login = '******' % (cls.provider, username or social_id)
            user = ucontr.create(
                **{
                    '%s_identity' % cls.provider: social_id,
                    'login': login,
                    'email': email
                })
        ucontr.update({"id": user.id}, {
            "last_connection": utc_now(),
            "renew_password_token": ""
        })
        jwt_ext = current_app.extensions['jwt']
        access_token = jwt_ext.jwt_encode_callback(user).decode('utf8')
        SERVER.labels(result="2XX", **labels).inc()
        return {
            "access_token":
            "%s %s" % (conf.auth.jwt_header_prefix, access_token)
        }, 200
コード例 #25
0
 def test_fetching_anti_herding_mech_utcplustwelve(self):
     self._test_fetching_anti_herding_mech(utc_now().astimezone(
         timezone(timedelta(hours=12))))
コード例 #26
0
 def extract_date(entry):
     published = entry.get('date_published')
     if published:
         return dateutil.parser.parse(published).astimezone(timezone.utc)
     return utc_now()
コード例 #27
0
ファイル: user.py プロジェクト: goddess5321/JARR
 def list_active(self):
     last_conn = utc_now() - timedelta(days=conf.feed.stop_fetch)
     return self.read(is_active=True, last_connection__ge=last_conn)
コード例 #28
0
 def _filter_unclustered(*fields):
     conn_max = utc_now() - timedelta(days=conf.feed.stop_fetch)
     return (session.query(*fields).filter(
         Article.cluster_id.__eq__(None)).join(User).filter(
             User.id == Article.user_id, User.is_active.__eq__(True),
             User.last_connection >= conn_max))
コード例 #29
0
 def test_fetching_anti_herding_mech_utctimezone(self):
     self._test_fetching_anti_herding_mech(utc_now())
コード例 #30
0
ファイル: main.py プロジェクト: jaesivsm/JARR
def metrics_users_active():
    logger.debug('Counting active users')
    threshold_connection = utc_now() - timedelta(days=conf.feed.stop_fetch)
    active = UserController().read(is_active=True,
                                   last_connection__ge=threshold_connection)
    USER.labels(status='active').set(active.count())