Exemple #1
0
    def to_string(cls, results):
        """ Transform results into strings.

        Results must be the value returned
        by a call of :meth:`classify`.
        """

        errors = results.get(u'error_types')

        output = (
            u'>> %s error types: %s distinct on %s instances, '
            u'computed in %s\n' %
            (cls.__name__[:-15], len(errors), results.get(u'seen_objects'),
             naturaldelta(results.get(u'duration'))))

        stored = results.get('stored_instances')

        output += u'\n'.join(u'%s: %s' % (k, v)
                             for k, v in sorted(errors.items(),
                                                key=operator.itemgetter(1),
                                                reverse=True)) + u'\n\n'

        output += u'>> to get them, stored by error kind:\n'

        output += u'\n'.join(u'results.get("stored_instances").get("%s")' % s
                             for s in stored)

        return output
Exemple #2
0
    def to_string(cls, results):
        """ Transform results into strings.

        Results must be the value returned
        by a call of :meth:`classify`.
        """

        errors = results.get(u'error_types')

        output = (u'>> %s error types: %s distinct on %s instances, '
                  u'computed in %s\n' % (
                      cls.__name__[:-15],
                      len(errors),
                      results.get(u'seen_objects'),
                      naturaldelta(results.get(u'duration'))))

        stored = results.get('stored_instances')

        output += u'\n'.join(u'%s: %s' % (k, v) for k, v in sorted(
                             errors.items(),
                             key=operator.itemgetter(1),
                             reverse=True)) + u'\n\n'

        output += u'>> to get them, stored by error kind:\n'

        output += u'\n'.join(u'results.get("stored_instances").get("%s")'
                             % s for s in stored)

        return output
Exemple #3
0
    def reading_time_abstracted(self):

        rtm = self.reading_time

        if rtm is None:
            return u''

        inum = 1
        icon = u'∎'  # u'<i class="icon-time"></i>'
        tmpl = _(u'<span class="popover-top" data-toggle="tooltip" '
                 u'title="Reading time: {0}">{1}</span>')
        time = naturaldelta(timedelta(seconds=rtm * 60))

        if rtm > 8:
            inum = 4

        elif rtm > 3:
            inum = 3

        elif rtm > 1:
            inum = 2

        elif rtm == 0:
            # HEADS UP: patch/hack; non-breakable spaces everywhere.
            time = _(u'very quick (<1 min)')

        return tmpl.format(time, inum * icon)
Exemple #4
0
    def reading_time_abstracted(self):

        rtm = self.reading_time

        if rtm is None:
            return u''

        inum = 1
        icon = u'∎'  # u'<i class="icon-time"></i>'
        tmpl = _(u'<span class="popover-top" data-toggle="tooltip" '
                 u'title="Reading time: {0}">{1}</span>')
        time = naturaldelta(timedelta(seconds=rtm * 60))

        if rtm > 8:
            inum = 4

        elif rtm > 3:
            inum = 3

        elif rtm > 1:
            inum = 2

        elif rtm == 0:
            # HEADS UP: patch/hack; non-breakable spaces everywhere.
            time = _(u'very quick (<1 min)')

        return tmpl.format(time, inum * icon)
Exemple #5
0
    def fetch_interval_display(self, obj):
        """ FILL ME, pep257. """

        if obj.is_active:
            with django_language():
                return naturaldelta(obj.fetch_interval)

        return u'—'
Exemple #6
0
    def fetch_interval_display(self, obj):
        """ FILL ME, pep257. """

        if obj.closed:
            return u'—'

        with django_language():
            return naturaldelta(obj.fetch_interval)
Exemple #7
0
        def format_quota(quota):
            if quota['remaining'] is None:
                return u' (no quota information)'

            if quota['remaining']:
                return u'; quota: %s call(s) remaining' % quota['remaining']

            else:
                return u'; quota exhausted, reset in %s' % (
                    naturaldelta(now() - quota['reset'].replace(tzinfo=utc)))
Exemple #8
0
def clean_obsolete_redis_keys():
    """ Call in turn all redis-related cleaners. """

    start_time = pytime.time()

    if today() <= (config.GR_END_DATE + timedelta(days=1)):
        clean_gri_keys()

    LOGGER.info(u'clean_obsolete_redis_keys(): finished in %s.',
                naturaldelta(pytime.time() - start_time))
Exemple #9
0
    def reading_time_display(self):

        rtm = self.reading_time

        if rtm is None:
            return u''

        if rtm == 0:
            return _(u'a quick read')

        return _(u'{0} read').format(naturaldelta(timedelta(seconds=rtm * 60)))
Exemple #10
0
        def format_quota(quota):
            if quota['remaining'] is None:
                return u' (no quota information)'

            if quota['remaining']:
                return u'; quota: %s call(s) remaining' % quota['remaining']

            else:
                return u'; quota exhausted, reset in %s' % (
                    naturaldelta(now() - quota['reset'].replace(tzinfo=utc))
                )
Exemple #11
0
    def reading_time_display(self):

        rtm = self.reading_time

        if rtm is None:
            return u''

        if rtm == 0:
            return _(u'a quick read')

        return _(u'{0} read').format(naturaldelta(timedelta(seconds=rtm * 60)))
Exemple #12
0
    def run(self):
        """ Run the import. """

        #
        # NOTE: we don't care if the import was already running, finished,
        #       whatever. This class is able to recover and re-run itself
        #       over and over without doing bad thing in the database.
        #

        is_retrying = self.status == IMPORT_STATUS.RETRY

        self.status = IMPORT_STATUS.RUNNING
        self.date_started = now()
        self.save()

        try:
            return self.run_internal()

        except:
            LOGGER.exception(u'User import %s failed')

            if is_retrying:
                message_user(self.user,
                             _(u'Your import #{0} failed to run after a '
                               u'retry. Please review it before relaunching '
                               u'it manually again.').format(self.id),
                             constants.ERROR)

                self.status = IMPORT_STATUS.FAILED

            else:
                countdown = randrange(1800, 3600)
                delta_cd = naturaldelta(timedelta(seconds=countdown))

                message_user(self.user,
                             _(u'Your import #{0} failed to run. If will '
                               u'be automatically retried in {1}').format(
                                 self.id, delta_cd),
                             constants.WARNING)

                globals()['userimport_run_task'].apply_async(
                    (self.id, ), countdown=countdown)

                self.status = IMPORT_STATUS.RETRY

            self.save()
Exemple #13
0
def feed_toggle_is_active(request, feed_id):
    """ Toggle a feed active or not (open or closed) from anywhere.

    If called via ajax, send a message to the user and call the notification
    template.
    """

    feed = BaseFeed.objects.get(id=feed_id)

    LOGGER.info('%s %s', feed.is_active, feed)

    if feed.is_active:
        feed.close(u'Closed manually by {0}'.format(request.user.username))

        messages.warning(
            request,
            _(u'Closed {0} <strong>{1}</strong>.').format(
                feed._meta.verbose_name, feed.name),
            extra_tags='safe'
        )

    else:
        feed.reopen()

        messages.success(
            request,
            _(u'Re-opened {0} <strong>{1}</strong>. '
              u'Next fetch in {2}.').format(
                feed._meta.verbose_name,
                feed.name,
                naturaldelta(feed.fetch_interval)),
            extra_tags='safe'
        )

    if request.is_ajax():
        return render(request, 'snippets/feed/is-active-toggle.html')

    # Standard request (admin or POST)

    if request.user.is_staff_or_superuser_and_enabled:
        fallback_referer = reverse('admin:index')
    else:
        fallback_referer = reverse('home')

    return HttpResponseRedirect(request.META.get('HTTP_REFERER',
                                fallback_referer))
Exemple #14
0
def feed_toggle_is_active(request, feed_id):
    """ Toggle a feed active or not (open or closed) from anywhere.

    If called via ajax, send a message to the user and call the notification
    template.
    """

    feed = BaseFeed.objects.get(id=feed_id)

    LOGGER.info('%s %s', feed.is_active, feed)

    if feed.is_active:
        feed.close(u'Closed manually by {0}'.format(request.user.username))

        messages.warning(request,
                         _(u'Closed {0} <strong>{1}</strong>.').format(
                             feed._meta.verbose_name, feed.name),
                         extra_tags='safe')

    else:
        feed.reopen()

        messages.success(request,
                         _(u'Re-opened {0} <strong>{1}</strong>. '
                           u'Next fetch in {2}.').format(
                               feed._meta.verbose_name, feed.name,
                               naturaldelta(feed.fetch_interval)),
                         extra_tags='safe')

    if request.is_ajax():
        return render(request, 'snippets/feed/is-active-toggle.html')

    # Standard request (admin or POST)

    if request.user.is_staff_or_superuser_and_enabled:
        fallback_referer = reverse('admin:index')
    else:
        fallback_referer = reverse('home')

    return HttpResponseRedirect(
        request.META.get('HTTP_REFERER', fallback_referer))
Exemple #15
0
def feed_distribution_by_last_fetch_display(results=None):
    """ Display feeds by last_fetch. """

    if results is None:
        results = feed_distribution_by_last_fetch()

    meta = results.get('meta')

    output = u''

    for loop_count in xrange(meta.get('loop_count')):
        feeds, count, percent, lower_value, upper_value, avg_fi = \
            results.get(loop_count)

        output += u'%s feeds (%.1f%%) fetched ' % (count, float(percent))

        if lower_value is None:
            output += u'less than %s ago' % naturaldelta(upper_value)

        elif upper_value is None:
            output += u'more than %s ago' % naturaldelta(lower_value)

        else:
            output += u'between %s and %s ago' % (naturaldelta(lower_value),
                                                  naturaldelta(upper_value))

        output += (u', avg fetch interval: %s' % naturaldelta(avg_fi) +
                   (u' — in results[%s]\n' % loop_count))

    if meta['fetched_feeds'] == meta['open_feeds_count']:
        output += u'\n>>> All open feeds are beiing fetched.'
    else:
        output += (u'%s total feeds fetched, out of %s open feeds.\n'
                   u'[computed in %s]') % (meta['fetched_feeds'],
                                           meta['open_feeds_count'],
                                           naturaldelta(meta['duration']))

    return results, output
Exemple #16
0
def feed_distribution_by_last_fetch_display(results=None):
    """ Display feeds by last_fetch. """

    if results is None:
        results = feed_distribution_by_last_fetch()

    meta = results.get('meta')

    output = u''

    for loop_count in xrange(meta.get('loop_count')):
        feeds, count, percent, lower_value, upper_value, avg_fi = \
            results.get(loop_count)

        output += u'%s feeds (%.1f%%) fetched ' % (count, float(percent))

        if lower_value is None:
            output += u'less than %s ago' % naturaldelta(upper_value)

        elif upper_value is None:
            output += u'more than %s ago' % naturaldelta(lower_value)

        else:
            output += u'between %s and %s ago' % (naturaldelta(lower_value),
                                                  naturaldelta(upper_value))

        output += (u', avg fetch interval: %s' % naturaldelta(avg_fi) +
                   (u' — in results[%s]\n' % loop_count))

    if meta['fetched_feeds'] == meta['open_feeds_count']:
        output += u'\n>>> All open feeds are beiing fetched.'
    else:
        output += (u'%s total feeds fetched, out of %s open feeds.\n'
                   u'[computed in %s]') % (
            meta['fetched_feeds'], meta['open_feeds_count'],
            naturaldelta(meta['duration']))

    return results, output
Exemple #17
0
def global_feeds_checker():
    """ Check all RSS feeds and their dependants. Close them if needed.

    No parameter.
    """
    def pretty_print_feed(feed):

        return (u'- %s,\n'
                u'    - admin url: http://%s%s\n'
                u'    - public url: %s\n'
                u'    - %s\n'
                u'    - reason: %s\n'
                u'    - last error: %s') % (
                    feed,
                    settings.SITE_DOMAIN,
                    reverse('admin:%s_%s_change' %
                            (feed._meta.app_label, feed._meta.module_name),
                            args=[feed.id]),

                    # Only RSS/Atom feeds have an URL…
                    feed.url if hasattr(feed, 'url') else '(NO URL)',
                    (u'closed on %s' % feed.date_closed)
                    if feed.date_closed else u'(no closing date)',
                    feed.closed_reason
                    or u'none (or manually closed from the admin interface)',
                    feed.errors[0]
                    if len(feed.errors) else u'(no error recorded)')

    def pretty_print_feed_list(feed_list):

        return '\n\n'.join(pretty_print_feed(feed) for feed in feed_list)

    dtnow = now()
    limit_days = config.FEED_CLOSED_WARN_LIMIT
    closed_limit = dtnow - timedelta(days=limit_days)
    closed_tested = 0
    reopened_list = []

    # ———————————————————————————————— See if old closed feeds can be reopened.

    old_closed_feeds = BaseFeed.objects.filter(is_active=False).filter(
        date_closed__lt=closed_limit)

    for feed in old_closed_feeds:
        # check all closed feeds monthly, on their closing date anniversary.
        if feed.date_closed.day == dtnow.day:
            if feed.check_old_closed():
                reopened_list.append(feed)
            closed_tested += 1

    # ——————————————————————————————————————————— Report recently closed feeds.

    recently_closed_feeds = BaseFeed.objects.filter(is_active=False).filter(
        Q(date_closed=None) | Q(date_closed__gte=closed_limit))

    if not recently_closed_feeds.exists():
        LOGGER.info(
            'No feed was closed in the last %s days, %s already '
            u'closed checked for eventual back-to-life, of which '
            u'%s were reopened.', limit_days, closed_tested,
            len(reopened_list))
        return

    count = recently_closed_feeds.count()

    mail_managers(
        _(u'Reminder: {0} feed(s) closed in last '
          u'{1} day(s), {2} automatically reopened').format(
              count, limit_days, len(reopened_list)),
        FEED_CHECK_TEMPLATE_TXT.format(
            feed_list=pretty_print_feed_list(recently_closed_feeds),
            closed_tested=closed_tested,
            reopened_count=len(reopened_list),
            reopened_list=pretty_print_feed_list(reopened_list)),
    )

    start_time = pytime.time()

    # Close the feeds, but after sending the mail,
    # So that initial reason is displayed at least
    # once to a real human.
    for feed in recently_closed_feeds:
        if feed.date_closed is None:
            feed.close('Automatic close by periodic checker task')

    LOGGER.info('Closed %s feeds in %s.', count,
                naturaldelta(pytime.time() - start_time))
Exemple #18
0
def global_duplicates_checker(limit=None, force=False):
    """ Check that duplicate articles have no more Reads anywhere.

    Fix it if not, and update all counters accordingly.

    :param limit: integer, the maximum number of duplicates to check.
        Default: none.
    :param force: boolean, default ``False``, allows to by bypass and
        reacquire the lock.
    """

    if config.CHECK_DUPLICATES_DISABLED:
        LOGGER.warning(u'Duplicates check disabled in configuration.')
        return

    # This task runs one a day. Acquire the lock for just a
    # little more time to avoid over-parallelized runs.
    my_lock = RedisExpiringLock('check_all_duplicates', expire_time=3600 * 25)

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(u'Forcing duplicates check…')

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'global_subscriptions_checker() is already '
                           u'locked, aborting.')
            return

    if limit is None:
        limit = config.CHECK_DUPLICATES_LIMIT

    start_time = pytime.time()
    duplicates = BaseItem.objects.duplicate()

    total_dupes_count = duplicates.count()
    total_reads_count = 0
    processed_dupes = 0
    done_dupes_count = 0
    purged_dupes_count = 0

    purge_after_weeks_count = max(1, config.CHECK_DUPLICATES_PURGE_AFTER_WEEKS)
    purge_after_weeks_count = min(52, purge_after_weeks_count)

    purge_before_date = now() - timedelta(days=purge_after_weeks_count * 7)

    LOGGER.info(
        u'Done counting (took %s of pure SQL joy), starting procedure.',
        naturaldelta(pytime.time() - start_time))

    with benchmark(u"Check {0}/{1} duplicates".format(limit or u'all',
                                                      total_dupes_count)):

        try:
            for duplicate in duplicates.iterator():
                reads = duplicate.reads.all()

                processed_dupes += 1

                if reads.exists():
                    done_dupes_count += 1
                    reads_count = reads.count()
                    total_reads_count += reads_count

                    LOGGER.info(
                        u'Duplicate %s #%s still has %s reads, fixing…',
                        duplicate._meta.model.__name__, duplicate.id,
                        reads_count)

                    duplicate.duplicate_of.register_duplicate(
                        duplicate,
                        force=duplicate.duplicate_status ==
                        DUPLICATE_STATUS.FINISHED)

                if duplicate.duplicate_status == DUPLICATE_STATUS.FINISHED:
                    #
                    # TODO: check we didn't get some race-conditions new
                    #       dependancies between the moment the duplicate
                    #       was marked duplicate and now.

                    if duplicate.date_created < purge_before_date:
                        try:
                            with transaction.atomic():
                                duplicate.delete()
                        except:
                            LOGGER.exception(
                                u'Exception while deleting '
                                u'duplicate %s #%s',
                                duplicate._meta.model.__name__, duplicate.id)

                        purged_dupes_count += 1
                        LOGGER.info(u'Purged duplicate %s #%s from database.',
                                    duplicate._meta.model.__name__,
                                    duplicate.id)

                elif duplicate.duplicate_status in (
                        DUPLICATE_STATUS.NOT_REPLACED,
                        DUPLICATE_STATUS.FAILED):
                    # Something went wrong, perhaps the
                    # task was purged before beiing run.
                    duplicate.duplicate_of.register_duplicate(duplicate)
                    done_dupes_count += 1

                elif duplicate.duplicate_status is None:
                    # Something went very wrong. If the article is a known
                    # duplicate, its status field should have been set to
                    # at least NOT_REPLACED.
                    duplicate.duplicate_of.register_duplicate(duplicate)
                    done_dupes_count += 1

                    LOGGER.error(
                        u'Corrected duplicate %s #%s found with no '
                        u'status.', duplicate._meta.model.__name__,
                        duplicate.id)

                if limit and processed_dupes >= limit:
                    break

        finally:
            my_lock.release()

    LOGGER.info(
        u'global_duplicates_checker(): %s/%s duplicates processed '
        u'(%.2f%%; limit: %s), %s corrected (%.2f%%), '
        u'%s purged (%.2f%%); %s reads altered.', processed_dupes,
        total_dupes_count, processed_dupes * 100.0 / total_dupes_count, limit
        or u'none', done_dupes_count,
        (done_dupes_count * 100.0 /
         processed_dupes) if processed_dupes else 0.0, purged_dupes_count,
        (purged_dupes_count * 100.0 /
         processed_dupes) if processed_dupes else 0.0, total_reads_count)
Exemple #19
0
def refresh_all_mongo_feeds(limit=None, force=False):
    u""" Refresh all MongoEngine feeds (RSS).

    .. note:: this task should vanish when
        MongoDB → PostgreSQL migration is done.
    """

    if config.FEED_FETCH_DISABLED:
        # Do not raise any .retry(), this is a scheduled task.
        LOGGER.warning(u'Feed refresh disabled in configuration.')
        return

    # Be sure two refresh operations don't overlap, but don't hold the
    # lock too long if something goes wrong. In production conditions
    # as of 20130812, refreshing all feeds takes only a moment:
    # [2013-08-12 09:07:02,028: INFO/MainProcess] Task
    #    oneflow.core.tasks.refresh_all_mongo_feeds succeeded in 1.99886608124s.
    my_lock = RedisExpiringLock(
        'refresh_all_mongo_feeds',
        expire_time=config.FEED_GLOBAL_REFRESH_INTERVAL * 180 - 1)

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(_(u'Forcing all feed refresh…'))

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'refresh_all_mongo_feeds() is already '
                           u'locked, aborting.')
            return

    feeds = MongoFeed.objects.filter(closed__ne=True, is_internal__ne=True)

    if limit:
        feeds = feeds.limit(limit)

    # No need for caching and cluttering CPU/memory for a one-shot thing.
    feeds.no_cache()

    with benchmark('refresh_all_mongo_feeds()'):

        try:
            count = 0
            mynow = now()

            for feed in feeds:

                if feed.refresh_lock.is_locked():
                    LOGGER.info(u'Feed %s already locked, skipped.', feed)
                    continue

                interval = timedelta(seconds=feed.fetch_interval)

                if feed.last_fetch is None:

                    mongo_feed_refresh_task.delay(feed.id)

                    LOGGER.info(
                        u'Launched immediate refresh of feed %s which '
                        u'has never been refreshed.', feed)

                elif force or feed.last_fetch + interval < mynow:

                    how_late = feed.last_fetch + interval - mynow
                    how_late = how_late.days * 86400 + how_late.seconds

                    countdown = 0
                    mongo_feed_refresh_task.delay(feed.id, force)

                    LOGGER.info(
                        u'%s refresh of feed %s %s (%s late).',
                        u'Scheduled randomized' if countdown else u'Launched',
                        feed, u' in {0}'.format(naturaldelta(countdown))
                        if countdown else u'in the background',
                        naturaldelta(how_late))
                    count += 1

        finally:
            # HEADS UP: see core.tasks.refresh_all_feeds() for note.
            # my_lock.release()
            pass

        LOGGER.info(u'Launched %s refreshes out of %s feed(s) checked.', count,
                    feeds.count())
Exemple #20
0
def refresh_all_feeds(limit=None, force=False):
    u""" Refresh all feeds (RSS/Mail/Twitter…). """

    if config.FEED_FETCH_DISABLED:
        # Do not raise any .retry(), this is a scheduled task.
        LOGGER.warning(u'Feed refresh disabled in configuration.')
        return

    # As FEED_GLOBAL_REFRESH_INTERVAL is dynamically modifiable,
    # we should re-evaluate it each time we run.
    this_round_expire_time = (
        config.FEED_GLOBAL_REFRESH_INTERVAL * 60
        - config.FEED_GLOBAL_REFRESH_INTERVAL
    )

    # Be sure two refresh operations don't overlap, but don't hold the
    # lock too long if something goes wrong. In production conditions
    # as of 20130812, refreshing all feeds takes only a moment:
    # [2013-08-12 09:07:02,028: INFO/MainProcess] Task
    #       oneflow.core.tasks.refresh_all_feeds succeeded in 1.99886608124s.
    #
    my_lock = RedisExpiringLock(
        REFRESH_ALL_FEEDS_LOCK_NAME,
        expire_time=this_round_expire_time
    )

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(_(u'Forcing all feed refresh…'))

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'refresh_all_feeds() is already locked, aborting.')
            return

    # This should bring us a Polymorphic Query to refresh all feeds types.
    feeds = BaseFeed.objects.filter(is_active=True,
                                    is_internal=False).order_by(
                                        'date_last_fetch')

    if limit:
        feeds = feeds[:limit]

    with benchmark('refresh_all_feeds()'):

        try:
            count = 0
            mynow = now()

            for feed in feeds:

                if feed.refresh_lock.is_locked():
                    # The refresh task lauched before its expiration, and is
                    # still [long] running while we want to launch another.
                    # Avoid, because the new would exit immediately on
                    # date_last_fetch too recent.
                    LOGGER.debug(u'Feed %s already locked, skipped.', feed)
                    continue

                if feed.date_last_fetch is None:

                    basefeed_refresh_task.apply_async(
                        args=(feed.id, ),

                        # in `this_round_expire_time`, we will relaunch it
                        # anyway, so don't clutter the queue with double work.
                        expire=this_round_expire_time,
                    )

                    LOGGER.info(u'Launched immediate refresh of feed %s which '
                                u'has never been refreshed.', feed)
                    count += 1
                    continue

                if feed.fetch_interval > 86399:
                    interval_days = feed.fetch_interval / 86400
                    interval_seconds = feed.fetch_interval - (
                        interval_days * 86400)

                    interval = timedelta(days=interval_days,
                                         seconds=interval_seconds)

                else:
                    interval = timedelta(seconds=feed.fetch_interval)

                if force or feed.date_last_fetch + interval < mynow:

                    how_late = feed.date_last_fetch + interval - mynow
                    how_late = how_late.days * 86400 + how_late.seconds

                    late = feed.date_last_fetch + interval < mynow

                    basefeed_refresh_task.apply_async(
                        args=(feed.id, ),
                        kwargs={'force': force},
                        expire=this_round_expire_time,
                    )

                    LOGGER.info(u'Launched refresh of feed %s (%s %s).',
                                feed, naturaldelta(how_late),
                                u'late' if late else u'earlier')
                    count += 1

        finally:
            # HEADS UP: in case the system is overloaded and feeds refresh()
            #           tasks don't complete fast enough, the current task
            #           will overload it even more. Thus, we intentionaly
            #           don't release the lock to avoid over-re-launched
            #           global tasks to feed the refresh queue with useless
            #           double-triple-Nble individual tasks.
            #
            # my_lock.release()
            pass

        LOGGER.info(u'Launched %s refreshes out of %s feed(s) checked.',
                    count, feeds.count())
Exemple #21
0
def reprocess_failed_articles(failed=None,
                              expiry=None,
                              limit=None,
                              force=False,
                              reprocessing_type=None):
    u""" Reprocess articles that failed absolutization.

    In case there was a temporary error, this could lead to more good articles.
    """

    if config.ARTICLE_REPROCESSING_DISABLED:
        # Do not raise any .retry(), this is a scheduled task.
        LOGGER.warning(u'Articles reprocess disabled in configuration.')
        return

    if failed is None:
        raise RuntimeError(u'Need a queryset of failed items to reprocess.')

    # TODO: as the celery tasks expires,
    # the lock is probably not needed anymore.

    my_lock = RedisExpiringLock('reprocess_failed_articles_' + str(expiry),
                                expire_time=expiry)

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(u'Forcing failed articles reprocessing…')

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'reprocess_failed_articles() is already locked, '
                           u'aborting.')
            return

    failed_count = failed.count()

    with benchmark((u'Reprocess_failed_articles(expiry=%s): %s '
                    u' processing chains relaunched.') %
                   (naturaldelta(expiry), failed_count)):

        try:
            for article in failed.iterator():

                if reprocessing_type is None:
                    article.url_error = None
                    article.save()

                    article_post_create_task.apply(args=(article.id, ),
                                                   kwargs={'apply_now': True})

                elif reprocessing_type == 'standard':
                    article.process()

        finally:
            # HEADS UP: in case the system is overloaded, we intentionaly
            #           don't release the lock to avoid over-re-launched
            #           global tasks to flood the queue with useless
            #           double-triple-Nble individual tasks.
            #
            # my_lock.release()
            pass
Exemple #22
0
def reprocess_failed_articles(failed=None, expiry=None,
                              limit=None, force=False,
                              reprocessing_type=None):
    u""" Reprocess articles that failed absolutization.

    In case there was a temporary error, this could lead to more good articles.
    """

    if config.ARTICLE_REPROCESSING_DISABLED:
        # Do not raise any .retry(), this is a scheduled task.
        LOGGER.warning(u'Articles reprocess disabled in configuration.')
        return

    if failed is None:
        raise RuntimeError(u'Need a queryset of failed items to reprocess.')

    # TODO: as the celery tasks expires,
    # the lock is probably not needed anymore.

    my_lock = RedisExpiringLock(
        'reprocess_failed_articles_' + str(expiry),
        expire_time=expiry
    )

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(u'Forcing failed articles reprocessing…')

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'reprocess_failed_articles() is already locked, '
                           u'aborting.')
            return

    failed_count = failed.count()

    with benchmark((u'Reprocess_failed_articles(expiry=%s): %s '
                   u' processing chains relaunched.')
                   % (naturaldelta(expiry), failed_count)):

        try:
            for article in failed.iterator():

                if reprocessing_type is None:
                    article.url_error = None
                    article.save()

                    article_post_create_task.apply(args=(article.id, ),
                                                   kwargs={'apply_now': True})

                elif reprocessing_type == 'standard':
                    article.process()

        finally:
            # HEADS UP: in case the system is overloaded, we intentionaly
            #           don't release the lock to avoid over-re-launched
            #           global tasks to flood the queue with useless
            #           double-triple-Nble individual tasks.
            #
            # my_lock.release()
            pass
Exemple #23
0
def import_google_reader_begin(user_id, access_token):

    auth = OAuth2Method(settings.GOOGLE_OAUTH2_CLIENT_ID,
                        settings.GOOGLE_OAUTH2_CLIENT_SECRET)
    auth.authFromAccessToken(access_token)
    reader = GoogleReader(auth)

    django_user, mongo_user = get_user_from_dbs(user_id)
    username = django_user.username

    try:
        user_infos = reader.getUserInfo()

    except TypeError:
        LOGGER.exception(u'Could not start Google Reader import for user %s.',
                         username)
        # Don't refresh, it's now done by a dedicated periodic task.
        # If we failed, it means the problem is quite serious.
        #       import_google_reader_trigger(user_id, refresh=True)
        return

    GR_MAX_FEEDS = config.GR_MAX_FEEDS

    LOGGER.info(u'Starting Google Reader import for user %s.', username)

    gri = GoogleReaderImport(user_id)

    # take note of user informations now that we have them.
    gri.start(user_infos=user_infos)

    reader.buildSubscriptionList()

    total_reads, reg_date = reader.totalReadItems(without_date=False)
    total_starred, star1_date = reader.totalStarredItems(without_date=False)
    total_feeds = len(reader.feeds) + 1  # +1 for 'starred'

    gri.reg_date(pytime.mktime(reg_date.timetuple()))
    gri.star1_date(pytime.mktime(star1_date.timetuple()))
    gri.total_reads(total_reads)
    gri.total_starred(total_starred)

    LOGGER.info(
        u'Google Reader import for user %s: %s feed(s) and %s read '
        u'article(s) to go…', username, total_feeds, total_reads)

    if total_feeds > GR_MAX_FEEDS and not settings.DEBUG:
        mail_admins(
            'User {0} has more than {1} feeds: {2}!'.format(
                username, GR_MAX_FEEDS, total_feeds),
            u"\n\nThe GR import will be incomplete.\n\n"
            u"Just for you to know…\n\n")

    # We launch the starred feed import first. Launching it after the
    # standard feeds makes it being delayed until the world's end.
    reader.makeSpecialFeeds()
    starred_feed = reader.getSpecialFeed(ReaderUrl.STARRED_LIST)
    import_google_reader_starred.apply_async((user_id, username, starred_feed),
                                             queue='low')

    processed_feeds = 1
    feeds_to_import = []

    for gr_feed in reader.feeds[:GR_MAX_FEEDS]:

        try:
            feed = create_feed(gr_feed, mongo_user)

        except Feed.DoesNotExist:
            LOGGER.exception(
                u'Could not create feed “%s” for user %s, '
                u'skipped.', gr_feed.title, username)
            continue

        processed_feeds += 1
        feeds_to_import.append((user_id, username, gr_feed, feed))

        LOGGER.info(u'Imported feed “%s” (%s/%s) for user %s…', gr_feed.title,
                    processed_feeds, total_feeds, username)

    # We need to clamp the total, else task won't finish in
    # the case where the user has more feeds than allowed.
    #
    gri.total_feeds(min(processed_feeds, GR_MAX_FEEDS))

    for feed_args in feeds_to_import:
        import_google_reader_articles.apply_async(feed_args, queue='low')

    LOGGER.info(
        u'Imported %s/%s feeds in %s. Articles import already '
        u'started with limits: date: %s, %s waves of %s articles, '
        u'max articles: %s, reads: %s, starred: %s.', processed_feeds,
        total_feeds, naturaldelta(now() - gri.start()),
        naturaltime(max([gri.reg_date(), GR_OLDEST_DATE])),
        config.GR_WAVE_LIMIT, config.GR_LOAD_LIMIT, config.GR_MAX_ARTICLES,
        total_reads, total_starred)
Exemple #24
0
def refresh_all_feeds(limit=None, force=False):
    u""" Refresh all feeds (RSS/Mail/Twitter…). """

    if config.FEED_FETCH_DISABLED:
        # Do not raise any .retry(), this is a scheduled task.
        LOGGER.warning(u'Feed refresh disabled in configuration.')
        return

    # As FEED_GLOBAL_REFRESH_INTERVAL is dynamically modifiable,
    # we should re-evaluate it each time we run.
    this_round_expire_time = (config.FEED_GLOBAL_REFRESH_INTERVAL * 60 -
                              config.FEED_GLOBAL_REFRESH_INTERVAL)

    # Be sure two refresh operations don't overlap, but don't hold the
    # lock too long if something goes wrong. In production conditions
    # as of 20130812, refreshing all feeds takes only a moment:
    # [2013-08-12 09:07:02,028: INFO/MainProcess] Task
    #       oneflow.core.tasks.refresh_all_feeds succeeded in 1.99886608124s.
    #
    my_lock = RedisExpiringLock(REFRESH_ALL_FEEDS_LOCK_NAME,
                                expire_time=this_round_expire_time)

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(_(u'Forcing all feed refresh…'))

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'refresh_all_feeds() is already locked, aborting.')
            return

    # This should bring us a Polymorphic Query to refresh all feeds types.
    feeds = BaseFeed.objects.filter(
        is_active=True, is_internal=False).order_by('date_last_fetch')

    if limit:
        feeds = feeds[:limit]

    with benchmark('refresh_all_feeds()'):

        try:
            count = 0
            mynow = now()

            for feed in feeds:

                if feed.refresh_lock.is_locked():
                    # The refresh task lauched before its expiration, and is
                    # still [long] running while we want to launch another.
                    # Avoid, because the new would exit immediately on
                    # date_last_fetch too recent.
                    LOGGER.debug(u'Feed %s already locked, skipped.', feed)
                    continue

                if feed.date_last_fetch is None:

                    basefeed_refresh_task.apply_async(
                        args=(feed.id, ),

                        # in `this_round_expire_time`, we will relaunch it
                        # anyway, so don't clutter the queue with double work.
                        expire=this_round_expire_time,
                    )

                    LOGGER.info(
                        u'Launched immediate refresh of feed %s which '
                        u'has never been refreshed.', feed)
                    count += 1
                    continue

                if feed.fetch_interval > 86399:
                    interval_days = feed.fetch_interval / 86400
                    interval_seconds = feed.fetch_interval - (interval_days *
                                                              86400)

                    interval = timedelta(days=interval_days,
                                         seconds=interval_seconds)

                else:
                    interval = timedelta(seconds=feed.fetch_interval)

                if force or feed.date_last_fetch + interval < mynow:

                    how_late = feed.date_last_fetch + interval - mynow
                    how_late = how_late.days * 86400 + how_late.seconds

                    late = feed.date_last_fetch + interval < mynow

                    basefeed_refresh_task.apply_async(
                        args=(feed.id, ),
                        kwargs={'force': force},
                        expire=this_round_expire_time,
                    )

                    LOGGER.info(u'Launched refresh of feed %s (%s %s).', feed,
                                naturaldelta(how_late),
                                u'late' if late else u'earlier')
                    count += 1

        finally:
            # HEADS UP: in case the system is overloaded and feeds refresh()
            #           tasks don't complete fast enough, the current task
            #           will overload it even more. Thus, we intentionaly
            #           don't release the lock to avoid over-re-launched
            #           global tasks to feed the refresh queue with useless
            #           double-triple-Nble individual tasks.
            #
            # my_lock.release()
            pass

        LOGGER.info(u'Launched %s refreshes out of %s feed(s) checked.', count,
                    feeds.count())
Exemple #25
0
def global_feeds_checker():
    """ Check all RSS feeds and their dependants. Close them if needed.

    No parameter.
    """

    def pretty_print_feed(feed):

        return (u'- %s,\n'
                u'    - admin url: http://%s%s\n'
                u'    - public url: %s\n'
                u'    - %s\n'
                u'    - reason: %s\n'
                u'    - last error: %s') % (
                    feed,

                    settings.SITE_DOMAIN,

                    reverse('admin:%s_%s_change' % (
                        feed._meta.app_label,
                        feed._meta.module_name),
                        args=[feed.id]),

                    # Only RSS/Atom feeds have an URL…
                    feed.url if hasattr(feed, 'url') else '(NO URL)',

                    (u'closed on %s' % feed.date_closed)
                    if feed.date_closed
                    else u'(no closing date)',

                    feed.closed_reason or
                    u'none (or manually closed from the admin interface)',

                    feed.errors[0]
                    if len(feed.errors)
                    else u'(no error recorded)')

    def pretty_print_feed_list(feed_list):

        return '\n\n'.join(
            pretty_print_feed(feed)
            for feed in feed_list
        )

    dtnow         = now()
    limit_days    = config.FEED_CLOSED_WARN_LIMIT
    closed_limit  = dtnow - timedelta(days=limit_days)
    closed_tested = 0
    reopened_list = []

    # ———————————————————————————————— See if old closed feeds can be reopened.

    old_closed_feeds = BaseFeed.objects.filter(is_active=False).filter(
        date_closed__lt=closed_limit)

    for feed in old_closed_feeds:
        # check all closed feeds monthly, on their closing date anniversary.
        if feed.date_closed.day == dtnow.day:
            if feed.check_old_closed():
                reopened_list.append(feed)
            closed_tested += 1

    # ——————————————————————————————————————————— Report recently closed feeds.

    recently_closed_feeds = BaseFeed.objects.filter(is_active=False).filter(
        Q(date_closed=None) | Q(date_closed__gte=closed_limit))

    if not recently_closed_feeds.exists():
        LOGGER.info('No feed was closed in the last %s days, %s already '
                    u'closed checked for eventual back-to-life, of which '
                    u'%s were reopened.', limit_days, closed_tested,
                    len(reopened_list))
        return

    count = recently_closed_feeds.count()

    mail_managers(_(u'Reminder: {0} feed(s) closed in last '
                    u'{1} day(s), {2} automatically reopened').format(
                        count, limit_days, len(reopened_list)),
                  FEED_CHECK_TEMPLATE_TXT.format(
        feed_list=pretty_print_feed_list(recently_closed_feeds),
        closed_tested=closed_tested,
        reopened_count=len(reopened_list),
        reopened_list=pretty_print_feed_list(reopened_list)),
    )

    start_time = pytime.time()

    # Close the feeds, but after sending the mail,
    # So that initial reason is displayed at least
    # once to a real human.
    for feed in recently_closed_feeds:
        if feed.date_closed is None:
            feed.close('Automatic close by periodic checker task')

    LOGGER.info('Closed %s feeds in %s.', count,
                naturaldelta(pytime.time() - start_time))
Exemple #26
0
def global_duplicates_checker(limit=None, force=False):
    """ Check that duplicate articles have no more Reads anywhere.

    Fix it if not, and update all counters accordingly.

    :param limit: integer, the maximum number of duplicates to check.
        Default: none.
    :param force: boolean, default ``False``, allows to by bypass and
        reacquire the lock.
    """

    if config.CHECK_DUPLICATES_DISABLED:
        LOGGER.warning(u'Duplicates check disabled in configuration.')
        return

    # This task runs one a day. Acquire the lock for just a
    # little more time to avoid over-parallelized runs.
    my_lock = RedisExpiringLock('check_all_duplicates', expire_time=3600 * 25)

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(u'Forcing duplicates check…')

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'global_subscriptions_checker() is already '
                           u'locked, aborting.')
            return

    if limit is None:
        limit = config.CHECK_DUPLICATES_LIMIT

    start_time = pytime.time()
    duplicates = BaseItem.objects.duplicate()

    total_dupes_count  = duplicates.count()
    total_reads_count  = 0
    processed_dupes    = 0
    done_dupes_count   = 0
    purged_dupes_count = 0

    purge_after_weeks_count = max(1, config.CHECK_DUPLICATES_PURGE_AFTER_WEEKS)
    purge_after_weeks_count = min(52, purge_after_weeks_count)

    purge_before_date = now() - timedelta(days=purge_after_weeks_count * 7)

    LOGGER.info(u'Done counting (took %s of pure SQL joy), starting procedure.',
                naturaldelta(pytime.time() - start_time))

    with benchmark(u"Check {0}/{1} duplicates".format(limit or u'all',
                   total_dupes_count)):

        try:
            for duplicate in duplicates.iterator():
                reads = duplicate.reads.all()

                processed_dupes += 1

                if reads.exists():
                    done_dupes_count  += 1
                    reads_count        = reads.count()
                    total_reads_count += reads_count

                    LOGGER.info(u'Duplicate %s #%s still has %s reads, fixing…',
                                duplicate._meta.model.__name__,
                                duplicate.id, reads_count)

                    duplicate.duplicate_of.register_duplicate(
                        duplicate, force=duplicate.duplicate_status
                        == DUPLICATE_STATUS.FINISHED)

                if duplicate.duplicate_status == DUPLICATE_STATUS.FINISHED:
                    #
                    # TODO: check we didn't get some race-conditions new
                    #       dependancies between the moment the duplicate
                    #       was marked duplicate and now.

                    if duplicate.date_created < purge_before_date:
                        try:
                            with transaction.atomic():
                                duplicate.delete()
                        except:
                            LOGGER.exception(u'Exception while deleting '
                                             u'duplicate %s #%s',
                                             duplicate._meta.model.__name__,
                                             duplicate.id)

                        purged_dupes_count += 1
                        LOGGER.info(u'Purged duplicate %s #%s from database.',
                                    duplicate._meta.model.__name__,
                                    duplicate.id)

                elif duplicate.duplicate_status in (
                    DUPLICATE_STATUS.NOT_REPLACED,
                        DUPLICATE_STATUS.FAILED):
                    # Something went wrong, perhaps the
                    # task was purged before beiing run.
                    duplicate.duplicate_of.register_duplicate(duplicate)
                    done_dupes_count += 1

                elif duplicate.duplicate_status is None:
                    # Something went very wrong. If the article is a known
                    # duplicate, its status field should have been set to
                    # at least NOT_REPLACED.
                    duplicate.duplicate_of.register_duplicate(duplicate)
                    done_dupes_count += 1

                    LOGGER.error(u'Corrected duplicate %s #%s found with no '
                                 u'status.', duplicate._meta.model.__name__,
                                 duplicate.id)

                if limit and processed_dupes >= limit:
                    break

        finally:
            my_lock.release()

    LOGGER.info(u'global_duplicates_checker(): %s/%s duplicates processed '
                u'(%.2f%%; limit: %s), %s corrected (%.2f%%), '
                u'%s purged (%.2f%%); %s reads altered.',

                processed_dupes, total_dupes_count,
                processed_dupes * 100.0 / total_dupes_count,

                limit or u'none',

                done_dupes_count,
                (done_dupes_count * 100.0 / processed_dupes)
                if processed_dupes else 0.0,

                purged_dupes_count,
                (purged_dupes_count * 100.0 / processed_dupes)
                if processed_dupes else 0.0,

                total_reads_count)
Exemple #27
0
def refresh_all_mongo_feeds(limit=None, force=False):
    u""" Refresh all MongoEngine feeds (RSS).

    .. note:: this task should vanish when
        MongoDB → PostgreSQL migration is done.
    """

    if config.FEED_FETCH_DISABLED:
        # Do not raise any .retry(), this is a scheduled task.
        LOGGER.warning(u'Feed refresh disabled in configuration.')
        return

    # Be sure two refresh operations don't overlap, but don't hold the
    # lock too long if something goes wrong. In production conditions
    # as of 20130812, refreshing all feeds takes only a moment:
    # [2013-08-12 09:07:02,028: INFO/MainProcess] Task
    #    oneflow.core.tasks.refresh_all_mongo_feeds succeeded in 1.99886608124s.
    my_lock = RedisExpiringLock(
        'refresh_all_mongo_feeds',
        expire_time=config.FEED_GLOBAL_REFRESH_INTERVAL * 180 - 1

    )

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(_(u'Forcing all feed refresh…'))

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'refresh_all_mongo_feeds() is already '
                           u'locked, aborting.')
            return

    feeds = MongoFeed.objects.filter(closed__ne=True, is_internal__ne=True)

    if limit:
        feeds = feeds.limit(limit)

    # No need for caching and cluttering CPU/memory for a one-shot thing.
    feeds.no_cache()

    with benchmark('refresh_all_mongo_feeds()'):

        try:
            count = 0
            mynow = now()

            for feed in feeds:

                if feed.refresh_lock.is_locked():
                    LOGGER.info(u'Feed %s already locked, skipped.', feed)
                    continue

                interval = timedelta(seconds=feed.fetch_interval)

                if feed.last_fetch is None:

                    mongo_feed_refresh_task.delay(feed.id)

                    LOGGER.info(u'Launched immediate refresh of feed %s which '
                                u'has never been refreshed.', feed)

                elif force or feed.last_fetch + interval < mynow:

                    how_late = feed.last_fetch + interval - mynow
                    how_late = how_late.days * 86400 + how_late.seconds

                    countdown = 0
                    mongo_feed_refresh_task.delay(feed.id, force)

                    LOGGER.info(u'%s refresh of feed %s %s (%s late).',
                                u'Scheduled randomized'
                                if countdown else u'Launched',
                                feed,
                                u' in {0}'.format(naturaldelta(countdown))
                                if countdown else u'in the background',
                                naturaldelta(how_late))
                    count += 1

        finally:
            # HEADS UP: see core.tasks.refresh_all_feeds() for note.
            # my_lock.release()
            pass

        LOGGER.info(u'Launched %s refreshes out of %s feed(s) checked.',
                    count, feeds.count())