Ejemplo n.º 1
0
    def mark_usable(self, commit=True, verbose=True):
        """ Mark the account usable and clear error. """

        if verbose:
            LOGGER.info(u'%s is now considered usable.', self)

        if self.is_usable:
            start_task = False

        else:
            start_task = True

        self.date_last_conn = now()
        self.conn_error = None
        self.is_usable = True

        if commit:
            self.save()

        try:
            usable_start_task = self._meta.model.usable_start_task

        except:
            pass

        else:
            if start_task:
                usable_start_task.delay(self.id)
Ejemplo n.º 2
0
    def mark_all_read(self, latest_displayed_read=None):

        if self.unread_items_count == 0:
            return

        # count = self.unread_items_count

        # self.unread_items_count = 0

        # for folder in self.folders:
        #     folder.unread_items_count -= count

        # self.user.unread_items_count -= count

        # Marking all read is not a database-friendly operation,
        # thus it's run via a task to be able to return now immediately,
        # with cache numbers updated.
        #
        # HEADS UP: this task name will be registered later
        # by the register_task_method() call.
        globals()['subscription_mark_all_read_in_database_task'].delay(
            self.id,
            now() if latest_displayed_read is None
            #
            # TRICK: we use self.user.reads for 2 reasons:
            #       - avoid importing `Read`, which would create a loop.
            #       - in case of a folder/global initiated mark_all_read(),
            #         the ID can be one of a read in another subscription
            #         and in this case, self.reads.get() will fail.
            #
            else latest_displayed_read.date_created)
Ejemplo n.º 3
0
Archivo: base.py Proyecto: 1flow/1flow
    def older_than_two_weeks(self):
        """ Return items created more than 14 days ago. """

        two_week_delta = timedelta(days=14)
        two_weeks_before = now() - two_week_delta

        return self.filter(date_created__lte=two_weeks_before)
Ejemplo n.º 4
0
    def older_than_one_day(self):
        """ Return items created more than 24 hours ago. """

        one_day_delta = timedelta(days=1)
        one_day_before = now() - one_day_delta

        return self.filter(date_created__lte=one_day_before)
Ejemplo n.º 5
0
Archivo: base.py Proyecto: 1flow/1flow
    def mark_usable(self, commit=True, verbose=True):
        """ Mark the account usable and clear error. """

        if verbose:
            LOGGER.info(u'%s is now considered usable.', self)

        if self.is_usable:
            start_task = False

        else:
            start_task = True

        self.date_last_conn = now()
        self.conn_error = None
        self.is_usable = True

        if commit:
            self.save()

        try:
            usable_start_task = self._meta.model.usable_start_task

        except:
            pass

        else:
            if start_task:
                usable_start_task.delay(self.id)
Ejemplo n.º 6
0
    def older_than_one_week(self):
        """ Return items created more than 7 days ago. """

        one_week_delta = timedelta(days=7)
        one_week_before = now() - one_week_delta

        return self.filter(date_created__lte=one_week_before)
Ejemplo n.º 7
0
Archivo: base.py Proyecto: 1flow/1flow
    def older_than_one_month(self):
        """ Return items created more than 31 days ago. """

        one_month_delta = timedelta(days=31)
        one_month_before = now() - one_month_delta

        return self.filter(date_created__lte=one_month_before)
Ejemplo n.º 8
0
    def older_than_one_month(self):
        """ Return items created more than 31 days ago. """

        one_month_delta = timedelta(days=31)
        one_month_before = now() - one_month_delta

        return self.filter(date_created__lte=one_month_before)
Ejemplo n.º 9
0
    def older_than_two_weeks(self):
        """ Return items created more than 14 days ago. """

        two_week_delta = timedelta(days=14)
        two_weeks_before = now() - two_week_delta

        return self.filter(date_created__lte=two_weeks_before)
Ejemplo n.º 10
0
Archivo: base.py Proyecto: 1flow/1flow
    def older_than_one_day(self):
        """ Return items created more than 24 hours ago. """

        one_day_delta = timedelta(days=1)
        one_day_before = now() - one_day_delta

        return self.filter(date_created__lte=one_day_before)
Ejemplo n.º 11
0
Archivo: base.py Proyecto: 1flow/1flow
    def older_than_one_week(self):
        """ Return items created more than 7 days ago. """

        one_week_delta = timedelta(days=7)
        one_week_before = now() - one_week_delta

        return self.filter(date_created__lte=one_week_before)
Ejemplo n.º 12
0
Archivo: read.py Proyecto: 1flow/1flow
    def mark_read(self):
        """ Mark a read as read and update cached descriptors. """

        self.is_read = True
        self.date_read = now()
        self.save()

        self.is_read_changed()
Ejemplo n.º 13
0
    def running_old(self):
        """ An import running for too long is probably crashed.

        But we didn't notice it, or celery crashed. Whatever.
        """

        return self.status == IMPORT_STATUS.RUNNING \
            and self.date_started < (now() - timedelta(seconds=21600))
Ejemplo n.º 14
0
    def error(self, message, commit=True, last_fetch=False):
        """ Take note of an error.

        If the maximum number of errors is reached, close the feed and
        return ``True``; else just return ``False``.

        :param last_fetch: as a commodity, set this to ``True`` if you
            want this method to update the :attr:`last_fetch` attribute
            with the value of ``now()`` (UTC). Default: ``False``.

        :param commit: as in any other Django DB-related method, set
            this to ``False`` if you don't want this method to call
            ``self.save()``. Default: ``True``.
        """

        LOGGER.error(u'Error on feed %s: %s.', self, message)

        error_message = u'{0} @@{1}'.format(message, now().isoformat())

        # Put the errors more recent first.
        self.errors.insert(0, error_message)

        if last_fetch:
            self.date_last_fetch = now()

        retval = False

        if len(self.errors) >= config.FEED_FETCH_MAX_ERRORS:
            if self.is_active:
                self.close(u'Too many errors on the feed. Last was: %s' %
                           self.errors[0],
                           commit=False)

                # LOGGER.critical(u'Too many errors on feed %s, closed.', self)

            # Keep only the most recent errors.
            self.errors = self.errors[:config.FEED_FETCH_MAX_ERRORS]

            retval = True

        if commit:
            self.save()

        statsd.incr('feeds.refresh.global.errors')

        return retval
Ejemplo n.º 15
0
    def mark_read(self):
        """ Mark a read as read and update cached descriptors. """

        self.is_read = True
        self.date_read = now()
        self.save()

        self.is_read_changed()
Ejemplo n.º 16
0
    def created_previous_hour(self):
        """ Return items created between 61 and 120 minutes inclusive. """

        one_hour_delta = timedelta(seconds=3600)
        one_hour_before = now() - one_hour_delta
        two_hours_before = one_hour_before - one_hour_delta

        return self.filter(date_created__lte=one_hour_before,
                           date_created__gte=two_hours_before)
Ejemplo n.º 17
0
    def update_last_fetch(self):
        """ Allow to customize the last fetch datetime.

        This method exists to be overriden by “under development” classes,
        to allow not updating the last_fetch attribute, and continue
        fetching data forever on development machines.
        """

        self.date_last_fetch = now()
Ejemplo n.º 18
0
Archivo: base.py Proyecto: 1flow/1flow
    def created_previous_month(self):
        """ Return items created between 32 and 62 days inclusive. """

        one_month_delta = timedelta(days=31)
        one_month_before = now() - one_month_delta
        two_months_before = one_month_before - one_month_delta

        return self.filter(date_created__lte=one_month_before,
                           date_created__gte=two_months_before)
Ejemplo n.º 19
0
    def older_than_delta(self, custom_timedelta):
        """ Return items created more than :param:`delta` ago.

        :param delta: a python :class:`~datetime.timedelta` object.
        """

        custom_delta_before = now() - custom_timedelta

        return self.filter(date_created__lt=custom_delta_before)
Ejemplo n.º 20
0
Archivo: base.py Proyecto: 1flow/1flow
    def created_previous_week(self):
        """ Return items created between 8 and 14 days inclusive. """

        one_week_delta = timedelta(days=7)
        one_week_before = now() - one_week_delta
        two_weeks_before = one_week_before - one_week_delta

        return self.filter(date_created__lte=one_week_before,
                           date_created__gte=two_weeks_before)
Ejemplo n.º 21
0
Archivo: base.py Proyecto: 1flow/1flow
    def created_previous_day(self):
        """ Return items created between 25 and 48 hours inclusive. """

        one_day_delta = timedelta(days=1)
        one_day_before = now() - one_day_delta
        two_days_before = one_day_before - one_day_delta

        return self.filter(date_created__lte=one_day_before,
                           date_created__gte=two_days_before)
Ejemplo n.º 22
0
Archivo: base.py Proyecto: 1flow/1flow
    def created_previous_hour(self):
        """ Return items created between 61 and 120 minutes inclusive. """

        one_hour_delta = timedelta(seconds=3600)
        one_hour_before = now() - one_hour_delta
        two_hours_before = one_hour_before - one_hour_delta

        return self.filter(date_created__lte=one_hour_before,
                           date_created__gte=two_hours_before)
Ejemplo n.º 23
0
    def created_previous_day(self):
        """ Return items created between 25 and 48 hours inclusive. """

        one_day_delta = timedelta(days=1)
        one_day_before = now() - one_day_delta
        two_days_before = one_day_before - one_day_delta

        return self.filter(date_created__lte=one_day_before,
                           date_created__gte=two_days_before)
Ejemplo n.º 24
0
    def created_previous_month(self):
        """ Return items created between 32 and 62 days inclusive. """

        one_month_delta = timedelta(days=31)
        one_month_before = now() - one_month_delta
        two_months_before = one_month_before - one_month_delta

        return self.filter(date_created__lte=one_month_before,
                           date_created__gte=two_months_before)
Ejemplo n.º 25
0
Archivo: base.py Proyecto: 1flow/1flow
    def older_than_delta(self, custom_timedelta):
        """ Return items created more than :param:`delta` ago.

        :param delta: a python :class:`~datetime.timedelta` object.
        """

        custom_delta_before = now() - custom_timedelta

        return self.filter(date_created__lt=custom_delta_before)
Ejemplo n.º 26
0
    def created_previous_week(self):
        """ Return items created between 8 and 14 days inclusive. """

        one_week_delta = timedelta(days=7)
        one_week_before = now() - one_week_delta
        two_weeks_before = one_week_before - one_week_delta

        return self.filter(date_created__lte=one_week_before,
                           date_created__gte=two_weeks_before)
Ejemplo n.º 27
0
        def format_quota(quota):
            if quota['remaining'] is None:
                return u' (no quota information)'

            if quota['remaining']:
                return u'; quota: %s call(s) remaining' % quota['remaining']

            else:
                return u'; quota exhausted, reset in %s' % (
                    naturaldelta(now() - quota['reset'].replace(tzinfo=utc)))
Ejemplo n.º 28
0
def create_tweet_from_id(tweet_id, feeds=None, origin=None):
    """ From a Tweet ID, create a 1flow tweet via the REST API.


    https://dev.twitter.com/rest/reference/get/statuses/show/%3Aid

    .. todo:: use http://celery.readthedocs.org/en/latest/reference/celery.contrib.batches.html  # NOQA
        to bulk get statuses and not exhaust the API Quota.
    """

    raise NotImplementedError('Needs a full review / redesign for tweets.')

    if feeds is None:
        feeds = []

    elif not hasattr(feeds, '__iter__'):
        feeds = [feeds]

    # TODO: find tweet publication date while fetching content…
    # TODO: set Title during fetch…

    try:
        new_tweet, created = Tweet.create_tweet(
            url=tweet_id.replace(' ', '%20'),
            title=_(u'Imported item from {0}').format(clean_url(tweet_id)),
            feeds=feeds,
            origin=ORIGINS.WEBIMPORT)

    except:
        # NOTE: duplication handling is already
        # taken care of in Tweet.create_tweet().
        LOGGER.exception(u'Tweet creation from URL %s failed.', tweet_id)
        return None, False

    mutualized = created is None

    if created or mutualized:
        for feed in feeds:
            feed.recent_items_count += 1
            feed.all_items_count += 1

    ze_now = now()

    for feed in feeds:
        feed.latest_item_date_published = ze_now

        # Even if the tweet wasn't created, we need to create reads.
        # In the case of a mutualized tweet, it will be fetched only
        # once, but all subscribers of all feeds must be connected to
        # it to be able to read it.
        for subscription in feed.subscriptions.all():
            subscription.create_read(new_tweet, verbose=created)

    # Don't forget the parenthesis else we return ``False`` everytime.
    return new_tweet, created or (None if mutualized else False)
Ejemplo n.º 29
0
    def refresh_must_abort(self, force=False, commit=True):
        """ Returns ``True`` if one or more abort conditions is met.
            Checks the feed cache lock, the ``last_fetch`` date, etc.
        """

        if not self.is_active:
            LOGGER.info(u'%s %s: is currently inactive, refresh aborted.',
                        self._meta.verbose_name, self.id)
            return True

        if self.is_internal:
            LOGGER.info(u'%s %s: beiing internal, no need to refresh.',
                        self._meta.verbose_name, self.id)
            return True

        if config.FEED_FETCH_DISABLED:
            # we do not raise .retry() because the global refresh
            # task will call us again anyway at next global check.
            LOGGER.info(u'%s %s: refresh disabled by configuration.',
                        self._meta.verbose_name, self.id)
            return True

        try:
            if self.refresh_must_abort_internal():
                return True

        except AttributeError:
            pass

        # ————————————————————————————————————————————————  Try to acquire lock

        if not self.refresh_lock.acquire():
            if force:
                LOGGER.warning(u'%s %s: forcing refresh unlocking.',
                               self._meta.verbose_name, self.id)
                self.refresh_lock.release()
                self.refresh_lock.acquire()

            else:
                LOGGER.info(u'%s %s: refresh already locked, aborting.',
                            self._meta.verbose_name, self.id)
                return True

        if self.date_last_fetch is not None and self.date_last_fetch >= (
                now() - timedelta(seconds=self.fetch_interval)):
            if force:
                LOGGER.warning(
                    u'%s %s: forcing refresh despite recently '
                    u'fetched.', self._meta.verbose_name, self.id)
            else:
                LOGGER.info(u'%s %s: last refresh too recent, aborting.',
                            self._meta.verbose_name, self.id)
                return True

        return False
Ejemplo n.º 30
0
        def format_quota(quota):
            if quota['remaining'] is None:
                return u' (no quota information)'

            if quota['remaining']:
                return u'; quota: %s call(s) remaining' % quota['remaining']

            else:
                return u'; quota exhausted, reset in %s' % (
                    naturaldelta(now() - quota['reset'].replace(tzinfo=utc))
                )
Ejemplo n.º 31
0
Archivo: tweet.py Proyecto: 1flow/1flow
def create_tweet_from_id(tweet_id, feeds=None, origin=None):
    """ From a Tweet ID, create a 1flow tweet via the REST API.


    https://dev.twitter.com/rest/reference/get/statuses/show/%3Aid

    .. todo:: use http://celery.readthedocs.org/en/latest/reference/celery.contrib.batches.html  # NOQA
        to bulk get statuses and not exhaust the API Quota.
    """

    raise NotImplementedError('Needs a full review / redesign for tweets.')

    if feeds is None:
        feeds = []

    elif not hasattr(feeds, '__iter__'):
        feeds = [feeds]

    # TODO: find tweet publication date while fetching content…
    # TODO: set Title during fetch…

    try:
        new_tweet, created = Tweet.create_tweet(
            url=tweet_id.replace(' ', '%20'),
            title=_(u'Imported item from {0}').format(clean_url(tweet_id)),
            feeds=feeds, origin=ORIGINS.WEBIMPORT)

    except:
        # NOTE: duplication handling is already
        # taken care of in Tweet.create_tweet().
        LOGGER.exception(u'Tweet creation from URL %s failed.', tweet_id)
        return None, False

    mutualized = created is None

    if created or mutualized:
        for feed in feeds:
            feed.recent_items_count += 1
            feed.all_items_count += 1

    ze_now = now()

    for feed in feeds:
        feed.latest_item_date_published = ze_now

        # Even if the tweet wasn't created, we need to create reads.
        # In the case of a mutualized tweet, it will be fetched only
        # once, but all subscribers of all feeds must be connected to
        # it to be able to read it.
        for subscription in feed.subscriptions.all():
            subscription.create_read(new_tweet, verbose=created)

    # Don't forget the parenthesis else we return ``False`` everytime.
    return new_tweet, created or (None if mutualized else False)
Ejemplo n.º 32
0
def toggle(request, klass, oid, key):
    """ Toggle any object property, given its a boolean on the DB side. """

    #
    # TODO: push notifications on error to the user.
    #

    try:
        obj = get_object_or_404(globals()[klass], id=oid)

    except:
        LOGGER.exception(u'Oops in toggle! Model “%s” not imported?', klass)
        return HttpResponseTemporaryServerError()

    if obj.user != request.user:
        return HttpResponseForbidden(u'Not owner')

    try:
        new_value = not getattr(obj, key)
        setattr(obj, key, new_value)

    except:
        msg = (u'Unable to toggle %s of %s', key, obj)
        LOGGER.exception(*msg)
        return HttpResponseTemporaryServerError(msg[0] % msg[1:])

    else:
        if key.startswith('is_'):
            date_attr = 'date_' + key[3:]

            if hasattr(obj, date_attr):
                # LOGGER.info(u'%s %s: set %s to NOW.',
                #             obj._meta.verbose_name, obj.id, date_attr)
                setattr(obj, date_attr, now() if new_value else None)

        try:
            getattr(obj, key + '_changed')()

        except AttributeError:
            pass

        except:
            LOGGER.exception(
                u'Unhandled exception while running '
                u', %s.%s_changed() on %s.', obj.__class__.__name__, key, obj)

        obj.save()

    if request.is_ajax():
        return HttpResponse(u'DONE.')

    else:
        return HttpResponseRedirect(
            request.META.get('HTTP_REFERER', reverse('home')))
Ejemplo n.º 33
0
def toggle(request, klass, oid, key):
    """ Toggle any object property, given its a boolean on the DB side. """

    #
    # TODO: push notifications on error to the user.
    #

    try:
        obj = get_object_or_404(globals()[klass], id=oid)

    except:
        LOGGER.exception(u'Oops in toggle! Model “%s” not imported?', klass)
        return HttpResponseTemporaryServerError()

    if obj.user != request.user:
        return HttpResponseForbidden(u'Not owner')

    try:
        new_value = not getattr(obj, key)
        setattr(obj, key, new_value)

    except:
        msg = (u'Unable to toggle %s of %s', key, obj)
        LOGGER.exception(*msg)
        return HttpResponseTemporaryServerError(msg[0] % msg[1:])

    else:
        if key.startswith('is_'):
            date_attr = 'date_' + key[3:]

            if hasattr(obj, date_attr):
                # LOGGER.info(u'%s %s: set %s to NOW.',
                #             obj._meta.verbose_name, obj.id, date_attr)
                setattr(obj, date_attr, now() if new_value else None)

        try:
            getattr(obj, key + '_changed')()

        except AttributeError:
            pass

        except:
            LOGGER.exception(u'Unhandled exception while running '
                             u', %s.%s_changed() on %s.',
                             obj.__class__.__name__, key, obj)

        obj.save()

    if request.is_ajax():
        return HttpResponse(u'DONE.')

    else:
        return HttpResponseRedirect(request.META.get('HTTP_REFERER',
                                    reverse('home')))
Ejemplo n.º 34
0
    def reopen(self, message=None, verbose=True, commit=True):
        """ Reopen the feed, clearing errors, date closed, etc. """

        self.errors = []
        self.is_active = True
        self.date_closed = now()
        self.closed_reason = u'Reopen on %s' % now().isoformat()

        if commit:
            self.save()

        statsd.gauge('feeds.counts.open', 1, delta=True)

        if verbose:
            if message is None:
                LOGGER.info(u'%s %s: %sre-opened.', self._meta.verbose_name,
                            self.id, u'' if commit else u'temporarily ')

            else:
                LOGGER.info(u'%s %s: %s', self._meta.verbose_name, self.id,
                            message)
Ejemplo n.º 35
0
    def close(self, reason=None, commit=True):
        """ Close the feed with or without a reason. """

        self.is_active = False
        self.date_closed = now()
        self.closed_reason = reason or _(u'NO REASON GIVEN')

        if commit:
            self.save()

        statsd.gauge('feeds.counts.open', -1, delta=True)

        LOGGER.warning(u'%s %s: closed with reason “%s”.',
                       self._meta.verbose_name, self.id, self.closed_reason)
Ejemplo n.º 36
0
    def mark_unusable(self, message, args=(), exc=None, commit=True):
        """ Mark account unsable with date & message, log exception if any. """

        if exc is not None:
            if args:
                message = message % args

            message = u'{0} ({1})'.format(message, unicode(exc))
            LOGGER.exception(u'%s unusable: %s', self, message)

        self.date_last_conn = now()
        self.conn_error = message
        self.is_usable = False

        if commit:
            self.save()
Ejemplo n.º 37
0
Archivo: base.py Proyecto: 1flow/1flow
    def mark_unusable(self, message, args=(), exc=None, commit=True):
        """ Mark account unsable with date & message, log exception if any. """

        if exc is not None:
            if args:
                message = message % args

            message = u'{0} ({1})'.format(message, unicode(exc))
            LOGGER.exception(u'%s unusable: %s', self, message)

        self.date_last_conn = now()
        self.conn_error = message
        self.is_usable = False

        if commit:
            self.save()
Ejemplo n.º 38
0
    def run(self):
        """ Run the import. """

        #
        # NOTE: we don't care if the import was already running, finished,
        #       whatever. This class is able to recover and re-run itself
        #       over and over without doing bad thing in the database.
        #

        is_retrying = self.status == IMPORT_STATUS.RETRY

        self.status = IMPORT_STATUS.RUNNING
        self.date_started = now()
        self.save()

        try:
            return self.run_internal()

        except:
            LOGGER.exception(u'User import %s failed')

            if is_retrying:
                message_user(self.user,
                             _(u'Your import #{0} failed to run after a '
                               u'retry. Please review it before relaunching '
                               u'it manually again.').format(self.id),
                             constants.ERROR)

                self.status = IMPORT_STATUS.FAILED

            else:
                countdown = randrange(1800, 3600)
                delta_cd = naturaldelta(timedelta(seconds=countdown))

                message_user(self.user,
                             _(u'Your import #{0} failed to run. If will '
                               u'be automatically retried in {1}').format(
                                 self.id, delta_cd),
                             constants.WARNING)

                globals()['userimport_run_task'].apply_async(
                    (self.id, ), countdown=countdown)

                self.status = IMPORT_STATUS.RETRY

            self.save()
Ejemplo n.º 39
0
    def run_internal(self):
        """ Import dirty work. """

        self._import_validator_ = URLValidator()
        self._import_to_create_ = set()
        self._import_created_   = {
            'feeds': [],
            'articles': []
        }
        self._import_failed_ = []

        all_in_one = False

        for importer in self.importers:
            if importer():
                all_in_one = True
                break

        if not all_in_one:
            urls = self.urls.splitlines()

            for url in urls:
                self.validate_url(url)

            for url in self._import_to_create_:
                self.import_from_one_url(url)

        self.results = {
            'created': self._import_created_,
            'failed': self._import_failed_,
        }

        if self._import_created_['articles'] or self._import_created_['feeds']:
            self.status = IMPORT_STATUS.FINISHED

        elif self._import_failed_:
            self.status = IMPORT_STATUS.FAILED

        self.date_finished = now()
        self.save()
Ejemplo n.º 40
0
    def repair_missing_authors_migration_201411(cls):

        # from oneflow.core.tasks.migration import vacuum_analyze

        articles = Article.objects.filter(
            authors=None,
            date_created__gt=datetime(2014, 10, 31))

        count = articles.count()
        done = 0

        LOGGER.info(u'Starting repairing %s missing authors @%s', count, now())

        with benchmark(u'Fix missing authors on rel-DB fetched content…'):

            for article in articles:
                article.postprocess_original_data(force=True)

                # if done % 25000 == 0:
                #     vacuum_analyze()

                done += 1
Ejemplo n.º 41
0
Archivo: poke.py Proyecto: 1flow/1flow
    def create_poke(cls, sender, recipients, message=None, attachments=None):
        """ Create a poke in all required feeds, with all required links. """

        if message is None and not bool(attachments):
            raise RuntimeError(u'Poke message and attachments cannot be '
                               u'both empty. You have to send something, '
                               u'we are not on fesse-bouc!')

        # BaseItem must have a name.
        poke_name = uuid.uuid4().hex

        poke = cls(
            name=poke_name,
            slug=poke_name,
            user=sender,
            message=message,
            is_restricted=True,
            date_published=now(),
            origin=ORIGINS.INTERNAL,
        )

        # This HAS to succeed, thus no try/except
        poke.save()

        if attachments is None:
            attachments = []

        poke.attachments.add(*attachments)

        poke.set_sender(sender)

        LOGGER.info(u'Created poke %s from %s.', poke.id, sender)

        poke.send_to_recipients(set(recipients))

        return poke, True
Ejemplo n.º 42
0
    def create_poke(cls, sender, recipients, message=None, attachments=None):
        """ Create a poke in all required feeds, with all required links. """

        if message is None and not bool(attachments):
            raise RuntimeError(u'Poke message and attachments cannot be '
                               u'both empty. You have to send something, '
                               u'we are not on fesse-bouc!')

        # BaseItem must have a name.
        poke_name = uuid.uuid4().hex

        poke = cls(
            name=poke_name,
            slug=poke_name,
            user=sender,
            message=message,
            is_restricted=True,
            date_published=now(),
            origin=ORIGINS.INTERNAL,
        )

        # This HAS to succeed, thus no try/except
        poke.save()

        if attachments is None:
            attachments = []

        poke.attachments.add(*attachments)

        poke.set_sender(sender)

        LOGGER.info(u'Created poke %s from %s.', poke.id, sender)

        poke.send_to_recipients(set(recipients))

        return poke, True
Ejemplo n.º 43
0
def global_feeds_checker():
    """ Check all RSS feeds and their dependants. Close them if needed.

    No parameter.
    """

    def pretty_print_feed(feed):

        return (u'- %s,\n'
                u'    - admin url: http://%s%s\n'
                u'    - public url: %s\n'
                u'    - %s\n'
                u'    - reason: %s\n'
                u'    - last error: %s') % (
                    feed,

                    settings.SITE_DOMAIN,

                    reverse('admin:%s_%s_change' % (
                        feed._meta.app_label,
                        feed._meta.module_name),
                        args=[feed.id]),

                    # Only RSS/Atom feeds have an URL…
                    feed.url if hasattr(feed, 'url') else '(NO URL)',

                    (u'closed on %s' % feed.date_closed)
                    if feed.date_closed
                    else u'(no closing date)',

                    feed.closed_reason or
                    u'none (or manually closed from the admin interface)',

                    feed.errors[0]
                    if len(feed.errors)
                    else u'(no error recorded)')

    def pretty_print_feed_list(feed_list):

        return '\n\n'.join(
            pretty_print_feed(feed)
            for feed in feed_list
        )

    dtnow         = now()
    limit_days    = config.FEED_CLOSED_WARN_LIMIT
    closed_limit  = dtnow - timedelta(days=limit_days)
    closed_tested = 0
    reopened_list = []

    # ———————————————————————————————— See if old closed feeds can be reopened.

    old_closed_feeds = BaseFeed.objects.filter(is_active=False).filter(
        date_closed__lt=closed_limit)

    for feed in old_closed_feeds:
        # check all closed feeds monthly, on their closing date anniversary.
        if feed.date_closed.day == dtnow.day:
            if feed.check_old_closed():
                reopened_list.append(feed)
            closed_tested += 1

    # ——————————————————————————————————————————— Report recently closed feeds.

    recently_closed_feeds = BaseFeed.objects.filter(is_active=False).filter(
        Q(date_closed=None) | Q(date_closed__gte=closed_limit))

    if not recently_closed_feeds.exists():
        LOGGER.info('No feed was closed in the last %s days, %s already '
                    u'closed checked for eventual back-to-life, of which '
                    u'%s were reopened.', limit_days, closed_tested,
                    len(reopened_list))
        return

    count = recently_closed_feeds.count()

    mail_managers(_(u'Reminder: {0} feed(s) closed in last '
                    u'{1} day(s), {2} automatically reopened').format(
                        count, limit_days, len(reopened_list)),
                  FEED_CHECK_TEMPLATE_TXT.format(
        feed_list=pretty_print_feed_list(recently_closed_feeds),
        closed_tested=closed_tested,
        reopened_count=len(reopened_list),
        reopened_list=pretty_print_feed_list(reopened_list)),
    )

    start_time = pytime.time()

    # Close the feeds, but after sending the mail,
    # So that initial reason is displayed at least
    # once to a real human.
    for feed in recently_closed_feeds:
        if feed.date_closed is None:
            feed.close('Automatic close by periodic checker task')

    LOGGER.info('Closed %s feeds in %s.', count,
                naturaldelta(pytime.time() - start_time))
Ejemplo n.º 44
0
def go(limit=None, all_hint=None):
    """ Do the dirty things, fast. """

    TO_CLEAN = ('url', 'content', )

    URL_CLEANING_QUERY = """
UPDATE core_article SET {0}_error = NULL
WHERE core_article.baseitem_ptr_id IN (
    SELECT baseitem_ptr_id
    FROM core_article
    WHERE {0}_error = ''
    LIMIT {1}
);
"""

    COUNT_QUERY = """
SELECT COUNT(*)
FROM core_article
WHERE {0}_error = '';
"""

    def one_line(a_string):
        return re.sub(u'  +', u' ', u' '.join(a_string.splitlines()))

    if limit is None:
        limit = 10000

    if all_hint is None:
        all_hint = 7000000

    LOGGER.info(u'Starting to fix the world @ %s', now())

    with benchmark(u'Fix everything'):

        for to_clean in TO_CLEAN:

            done = 0

            with benchmark(u'Fixing %s' % to_clean):
                while True:
                    do_whatever_SQL(
                        one_line(URL_CLEANING_QUERY).format(
                            to_clean, limit
                        ),
                        [],
                        u'Fixing %s, round %s' % (to_clean, done)
                    )

                    done += 1

                    # if done % 10 == 0:
                    #     vacuum_analyze('at %s' % (done * 50000))

                    if done > (all_hint / limit):
                        count = do_whatever_SQL(
                            one_line(COUNT_QUERY).format(to_clean),
                            [],
                            u'Counting things',
                            commit=False
                        )
                        if count == 0:
                            break

                    time.sleep(20)
Ejemplo n.º 45
0
    def check_reads(self,
                    items=None,
                    extended_check=False,
                    force=False,
                    commit=True):
        """ Also available as a task for background execution. """

        in_the_past = combine(
            today() - timedelta(days=config.SUBSCRIPTIONS_ITEMS_UNREAD_DAYS),
            time(0, 0, 0))

        my_now = now()

        counters = CheckReadsCounter()

        def create_read_for_item(item, params):

            _, created = self.create_read(item, verbose=False, **params)

            if created:
                counters.missing += 1

                if params.get('is_read', False):
                    counters.reads += 1

                else:
                    counters.unreads += 1

            elif created is False:
                counters.rechecked += 1

                if extended_check:
                    try:
                        item.activate_reads()

                    except:
                        LOGGER.exception(
                            u'Problem while activating reads '
                            u'of item #%s in Subscription '
                            u'#%s.check_reads(), continuing '
                            u'check.', item.id, self.id)

            else:
                counters.failed += 1

        # ——————————————————————————————————————————————— First, check articles
        # We can order them by date and connect reads in the same order.

        if items is None:
            on_items = self.feed.good_items.article().order_by(
                'Article___date_published')

        else:
            on_items = items.article().order_by('Article___date_published')

        for item in on_items.filter(Article___date_published__lt=in_the_past):

            # We reconnect the user to the whole feed history, but marking
            # old articles auto read, else there could be too much to read.
            create_read_for_item(
                item, {
                    'is_read': True,
                    'is_auto_read': True,
                    'date_read': my_now,
                    'date_auto_read': my_now,
                })

        for item in on_items.filter(
                Q(Article___date_published__gte=in_the_past)
                | Q(Article___date_published=None)):

            # default parameters, reads will be unread.
            create_read_for_item(item, {})

        # ——————————————————————————————————————————————————— Then, other items
        # Do the same, but based on the date_created

        if items is None:
            on_items = self.feed.good_items.not_instance_of(Article)

        else:
            on_items = items.not_instance_of(Article)

        for item in on_items.filter(date_updated__lt=in_the_past):

            # We reconnect the user to the whole feed history, but marking
            # old items auto read, else there could be too much to read.
            create_read_for_item(
                item, {
                    'is_read': True,
                    'is_auto_read': True,
                    'date_read': my_now,
                    'date_auto_read': my_now,
                })

        for item in on_items.filter(date_updated__gte=in_the_past):

            # default parameters, reads will be unread.
            create_read_for_item(item, {})

        for item in on_items:
            create_read_for_item(item, {})

        # —————————————————————————————————————————————————— Update descriptors

        if counters.missing or counters.rechecked:
            #
            # TODO: don't recompute everything, just
            #    add or subscribe the changed counts.
            #
            self.compute_cached_descriptors(all=True, unread=True)

            for folder in self.folders.all():
                folder.compute_cached_descriptors(all=True, unread=True)

        LOGGER.info(
            u'Checked subscription #%s. '
            u'%s/%s non-existing/re-checked, '
            u'%s/%s read/unread and %s not created.', self.id,
            counters.missing, counters.rechecked, counters.reads,
            counters.unreads, counters.failed)

        return counters
Ejemplo n.º 46
0
Archivo: mongo.py Proyecto: 1flow/1flow
def refresh_all_mongo_feeds(limit=None, force=False):
    u""" Refresh all MongoEngine feeds (RSS).

    .. note:: this task should vanish when
        MongoDB → PostgreSQL migration is done.
    """

    if config.FEED_FETCH_DISABLED:
        # Do not raise any .retry(), this is a scheduled task.
        LOGGER.warning(u'Feed refresh disabled in configuration.')
        return

    # Be sure two refresh operations don't overlap, but don't hold the
    # lock too long if something goes wrong. In production conditions
    # as of 20130812, refreshing all feeds takes only a moment:
    # [2013-08-12 09:07:02,028: INFO/MainProcess] Task
    #    oneflow.core.tasks.refresh_all_mongo_feeds succeeded in 1.99886608124s.
    my_lock = RedisExpiringLock(
        'refresh_all_mongo_feeds',
        expire_time=config.FEED_GLOBAL_REFRESH_INTERVAL * 180 - 1

    )

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(_(u'Forcing all feed refresh…'))

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'refresh_all_mongo_feeds() is already '
                           u'locked, aborting.')
            return

    feeds = MongoFeed.objects.filter(closed__ne=True, is_internal__ne=True)

    if limit:
        feeds = feeds.limit(limit)

    # No need for caching and cluttering CPU/memory for a one-shot thing.
    feeds.no_cache()

    with benchmark('refresh_all_mongo_feeds()'):

        try:
            count = 0
            mynow = now()

            for feed in feeds:

                if feed.refresh_lock.is_locked():
                    LOGGER.info(u'Feed %s already locked, skipped.', feed)
                    continue

                interval = timedelta(seconds=feed.fetch_interval)

                if feed.last_fetch is None:

                    mongo_feed_refresh_task.delay(feed.id)

                    LOGGER.info(u'Launched immediate refresh of feed %s which '
                                u'has never been refreshed.', feed)

                elif force or feed.last_fetch + interval < mynow:

                    how_late = feed.last_fetch + interval - mynow
                    how_late = how_late.days * 86400 + how_late.seconds

                    countdown = 0
                    mongo_feed_refresh_task.delay(feed.id, force)

                    LOGGER.info(u'%s refresh of feed %s %s (%s late).',
                                u'Scheduled randomized'
                                if countdown else u'Launched',
                                feed,
                                u' in {0}'.format(naturaldelta(countdown))
                                if countdown else u'in the background',
                                naturaldelta(how_late))
                    count += 1

        finally:
            # HEADS UP: see core.tasks.refresh_all_feeds() for note.
            # my_lock.release()
            pass

        LOGGER.info(u'Launched %s refreshes out of %s feed(s) checked.',
                    count, feeds.count())
Ejemplo n.º 47
0
def global_duplicates_checker(limit=None, force=False):
    """ Check that duplicate articles have no more Reads anywhere.

    Fix it if not, and update all counters accordingly.

    :param limit: integer, the maximum number of duplicates to check.
        Default: none.
    :param force: boolean, default ``False``, allows to by bypass and
        reacquire the lock.
    """

    if config.CHECK_DUPLICATES_DISABLED:
        LOGGER.warning(u'Duplicates check disabled in configuration.')
        return

    # This task runs one a day. Acquire the lock for just a
    # little more time to avoid over-parallelized runs.
    my_lock = RedisExpiringLock('check_all_duplicates', expire_time=3600 * 25)

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(u'Forcing duplicates check…')

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'global_subscriptions_checker() is already '
                           u'locked, aborting.')
            return

    if limit is None:
        limit = config.CHECK_DUPLICATES_LIMIT

    start_time = pytime.time()
    duplicates = BaseItem.objects.duplicate()

    total_dupes_count = duplicates.count()
    total_reads_count = 0
    processed_dupes = 0
    done_dupes_count = 0
    purged_dupes_count = 0

    purge_after_weeks_count = max(1, config.CHECK_DUPLICATES_PURGE_AFTER_WEEKS)
    purge_after_weeks_count = min(52, purge_after_weeks_count)

    purge_before_date = now() - timedelta(days=purge_after_weeks_count * 7)

    LOGGER.info(
        u'Done counting (took %s of pure SQL joy), starting procedure.',
        naturaldelta(pytime.time() - start_time))

    with benchmark(u"Check {0}/{1} duplicates".format(limit or u'all',
                                                      total_dupes_count)):

        try:
            for duplicate in duplicates.iterator():
                reads = duplicate.reads.all()

                processed_dupes += 1

                if reads.exists():
                    done_dupes_count += 1
                    reads_count = reads.count()
                    total_reads_count += reads_count

                    LOGGER.info(
                        u'Duplicate %s #%s still has %s reads, fixing…',
                        duplicate._meta.model.__name__, duplicate.id,
                        reads_count)

                    duplicate.duplicate_of.register_duplicate(
                        duplicate,
                        force=duplicate.duplicate_status ==
                        DUPLICATE_STATUS.FINISHED)

                if duplicate.duplicate_status == DUPLICATE_STATUS.FINISHED:
                    #
                    # TODO: check we didn't get some race-conditions new
                    #       dependancies between the moment the duplicate
                    #       was marked duplicate and now.

                    if duplicate.date_created < purge_before_date:
                        try:
                            with transaction.atomic():
                                duplicate.delete()
                        except:
                            LOGGER.exception(
                                u'Exception while deleting '
                                u'duplicate %s #%s',
                                duplicate._meta.model.__name__, duplicate.id)

                        purged_dupes_count += 1
                        LOGGER.info(u'Purged duplicate %s #%s from database.',
                                    duplicate._meta.model.__name__,
                                    duplicate.id)

                elif duplicate.duplicate_status in (
                        DUPLICATE_STATUS.NOT_REPLACED,
                        DUPLICATE_STATUS.FAILED):
                    # Something went wrong, perhaps the
                    # task was purged before beiing run.
                    duplicate.duplicate_of.register_duplicate(duplicate)
                    done_dupes_count += 1

                elif duplicate.duplicate_status is None:
                    # Something went very wrong. If the article is a known
                    # duplicate, its status field should have been set to
                    # at least NOT_REPLACED.
                    duplicate.duplicate_of.register_duplicate(duplicate)
                    done_dupes_count += 1

                    LOGGER.error(
                        u'Corrected duplicate %s #%s found with no '
                        u'status.', duplicate._meta.model.__name__,
                        duplicate.id)

                if limit and processed_dupes >= limit:
                    break

        finally:
            my_lock.release()

    LOGGER.info(
        u'global_duplicates_checker(): %s/%s duplicates processed '
        u'(%.2f%%; limit: %s), %s corrected (%.2f%%), '
        u'%s purged (%.2f%%); %s reads altered.', processed_dupes,
        total_dupes_count, processed_dupes * 100.0 / total_dupes_count, limit
        or u'none', done_dupes_count,
        (done_dupes_count * 100.0 /
         processed_dupes) if processed_dupes else 0.0, purged_dupes_count,
        (purged_dupes_count * 100.0 /
         processed_dupes) if processed_dupes else 0.0, total_reads_count)
Ejemplo n.º 48
0
def global_feeds_checker():
    """ Check all RSS feeds and their dependants. Close them if needed.

    No parameter.
    """
    def pretty_print_feed(feed):

        return (u'- %s,\n'
                u'    - admin url: http://%s%s\n'
                u'    - public url: %s\n'
                u'    - %s\n'
                u'    - reason: %s\n'
                u'    - last error: %s') % (
                    feed,
                    settings.SITE_DOMAIN,
                    reverse('admin:%s_%s_change' %
                            (feed._meta.app_label, feed._meta.module_name),
                            args=[feed.id]),

                    # Only RSS/Atom feeds have an URL…
                    feed.url if hasattr(feed, 'url') else '(NO URL)',
                    (u'closed on %s' % feed.date_closed)
                    if feed.date_closed else u'(no closing date)',
                    feed.closed_reason
                    or u'none (or manually closed from the admin interface)',
                    feed.errors[0]
                    if len(feed.errors) else u'(no error recorded)')

    def pretty_print_feed_list(feed_list):

        return '\n\n'.join(pretty_print_feed(feed) for feed in feed_list)

    dtnow = now()
    limit_days = config.FEED_CLOSED_WARN_LIMIT
    closed_limit = dtnow - timedelta(days=limit_days)
    closed_tested = 0
    reopened_list = []

    # ———————————————————————————————— See if old closed feeds can be reopened.

    old_closed_feeds = BaseFeed.objects.filter(is_active=False).filter(
        date_closed__lt=closed_limit)

    for feed in old_closed_feeds:
        # check all closed feeds monthly, on their closing date anniversary.
        if feed.date_closed.day == dtnow.day:
            if feed.check_old_closed():
                reopened_list.append(feed)
            closed_tested += 1

    # ——————————————————————————————————————————— Report recently closed feeds.

    recently_closed_feeds = BaseFeed.objects.filter(is_active=False).filter(
        Q(date_closed=None) | Q(date_closed__gte=closed_limit))

    if not recently_closed_feeds.exists():
        LOGGER.info(
            'No feed was closed in the last %s days, %s already '
            u'closed checked for eventual back-to-life, of which '
            u'%s were reopened.', limit_days, closed_tested,
            len(reopened_list))
        return

    count = recently_closed_feeds.count()

    mail_managers(
        _(u'Reminder: {0} feed(s) closed in last '
          u'{1} day(s), {2} automatically reopened').format(
              count, limit_days, len(reopened_list)),
        FEED_CHECK_TEMPLATE_TXT.format(
            feed_list=pretty_print_feed_list(recently_closed_feeds),
            closed_tested=closed_tested,
            reopened_count=len(reopened_list),
            reopened_list=pretty_print_feed_list(reopened_list)),
    )

    start_time = pytime.time()

    # Close the feeds, but after sending the mail,
    # So that initial reason is displayed at least
    # once to a real human.
    for feed in recently_closed_feeds:
        if feed.date_closed is None:
            feed.close('Automatic close by periodic checker task')

    LOGGER.info('Closed %s feeds in %s.', count,
                naturaldelta(pytime.time() - start_time))
Ejemplo n.º 49
0
Archivo: stats.py Proyecto: 1flow/1flow
def feed_distribution_by_last_fetch():
    """ compute and group feeds by last_fetch delta from now. """

    start_time = pytime.time()

    # open_feeds = Feed.objects(Q(closed=False) | Q(closed__exists=False))
    open_feeds_count = Feed.objects.filter(closed__ne=True).count()

    lower_value   = None
    loop_count    = 0
    fetched_feeds = 0
    delta_lengths = (
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL / 6),
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL / 2),
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL),
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 2),
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 6),
        timedelta(seconds=config.FEED_FETCH_DEFAULT_INTERVAL * 12),
        timedelta(days=1),
        timedelta(days=2),
        timedelta(days=3),
        timedelta(days=4),
        timedelta(days=5),
        timedelta(days=6),
        timedelta(days=7),
        timedelta(days=10),
        None
    )

    results = {}

    for delta in delta_lengths:

        upper_value = (now() - delta) if delta else None

        if lower_value is None:
            kwargs = {'last_fetch__gt': upper_value}

        elif upper_value is None:
            kwargs = {'last_fetch__lte': lower_value}

        else:
            kwargs = {'last_fetch__lte': lower_value,
                      'last_fetch__gt': upper_value}

        feeds   = Feed.objects(**kwargs)
        count   = feeds.count()
        percent = float(count * 100.0 / open_feeds_count)
        avg_fi  = sum(f.fetch_interval for f in feeds) * 1.0 / (count or 1.0)

        results[loop_count] = [
            feeds,
            count,
            percent,
            lower_value,
            upper_value,
            avg_fi,
        ]

        fetched_feeds += count
        lower_value = upper_value
        loop_count += 1

    results['meta'] = {'fetched_feeds': fetched_feeds,
                       'open_feeds_count': open_feeds_count,
                       'duration': pytime.time() - start_time,
                       'loop_count': loop_count}

    return results
Ejemplo n.º 50
0
Archivo: base.py Proyecto: 1flow/1flow
    def recently_usable(self):
        """ Return True if the account has been tested/connected recently. """

        return self.is_usable and (
            now() - self.date_last_conn
            < timedelta(seconds=self.config_account_refresh_period))
Ejemplo n.º 51
0
    def __consume_items(self, api_path, parameters=None, backfilling=False):
        """ Consume tweets from a stream (public/user).

        This is an internal method, called from :meth:`consume`.
        """

        def format_quota(quota):
            if quota['remaining'] is None:
                return u' (no quota information)'

            if quota['remaining']:
                return u'; quota: %s call(s) remaining' % quota['remaining']

            else:
                return u'; quota exhausted, reset in %s' % (
                    naturaldelta(now() - quota['reset'].replace(tzinfo=utc))
                )

        def backfill_if_needed(old_latest, max_id):
            """ See if we need to backfill, or not. """

            # If we already have a latest_id, it means we connected
            # before. Thus, we check if a backfill is needed between
            # previous session and now. If latest recorded and current
            # are different, we could eventually have missed something.
            # → BACKFILL.
            #
            # If we don't have a latest_id, it's our first
            # connection ever. Backfilling is for history, and
            # has already been launched by consume(), to not
            # wait for an hypothetical first item in low-trafic
            # streams. → NO ACTION

            if old_latest and old_latest < max_id:
                globals()[
                    'twitterfeed_backfill_task'
                ].apply_async(
                    args=(self.id, ),
                    kwargs={
                        'since_id': old_latest,
                        'max_id': max_id - 1,
                    }
                )

        LOGGER.info(u'%s: starting consume() %sloop on %s(%s)',
                    self,
                    u'for backfilling ' if backfilling else u'',
                    api_path,
                    u'' if parameters is None
                    else ', '.join(u'{0}: {1}'.format(k, v)
                                   for k, v in parameters.items()))

        # We create it here to have it in scope to get quota at the end.
        result = None

        if parameters is None:
            parameters = {}

        exit_loop = False

        max_rewind_range = config.TWITTER_BACKFILL_ALLOWED_REWIND_RANGE
        max_rewind_range_as_dt_from_now = (
            now() - timedelta(days=max_rewind_range * 7))

        infinite_count = 0
        all_processed = 0
        cur_processed = 0

        old_latest = self.latest_id
        last_item = None

        if self.account.exists():
            twitter_account = self.account.order_by('?').first()

        else:
            twitter_account = self.user.accounts.twitter().order_by('?').first()

        if twitter_account is None:
            self.close(u'No more account to run this Twitter feed!')
            return

        LOGGER.info(u'%s: consuming via account %s.', self, twitter_account)

        if not backfilling:
            self.update_last_fetch()
            self.save()

        with twitter_account as tweetapi:
            while True:
                LOGGER.debug(u'%s: %s (loop #%s)…', self,
                             u'backfilling' if backfilling else u'consuming',
                             infinite_count)
                infinite_count += 1

                try:
                    logging.disable(logging.CRITICAL)

                    try:
                        if parameters:
                            result = tweetapi.request(api_path, parameters)
                        else:
                            result = tweetapi.request(api_path)

                    finally:
                        logging.disable(logging.NOTSET)

                    if result.get_rest_quota()['remaining'] == 0:
                        LOGGER.error(u'%s: quota exhausted, exiting to '
                                     u'postpone processing.', self)
                        break

                    for item in result.get_iterator():

                        processed, exit_loop = self.__handle_one_item(
                            item, backfilling=backfilling)

                        if processed:
                            if cur_processed == 0 and not backfilling:
                                # At the first received item while streaming,
                                # we need to check if backfill is needed.
                                backfill_if_needed(old_latest, item['id'])

                            cur_processed += 1

                        if backfilling:
                            # Backfilling doesn't touch the lock.
                            continue

                        if config.FEED_FETCH_TWITTER_DISABLED:
                            LOGGER.warning(
                                u'%s: exiting because '
                                u'config.FEED_FETCH_TWITTER_DISABLED is '
                                u'now true.', self)
                            exit_loop = True

                        last_item = item

                        if exit_loop:
                            break

                    if backfilling and max_rewind_range:
                        if last_item \
                            and twitter_datetime(last_item['created_at']) \
                                < max_rewind_range_as_dt_from_now:
                            self.backfill_completed = max_rewind_range
                            self.save()

                            LOGGER.info(u'%s: backfilled to the maximum '
                                        u'allowed.', self)
                            break

                    if cur_processed == 0:

                        if backfilling:
                            # Twitter did not send us any new data while
                            # were backfilling for full history. We won't
                            # get any data further in the past, we just
                            # hit the 800/3200 limit.
                            if parameters.get('since_id', None) is None:
                                LOGGER.info(u'%s: reached end of available '
                                            u'data on the Twitter side.',
                                            self)
                                self.backfill_completed = 0
                                self.save()

                        else:
                            # We got out of the loop without getting any new
                            # item. Just bail out, else we will keep polling
                            # Twitter again and again, exhausting our REST
                            # API quota, hitting duplicates in our database.
                            LOGGER.info(u'%s: no new item in stream.', self)

                        break

                    else:
                        # We got out of the loop, with max items (200)
                        # reached, or at least more than 0. Try to
                        # {fore,back}fill again to fill the gap. If we
                        # were already at max items, we'll got 0 and
                        # then stop. Else, the process will continue.
                        # Only if we were already at end will it cost
                        # us an API call for nothing.
                        if backfilling:
                            parameters['max_id'] = self.oldest_id - 1

                        else:
                            parameters['since_id'] = self.latest_id

                    if not backfilling:
                        self.update_last_fetch()
                        self.save()

                        # TODO: this creates a race condition. we should
                        # just re-aquire the lock with celery current task
                        # ID, but it's not available in the current scope
                        # (we are a method in the task, not the task
                        # itself)…
                        self.refresh_lock.release()

                        if not self.refresh_lock.acquire():
                            LOGGER.critical(u'%s: could not re-acquire '
                                            u'our own lock, abruptly '
                                            u'terminating stream '
                                            u'consumption.', self)
                            exit_loop = True

                except KeyboardInterrupt:
                    LOGGER.warning(u'Interrupting stream consumption '
                                   u'at user request.')
                    break

                except SoftTimeLimitExceeded:
                    # This should happen only on streaming APIs.
                    LOGGER.info(u'%s: time limit reached, terminating '
                                u'to let things flow.', self)

                    if self.can_continue_consuming():
                        if not backfilling:

                            # update last fetch date for global refresh task
                            # not to relaunch us again while we already do it.
                            self.update_last_fetch()
                            self.save()

                            # relaunch immediately if a worker is available,
                            # to not loose any tweet in case of a prolix feed.
                            globals()['twitterfeed_consume_task'].delay(self.id)

                    else:
                        LOGGER.warning(u'%s: not active anymore, exiting.',
                                       self)

                    break

                except TwitterRequestError, e:
                    # https://dev.twitter.com/overview/api/response-codes
                    if e.status_code in (420, 429):
                        LOGGER.error(u'%s: API rate exceeded (%s) while %s, '
                                     u'exiting loop to throttle down.', self,
                                     e.status_code, u'backfilling'
                                     if backfilling else u'consuming')
                        statsd.incr('api.twitter.messages.rate_exceeded')

                    else:
                        LOGGER.error(u'%s: Twitter error %s while %s %s, '
                                     u'exiting.', self, unicode(e),
                                     u'backfilling' if backfilling
                                     else u'consuming', api_path)

                    exit_loop = True

                except Exception:
                    #
                    # TODO: handle network errors, set last_fetch,
                    #       last TID, and exit for a while if relevant.
                    #       Else, just continue and let the stream flow.
                    #
                    LOGGER.exception(u'%s: exception in loop #%s after '
                                     u'having consumed %s item(s), '
                                     u're-starting…', self,
                                     infinite_count, cur_processed)

                    statsd.incr('api.twitter.items.exception')

                all_processed += cur_processed
                cur_processed = 0

                if exit_loop:
                    break
Ejemplo n.º 52
0
    def recently_usable(self):
        """ Return True if the account has been tested/connected recently. """

        return self.is_usable and (now() - self.date_last_conn < timedelta(
            seconds=self.config_account_refresh_period))
Ejemplo n.º 53
0
def archive_articles(limit=None):
    """ Archive articles that pollute the production database. """

    raise NotImplementedError('REVIEW for RELDB.')

    # cf. https://docs.djangoproject.com/en/dev/topics/db/multi-db/#selecting-a-database-to-delete-from  # NOQA

    counts = {
        'duplicates': 0,
        'orphaned': 0,
        'bad_articles': 0,
        'archived_dupes': 0,
    }

    if limit is None:
        limit = config.ARTICLE_ARCHIVE_BATCH_SIZE

    with no_dereference(Article) as ArticleOnly:
        if config.ARTICLE_ARCHIVE_OLDER_THAN > 0:
            older_than = now() - timedelta(
                days=config.ARTICLE_ARCHIVE_OLDER_THAN)

            duplicates = ArticleOnly.objects(
                duplicate_of__ne=None,
                date_published__lt=older_than).limit(limit)
            orphaned   = ArticleOnly.objects(
                orphaned=True,
                date_published__lt=older_than).limit(limit)

        else:
            duplicates = ArticleOnly.objects(duplicate_of__ne=None
                                             ).limit(limit)
            orphaned   = ArticleOnly.objects(orphaned=True).limit(limit)

    duplicates.no_cache()
    orphaned.no_cache()

    counts['duplicates'] = duplicates.count()
    counts['orphaned']   = orphaned.count()

    if counts['duplicates']:
        current = 0
        LOGGER.info(u'Archiving of %s duplicate article(s) started.',
                    counts['duplicates'])

        with benchmark('Archiving of %s duplicate article(s)'
                       % counts['duplicates']):
            for article in duplicates:
                archive_article_one_internal(article, counts)
                current += 1
                if current % 50 == 0:
                    LOGGER.info(u'Archived %s/%s duplicate articles so far.',
                                current, counts['duplicates'])

    if counts['orphaned']:
        current = 0
        LOGGER.info(u'Archiving of %s orphaned article(s) started.',
                    counts['orphaned'])

        with benchmark('Archiving of %s orphaned article(s)'
                       % counts['orphaned']):
            for article in orphaned:
                archive_article_one_internal(article, counts)
                current += 1
                if current % 50 == 0:
                    LOGGER.info(u'Archived %s/%s orphaned articles so far.',
                                current, counts['duplicates'])

    if counts['duplicates'] or counts['orphaned']:
        synchronize_statsd_articles_gauges(full=True)

        LOGGER.info('%s already archived and %s bad articles were found '
                    u'during the operation.', counts['archived_dupes'],
                    counts['bad_articles'])

    else:
        LOGGER.info(u'No article to archive.')
Ejemplo n.º 54
0
def global_duplicates_checker(limit=None, force=False):
    """ Check that duplicate articles have no more Reads anywhere.

    Fix it if not, and update all counters accordingly.

    :param limit: integer, the maximum number of duplicates to check.
        Default: none.
    :param force: boolean, default ``False``, allows to by bypass and
        reacquire the lock.
    """

    if config.CHECK_DUPLICATES_DISABLED:
        LOGGER.warning(u'Duplicates check disabled in configuration.')
        return

    # This task runs one a day. Acquire the lock for just a
    # little more time to avoid over-parallelized runs.
    my_lock = RedisExpiringLock('check_all_duplicates', expire_time=3600 * 25)

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(u'Forcing duplicates check…')

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'global_subscriptions_checker() is already '
                           u'locked, aborting.')
            return

    if limit is None:
        limit = config.CHECK_DUPLICATES_LIMIT

    start_time = pytime.time()
    duplicates = BaseItem.objects.duplicate()

    total_dupes_count  = duplicates.count()
    total_reads_count  = 0
    processed_dupes    = 0
    done_dupes_count   = 0
    purged_dupes_count = 0

    purge_after_weeks_count = max(1, config.CHECK_DUPLICATES_PURGE_AFTER_WEEKS)
    purge_after_weeks_count = min(52, purge_after_weeks_count)

    purge_before_date = now() - timedelta(days=purge_after_weeks_count * 7)

    LOGGER.info(u'Done counting (took %s of pure SQL joy), starting procedure.',
                naturaldelta(pytime.time() - start_time))

    with benchmark(u"Check {0}/{1} duplicates".format(limit or u'all',
                   total_dupes_count)):

        try:
            for duplicate in duplicates.iterator():
                reads = duplicate.reads.all()

                processed_dupes += 1

                if reads.exists():
                    done_dupes_count  += 1
                    reads_count        = reads.count()
                    total_reads_count += reads_count

                    LOGGER.info(u'Duplicate %s #%s still has %s reads, fixing…',
                                duplicate._meta.model.__name__,
                                duplicate.id, reads_count)

                    duplicate.duplicate_of.register_duplicate(
                        duplicate, force=duplicate.duplicate_status
                        == DUPLICATE_STATUS.FINISHED)

                if duplicate.duplicate_status == DUPLICATE_STATUS.FINISHED:
                    #
                    # TODO: check we didn't get some race-conditions new
                    #       dependancies between the moment the duplicate
                    #       was marked duplicate and now.

                    if duplicate.date_created < purge_before_date:
                        try:
                            with transaction.atomic():
                                duplicate.delete()
                        except:
                            LOGGER.exception(u'Exception while deleting '
                                             u'duplicate %s #%s',
                                             duplicate._meta.model.__name__,
                                             duplicate.id)

                        purged_dupes_count += 1
                        LOGGER.info(u'Purged duplicate %s #%s from database.',
                                    duplicate._meta.model.__name__,
                                    duplicate.id)

                elif duplicate.duplicate_status in (
                    DUPLICATE_STATUS.NOT_REPLACED,
                        DUPLICATE_STATUS.FAILED):
                    # Something went wrong, perhaps the
                    # task was purged before beiing run.
                    duplicate.duplicate_of.register_duplicate(duplicate)
                    done_dupes_count += 1

                elif duplicate.duplicate_status is None:
                    # Something went very wrong. If the article is a known
                    # duplicate, its status field should have been set to
                    # at least NOT_REPLACED.
                    duplicate.duplicate_of.register_duplicate(duplicate)
                    done_dupes_count += 1

                    LOGGER.error(u'Corrected duplicate %s #%s found with no '
                                 u'status.', duplicate._meta.model.__name__,
                                 duplicate.id)

                if limit and processed_dupes >= limit:
                    break

        finally:
            my_lock.release()

    LOGGER.info(u'global_duplicates_checker(): %s/%s duplicates processed '
                u'(%.2f%%; limit: %s), %s corrected (%.2f%%), '
                u'%s purged (%.2f%%); %s reads altered.',

                processed_dupes, total_dupes_count,
                processed_dupes * 100.0 / total_dupes_count,

                limit or u'none',

                done_dupes_count,
                (done_dupes_count * 100.0 / processed_dupes)
                if processed_dupes else 0.0,

                purged_dupes_count,
                (purged_dupes_count * 100.0 / processed_dupes)
                if processed_dupes else 0.0,

                total_reads_count)
Ejemplo n.º 55
0
def refresh_all_feeds(limit=None, force=False):
    u""" Refresh all feeds (RSS/Mail/Twitter…). """

    if config.FEED_FETCH_DISABLED:
        # Do not raise any .retry(), this is a scheduled task.
        LOGGER.warning(u'Feed refresh disabled in configuration.')
        return

    # As FEED_GLOBAL_REFRESH_INTERVAL is dynamically modifiable,
    # we should re-evaluate it each time we run.
    this_round_expire_time = (
        config.FEED_GLOBAL_REFRESH_INTERVAL * 60
        - config.FEED_GLOBAL_REFRESH_INTERVAL
    )

    # Be sure two refresh operations don't overlap, but don't hold the
    # lock too long if something goes wrong. In production conditions
    # as of 20130812, refreshing all feeds takes only a moment:
    # [2013-08-12 09:07:02,028: INFO/MainProcess] Task
    #       oneflow.core.tasks.refresh_all_feeds succeeded in 1.99886608124s.
    #
    my_lock = RedisExpiringLock(
        REFRESH_ALL_FEEDS_LOCK_NAME,
        expire_time=this_round_expire_time
    )

    if not my_lock.acquire():
        if force:
            my_lock.release()
            my_lock.acquire()
            LOGGER.warning(_(u'Forcing all feed refresh…'))

        else:
            # Avoid running this task over and over again in the queue
            # if the previous instance did not yet terminate. Happens
            # when scheduled task runs too quickly.
            LOGGER.warning(u'refresh_all_feeds() is already locked, aborting.')
            return

    # This should bring us a Polymorphic Query to refresh all feeds types.
    feeds = BaseFeed.objects.filter(is_active=True,
                                    is_internal=False).order_by(
                                        'date_last_fetch')

    if limit:
        feeds = feeds[:limit]

    with benchmark('refresh_all_feeds()'):

        try:
            count = 0
            mynow = now()

            for feed in feeds:

                if feed.refresh_lock.is_locked():
                    # The refresh task lauched before its expiration, and is
                    # still [long] running while we want to launch another.
                    # Avoid, because the new would exit immediately on
                    # date_last_fetch too recent.
                    LOGGER.debug(u'Feed %s already locked, skipped.', feed)
                    continue

                if feed.date_last_fetch is None:

                    basefeed_refresh_task.apply_async(
                        args=(feed.id, ),

                        # in `this_round_expire_time`, we will relaunch it
                        # anyway, so don't clutter the queue with double work.
                        expire=this_round_expire_time,
                    )

                    LOGGER.info(u'Launched immediate refresh of feed %s which '
                                u'has never been refreshed.', feed)
                    count += 1
                    continue

                if feed.fetch_interval > 86399:
                    interval_days = feed.fetch_interval / 86400
                    interval_seconds = feed.fetch_interval - (
                        interval_days * 86400)

                    interval = timedelta(days=interval_days,
                                         seconds=interval_seconds)

                else:
                    interval = timedelta(seconds=feed.fetch_interval)

                if force or feed.date_last_fetch + interval < mynow:

                    how_late = feed.date_last_fetch + interval - mynow
                    how_late = how_late.days * 86400 + how_late.seconds

                    late = feed.date_last_fetch + interval < mynow

                    basefeed_refresh_task.apply_async(
                        args=(feed.id, ),
                        kwargs={'force': force},
                        expire=this_round_expire_time,
                    )

                    LOGGER.info(u'Launched refresh of feed %s (%s %s).',
                                feed, naturaldelta(how_late),
                                u'late' if late else u'earlier')
                    count += 1

        finally:
            # HEADS UP: in case the system is overloaded and feeds refresh()
            #           tasks don't complete fast enough, the current task
            #           will overload it even more. Thus, we intentionaly
            #           don't release the lock to avoid over-re-launched
            #           global tasks to feed the refresh queue with useless
            #           double-triple-Nble individual tasks.
            #
            # my_lock.release()
            pass

        LOGGER.info(u'Launched %s refreshes out of %s feed(s) checked.',
                    count, feeds.count())
Ejemplo n.º 56
0
    def guess_and_import_wallabag(self):
        """ Try to import a JSON export file from wallabag. """

        try:
            wallabag_json = json.loads(self.urls)

        except:
            return False

        try:
            first_object = wallabag_json[0]

        except:
            return False

        for attr_name in (
            "0", "1", "2", "3", "4", "5", "6",
            "content", "id", "is_fav", "is_read",
            "title", "url", "user_id",
        ):
            if attr_name not in first_object:
                return False

        message_user(self.user,
                     _(u'Wallabag JSON export format detected.'),
                     constants.INFO)

        for wallabag_object in wallabag_json:

            url = wallabag_object['url']

            if self.validate_url(url):
                article = self.import_from_one_url(
                    url,
                    origin=ORIGINS.WALLABAG
                )

                if article is None:
                    # article was not created, we
                    # already have it in the database.
                    article = Article.objects.get(url=url)

                # Now comes the wallabag-specific part of the import,
                # eg. get back user meta-data as much as possible in 1flow.

                article_needs_save = False
                article_needs_convert = False

                title = wallabag_object.get('title', None)

                if title:
                    article.name       = title
                    article_needs_save = True

                content = wallabag_object['content']

                if content:
                    article.content       = content
                    article.content_type  = CONTENT_TYPES.HTML
                    article_needs_save    = True
                    article_needs_convert = True

                if article_needs_save:
                    article.save()

                if article_needs_convert:
                    article.convert_to_markdown()

                read = article.reads.get(
                    subscriptions=self.user.user_subscriptions.imported_items)

                # About parsing dates:
                # http://stackoverflow.com/q/127803/654755
                # http://stackoverflow.com/a/18150817/654755

                read_needs_save = False

                if wallabag_object.get('is_fav', False):
                    read.is_starred = True
                    read_needs_save = True

                    # This information is not in wallabag.
                    read.date_starred = now()

                if wallabag_object.get('is_read', False):
                    read.is_read = True
                    read_needs_save = True

                    # This information is not in wallabag.
                    read.date_read = now()

                if read_needs_save:
                    read.save()

        return True