Exemple #1
0
    def skip_incremental(self, *criteria):
        """Perform an incremental check on a set of criteria.

        This can be used to execute a part of a crawler only once per an
        interval (which is specified by the ``expire`` setting). If the
        operation has already been performed (and should thus be skipped),
        this will return ``True``. If the operation needs to be executed,
        the returned value will be ``False``.
        """
        if not self.incremental:
            return False

        # this is pure convenience, and will probably backfire at some point.
        key = make_key(criteria)
        if key is None:
            return False

        # this is used to re-run parts of a scrape after a certain interval,
        # e.g. half a year, or a year
        since = None
        if self.crawler.expire > 0:
            delta = timedelta(days=-1 * self.crawler.expire)
            since = datetime.utcnow() - delta

        if Tag.exists(self.crawler, key, since=since):
            return True
        self.set_tag(key, None)
        return False
Exemple #2
0
    def skip_incremental(self, *criteria):
        """Perform an incremental check on a set of criteria.

        This can be used to execute a part of a crawler only once per an
        interval (which is specified by the ``expire`` setting). If the
        operation has already been performed (and should thus be skipped),
        this will return ``True``. If the operation needs to be executed,
        the returned value will be ``False``.
        """
        if not self.incremental:
            return False

        # this is pure convenience, and will probably backfire at some point.
        key = make_key(*criteria)
        if key is None:
            return False

        if Tag.exists(self.crawler, key):
            return True

        self.set_tag(key, None)
        return False
Exemple #3
0
 def check_tag(self, key):
     return Tag.exists(self.crawler, key)