def skip_incremental(self, *criteria): """Perform an incremental check on a set of criteria. This can be used to execute a part of a crawler only once per an interval (which is specified by the ``expire`` setting). If the operation has already been performed (and should thus be skipped), this will return ``True``. If the operation needs to be executed, the returned value will be ``False``. """ if not self.incremental: return False # this is pure convenience, and will probably backfire at some point. key = make_key(criteria) if key is None: return False # this is used to re-run parts of a scrape after a certain interval, # e.g. half a year, or a year since = None if self.crawler.expire > 0: delta = timedelta(days=-1 * self.crawler.expire) since = datetime.utcnow() - delta if Tag.exists(self.crawler, key, since=since): return True self.set_tag(key, None) return False
def skip_incremental(self, *criteria): """Perform an incremental check on a set of criteria. This can be used to execute a part of a crawler only once per an interval (which is specified by the ``expire`` setting). If the operation has already been performed (and should thus be skipped), this will return ``True``. If the operation needs to be executed, the returned value will be ``False``. """ if not self.incremental: return False # this is pure convenience, and will probably backfire at some point. key = make_key(*criteria) if key is None: return False if Tag.exists(self.crawler, key): return True self.set_tag(key, None) return False
def check_tag(self, key): return Tag.exists(self.crawler, key)