def execute(self, job): name = job.get('name') count = job.get('count', 1) sleep = job.get('sleep', 1) echo = job.get('echo') WARN("TestExecutor %s start [%s]" % (name, echo)) INFO(echo) for i in range(count): INFO(echo) WARN("TestExecutor %s sleeping (%d/%d)..." % (name, i, count)) sleep_utils.sleep(sleep) WARN("TestExecutor %s done" % name)
def process_exception(self, request, exception, crawler): if not '__retry__' in request: request['__retry__'] = 0 retry = request['__retry__'] if retry < self.max_retry: ERROR('retry num: %s<=%s:\nexception:\n%s\nrequest:\n%s' %\ (retry, self.max_retry, exception, request)) request['__retry__'] += 1 sleep_utils.sleep(self.retry_sleep, 'RetryPlugin.retry') return request else: ERROR('max retry limit reached %s>=%s:\nexception:\n%s\nrequest:\n%s' %\ (retry, self.max_retry, exception, request))
def process_response(self, request, response, crawler): if not response.cached: obj = { 'content': response.content, 'headers': dict(response.headers), 'time': datetime.now(), } self.cache[request.url] = obj s = self.sleep_if_no_cache #don't sleep for first response if s is not None and not self._first: sleep_utils.sleep(s, 'QdbCachePlugin: no_cache') self._first = False
def process_response(self, request, response, crawler): if request.get('raw_file'): response['raw_file'] = True response['raw_file_name'] = request['full_target'] if not response.cached and request.get('full_target'): #write_to_file ensure path exists #write raw content to file write_to_file(response.content, request['full_target'], zip=self.zip) s = self.sleep_if_no_cache #don't sleep for first response if s is not None and (self.sleep_first_time or not self._first): sleep_utils.sleep(s, 'FileCachePlugin: no_cache') self._first = False
def f(): try: DEBUG_('[zipfile] open') root = zipfile.ZipFile(filename, "r") try: for name in root.namelist(): if name.endswith('/'): continue h = root.open(name) for line in h: yield line h.close() finally: DEBUG_('[zipfile] close') root.close() except Exception, e: ERROR(e) sleep(3, 'read_lines_from_file')
def generate_tasks(self, job): sleep = job.get('sleep', 5) wait_sleep = job.get('wait_sleep', 5) interval = TimeIntervalList(intervals=job.get('intervals'), none_is_all=True) while True: now = time.time() if interval.before_start(now): DEBUG("sleep before start") sleep_utils.sleep(check_sleep) elif interval.after_end(now): DEBUG("break after end") break else: if interval.include(now): for t in self.tasks(job): yield t DEBUG("sleep inside working interval") sleep_utils.sleep(sleep) else: DEBUG("sleep outside working interval") sleep_utils.sleep(wait_sleep)
def process_exception(self, request, exception, crawler): sleep_utils.sleep(self.exception_sleep, "SleepPlugin.process_exception")
def process_response(self, request, response, crawler): sleep_utils.sleep(self.sleep, "SleepPlugin.process_response")