def _run(self): while not self.stop.is_set(): try: self.name = 'WarcWriterThread(tid={})'.format(warcprox.gettid()) while True: try: if self.stop.is_set(): qsize = self.recorded_url_q.qsize() if qsize % 50 == 0: self.logger.info("%s urls left to write", qsize) recorded_url = self.recorded_url_q.get(block=True, timeout=0.5) self.idle = None if self._filter_accepts(recorded_url): if self.dedup_db: warcprox.dedup.decorate_with_dedup_info(self.dedup_db, recorded_url, base32=self.options.base32) records = self.writer_pool.write_records(recorded_url) self._final_tasks(recorded_url, records) # try to release resources in a timely fashion if recorded_url.response_recorder and recorded_url.response_recorder.tempfile: recorded_url.response_recorder.tempfile.close() except queue.Empty: if self.stop.is_set(): break self.idle = time.time() self.writer_pool.maybe_idle_rollover() self.logger.info('WarcWriterThread shutting down') self.writer_pool.close_writers() except: self.logger.critical("WarcWriterThread will try to continue after unexpected error", exc_info=True) time.sleep(0.5)
def __init__(self, request, client_address, server): threading.current_thread( ).name = 'MitmProxyHandler(tid={},started={},client={}:{})'.format( warcprox.gettid(), datetime.datetime.utcnow().isoformat(), client_address[0], client_address[1]) self.is_connect = False self._headers_buffer = [] request.settimeout(self._socket_timeout) http_server.BaseHTTPRequestHandler.__init__(self, request, client_address, server)
def _run(self): self.name = '%s(tid=%s)' % (self.name, warcprox.gettid()) while not self.stop.is_set(): try: while True: try: if self.stop.is_set(): qsize = self.recorded_url_q.qsize() if qsize % 50 == 0: self.logger.info("%s urls left to write", qsize) recorded_url = self.recorded_url_q.get(block=True, timeout=0.5) records = [] self.idle = None if self._filter_accepts(recorded_url): if self.dedup_db: warcprox.dedup.decorate_with_dedup_info( self.dedup_db, recorded_url, base32=self.options.base32) records = self.writer_pool.write_records( recorded_url) self._final_tasks(recorded_url, records) # try to release resources in a timely fashion if recorded_url.response_recorder and recorded_url.response_recorder.tempfile: recorded_url.response_recorder.tempfile.close() self.writer_pool.maybe_idle_rollover() except queue.Empty: if self.stop.is_set(): break self.idle = time.time() self.logger.info('WarcWriterThread shutting down') self._shutdown() except Exception as e: if isinstance(e, OSError) and e.errno == 28: # OSError: [Errno 28] No space left on device self.logger.critical( 'shutting down due to fatal problem: %s: %s', e.__class__.__name__, e) self._shutdown() sys.exit(1) self.logger.critical( 'WarcWriterThread will try to continue after unexpected ' 'error', exc_info=True) time.sleep(0.5)
def _wrap_process_url(self, recorded_url): if not getattr(self.thread_local, 'name_set', False): threading.current_thread().name = 'WarcWriterThread(tid=%s)' % warcprox.gettid() self.thread_local.name_set = True if self.options.profile: import cProfile if not hasattr(self.thread_local, 'profiler'): self.thread_local.profiler = cProfile.Profile() tid = threading.current_thread().ident self.thread_profilers[tid] = self.thread_local.profiler self.thread_local.profiler.enable() self._process_url(recorded_url) self.thread_local.profiler.disable() else: self._process_url(recorded_url)
def _run(self): self.name = '%s(tid=%s)'% (self.name, warcprox.gettid()) while not self.stop.is_set(): try: while True: try: if self.stop.is_set(): qsize = self.recorded_url_q.qsize() if qsize % 50 == 0: self.logger.info("%s urls left to write", qsize) recorded_url = self.recorded_url_q.get(block=True, timeout=0.5) records = [] self.idle = None if self._filter_accepts(recorded_url): if self.dedup_db: warcprox.dedup.decorate_with_dedup_info(self.dedup_db, recorded_url, base32=self.options.base32) records = self.writer_pool.write_records(recorded_url) self._final_tasks(recorded_url, records) # try to release resources in a timely fashion if recorded_url.response_recorder and recorded_url.response_recorder.tempfile: recorded_url.response_recorder.tempfile.close() self.writer_pool.maybe_idle_rollover() except queue.Empty: if self.stop.is_set(): break self.idle = time.time() self.logger.info('WarcWriterThread shutting down') self._shutdown() except Exception as e: if isinstance(e, OSError) and e.errno == 28: # OSError: [Errno 28] No space left on device self.logger.critical( 'shutting down due to fatal problem: %s: %s', e.__class__.__name__, e) self._shutdown() sys.exit(1) self.logger.critical( 'WarcWriterThread will try to continue after unexpected ' 'error', exc_info=True) time.sleep(0.5)
def __init__(self, request, client_address, server): threading.current_thread().name = 'MitmProxyHandler(tid={},started={},client={}:{})'.format(warcprox.gettid(), datetime.datetime.utcnow().isoformat(), client_address[0], client_address[1]) self.is_connect = False self._headers_buffer = [] request.settimeout(60) # XXX what value should this have? http_server.BaseHTTPRequestHandler.__init__(self, request, client_address, server)