class Network(object): def __init__(self, concurrency, timeout): self._executor = ThreadPoolExecutor(concurrency) self._timeout = timeout def _request(self, request): try: session = requests.Session() prepared = session.prepare_request(request) response = session.send(prepared, timeout=self._timeout) except Exception as exc: logger.warning('Exception {}: {}'.format(type(exc), exc)) callback = request.kwargs['hooks']['response'] response = FakeResponse(400, 'No Response') callback(response) def perform_requests(self, requests): return self._executor.map(self._request, requests) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self._executor.shutdown(wait=False)
class Network(object): def __init__(self, concurrency, timeout, ui=None): self._executor = ThreadPoolExecutor(concurrency) self._timeout = timeout self.session = requests.Session() self._ui = ui or logger self.futures = [] self.concurrency = concurrency def _request(self, request): prepared = self.session.prepare_request(request) try: self.session.send(prepared, timeout=self._timeout) except requests.exceptions.ReadTimeout: self._ui.warning(textwrap.dedent("""The server did not send any data in the allotted amount of time. You might want to decrease the "--n_concurrent" parameters or increase "--timeout" parameter. """)) except Exception as exc: self._ui.debug('Exception {}: {}'.format(type(exc), exc)) try: callback = request.kwargs['hooks']['response'] except AttributeError: callback = request.hooks['response'][0] response = FakeResponse(400, 'No Response') callback(response) def perform_requests(self, requests): for r in requests: while True: self.futures = [i for i in self.futures if not i.done()] if len(self.futures) < self.concurrency: self.futures.append(self._executor.submit(self._request, r)) break else: sleep(0.1) yield # wait for all batches to finish before returning while self.futures: f_len = len(self.futures) self.futures = [i for i in self.futures if not i.done()] if f_len != len(self.futures): self._ui.debug('Waiting for final requests to finish. ' 'remaining requests: {}' ''.format(len(self.futures))) sleep(0.1) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self._executor.shutdown(wait=False)
h = hpy() executor = ThreadPoolExecutor(max_workers=THREADS) thread = None for subdirs, dirs, files in os.walk('stackoverflow/'): i = 0 for filename in files: username = filename[:-5] github_filename = 'github/{}.csv'.format(username) if os.path.isfile('{}.tmp'.format(github_filename)): os.remove('{}.tmp'.format(github_filename)) if os.path.isfile(github_filename): print u"skip {}".format(username) continue f = codecs.open('stackoverflow/{}'.format(filename), 'r', 'utf-8') data = json.load(f) f.close() fullname = data['answerer']['name'] # if i % (THREADS * 2) == 0: # if thread: # thread.result() # thread = executor.submit(process_user, username, fullname) # else: # executor.submit(process_user, username, fullname) print u"put in thread pool user '{}'".format(username) process_user(username, fullname) i += 1 print h.heap() sys.exit(0) executor.shutdown(wait=True)
class Pool(object): __metaclass__ = ABCMeta class debugger(threading.Thread): def __init__(self, pool, interval = 5): self.pool = pool self.interval = interval threading.Thread.__init__(self) def start(self): self._running = True self.startTime = time.time() self.lastTime = time.time() self.lastNumber = 0 self.numberAtStart = self.pool.processed threading.Thread.start(self) def stop(self): self._running = False def debug(self): meanSpeed = (self.pool.processed - self.numberAtStart) / (time.time() - self.startTime) instantSpeed = (self.pool.processed - self.lastNumber) / (time.time() - self.lastTime) print "%s Threads: %s Remaining: %s Speed: %s / %s Done: %s" % ( ("["+self.pool.name+"]").ljust(15), str(self.pool.maxWorkers).ljust(4), str(self.pool.getQueueSize()).ljust(3), ("%.2f" % instantSpeed).ljust(9), ("%.2f" % meanSpeed).ljust(9), str(self.pool.processed) ) self.lastTime = time.time() self.lastNumber = self.pool.processed def run(self): while(self._running): self.debug() time.sleep(self.interval) def __init__(self, maxWorkers, queueSize): self.maxWorkers = maxWorkers self._pool = ThreadPoolExecutor(max_workers=maxWorkers) self._pool._work_queue.maxsize = queueSize #self._pool = ProcessPoolExecutor(max_workers=20) #self._pool._work_ids.maxsize = 2 self.processed = 0 self.debugger = self.__class__.debugger(self) self.debugger.start() def getQueueSize(self): return self._pool._work_queue.qsize() #return self._pool._work_ids.qsize()*self.maxWorkers @property def name(self): return self.__class__.__name__ def submit(self, task, *args, **kwargs): def handleSubmit(): try: result = task(*args, **kwargs) except Exception as e: self.handleError(task, e) else: self.agregate(task, result) self.processed += 1 self._pool.submit(handleSubmit) def waitAndShutdown(self): self._pool.shutdown(wait = True) self.debugger.stop() @abstractmethod def handleError(self, task, e): pass @abstractmethod def agregate(self, task, result): pass