Example #1
0
class Network(object):

    def __init__(self, concurrency, timeout):
        self._executor = ThreadPoolExecutor(concurrency)
        self._timeout = timeout

    def _request(self, request):
        try:
            session = requests.Session()
            prepared = session.prepare_request(request)
            response = session.send(prepared, timeout=self._timeout)
        except Exception as exc:
            logger.warning('Exception {}: {}'.format(type(exc), exc))
            callback = request.kwargs['hooks']['response']
            response = FakeResponse(400, 'No Response')
            callback(response)

    def perform_requests(self, requests):
        return self._executor.map(self._request, requests)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self._executor.shutdown(wait=False)
Example #2
0
class Network(object):

    def __init__(self, concurrency, timeout, ui=None):
        self._executor = ThreadPoolExecutor(concurrency)
        self._timeout = timeout
        self.session = requests.Session()
        self._ui = ui or logger
        self.futures = []
        self.concurrency = concurrency

    def _request(self, request):
        prepared = self.session.prepare_request(request)
        try:
            self.session.send(prepared, timeout=self._timeout)
        except requests.exceptions.ReadTimeout:
            self._ui.warning(textwrap.dedent("""The server did not send any data
in the allotted amount of time.
You might want to decrease the "--n_concurrent" parameters
or
increase "--timeout" parameter.
"""))

        except Exception as exc:
            self._ui.debug('Exception {}: {}'.format(type(exc), exc))
            try:
                callback = request.kwargs['hooks']['response']
            except AttributeError:
                callback = request.hooks['response'][0]
            response = FakeResponse(400, 'No Response')
            callback(response)

    def perform_requests(self, requests):
        for r in requests:
            while True:
                self.futures = [i for i in self.futures if not i.done()]
                if len(self.futures) < self.concurrency:
                    self.futures.append(self._executor.submit(self._request,
                                                              r))
                    break
                else:
                    sleep(0.1)
            yield
        #  wait for all batches to finish before returning
        while self.futures:
            f_len = len(self.futures)
            self.futures = [i for i in self.futures if not i.done()]
            if f_len != len(self.futures):
                self._ui.debug('Waiting for final requests to finish. '
                               'remaining requests: {}'
                               ''.format(len(self.futures)))
            sleep(0.1)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self._executor.shutdown(wait=False)
    h = hpy()
    executor = ThreadPoolExecutor(max_workers=THREADS)
    thread = None
    for subdirs, dirs, files in os.walk('stackoverflow/'):
        i = 0
        for filename in files:
            username = filename[:-5]
            github_filename = 'github/{}.csv'.format(username)
            if os.path.isfile('{}.tmp'.format(github_filename)):
                os.remove('{}.tmp'.format(github_filename))
            if os.path.isfile(github_filename):
                print u"skip {}".format(username)
                continue
            f = codecs.open('stackoverflow/{}'.format(filename), 'r', 'utf-8')
            data = json.load(f)
            f.close()
            fullname = data['answerer']['name']
            #            if i % (THREADS * 2) == 0:
            #                if thread:
            #                    thread.result()
            #                thread = executor.submit(process_user, username, fullname)
            #            else:
            #                executor.submit(process_user, username, fullname)
            print u"put in thread pool user '{}'".format(username)

            process_user(username, fullname)
            i += 1
            print h.heap()
            sys.exit(0)
    executor.shutdown(wait=True)
Example #4
0
class Pool(object):
    __metaclass__ = ABCMeta
    class debugger(threading.Thread):
        def __init__(self, pool, interval = 5):
            self.pool = pool
            self.interval = interval
            threading.Thread.__init__(self)

        def start(self):
            self._running = True
            self.startTime = time.time()
            self.lastTime = time.time()
            self.lastNumber = 0
            self.numberAtStart = self.pool.processed
            threading.Thread.start(self)

        def stop(self):
            self._running = False

        def debug(self):
            meanSpeed = (self.pool.processed - self.numberAtStart) / (time.time() - self.startTime)
            instantSpeed = (self.pool.processed - self.lastNumber) / (time.time() - self.lastTime)
            print "%s Threads: %s Remaining: %s Speed: %s / %s Done: %s" % (
                ("["+self.pool.name+"]").ljust(15),
                str(self.pool.maxWorkers).ljust(4),
                str(self.pool.getQueueSize()).ljust(3),
                ("%.2f" % instantSpeed).ljust(9),
                ("%.2f" % meanSpeed).ljust(9),
                str(self.pool.processed)
            )
            self.lastTime = time.time()
            self.lastNumber = self.pool.processed

        def run(self):
            while(self._running):
                self.debug()
                time.sleep(self.interval)

    def __init__(self, maxWorkers, queueSize):
        self.maxWorkers = maxWorkers
        self._pool = ThreadPoolExecutor(max_workers=maxWorkers)
        self._pool._work_queue.maxsize = queueSize
        #self._pool = ProcessPoolExecutor(max_workers=20)
        #self._pool._work_ids.maxsize = 2

        self.processed = 0
        self.debugger = self.__class__.debugger(self)
        self.debugger.start()

    def getQueueSize(self):
        return self._pool._work_queue.qsize()
        #return self._pool._work_ids.qsize()*self.maxWorkers


    @property
    def name(self):
        return self.__class__.__name__

    def submit(self, task, *args, **kwargs):
        def handleSubmit():
            try:
                result = task(*args, **kwargs)
            except Exception as e:
                self.handleError(task, e)
            else:
                self.agregate(task, result)
            self.processed += 1

        self._pool.submit(handleSubmit)

    def waitAndShutdown(self):
        self._pool.shutdown(wait = True)
        self.debugger.stop()

    @abstractmethod
    def handleError(self, task, e):
        pass

    @abstractmethod
    def agregate(self, task, result):
        pass