예제 #1
0
def main():
    """主函数"""
    p = ThreadPool(settings.THREAD_NUM)
    p.run(verify, (
        settings.BASE_URL,
        p,
    ), callback=callback(p, ))
예제 #2
0
파일: pools.py 프로젝트: fferner/modrana
    def startBatch(self, batch, **kwargs):
        """Process a batch"""
        with self._mutex:
            if self._running:
                log.debug("can't start another batch - already running")
            else:
                self._running = True
                self._batch = batch

                self._pool = ThreadPool(name=self.name,
                                        maxThreads=self._maxThreads())

                # start the loading thread
                t = threads.ModRanaThread(name=self.name+"Loader", target=self._loadItems)
                self._loaderName = threads.threadMgr.add(t)
예제 #3
0
파일: pools.py 프로젝트: fferner/modrana
class BatchPool(object):
    def __init__(self, name=None):
        if name is None:
            self._name = _getBatchPoolName()
        else:
            self._name = name
        self._batch = set()
        self._doneCount = 0
        self._running = False
        self._shutdown = False
        self._mutex = threading.RLock()
        self._loaderName = None
        self._pool = None
        self.batchDone = Signal()

    def startBatch(self, batch, **kwargs):
        """Process a batch"""
        with self._mutex:
            if self._running:
                log.debug("can't start another batch - already running")
            else:
                self._running = True
                self._batch = batch

                self._pool = ThreadPool(name=self.name,
                                        maxThreads=self._maxThreads())

                # start the loading thread
                t = threads.ModRanaThread(name=self.name+"Loader", target=self._loadItems)
                self._loaderName = threads.threadMgr.add(t)

    @property
    def name(self):
        return self._name

    @property
    def batchSize(self):
        return len(self._batch)

    @property
    def done(self):
        with self._mutex:
            return self._doneCount

    @property
    def running(self):
        with self._mutex:
            return self._running

    def _loadItems(self):
        self._processBatch()
        # in this moment we either run out of work, in which case
        # we tell the pool to shutdown once all work is processed
        # (now==False) or the batch pool has been explicitly shutdown
        # (self._shutdown=True) in which case we tell the pool to shutdown
        # as quickly as possible (now==True)
        self._pool.shutdown(now=self._shutdown, join=True, async=False,
                            callback=self._stoppedCallback)
        log.info("%s loader done", self.name)

    def _handleItemWrapper(self, item):
        self._handleItem(item)
        # one item processed
        with self._mutex:
            self._doneCount+=1

    def stop(self):
        """Stop processing as soon as possible"""
        # tell the loading thread to shutdown
        with self._mutex:
            if self._running:
                self._shutdown = True

    def _stoppedCallback(self):
        """Called from a thread once the thread pool is fully stopped"""
        # lets miss-use the callback thread a bit and make it wait for the
        # loader to finish
        self._shutdown = True

        # now we can report we are no longer running & ready for new batch
        with self._mutex:
            # also cleanup so we are back to initial state
            self._cleanup()
            self._running = False
            self._shutdown = False

        # trigger the batch done signal
        self.batchDone()

    def _cleanup(self):
        self._batch = []
        self._doneCount = 0
        self._loaderName = None
        self._pool = None

    # subclassing interface
    def _processBatch(self):
        pass

    def _handleItem(self, item):
        pass

    def _maxThreads(self):
        return 5
예제 #4
0
파일: crawler.py 프로젝트: lun0522/ProjectX
def download(bound, shared):
    begin, end = bound
    pool = ThreadPool(shared["num_thread"], fetch, shared["urls"][begin: end], shared)
    pool.join()