コード例 #1
0
ファイル: concurrency.py プロジェクト: bossjones/ultron8
def is_green_pool_free(pool):
    """
    Return True if the provided green pool is free, False otherwise.
    """
    if CONCURRENCY_LIBRARY == "eventlet":
        return pool.free()
    elif CONCURRENCY_LIBRARY == "gevent":
        return not pool.full()
    else:
        raise ValueError("Unsupported concurrency library")
コード例 #2
0
def crawler(u):
    global crawled

    response = requests.get(u)
    print response.status_code, u

    for link in re.findall('<a href="(http.*?)"', response.content):

        if crawled < 10 and not pool.full():
            crawled += 1
            pool.spawn(crawler, link)
コード例 #3
0
def crawler(u):
    global crawled

    response = requests.get(u)
    print response.status_code, u

    for link in re.findall('<a href="(http.*?)"', response.content):

        if crawled < 10 and not pool.full():
            crawled += 1
            pool.spawn(crawler, link)
コード例 #4
0
def crawler(u):
    '''A very simple pooled gevent web crawler'''
    global crawled

    # Crawl the page, print the status
    response = requests.get(u)
    print response.status_code, u

    # Extract some links to follow
    for link in re.findall('<a href="(http.*?)"', response.content):

        # Limit to 10 pages (ignores links when the pool is already full)
        if crawled < 10 and not pool.full():
            crawled += 1
            pool.spawn(crawler, link)
コード例 #5
0
ファイル: scraper.py プロジェクト: prdx23/evolvo
def scrape_base_url():
    global data
    startTime = datetime.now()
    tree = html.fromstring(session.get(base_url).text)

    func = lambda x: queue.put_nowait((parse_comp, {
        'url': domain + x.xpath('./@href')[0],
        'name': x.xpath('./text()')[0]
    }))
    [
        func(x) for x in tree.xpath('//div[@class="st-text"]//td/a')
        if x.xpath('./text()') != []
    ]

    while not queue.empty() and not pool.full():
        for x in xrange(0, min(queue.qsize(), pool.free_count())):
            t = queue.get_nowait()
            pool.start(pool.spawn(t[0], t[1]))
    pool.join()
    print 'Time Taken : ', datetime.now() - startTime
    with open('data.json', 'w') as fp:
        json.dump(data, fp)
コード例 #6
0
ファイル: greenlets_concurrency.py プロジェクト: adgaudio/Bin
def green_producer(iterator, map_func, queue, consumer_instance, pool_size,
                   consumer_args=(), consumer_kwargs={}):
    """A map function:

    Map pool.spawn(func, iterated_element) on iterator
    until the gevent pool is full or the iterator is exhausted.

    Put the return value (a gevent.event.AsyncResult) of each spawned func call
    into the queue.

    Whenever the pool is full, switch control to the consumer_instance greenlet.

    When control switches back to this instance, fill up pool again.
    """
    log.debug('initializing producer.  This will only appear once per greenlet')
    consumer_instance.switch(*consumer_args, **consumer_kwargs)
    pool = gevent.pool.Pool(7)
    for elem in iterator:
        log.debug("Queued job for elem {elem}".format(**locals()))
        queue.put(pool.spawn(map_func, elem))
        if pool.full():
            consumer_instance.switch()
    gevent.joinall(pool)
コード例 #7
0
ファイル: webhook_worker.py プロジェクト: troybots/omnibot
def wait_available(pool, pool_name):
    statsd = stats.get_statsd_client()
    if pool.full():
        statsd.incr('%s.pool.full' % pool_name)
        pool.wait_available()
コード例 #8
0
ファイル: messaging_worker.py プロジェクト: notbrain/scalr
 def _spawn_gm_worker(self):
     global pool
     if not pool.full():
         gm_worker = _GMWorker(self.parent)
         gm_worker.register_task('message.send', send)
         pool.add(gevent.spawn(gm_worker.work, time_to_work=20))
コード例 #9
0
    def _download_res(
            self,
            filepath,
            rate,
            uploading=True,
            callback=None,
            cb_kwargs=None):
        try:
            peers = self.get_peers()
            self._record_get_peer_ts()
            peers_num = len(peers)
            count = 0

            # just get resource size
            while True:
                ip, port = peers[count]
                logger.info('get resource size')
                try:
                    ret = self._requests_session.get(
                        "{protocol}{ip}:{port}/?{res}"
                        .format(
                            protocol=PROTOCOL, ip=ip,
                            port=port,
                            res=urlencode({'res_url': self.upload_res_url})),
                        stream=True,
                        headers={"Range": "bytes=0-0"},
                        timeout=1)

                    if ret.ok:
                        #: bytes=0-1/17)
                        content_range = ret.headers.get("Content-Range")
                        res_length = content_range.split('/')[-1]
                        break
                    else:
                        logger.warn(
                            'get piece from ip: %s port: %s error, code: %s ' %
                            (ip, port, ret.status_code))
                        count += 1
                        self.del_from_tracker(ip=ip, peer_port=port)
                except ConnectionError:
                    logger.warn(
                        'get piece from ip: %s port: %s error ConnectionError'
                        % (ip, port))
                    count += 1
                    self.del_from_tracker(ip=ip, peer_port=port)
                except Timeout:
                    logger.warn(
                        'get piece from ip: %s port: %s error Timeout' %
                        (ip, port))
                    count += 1
                    self.del_from_tracker(ip=ip, peer_port=port)
                finally:
                    if count >= peers_num:
                        logger.warn("No peers avaliable")
                        peers = self.get_peers()
                        peers_num = len(peers)
                        count = 0

            logger.info('%s is size of %s' %
                        (self.upload_res_url, sizeof_fmt_human(res_length)))

            self.piece_file = PieceFile(res_length, filepath)

            pool_work_num = 15
            pool_q_size = pool_work_num * 2
            pool = gevent.pool.Pool(pool_work_num)
            self.start_ready_upload_thread()

            if rate:
                self.download_rate = rate
            else:
                rate = self.download_rate

            if rate:
                self.token_bucket = TokenBucket(rate)

            while self.piece_file.has_unalloc():
                args_list = list()
                for peer in peers:
                    if peer not in self._peer_in_conn:
                        args_list.append((peer, None))
                [pool.apply_async(self._download_piece_thread, *args)
                    for args in args_list[:pool_q_size]]
                # update peers if peer run out
                while pool.full():
                    gevent.sleep(0.2)

                if not self.piece_file.has_empty():
                    pool.join()

                logger.debug(
                    'test get_empty_block: %s' %
                    self.piece_file.get_empty_piece())

                logger.debug('peer in connection:  %s' % self._peer_in_conn)
                if self.piece_file.has_unalloc():
                    try:
                        tv = self._get_last_get_peer_tv()
                        if tv < GET_PEER_INTERVAL:
                            gevent.sleep(GET_PEER_INTERVAL - tv)
                        g = gevent.spawn(self.get_peers)
                        peers = g.get()
                        self._record_get_peer_ts()
                    except NoPeersFound:
                        # if pool.workRequests:
                        if pool_work_num - pool.free_count() > 0:
                            # some remained piece maybe on the way
                            pool.join()
                            if self.piece_file.has_unalloc():
                                tv = self._get_last_get_peer_tv()
                                if tv > GET_PEER_INTERVAL:
                                    gevent.sleep(GET_PEER_INTERVAL - tv)
                                g = gevent.spawn(self.get_peers)
                                peers = g.get()
                                self._record_get_peer_ts()
                            else:
                                break
                        else:
                            logger.error("no worker running, and get no peers")
                            raise
                else:
                    break

            logger.info('File is complete, size: %s' %
                        self.piece_file.get_real_filesize())

        except NoPeersFound:
            if self.fallback:
                logger.info('Use fallback way to get resouce')
                try:
                    res_length = get_res_length(self.res_url)
                except ConnectionError:
                    raise OriginURLConnectError(self.res_url)
                logger.info(
                    'get resource length %s' %
                    sizeof_fmt_human(res_length))
                if not self.piece_file:
                    self.piece_file = PieceFile(res_length, filepath)

                self.start_ready_upload_thread()
                http_download_to_piecefile(
                        self.res_url, self.piece_file)
            else:
                self.deactivate()
                raise

        # self.piece_file.tofile()
        self.res_is_downloaded = True
        if callback:
            logger.info('Run callback')
            callback(**cb_kwargs)
コード例 #10
0
ファイル: messaging_worker.py プロジェクト: Teevity/scalr
 def _spawn_gm_worker(self):
     global pool
     if not pool.full():
         gm_worker = _GMWorker(self.parent)
         gm_worker.register_task('message.send', send)
         pool.add(gevent.spawn(gm_worker.work, time_to_work=20))
コード例 #11
0
ファイル: webhook_worker.py プロジェクト: dschaller/osscla
def wait_available(pool, pool_name):
    statsd = stats.get_statsd_client()
    if pool.full():
        statsd.incr('%s.pool.full' % pool_name)
        pool.wait_available()
    return not STATE['shutdown']
コード例 #12
0
        if result != '':
            print 'Found [%s][%s] in %s' % (result, link, tag)
            a += 1
            if tag in json_dict:
                json_dict[tag].append((result, link, imgs[i]))
            else:
                json_dict[tag] = list()
                json_dict[tag].append((result, link, imgs[i]))

r = session.get(url)
tree = html.fromstring(r.text)
a_tags = tree.xpath('//li[@class="menu-item"]//a')
tags = [(x.xpath('.//@href'), repr(x.xpath('.//text()'))) for x in a_tags]

for t in tags:
    url = t[0]
    result = regex.findall(t[1])
    # print url, result
    # scrape(url[0], result[0])
    queue.put((url[0], result[0]))


while not queue.empty() and not pool.full():
    for x in xrange(0, min(queue.qsize(), pool.free_count())):
        pool.spawn(worker)
pool.join()
print a
print 'Time Taken : ', datetime.now() - start_time
with open('data.json', 'w') as fp:
    json.dump(json_dict, fp)