Exemple #1
0
def main(args):
    nsxchecker = NsxChecker(args.controller, args.password, args.quiet)
    nsxchecker.lswitch = nsxchecker.get_lswitch(args.network)
    nsxchecker.lports = nsxchecker.get_lports(nsxchecker.lswitch)
    nsxchecker.macs = [lport['mac'] for lport in nsxchecker.lports]
    if len(nsxchecker.lports) <= 1:
        print "No ports to check!"
        return 0
    if not nsxchecker.quiet:
        print "%s ports on this network. " % len(nsxchecker.lports)
    if not nsxchecker.lports:
        if not nsxchecker.quiet:
            print "Nothing found!"
        quit()
    if args.full:
        out_string = "A full check will is %s src/dests."
        print out_string % int(math.pow(len(nsxchecker.lports), 2))
        pool.map(nsxchecker.check_port_full, nsxchecker.lports)
    else:
        pool.map(nsxchecker.check_port, nsxchecker.lports)
    if not nsxchecker.quiet:
        print "-" * 40

    fail_percent = percentage(nsxchecker.fail, nsxchecker.total)
    success_percent = percentage(nsxchecker.success, nsxchecker.total)
    if success_percent > 0:
        out_string = "%s percent successful (network ID: %s)"
        print out_string % (success_percent, args.network)
    if fail_percent > 0:
        out_string = "%s percent fail (network ID: %s)"
        print out_string % (fail_percent, args.network)
    return 0
Exemple #2
0
def main(args):
    nsxchecker = NsxChecker(args.controller, args.password, args.quiet)
    nsxchecker.lswitch = nsxchecker.get_lswitch(args.network)
    nsxchecker.lports = nsxchecker.get_lports(nsxchecker.lswitch)
    nsxchecker.macs = [lport['mac'] for lport in nsxchecker.lports]
    if len(nsxchecker.lports) <= 1:
        print "No ports to check!"
        return 0
    if not nsxchecker.quiet:
        print "%s ports on this network. " % len(nsxchecker.lports)
    if not nsxchecker.lports:
        if not nsxchecker.quiet:
            print "Nothing found!"
        quit()
    if args.full:
        out_string = "A full check will is %s src/dests."
        print out_string % int(math.pow(len(nsxchecker.lports), 2))
        pool.map(nsxchecker.check_port_full, nsxchecker.lports)
    else:
        pool.map(nsxchecker.check_port, nsxchecker.lports)
    if not nsxchecker.quiet:
        print "-" * 40

    fail_percent = percentage(nsxchecker.fail, nsxchecker.total)
    success_percent = percentage(nsxchecker.success, nsxchecker.total)
    if success_percent > 0:
        out_string = "%s percent successful (network ID: %s)"
        print out_string % (success_percent, args.network)
    if fail_percent > 0:
        out_string = "%s percent fail (network ID: %s)"
        print out_string % (fail_percent, args.network)
    return 0
Exemple #3
0
 def start(self):
     offset = 1
     time.sleep(1)
     pool = ThreadPool(THREAD_NUM)
     while self.run:
         offsets = [i + offset for i in range(10)]
         pool.map(self.get_a_list, offsets)
         offset += 10
Exemple #4
0
def main():
    exchanges = [
        'bitbankcc',
        'bitfinex',
        'bitflyer',
        'bitmex',
        'coincheck',
        'kraken',
        'quoinex',
        'zaif',
    ]

    clients = {}  # type: Dict[str, ClientBase]
    pool = gevent.pool.Pool()
    for ex in exchanges:
        clients[ex] = getattr(coinapi, ex).Client()  # type: ClientBase

    def get_balance(key: str):
        balances = {}
        for k, v in clients[key].balance().items():
            balances[C_MAP.get(k, k)] = v
        return key, balances

    def get_tick(tick: str):
        instrument, ex = TICKER_MAP[tick]
        return instrument, clients[ex].tick(instrument)

    ticks = dict(pool.map(get_tick, list(TICKER_MAP)))
    balances = dict(pool.map(get_balance, exchanges))
    fx_rates = get_fxrates()

    currency_totals = defaultdict(lambda: dict(qty=.0, jpy=.0))
    totals = {}
    for ex, balance in sorted(balances.items()):
        ex_totals = defaultdict(lambda: dict(qty=.0, jpy=.0))
        for k, v in sorted(balance.items()):
            q = v['total']
            if not q:
                continue
            if k in TICKER_MAP:
                rate = ticks['{}/JPY'.format(k)]['bid']
            else:
                rate = fx_rates['{}/JPY'.format(k)]
            jpy = q * rate
            ex_totals[k]['qty'] += q
            ex_totals[k]['jpy'] += jpy
            ex_totals['_total']['jpy'] += jpy
            currency_totals[k]['qty'] += q
            currency_totals[k]['jpy'] += jpy
        print('# {}'.format(ex))
        pprint(dict(ex_totals))
        totals[ex] = ex_totals['_total']['jpy']

    print('# currency')
    pprint(dict(currency_totals))
    totals['_total'] = sum(totals.values())
    print('# jpy')
    pprint(dict(totals))
Exemple #5
0
 def process_season(self, season):
     self.uploader.set_season_endpoint(season)
     pool = gevent.pool.Pool(5)
     game_date_url_list = self.fetch_game_list_for_season(season)
     pool.map(self._fetch_parse_and_upload, game_date_url_list)
     pool.join()
     if len(self.uploader.fetch_keys()) != len(
             map(lambda (game_date, _): game_date, self.all_games)):
         raise Exception('Some games was not uploaded for season %s!',
                         season)
def main():
    authors = getRequestName()
    proxies_list = getProxies()
    session = getSession(len(proxies_list) + 5)
    fout = open('result.txt', 'a+', encoding = 'utf-8')
    pool = gevent.pool.Pool(len(proxies_list))
    lock = gevent.lock.Semaphore()
    func = functools.partial(multiprocess, lock, session, proxies_list, fout)

    pool.map(func, enumerate(authors))
Exemple #7
0
def multiprocess_upload(QueueClass, queue_name, tasks, parallel=True):
  if parallel is True:
    parallel = mp.cpu_count()
  elif parallel <= 0:
    raise ValueError("Parallel must be a positive number or zero (all cpus). Got: " + str(parallel))

  if parallel == 1:
    soloprocess_upload(QueueClass, queue_name, tasks)
    return 

  def capturing_soloprocess_upload(*args, **kwargs):
    try:
      soloprocess_upload(*args, **kwargs)
    except Exception as err:
      print(err)
      error_queue.put(err)

  uploadfn = partial(
    capturing_soloprocess_upload, QueueClass, queue_name
  )
  tasks = _scatter(tasks, parallel)

  # This is a hack to get dill to pickle dynamically
  # generated classes. This is an important use case
  # for when we create iterators with generator __iter__
  # functions on demand.

  # https://github.com/uqfoundation/dill/issues/56

  try:
    task = next(item for item in tasks if item is not None)
  except StopIteration:
    return 

  cls_module = task.__class__.__module__
  task.__class__.__module__ = '__main__'

  with pathos.pools.ProcessPool(parallel) as pool:
    pool.map(uploadfn, tasks)

  task.__class__.__module__ = cls_module

  if not error_queue.empty():
    errors = []
    while not error_queue.empty():
      err = error_queue.get()
      if err is not StopIteration:
        errors.append(err)
    if len(errors):
      raise Exception(errors)
Exemple #8
0
def loop_task(redis_task_list_name):
    _wait_run_task_num = count_task(redis_task_list_name)
    logging.info("exists task:%s" % _wait_run_task_num)
    pool = multiprocessing.Pool(VIDEO_TO_SENTENCE_THREADS)
    logging.info("create multiprocessing pool:%s" % VIDEO_TO_SENTENCE_THREADS)
    _all_task = [
        get_task(redis_task_list_name) for _ in xrange(_wait_run_task_num)
    ]
    logging.info("collect task done, wait to input pool")
    pool.map(single_video_translate_task, _all_task)
    logging.info("add task to pool done :%s" % _wait_run_task_num)
    pool.close()
    pool.join()
    logging.info("add  task done:%s" % datetime.datetime.now())
    return _wait_run_task_num
Exemple #9
0
def subpool_map(pool_size, func, iterable):
    """ Starts a Gevent pool and run a map. Takes care of setting current_job and cleaning up. """

    if not pool_size:
        return [func(*args) for args in iterable]

    counter = itertools_count()

    current_job = get_current_job()

    def inner_func(*args):
        next(counter)
        if current_job:
            set_current_job(current_job)
        ret = func(*args)
        if current_job:
            set_current_job(None)
        return ret

    start_time = time.time()
    pool = gevent.pool.Pool(size=pool_size)
    ret = pool.map(inner_func, iterable)
    pool.join(raise_error=True)
    total_time = time.time() - start_time

    log.debug("SubPool ran %s greenlets in %0.6fs" % (counter, total_time))

    return ret
Exemple #10
0
def subpool_map(pool_size, func, iterable):
    """ Starts a Gevent pool and run a map. Takes care of setting current_job and cleaning up. """

    if not pool_size:
        return [func(*args) for args in iterable]

    counter = itertools_count()

    current_job = get_current_job()

    def inner_func(*args):
        next(counter)
        if current_job:
            set_current_job(current_job)
        ret = func(*args)
        if current_job:
            set_current_job(None)
        return ret

    start_time = time.time()
    pool = gevent.pool.Pool(size=pool_size)
    ret = pool.map(inner_func, iterable)
    pool.join(raise_error=True)
    total_time = time.time() - start_time

    log.debug("SubPool ran %s greenlets in %0.6fs" % (counter, total_time))

    return ret
Exemple #11
0
 def start_listen_gnsq_map(cls, topic, callback):
     import gevent.pool
     import gnsq
     from gnsq.contrib.queue import QueueHandler
     consumer = gnsq.Consumer(
         # nsqd_tcp_addresses=['192.168.5.134:4150'],
         lookupd_http_addresses=['192.168.5.134:4161'],
         # lookupd_http_addresses=['127.0.0.1:4161'],
         topic=topic,
         channel='channel1',
         max_in_flight=16,
     )
     queue = QueueHandler(maxsize=16)
     consumer.on_message.connect(queue)
     consumer.start(block=False)
     pool = gevent.pool.Pool(16)
     pool.map(callback, queue)
Exemple #12
0
 def fuzz_top_level(self):
     print('[*] TOP-LEVEL DOMAIN FUZZING...')
     file_handle = open("./dict/top-level.dict")
     content_dict = file_handle.read().split('\n')
     if content_dict[-1] == '':
         del content_dict[-1]
     pool = gevent.pool.Pool(self.threads)
     data = pool.map(self.get_ip, content_dict)
     pass
Exemple #13
0
 def run(self):
     """启动函数"""
     self.get_city_info()
     # print(self.city_info)
     for city_name, city_url in self.city_info.items():
         # for city_name in ['郑州', '北京', '上海', '广州', '深圳']:
         city_url = self.city_info[city_name]
         if '-zhaopin' not in city_url:
             city_url = city_url.rstrip('/') + '-zhaopin/'
         response = self.get_response(url=city_url, method='GET')
         html = etree.HTML(response.text)
         district_name = html.xpath(
             '//div[@data-type="district"]/a[position()>1]/text()')
         item = [{
             'city_name': city_name,
             'district': name,
             'keyword': 'python'
         } for name in district_name]
         print(item)
         pool = gevent.pool.Pool(size=1)
         pool.map(self.get_job_info, item)
Exemple #14
0
    def archive_articles(self):
        log.info('downloading news from {name}'.format(**self.__dict__))
        pool = gevent.pool.Pool(self.session.max_connections)
        pool.map(self.save_article, self._article_number_generator())

        r = self.compress_and_sort_index()
        if not r:
            clean_up(self.name, self.item.identifier, self.date)
            return

        self.state[self.name] = max(self.articles_archived)
        local_state_fname = '{identifier}_state.json'.format(**self.__dict__)
        with open(local_state_fname, 'w') as fp:
            json.dump(self.state, fp)

        ## Item is ready to upload, remove lock.
        mbox_fname = '{name}.{date}.mbox.gz'.format(**self.__dict__)
        mbox_lck_fname = mbox_fname + '.lck'
        shutil.move(mbox_lck_fname, mbox_fname)
        log.info('archived and indexed {0} '
                 'articles from {1}'.format(len(self.articles_archived), self.name))
Exemple #15
0
def parallel_call(
    function: Callable,
    args: Any,
    repeatable_args: Union[Tuple[Any, ...], List[Any]] = None,
    fold_list: bool = False,
    fold_dict: bool = False,
    force_starmap: bool = False,
    pool_size: int = 32,
) -> Any:
    """
    Execute a function in parallel
    :param function: Function to execute
    :param args: Args to pass to the function
    :param repeatable_args: Repeatable args to pass with the original args
    :param fold_list: Compress the results into a 1D list
    :param fold_dict: Compress the results into a single dictionary
    :param force_starmap: Force system to use Starmap over normal selection process
    :param pool_size: How large the gevent pool should be
    :return: Results from execution, with modifications if desired
    """
    pool = gevent.pool.Pool(pool_size)

    if repeatable_args:
        extra_args_rep = [itertools.repeat(arg) for arg in repeatable_args]
        results = pool.map(lambda g_args: function(*g_args),
                           zip(args, *extra_args_rep))
    elif force_starmap:
        results = pool.map(lambda g_args: function(*g_args), args)
    else:
        results = pool.map(function, args)

    if fold_list:
        return list(itertools.chain.from_iterable(results))

    if fold_dict:
        return dict(collections.ChainMap(*results))

    return results
Exemple #16
0
def subpool_map(pool_size, func, iterable):
    """ Starts a Gevent pool and run a map. Takes care of setting current_job and cleaning up. """

    if not pool_size:
        return [func(*args) for args in iterable]

    counter = itertools_count()

    current_job = get_current_job()

    def inner_func(*args):
        """ As each call to 'func' will be done in a random greenlet of the subpool, we need to
            register their IDs with set_current_job() to make get_current_job() calls work properly
            inside 'func'.
        """
        next(counter)
        if current_job:
            set_current_job(current_job)

        try:
            ret = func(*args)
        except Exception as exc:
            trace = traceback.format_exc()
            log.error("Error in subpool: %s \n%s" % (exc, trace))
            raise

        if current_job:
            set_current_job(None)
        return ret

    def inner_iterable():
        """ This will be called inside the pool's main greenlet, which ID also needs to be registered """
        if current_job:
            set_current_job(current_job)

        for x in iterable:
            yield x

        if current_job:
            set_current_job(None)

    start_time = time.time()
    pool = gevent.pool.Pool(size=pool_size)
    ret = pool.map(inner_func, inner_iterable())
    pool.join(raise_error=True)
    total_time = time.time() - start_time

    log.debug("SubPool ran %s greenlets in %0.6fs" % (counter, total_time))

    return ret
Exemple #17
0
def subpool_map(pool_size, func, iterable):
    """ Starts a Gevent pool and run a map. Takes care of setting current_job and cleaning up. """

    if not pool_size:
        return [func(*args) for args in iterable]

    counter = itertools_count()

    current_job = get_current_job()

    def inner_func(*args):
        """ As each call to 'func' will be done in a random greenlet of the subpool, we need to
            register their IDs with set_current_job() to make get_current_job() calls work properly
            inside 'func'.
        """
        next(counter)
        if current_job:
            set_current_job(current_job)

        try:
            ret = func(*args)
        except Exception as exc:
            trace = traceback.format_exc()
            log.error("Error in subpool: %s \n%s" % (exc, trace))
            raise

        if current_job:
            set_current_job(None)
        return ret

    def inner_iterable():
        """ This will be called inside the pool's main greenlet, which ID also needs to be registered """
        if current_job:
            set_current_job(current_job)

        for x in iterable:
            yield x

        if current_job:
            set_current_job(None)

    start_time = time.time()
    pool = gevent.pool.Pool(size=pool_size)
    ret = pool.map(inner_func, inner_iterable())
    pool.join(raise_error=True)
    total_time = time.time() - start_time

    log.debug("SubPool ran %s greenlets in %0.6fs" % (counter, total_time))

    return ret
Exemple #18
0
def get_keyword(url):
    req = requests.get(url, headers=HEADERS)
    html = req.text
    soup = bs(html, 'html.parser')
    keywords = soup.select(
        '#PM_ID_ct > div.header > div.section_navbar > div.area_hotkeyword.PM_CL_realtimeKeyword_base > div.ah_list.PM_CL_realtimeKeyword_list_base > ul > li > .ah_a'
    )
    answer = {'url': [], 'rank': [], 'word': [], 'related': []}
    for keyword in keywords:
        answer['url'].append(keyword.get('href'))
        answer['rank'].append(keyword.select('.ah_r')[0].get_text())
        answer['word'].append(keyword.select('.ah_k')[0].get_text())

    answer['related'] = list(pool.map(get_relateds, keywords))

    return answer
Exemple #19
0
def crawl(url, pool_count=1000):
    """Takes a url and prints to stdout a list of static assets and links
    on each linked page in the domain as a JSON object

    {
    "Link: {
        "Links": [],
        "Static Assets": [

        ]
    }
}

    """
    consider_links = []
    seen_links = set()
    sitemap = {}
    pool = gevent.pool.Pool(pool_count)
    consider_links.append(url)

    while len(consider_links) > 0:

        results = pool.map(makeConn, consider_links)
        seen_links.update(consider_links)
        del consider_links[:]
        for c in results:
            if c is None:
                continue

            assets = c.getAssets()
            seen_links.update(assets["Static"])
            sitemap[c.parent_link] = {"Static Assets": assets[
                                      "Static"], "Links": assets["Links"]}

            for link in assets["Consider_Links"]:

                if link not in seen_links:
                    consider_links.append(link)

    return sitemap
Exemple #20
0
def reqNgram(pattern_list):
    log("hey")

    chunks = [connect(pattern_list[x:x + 10])
              for x in xrange(0, len(pattern_list), 10)]

    pool = gevent.pool.Pool(5)
    results = pool.map(runQuery, chunks)
    big_dict = {}
    for dic in results:
        for d in dic:
            log(d)

            if "(ALL)" in d:
                d = d.replace("(ALL)", "")

            if d in big_dict:
                big_dict[d] += dic[d]
            else:
                big_dict[d] = dic[d]

    return big_dict
Exemple #21
0
gevent.monkey.patch_all()

import sys
import requests
import gevent
import gevent.pool
import random
import json


def consume(id):
    r = requests.get('http://127.0.0.1:5000/stream/' + str(id), stream=True)
    for line in r.iter_lines(10):
        try:
            obj = json.loads(line)
        except:
            print line
            raise

        if random.randint(0, 99) <= 1:
            print obj


POOL_SIZE = 1000

if __name__ == '__main__':

    pool = gevent.pool.Pool(POOL_SIZE)
    greenlets = pool.map(consume, xrange(POOL_SIZE))
    gevent.joinall(greenlets)
Exemple #22
0
def getBzhanInfoByKeywords(keyword='曾艳芬'):
    """
    保存所有封面,返回值是一个json,结构为[{"imgURL": imgURL, "shareURL": flash地址, "title": 标题, 'up': UP主},……]
    :type keywords: str
    :return: json
    """
    k = urlencode({'keyword': keyword})
    baseURL = 'http://search.bilibili.com/ajax_api/video?%s&order=totalrank&page=' % k

    if not os.path.exists('img'):
        os.mkdir('img')

    # 预处理
    preNum = 1
    preR = requests.get(baseURL + str(preNum))
    prerResult = preR.json()
    numPages = int(prerResult['numPages'])
    pool = gevent.pool.Pool(20)

    def downloadImg(imgURL):
        """
        :type imgURL: str
        :return: None
        """
        r = requests.get(imgURL)
        index = imgURL.index('archive') + 8
        saveName = str(imgURL[index:])
        with open(os.path.join('img', str(saveName)), 'wb') as f:
            f.write(r.content)

    def getInfoFromLi(li):
        soupTemp = BeautifulSoup(str(li), "html.parser")
        avURL = str(soupTemp.a['href'])
        _avIndex = avURL.index('av')
        avCode = avURL[_avIndex + 2:-1]
        shareURL = 'http://static.hdslb.com/miniloader.swf?aid=%s&page=1' % avCode
        title = soupTemp.img['title']
        imgURL = soupTemp.img['src']
        upSpan = soupTemp.find_all('span')[-1]
        try:
            up = BeautifulSoup(str(upSpan), "html.parser").a.string
        except:
            up = 'None'
        return {
            'shareURL': shareURL,
            'title': title,
            'imgURL': imgURL,
            'up': up
        }

    def getInfoFromHTML(html):
        soup = BeautifulSoup(html, "html.parser")
        info = soup.find_all('li')
        return info

    result = []
    urls = []

    for i in range(1, numPages + 1):
        url = baseURL + str(i)
        urls.append(url)
    print('parsing......')
    rs = pool.map(requests.get, urls)

    for r in rs:
        html = r.json()['html']
        lis = getInfoFromHTML(html)
        for li in lis:
            temp = getInfoFromLi(li)
            result.append(temp)
            try:
                # pool.spawn(downloadImg(temp['imgURL']))
                print(temp['imgURL'])
                pass
            except:
                print('error with %s' % temp['imgURL'])

    print('ok!!!!!')
    pool.join()

    return json.dumps(result)
Exemple #23
0
N = 1
PoolNum = 500

for i in range(N):
    print('start %d try' % i)
    #for status, data in jobs:
    #    print(status, data[:10])
    #tic(urllibT, 'urllib.request')

    #jobs = [download_requests(url) for url in urls]
    #for status, data in jobs:
    #    print(status, data[:10])
    #tic(requestsT, 'requests')

    pool = gevent.pool.Pool(PoolNum)
    data = pool.map(download_requests, urls)
    #for status, text in data:
    #    print(status, text[:10])
    #tic(requestsT, 'requests with gevent.pool')

    print "gevent...."
    jobs = [gevent.spawn(download_requests, url) for url in urls]
    gevent.joinall(jobs)
    #for i in jobs:
    #    print(i.value[0], i.value[1][:10])
    #tic(requestsT, 'requests with gevent.spawn')
'''
import matplotlib.pyplot as plt  
x = list(range(1, N+1))  
plt.plot(x, urllibL, label='urllib')  
plt.plot(x, requestsL, label='requests')  
Exemple #24
0
# -*- coding: utf-8 -*-

from gevent import monkey, spawn, joinall, pool
monkey.patch_all()  # 遇到阻塞时自动切换协程


def run_task(num):
    print("得到数字%s, 平方为%s" % (num, pow(num, 2)))


if __name__ == '__main__':
    pool = pool.Pool(2)
    nums = [i for i in range(5)]
    # 进程池的map()函数将第二个参数中的值一个个传入第一个参数表示函数所需要的参数中
    pool.map(run_task, nums)
    count += len([x for x in gf if x]) - 1
    Q.put(count)


def get_index_urls(item):
    for f in item.iter_files():
        if f.format == "Comma-Separated Values GZ":
            yield "http://archive.org/download/{0}/{1}".format(item.identifier, f.name)


if __name__ == "__main__":
    identifier = sys.argv[-1]
    item = get_item(identifier)

    pool = gevent.pool.Pool(40)
    pool.map(get_gzip_file_from_url, get_index_urls(item))
    _imagecount = 0

    while not Q.empty():
        _imagecount += Q.get()

    if _imagecount == int(item.metadata.get("imagecount", 0)):
        sys.stdout.write("{0} - imagecount is up to date\n".format(item.identifier))
        sys.exit(0)

    md = dict(imagecount=_imagecount)
    r = item.modify_metadata(md)
    if r.status_code == 200:
        sys.stdout.write("{0} - imagecount is up to date\n".format(item.identifier))
        sys.exit(0)
    else:
    # dst = RedirectDst(dst_host, dst_port)
    # dst.send(cmd)
    print('redirect: ', cmd)


class RedirectDst(object):
    def __init__(self, host, port):
        self.host = host
        self.port = port
        self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.s.connect((host, port))

    def send(self, data):
        self.s.send(data)


if __name__ == '__main__':
    _, host, port, dst_host, dst_port = sys.argv

    port = int(port)
    cluster = RedisCluster.from_node(host, port)
    masters = [r for r in cluster.redis_list if r.is_master()]
    fakeslaves = [FakeSlave(m.ip, m.port) for m in masters]
    fakeslaves = [
        FakeSlave(m.ip, m.port) for m in masters
        if m.ip == host and m.port == port
    ][:1]
    print(fakeslaves)
    pool = gevent.pool.Pool(len(fakeslaves))
    pool.map(lambda s: s.loop(), fakeslaves)
Exemple #27
0
        return ret

    def inner_iterable():
        """ This will be called inside the pool's main greenlet, which ID also needs to be registered """
        if current_job:
            set_current_job(current_job)

        for x in iterable:
            yield x

        if current_job:
            set_current_job(None)

    start_time = time.time()
    pool = gevent.pool.Pool(size=pool_size)
    ret = pool.map(inner_func, inner_iterable())
    pool.join(raise_error=True)
    total_time = time.time() - start_time

    log.debug("SubPool ran %s greenlets in %0.6fs" % (counter, total_time))

    return ret


def subpool_imap(pool_size, func, iterable, flatten=False, unordered=False, buffer_size=None):
    """ Generator version of subpool_map. Should be used with unordered=True for optimal performance """

    if not pool_size:
        for args in iterable:
            yield func(*args)
Exemple #28
0
    '''

    resp = requests.get(url2)
    global i
    i+=1
    print i
    data = resp.text
    print('%d bytes received from %s.' % (len(data), url2))+" "+time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
    #print
'''
gevent.joinall([
        gevent.spawn(f, 'http://www.wsgjp.com.cn/'),
        gevent.spawn(f, 'http://www.wsgjp.com.cn/'),
        gevent.spawn(f, 'http://www.wsgjp.com.cn/'),


])
'''
#urls=["http://www.wsgjp.com.cn/"]*300
urls=["http://dba.wsgjp.com.cn/"]*300
print "pool"
pool = gevent.pool.Pool(300)
data = pool.map(f, urls)



print "no pool"
jobs = [gevent.spawn(f, url) for url in urls]
gevent.joinall(jobs)

print i
            downloaded = []

            if tries > 0:
                print '--- Retry attempt {0} of {1} ---'.format(tries, retries)
            # Remove existing files
            if args.overwrite is None:
                new_imdb_profiles = filter(functools.partial(_not(profile_exists), args.outdir),
                                       imdb_profiles)
            else:
                new_imdb_profiles = imdb_profiles

            if len(new_imdb_profiles) == 0:
                # Nothing to download
                break

            results = pool.map(download_imdb_csv, new_imdb_profiles)
            pool.join()
            for response, profile, error in results:
                try:
                    if error is not None:
                        raise Exception(error)

                    # Save
                    savefile = os.path.join(args.outdir, '{0}.csv'.format(profile['username']))
                    with open(savefile, 'wb') as saveto:
                        saveto.write(response)
                    downloaded.append(profile)
                except Exception as e:
                    failed.append(profile)

            tries += 1
Exemple #30
0
              (count, tv_type, item_movie_info['title']))
        count += 1

        if count >= 100000:
            return True

    def craw(self, tv_type, page):
        if page is not None:
            self.craw_dianyinggang_tv(tv_type, page)
        return


print(movie_type)

obj_spider = SpiderMain()
#放在队列里的数据
all_items = obj_spider.get_all_craw_url_item_url(movie_type)

pool = gevent.pool.Pool(30)

requestsT = startTimer()
"""
for item in all_items:
    pool.add(gevent.spawn(obj_spider.craw_dianyinggang_tv, item))
pool.join()
"""
data = pool.map(obj_spider.craw_dianyinggang_tv, all_items)
print("gevent spawn request using time: ", ticT(requestsT))

print("exit all threads\n")
Exemple #31
0
def mvthread(purl, sem, result):
    global errthread
    with sem:
        source = getpagedata(purl)
        if type(source) == str:
            mytree = lxml.etree.HTML(source)
            # 网页存在两种状态
            list = mytree.xpath(
                '//ul[@class=\"panel\"]//li[@class=\"yk-col4 mr1\"]')
            if len(list) <= 5:
                list = mytree.xpath(
                    '//div[@class=\"yk-row\"]//div[@class=\"yk-col4 \"]')
            kind = mytree.xpath(
                '//div[@class=\"item noborder\"]/ul/li[@class=\"current\"]/span/text()'
            )
            if len(kind) <= 5:
                kind = mytree.xpath(
                    '//div[@class=\"item border\"]/ul/li[@class=\"current\"]/span/text()'
                )
            print('---kind.type--->>>--%s' % kind)
            # 使用协程池控制协程开启数量
            pool = gevent.pool.Pool(15)
            urlist = []
            for li in list:
                cover = li.xpath('.//div[@class=\"p-thumb\"]/img/@src')[0]
                num = li.xpath(
                    './/ul[@class=\"p-info pos-bottom\"]//span[@class=\"p-time \"]/span/text()'
                )
                if len(num) == 0:
                    num = li.xpath(
                        './/ul[@class=\"p-info pos-bottom\"]//span[@class=\"p-time hover-hide\"]/span/text()'
                    )[0]
                else:
                    num = num[0]
                title = li.xpath(
                    './/ul[@class=\"info-list\"]/li[@class=\"title\"]/a/text()'
                )[0]
                link = 'http:' + li.xpath(
                    './/ul[@class=\"info-list\"]/li[@class=\"title\"]/a/@href'
                )[0]
                aclist = li.xpath(
                    './/ul[@class=\"info-list\"]/li[@class=\"actor\"]/a/text()'
                )
                if len(aclist) == 0:
                    aclist = li.xpath(
                        './/ul[@class=\"info-list\"]/li[@class=\"actor\"]/text()'
                    )
                actor = li.xpath(
                    './/ul[@class=\"info-list\"]/li[@class=\"actor\"]/em/text()'
                )
                if len(actor) != 0:
                    actor = actor[0]
                else:
                    actor = 'None'
                for i in range(len(aclist)):
                    if i != len(aclist) - 1:
                        actor += aclist[i]
                        actor += '、'
                    else:
                        actor += aclist[i]
                doc = [kind[0], title, cover, actor, num, link, '']
                urlist.append((link, doc, result))
            print('------>>>--解析-->>-----', urlist)
            # 协程池
            pool.map(getdetail, urlist)
        else:
            errthread.append(purl, sem, result)
Exemple #32
0
    # TODO: fix this hack, which is used to skip groups only containig
    # a single level in their heirarchy. These groups conflict with items
    # from the historical usenet collection!
    if '.' not in group:
        return

    g = NewsGroup(group, session=sesh, logging_level='INFO', ia_sync=True)

    # Check for new articles.
    count = int(g.last) - int(g.first)
    if count <= 0:
        log.info('no new articles found for {0}'.format(group))
        return

    g.archive_articles()


# ________________________________________________________________________________________
if __name__ == '__main__':
    global sesh
    sesh = GiganewsSession()

    news_list = [x.split()[0] for x in open('giganews_listfile.txt')]
    random.shuffle(news_list)
    pool = gevent.pool.Pool(16)
    pool.map(archive_group, news_list)
    #for g in news_list:
    #    print g
    #    archive_group(g)
Exemple #33
0
        return ret

    def inner_iterable():
        """ This will be called inside the pool's main greenlet, which ID also needs to be registered """
        if current_job:
            set_current_job(current_job)

        for x in iterable:
            yield x

        if current_job:
            set_current_job(None)

    start_time = time.time()
    pool = gevent.pool.Pool(size=pool_size)
    ret = pool.map(inner_func, inner_iterable())
    pool.join(raise_error=True)
    total_time = time.time() - start_time

    log.debug("SubPool ran %s greenlets in %0.6fs" % (counter, total_time))

    return ret


def subpool_imap(pool_size,
                 func,
                 iterable,
                 flatten=False,
                 unordered=False,
                 buffer_size=None):
    """ Generator version of subpool_map. Should be used with unordered=True for optimal performance """
Exemple #34
0
协程池实现
'''
from gevent import monkey
monkey.patch_all()
from gevent import pool
from urllib import request
import time


#协程执行的任务
def run_task(url):
    print('Visit --> %s' % url)
    try:
        re = request.urlopen(url)
        data = re.read()
        print('%d bytes received from %s .' % (len(data), url))
    except BaseException as e:
        print(e)
    return 'url:%s ---> finisk' % url


if __name__ == '__main__':
    #协程池方式
    pool = pool.Pool(2)
    urls = [
        'https://github.com', 'https://www.python.org/',
        'https://www.baidu.com'
    ]
    results = pool.map(run_task, urls)
    print(results)
gevent.monkey.patch_all()

import sys
import requests
import gevent
import gevent.pool
import random
import json

def consume(id):
    r = requests.get('http://127.0.0.1:5000/stream/' + str(id), stream=True)
    for line in r.iter_lines(10):
        try:
            obj = json.loads(line)
        except:
            print line
            raise

        if random.randint(0, 99) <= 1:
            print obj


POOL_SIZE = 1000

if __name__ == '__main__':

    pool = gevent.pool.Pool(POOL_SIZE)
    greenlets = pool.map(consume, xrange(POOL_SIZE))
    gevent.joinall(greenlets)

Exemple #36
0
def go():
    pool = gevent.pool.Pool(20)
    pool.map(put_shell,[i for i in range(40)])
Exemple #37
0

def f(x):
    r = 0
    for k in range(1, K + 2):
        r += x**(1 / k**1.5)
    return r


if __name__ == '__main__':
    #multiprocessing.pool测试
    print('multiprocessing.pool.Pool:\n')
    start = time.time()
    l = []
    pool = Pool(3)
    for num, result in zip(NUMBERS, pool.map(f, NUMBERS)):
        l.append(result)
    print(len(l))
    print('COST: {}'.format(time.time() - start))

    #multiprocessing.pool.Pool.apply_async测试
    print('multiprocessing.pool.Pool.async:\n')
    start = time.time()
    l = []
    po = Pool(3)
    tt = [po.apply_async(f, i) for i in NUMBERS]
    print(len(tt), type(tt[0]))
    print('COST: {}'.format(time.time() - start))

    #concurrent.future.ProcessPoolExecutor without chunksize测试
    print('ProcessPoolExecutor without chunksize:\n')
Exemple #38
0
def gevent_vmap(func, iterable, *iterables, max_workers=10):
    pool = gevent.pool.Pool(max_workers)
    return pool.map(vcall(func), *concatv((iterable, ), *iterables))