def start(): sql = 'select # from # where #' urls = get_url_from_db(sql) #获取url pool = gevent.pool.Pool(4) #设置并发度 for url in urls: pool.add(gevent.spawn(get_page_content, url)) pool.join()
def run(self): self.db_manager = util.DBManager(self.config['connections']['mysql']) db = self.db_manager.get_db() while True: # XXX next try-except block force load tables try: db.messages db.servers db.server_properties break except db_exc.OperationalError: LOG.error(util.exc_info()) time.sleep(5) LOG.debug('Reconnecting...') try: global pool persistent_gm_workers = [_GMWorker(self) for i in range(2)] for wrk in persistent_gm_workers: wrk.register_task('message.send', send) pool.add(gevent.spawn(wrk.work)) gevent.spawn(heartbeat, 10, self) pool.join() except: LOG.critical(util.exc_info()) sys.exit(1)
def _find_wildcards(self): """ Queries some random non-existant records to reduce false positives. Returns True if process can continue, otherwise false. """ wildcard_count = self.options.wildcard_tests if wildcard_count < 1: return True total_queries = len(self.domains) * wildcard_count LOG.info("Eliminating wildcard responses (%d tests)", total_queries) is_ok = False # Setup pool and progress pool = gevent.pool.Pool(self.options.concurrency) if self.progress: self.progress.start(total_queries) self.finished = 0 try: for domain in self.domains: LOG.debug("Checking wildcard domain: %s", domain) names = [rand_name() for _ in range(0, wildcard_count)] for name in names: pool.add(gevent.spawn(self._test_wildcard, domain, name)) is_ok = True except KeyboardInterrupt: print("Ctrl+C caught... stopping") pool.join() if self.progress: self.progress.finish() return is_ok
def fetch_pages_con(): pool = gevent.pool.Pool(16) gs = [] for i in range(pages): g = gevent.spawn(fetch_page, i) gs.append(g) pool.add(g) pool.join()
def join_raffle(self, room_id): params = { 'roomid': room_id, } pool = gevent.pool.Pool(len(self)) for each_record in self: pool.add(gevent.spawn(each_record._join_raffle, params=params)) pool.join()
def generator(): import gevent.local lo = gevent.local.local() lo.tid = 1 with store.begin(write=True): Host.by_unchecked.find() with store.begin(): for host in Host.by_unchecked.values(max=1000): pool.wait_available() pool.add(gevent.spawn(worker, host))
def join_small_tv(self, room_id, tv_id): params = { 'roomid': room_id, 'raffleId': tv_id, '_': int(time.time() * 100) } pool = gevent.pool.Pool(len(self)) for each_record in self: pool.add(gevent.spawn(each_record._join_small_tv, params=params)) pool.join()
def startBruteforce(sClient, pool, wordlist): for word in open(wordlist, "r"): pool.add( pool.apply_async( isSecretSaslValid, args=(sClient, word.strip(), "sparkSaslUser", True), callback=checkResult, )) pool.join(timeout=30) pool.kill() whine("Could not find the secret", "warn")
def run(args): if args.download: resolvers = download_resolvers() else: resolvers = load_resolvers(args.resolvers) random.shuffle(resolvers) pool = gevent.pool.Pool(args.concurrency) bar = progressbar.ProgressBar(redirect_stdout=True, redirect_stderr=True) for resolver in bar(resolvers): pool.add(gevent.spawn(check_resolver, args, resolver)) pool.join()
def import_samples(self, sample_list): # Filter out known/duplicate samples len_before = len(sample_list) sample_list = self.database.only_unknown(sample_list) logging.info("Importing %d malware samples (%d duplicates)", len(sample_list), len_before - len(sample_list)) pool = gevent.pool.Pool(self.opts.concurrency) for idx, sample in enumerate(sample_list): pool.add(gevent.spawn(self.import_sample, sample)) if idx % 10 == 0: self.database.commit() pool.join() self.database.commit()
def run(self): if not self._find_wildcards(): return pool = gevent.pool.Pool(self.options.concurrency) namegen = DNSTesterGenerator(self, self.domains, self.names) LOG.info("Starting DNS brute force (%d tests)", namegen.total) self.finished = 0 if self.progress: self.progress.start(namegen.total) try: for tester in namegen.all(): pool.add(gevent.spawn(tester.run)) except KeyboardInterrupt: print("Ctrl+C caught... stopping") pool.join() if self.progress: self.progress.finish()
def fetch_used(self, name, lst, expanded=False): limit = self.api.api_request_limit pool = gevent.pool.Pool() blocks = splitblocks(lst, limit) self.count_total += len(blocks) for bl in blocks: pool.add(self._refcall_noinc(self.fetch_used_block, name, bl, expanded)) pool.join() if conf.noedits: return items = self.title2latest.items() self.title2latest = {} self.count_total += len(items) for title, rev in items: self._refcall_noinc(self.get_edits, title, rev)
def run(self): if self.beanstalk: generator = self.beanstalk.get_workgenerator(self) else: generator = ListWorkGenerator(self) pool = gevent.pool.Pool(self.options.concurrency) self.finished = 0 if self.progress: self.progress.start(generator.total) try: for worker in generator.getall(): pool.add(gevent.spawn(worker.run)) except KeyboardInterrupt: print("Ctrl+C caught... stopping") pool.join() if self.progress: self.progress.finish()
def crawl(start_url, concurrency_level, visited_link_limit): """ Main crawling function. Uses a pool of greenlets to get the job done :param start_url: URL to start crawling from :param concurrency_level: number of concurrent downloads :param visited_link_limit: maximum number of links to crawl :return: None """ print('start crawling from %s' % start_url) print('concurrency level: %s, visited link limit: %s' % (concurrency_level, visited_link_limit)) # init our pending links with start_url pending_links.append(start_url) pool = gevent.pool.Pool(concurrency_level) # limit number of visited links, just for testing purposes while len(visited_links) < visited_link_limit and ( len(pending_links) > 0 or len(crawlin_links) > 0): # if there is nothing more to schedule, then wait for current jobs to complete and try again if not pending_links: pool.join() continue link = pending_links.pop(0) crawlin_links.add(link) pool.wait_available() pool.add(gevent.spawn(crawl_one, link)) # print('%s - current visited: %s' % (threading.currentThread(), visited_links)) pool.join() # print('%s - visited links: %s' % (threading.currentThread(), visited_links)) # print('%s - pending links: %s' % (threading.currentThread(), pending_links)) print('Done. %s links visited.' % len(visited_links))
# encoding: utf-8 from db.dbHandle import dbHandle from pages_parsing.listPagesParse import listParse from gevent import monkey monkey.patch_socket() # from multiprocessing.dummy import Pool as ThreadPool import gevent.pool import gevent if __name__ == "__main__": dbHandle = dbHandle() sql = 'select * from positions where hasSpider=0' link_list = dbHandle.dbQueryLinks(sql) # pool = ThreadPool(4) # 4核 pool = gevent.pool.Pool(2) for i in link_list: pool.add(gevent.spawn(listParse, i, dbHandle)) pool.join() pool.close() pool.join() # 等待线程都结束后再执行主模块
i += 1 print i except Exception, e: print traceback.format_exc(e) # allow to run 20 greenlet at a time, this is more than concurrency # of the http client but isn't a problem since the client has its own # connection pool. pool = gevent.pool.Pool(64) tasks = TaskSchedule.get_init_raw('item', 31, limit=10000) time1 = time.time() for item in tasks: p_id = item['key'] pool.add(gevent.spawn(get_pro, '', p_id)) # get_pro(http, p_id) pool.join() # http.close() time2 = time.time() with futures.ThreadPoolExecutor(max_workers=64) as executor: future_to_worker = {} for item in tasks: future_to_worker[executor.submit(get_pro, '', item['key'])] = item for future in futures.as_completed(future_to_worker): ts = future_to_worker[future] try: data = future.result() except Exception as exc:
def _spawn_gm_worker(self): global pool if not pool.full(): gm_worker = _GMWorker(self.parent) gm_worker.register_task('message.send', send) pool.add(gevent.spawn(gm_worker.work, time_to_work=20))