def __init__(self,url,parameter,HTTPClients,ClientConnectionPool,task=None): if task is not None: self.celeryTask = task self.celeryTaskId = task.request.id else: self.celeryTask = None self.parameter = parameter self.url = url self.numberHTTPClients = HTTPClients self.numberClientConnectionPool = ClientConnectionPool self.http = HTTPClient.from_url(URL(url),concurrency=self.numberClientConnectionPool) self.clientPool = gevent.pool.Pool(self.numberHTTPClients) self.workQueue = JoinableQueue() self.resultList = {} self.workQueueMax = 0 self.workQueueDone = 0 self.countRequests = 0 self.status_codes = {} self.status_codes_count = {} self.meta = {} self.greenletList = {} self.initAdditionalStructures() self.progressMeta = None self.exitFlag = False self.pauseRequests = False
def __init__(self, scheme, create_devices=True, write_data=True, start_date="2000-01-01T00:00:00Z", end_date="2014-12-31T00:00:00Z", pool_size=3): self.scheme = scheme self.create_devices = create_devices self.should_write_data = write_data self.start_date = start_date self.end_date = end_date self.tdb = TDBClient(scheme.db_key, scheme.db_key, scheme.db_secret, base_url=scheme.db_baseurl) iq_endpoint = HTTPEndpoint(scheme.iq_baseurl, scheme.iq_key, scheme.iq_secret) self.tiq = TIQClient(iq_endpoint) self.queue = JoinableQueue() self.lock = Lock() self.dp_count = 0 self.req_count = 0 self.dp_reset = time.time() for i in range(pool_size): gevent.spawn(self.worker)
def test_service_failure(): "Service() should handle custom callback failures" # Given the following service class MyService(Service): def __init__(self, result_queue=None): super(MyService, self).__init__( callback=self.run, result_queue=result_queue, ) def run(self, package, sender_data): raise ValueError("I don't want to do anything") queue = JoinableQueue() service = MyService(result_queue=queue) # When I queue a package to be processed by my service and start the # service with 1 concurrent worker service.queue('gherkin==0.1.0', 'main') service.consume() service.pool.join() # Ensure we finish spawning the greenlet # Then I see that no package was processed queue.qsize().should.equal(0) # And that the list of failed packages was updated service.failed_queue[0][0].should.equal('gherkin==0.1.0') service.failed_queue[0][1].should.be.a(ValueError) service.failed_queue[0][1].message.should.equal("I don't want to do anything")
class Dispatcher(gevent.Greenlet): """ The Dispatcher class handles routing communications to and from the Gateway. It implements an Actor interface as made popular by Erlang. """ def __init__(self): self._gw_inbox = JoinableQueue() super().__init__() def _run(self): while True: try: event = self._gw_inbox.get(block=False) # Dispatch the event back to interface self._gw_inbox.task_done() finally: gevent.sleep(1) @property def gw_inbox(self): """ This is the inbox for the Gateway. It's not accessible outside the class methods. :return: None """ return None @gw_inbox.setter def gw_inbox(self, message): self._gw_inbox.put(message)
def __init__(self, *args, **kwargs): super(InterceptedStreamsMixin, self).__init__(*args, **kwargs) self.stdout_callbacks = [] self.stdout_queue = JoinableQueue() self.stderr_callbacks = [] self.stderr_queue = JoinableQueue()
def test_service(): "Service() should implement the basic needs of an async service" # Given the following service class MyService(Service): def __init__(self, my_mock, result_queue=None): self.my_mock = my_mock super(MyService, self).__init__( callback=self.run, result_queue=result_queue, ) def run(self, package, sender_data): self.my_mock.ran = package my_mock = Mock() queue = JoinableQueue() service = MyService(my_mock, result_queue=queue) # When I queue a package to be processed by my service and start the # service with 1 concurrent worker service.queue('gherkin==0.1.0', 'main') service.consume() # Then I see that the package processed package = queue.get() package.should.equal('gherkin==0.1.0') my_mock.ran.should.equal('gherkin==0.1.0')
def test_service_failure(): "Service() should handle custom callback failures" # Given the following service class MyService(Service): def __init__(self, result_queue=None): super(MyService, self).__init__( callback=self.run, result_queue=result_queue, ) def run(self, package, sender_data): raise ValueError("I don't want to do anything") queue = JoinableQueue() service = MyService(result_queue=queue) # When I queue a package to be processed by my service and start the # service with 1 concurrent worker service.queue('gherkin==0.1.0', 'main') service.consume() service.pool.join() # Ensure we finish spawning the greenlet # Then I see that no package was processed queue.qsize().should.equal(0) # And that the list of failed packages was updated service.failed_queue[0][0].should.equal('gherkin==0.1.0') service.failed_queue[0][1].should.be.a(ValueError) service.failed_queue[0][1].message.should.equal( "I don't want to do anything")
def handle(): connection = create_postgresql_connection() cursor = connection.cursor() cursor.execute("BEGIN;") cursor.execute("DELETE FROM core_ratequery;") cursor.execute("COMMIT;") cursor.close() queue = JoinableQueue() event = Event() age_ids = age_map(connection).values() + [None] sex_ids = sex_map(connection).values() + [None] education_ids = education_map(connection).values() + [None] province_ids = province_map(connection).values() + [None] cursor = connection.cursor() cursor.execute("SELECT DISTINCT cycle FROM core_microdata;"); cycles = [row[0] for row in cursor] cursor.close() greenlets = [] for i in range(50): gv = gevent.spawn(worker, queue, event) greenlets.append(gv) combs = itertools.product(age_ids, sex_ids, province_ids, education_ids, cycles) for c in combs: queue.put(c) queue.join() event.set() gevent.joinall(greenlets)
def __init__(self, url, dic, threads=100, report_db=False, keepalive=None, table_name=None): self.url = url parts = urlparse(url) self.scheme, self.host, self.port = parts.scheme, parts.hostname, parts.port if not self.port: self.port = 443 if self.scheme == 'https' else 80 self.keepalive = keepalive try: instance = HehReq(self.host, int(self.port), self.scheme, self.keepalive) except Exception as e: logging.error('Init exception for %s: %s' % (self.url, e)) return if not keepalive: self.keepalive = instance.detect_keepalive() if self.keepalive == 0: logging.error('Keep-Alive value for %s appears to be 0, check the connection' % url) return logging.warning('Calculated Keep-Alive for %s: %s' % (url, self.keepalive)) self.report_db = report_db if report_db: self.table = table_name self.sql_conn(report_db) self.queue = JoinableQueue() [self.queue.put(dic[i:i + self.keepalive]) for i in xrange(0, len(dic), self.keepalive)] [spawn(self.worker) for _ in xrange(threads)] self.queue.join()
class Speaker(gevent.Greenlet): RATE = 44100 def __init__(self, rcv): gevent.Greenlet.__init__(self) self.rcv = rcv PA = pyaudio.PyAudio() self.pa= PA.open( format= pyaudio.paInt16, channels= 1, rate= self.RATE, output= True ) self.queue = JoinableQueue() def _run(self): print 'spk_on' while True: try: buf = self.rcv.queue.get() except gevent.queue.Empty: buf = '\0' ## print '.', self.pa.write(buf) time.sleep(0.0001) self.queue.task_done() self.pa.close()
def __bootstrap(self): """ Prepare this driver pool instance to batch execute task items. """ if self.is_processing: # cannot run two executions simultaneously raise DriverPoolRuntimeException( 'cannot bootstrap pool, already running') if self._results and self._results.qsize(): # pragma: no cover self.logger.debug('pending results being discarded') if self._tasks and self._tasks.qsize(): # pragma: no cover self.logger.debug('pending tasks being discarded') if self._pool: # pragma: no cover self.logger.debug('killing processing pool') self._pool.join(timeout=10.0) self._pool.kill() self._pool = None if self._use_proxy and not self.proxy: # defer proxy instantiation -- since spinning up a squid proxy # docker container is surprisingly time consuming. self.logger.debug('bootstrapping squid proxy') self.proxy = self.PROXY_CLS(factory=self.factory) self.logger.debug('bootstrapping pool processing') self._processing = True self._results = Queue() self._tasks = JoinableQueue() self._load_drivers() # create our processing pool with headroom over the number of drivers # requested for this processing pool. self._pool = Pool(size=self.size + math.ceil(self.size * 0.25))
def __init__(self, maxsize=0, workers=10): """ Setup the gevent queue and the workers. :param int maxsize: the max lenght of the queue, default the queue size is infinite. :param int workers: the number of workers, default=10. """ self.queue = JoinableQueue(maxsize=maxsize) [spawn(self.worker) for x in xrange(workers)]
def _setup_command_system(self): # we have two refs to the commands queue, # but write_commands_q will switch to throwaway # after we receive a finish command self._read_commands_q = JoinableQueue(None) self._write_commands_q = self._read_commands_q for x in range(self._workers_to_start): gevent.spawn(self._process_commands)
def __init__(self, *args, **kwargs): super(WebServer, self).__init__(*args, **kwargs) print 'Webserver started' self.debug = True self.cmd_queue = JoinableQueue() self.event_queue = JoinableQueue() self.cmd_id = 0 self.cmd_results = {} gevent.spawn(self.send_commands_to_debugger) gevent.spawn(self.receive_events_from_debugger)
def __init__( self, max_works, ): self._q = JoinableQueue(maxsize=max_works) # self._q = Queue(maxsize=max_works) for _ in range(max_works): gevent.spawn(self.__worker) # atexit.register(self.__atexit) self._q.join(timeout=100)
class GeventPoolExecutor2(LoggerMixin): def __init__( self, max_works, ): self._q = JoinableQueue(maxsize=max_works) # self._q = Queue(maxsize=max_works) for _ in range(max_works): gevent.spawn(self.__worker) # atexit.register(self.__atexit) self._q.join(timeout=100) def __worker(self): while True: fn, args, kwargs = self._q.get() try: fn(*args, **kwargs) except Exception as exc: self.logger.exception( f'函数 {fn.__name__} 中发生错误,错误原因是 {type(exc)} {exc} ') finally: pass self._q.task_done() def submit(self, fn: Callable, *args, **kwargs): self._q.put((fn, args, kwargs)) def __atexit(self): self.logger.critical('想即将退出程序。') self._q.join()
def process_24_network(net, port): q = JoinableQueue() r = JoinableQueue() gevent.spawn(prepare_list, q, net) tasks = [] for x in range(0, CONCURRENT_GROUPS): #print "spawning %i" % x tasks += [gevent.spawn(scan_network, q, r, port)] q.join() gevent.joinall(tasks) if not r.empty(): with open(str(net.ip) + '_' + str(port) + ".m3u", "w+") as f: f.write("#EXTM3U\n") while not r.empty(): try: group = r.get(timeout=10) f.write( '#EXTINF:-1 tvg-logo="" tvg-name="" group-title="",ChannelName' + "\n") f.write('udp://@' + str(group) + ':' + str(port) + "\n") logging.info("Ok ====> %s" % group) except gevent.queue.Empty: break
def _init(self): """Fork off the writer thread. Override this to do nothing if you don't have one.""" self.q = JoinableQueue(100) self.start_job("job", self._writer) self.job.link(self.delete) if self.ready is False: self.ready = True else: self.stop_job("job") # concurrency issues?
def __init__(self, app_key, app_secret, api_url): if not self.init_flag: # 防止重复执行init方法 self.api_url = api_url self.app_key = app_key self.app_secret = app_secret self.req_q = JoinableQueue(MAXSIZE) self.init_flag = True t1 = threading.Thread(target=http_request, args=[self.api_url, self.req_q]) t1.start() else: return
def __init__( self, max_works, ): check_gevent_monkey_patch() self._q = JoinableQueue(maxsize=max_works) # self._q = Queue(maxsize=max_works) for _ in range(max_works): # self.logger.debug('yyyyyy') gevent.spawn(self.__worker) atexit.register(self.__atexit)
def __init__(self, urls, dic, threads=10, report_db=False, keepalive=None, each_threads=10): self.dic = dic self.report_db = report_db self.table = None if report_db: self.sql_conn(report_db) self.keepalive = keepalive self.each_threads = each_threads self.queue = JoinableQueue() [self.queue.put(x.strip()) for x in urls] [spawn(self.worker) for _ in xrange(threads)] self.queue.join()
def get_movie_id(): baidu_tool = MysqlCurd('douban_movie') baidu_tool.connect_mysql() result = baidu_tool.query_mysql_condition('movie_name', [{'version': 0}, ['name']]) q = JoinableQueue() for temp in result: if not baidu_tool.query_mysql_condition('name_id', [{'movie_name': temp[0]}, ['movie_id']]): q.put(temp[0]) baidu_tool.close_connect() error_q = JoinableQueue() def crawl(time): while not q.empty(): tool = MysqlCurd('douban_movie') tool.connect_mysql() name = q.get() try: page = super_downloader('https://movie.douban.com/subject_search?', params={'search_text': name}, cookies=True, proxy=True) except requests.exceptions.RequestException: print('get movie id ' + name + 'download error!') return False page = etree.HTML(page) gevent.sleep(random.uniform(time[0], time[1])) try: count = 0 count1 = 0 for _ in page.xpath('//*[@id="content"]/div/div[1]/div[2]/table[@width="100%"]'): try: mark = _.xpath('tr/td[2]/div')[0] id = mark.xpath('a')[0].get('href')[33:-1] _name = mark.xpath('a')[0].text.split('/')[0].strip() # score = mark.xpath('div/span[2]')[0].text # comment_num = mark.xpath('div/span[3]')[0].text[1:-4] tool.replace_mysql('name_id', {'movie_id': id, 'movie_name': _name}) count1 += 1 print('get movie id '+_name+'completed!!!') except IndexError as e: print('get movie id sub error!!!'+repr(e)) continue count += 1 if count == 3: break if count1>0: # tool.replace_mysql('movie_name', {'version': 1, 'name': name}) tool.close_connect() print('get movie id ' + name + ' completed!') except Exception as e: error_q.put(name) print('get movie id ' + name + ' error!') print(e) worker = SleepFunction() worker.run(crawl) with open('errorlist//movie_id.txt', 'a', encoding='utf8') as f: if not error_q.empty(): print(get_time(), file=f) while not error_q.empty(): print(error_q.get(), file=f)
def __init__(self, callback, **args): self.callback = callback self.result_queue = args.get('result_queue') self.package_queue = JoinableQueue() self.failed_queue = [] self.env = args.get('env') self.main_greenlet = None self.pool = Pool(args.get('concurrency')) self.should_run = True self.subscribers = [] self.logger = Logger(self.name, args.get('log_level'))
def start(self): if not self.__threads: self.__threads = len(IPNetwork(self.__ip)) if len(IPNetwork(self.__ip)) <= 10 else 10 if len(IPNetwork(self.__ip)) < int(self.__threads): print "Please decrease number of threads to number of hosts <= %s" % len(IPNetwork(self.__ip)) exit() queue = JoinableQueue() [queue.put(str(ip)) for ip in IPNetwork(self.__ip)] workers = [spawn(self.get_ip_info, queue, self.__apis) for t in range(int(self.__threads))] queue.join()
def test_main(self): queue = JoinableQueue() print dir(queue) queue.put(1) queue.put(3) queue.put(2) queue.put(6) print queue.qsize() print '1', queue.get(), queue.get()
def __init__(self, url, dic, threads=100, report_db=False, keepalive=None, table_name=None): self.url = url parts = urlparse(url) self.scheme, self.host, self.port, self.path = parts.scheme, parts.hostname, parts.port, parts.path if not self.port: self.port = 443 if self.scheme == 'https' else 80 self.keepalive = keepalive try: instance = HehReq(self.host, int(self.port), self.scheme, self.keepalive, path=self.path) except Exception as e: logging.error('Init exception for %s: %s' % (self.url, e)) return self.IGNORE_CODES = [400, 403, 404] self.IGNORE_CODES += [ x[1] for x in instance.bulk_get( '/kajshdkajsdhkquwehiqubwdkjnmnzcxbfvkjhsdbfiqujdsf') ] if not keepalive: self.keepalive = instance.detect_keepalive() if self.keepalive == 0: logging.error( 'Keep-Alive value for %s appears to be 0, check the connection' % url) return logging.warning('Calculated Keep-Alive for %s: %s' % (url, self.keepalive)) self.report_db = report_db if report_db: self.table = table_name self.sql_conn(report_db) self.queue = JoinableQueue() [ self.queue.put(dic[i:i + self.keepalive]) for i in xrange(0, len(dic), self.keepalive) ] [spawn(self.worker) for _ in xrange(threads)] self.queue.join()
class TaskList: def __init__(self): self.queue = JoinableQueue() self.all_tasks = {} def add_task(self, task): self.all_tasks[task.get_id()] = task self.queue.put(task) def get_queue(self): return self.queue def join(self, timeout=None): return self.queue.join(timeout)
def __init__(self, requestHandler=BaseRequestHandler(), parseHandler=BaseParseHandler(), sheduler=BaseScheduler(), pipeline=BasePipeline()): self.requestHandler = requestHandler self.parseHandler = parseHandler self.sheduler = sheduler self.pipeline = pipeline self.task_queue = JoinableQueue() self.response_queue = JoinableQueue() self.tasks_cnt = 0 self.result_queue = JoinableQueue() self.jobs_cnt = config.num_threads self.start_time = time.time() self.stop = False
def test_greenlet(self): queue = JoinableQueue() requests_done = Event() g = Greenlet(self._producer, queue, FirstService(), 'Terminator') h = Greenlet(self._producer, queue, SecondService(), 'Terminator') i = Greenlet(self._producer, queue, ThirdService(), 'Terminator') requests = Group() for request in g, h, i: requests.add(request) log.debug('before spawn') c = spawn( self._consumer, done=requests_done, queue=queue, ) [r.start() for r in requests] log.debug('after spawn') requests.join() requests_done.set() log.debug('requests are done') c.join() log.debug('consumer is done')
def __init__(self, creds, pool_size=POOL_SIZE): self.client = get_session(creds['host'], creds['key'], creds['secret']) self.queue = JoinableQueue(maxsize=POOL_SIZE*2) for i in range(pool_size): gevent.spawn(self.worker)
class ApartmentManager(Greenlet): def __init__(self, name, urls): Greenlet.__init__(self) self.JobQueue = JoinableQueue() self.name = name self.assigning = True self.urls = urls def assignJob(self, job): print 'Manager {0} -> {1}'.format(self.name, job) self.JobQueue.put(job) gevent.sleep(0) def _run(self): for url in self.urls: self.assignJob(url) self.assigning = False
def save_worker(dsn: str, save_queue: JoinableQueue): conn = psycopg2.connect(dsn) while True: word, direction, data = save_queue.get() try: with conn: with conn.cursor() as cur: psycopg2.extensions.register_type(psycopg2.extensions.UNICODE, cur) cur.execute("INSERT INTO youdao_bilingual (keyword, direction, data) VALUES (%s, %s, %s)", (word, direction, data)) save_queue.task_done() except Exception as e: print(e) save_queue.put((word, direction, data)) conn.close()
class GQueue(object): def __init__(self): self.__QUEUE = JoinableQueue() def job(self, func): @functools.wraps(func) def f(*args, **kwargs): self.__QUEUE.put([func, args, kwargs]) return f def join(self): self.__QUEUE.join() def work(self): while True: func, args, kwargs = self.__QUEUE.get() try: func(*args, **kwargs) finally: self.__QUEUE.task_done() def run_worker(self, num=1): for i in range(num): gevent.spawn(self.work)
def test_api(self): queue = JoinableQueue() task_group = self.api.search('terminator', queue) while True: finished = all( [greenlet.ready() for greenlet in task_group.greenlets]) try: item = queue.get(timeout=1.0) except Empty: if finished: log.info('queue is empty and all jobs are done, quitting') break log.info( 'queue was empty and jobs are still running, retrying') continue try: log.info('%r', item) finally: queue.task_done() task_group.join() queue.join() log.info('joined everything')
class GeventPoolExecutor2(LoggerMixin): def __init__( self, max_works, ): check_gevent_monkey_patch() self._q = JoinableQueue(maxsize=max_works) # self._q = Queue(maxsize=max_works) for _ in range(max_works): # self.logger.debug('yyyyyy') gevent.spawn(self.__worker) atexit.register(self.__atexit) def __worker(self): while True: fn, args, kwargs = self._q.get() # noinspection PyBroadException try: fn(*args, **kwargs) except Exception as exc: self.logger.exception( f'函数 {fn.__name__} 中发生错误,错误原因是 {type(exc)} {exc} ') finally: pass self._q.task_done() def submit(self, fn: Callable, *args, **kwargs): # self.logger.debug(self._q.qsize()) self._q.put((fn, args, kwargs)) def __atexit(self): self.logger.critical('想即将退出程序。') self._q.join()
def on_search(self, query): log.debug('search for %r', query) queue = JoinableQueue() task_group = g.api.search(query, queue) while True: finished = all( [t.ready() for t in task_group] ) try: item = queue.get(timeout=1.0) except Empty: if finished: break continue try: self.emit('result', item._asdict()) finally: queue.task_done() queue.join() task_group.join() self.emit('done', query)
def _run(self): paths = glob.glob(self.path) while not paths: gevent.sleep(0.01) paths = glob.glob(self.path) q = JoinableQueue() self.logger.debug('Tailing %s' % ', '.join(paths)) self.tails = [Tail(p, q, self.statedir) for p in paths] while True: data = q.get() if data: if data.endswith('\n'): data = data[0:-1] self.logger.debug('Received: %r' % data) self.output.put(Event(data=data)) q.task_done()
def start(self): if not self.__threads: self.__threads = len(IPNetwork( self.__ip)) if len(IPNetwork(self.__ip)) <= 10 else 10 if len(IPNetwork(self.__ip)) < int(self.__threads): print "Please decrease number of threads to number of hosts <= %s" % len( IPNetwork(self.__ip)) exit() queue = JoinableQueue() [queue.put(str(ip)) for ip in IPNetwork(self.__ip)] workers = [ spawn(self.get_ip_info, queue, self.__apis) for t in range(int(self.__threads)) ] queue.join()
def __init__(self, shard): """Initialize the ScoringService. """ Service.__init__(self, shard) # Set up communication with ProxyService. self.proxy_service = self.connect_to(ServiceCoord("ProxyService", 0)) # Set up and spawn the scorer. # TODO Link to greenlet: when it dies, log CRITICAL and exit. self._scorer_queue = JoinableQueue() gevent.spawn(self._scorer_loop) # Set up and spawn the sweeper. # TODO Link to greenlet: when it dies, log CRITICAL and exit. self._sweeper_start = None self._sweeper_event = Event() gevent.spawn(self._sweeper_loop)
def __init__(self, rcv): gevent.Greenlet.__init__(self) self.rcv = rcv PA = pyaudio.PyAudio() self.pa= PA.open( format= pyaudio.paInt16, channels= 1, rate= self.RATE, output= True ) self.queue = JoinableQueue()
class Receiver(gevent.Greenlet): PORT = 20000 CHUNK = 512 def __init__(self): gevent.Greenlet.__init__(self) self.queue = JoinableQueue() def _run(self): context = zmq.Context() receiver = context.socket(zmq.PULL) receiver.connect("tcp://localhost:%s" % self.PORT) print 'rcv_on' while True: frame = receiver.recv() sys.stdout.write('.') sys.stdout.flush() self.queue.put(frame) time.sleep(0.0001)
def spider(start_url, max_depth=1, no_of_workers=10, page_fn=check_page_for_profanities): """ Concurrently spider the web, starting from web page, executing page_fn on each page. start_url specifies the document the spider starts from. max_depth specifies the maximum link depth from the start_url that processing will occur. no_of_workers specifies how many concurrent workers process the job queue. page_fn is a function that takes BeautifulSoup parsed html and a url and processes them as required """ seen_urls = set((start_url,)) job_queue = JoinableQueue() job_queue.put((start_url, max_depth)) for i in range(no_of_workers): gevent.spawn(job_worker, job_queue, seen_urls, page_fn) job_queue.join()
def _init(self): """Fork off the writer thread. Override this to do nothing if you don't have one.""" self.q = JoinableQueue(100) self.start_job("job",self._writer) self.job.link(self.delete) if self.ready is False: self.ready = True else: self.stop_job("job") # concurrency issues?
def handle(): #The expected format is: #ciclo edad sexo nforma prov aoi factorel csv_path = sys.argv[1] queue = JoinableQueue() event = Event() greenlets = [] for i in range(90): gv = gevent.spawn(worker, queue, event) greenlets.append(gv) with io.open(csv_path, 'r') as f: for line in f: queue.put(line) queue.join() event.set() gevent.joinall(greenlets)
def test_api(self): queue = JoinableQueue() task_group = self.api.search('terminator', queue) while True: finished = all( [greenlet.ready() for greenlet in task_group.greenlets] ) try: item = queue.get(timeout=1.0) except Empty: if finished: log.info('queue is empty and all jobs are done, quitting') break log.info( 'queue was empty and jobs are still running, retrying' ) continue try: log.info('%r', item) finally: queue.task_done() task_group.join() queue.join() log.info('joined everything')
def recursive_crawl(url): all_urls = set() processed_urls = set() task_queue = JoinableQueue() def add_to_all(url): if url not in all_urls: print("Record url {}".format(url)) all_urls.add(url) task_queue.put_nowait(url) # Start workers workers = [] for i in xrange(10): workers.append(gevent.spawn(url_worker, i, processed_urls, add_to_all, task_queue)) print("workers", len(workers)) task_queue.join() print("Processed", len(processed_urls), "All", len(all_urls)) print("Total latency", demo_helpers.TOTAL_LATENCY)
def pop_init (self, *args, **kwargs): """initialize a Population of unique Individuals on this shard""" payload = args[0] body = args[1] start_response = args[2] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): self.ff_name = payload["ff_name"] logging.info("initializing population based on %s", self.ff_name) self.pop = Population(Individual(), self.ff_name, self.prefix) self.pop.set_ring(self.shard_id, self.ring) self.reify_queue = JoinableQueue() spawn(self.reify_consumer) start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) else: self._bad_auth(payload, body, start_response)
def __init__(self, args): """ Initialise HTTP scanner :param args: :return: """ self.args = args self.output = HttpScannerOutput(args) self._init_scan_options() # Reading files self.output.write_log("Reading files and deduplicating.", logging.INFO) self.hosts = self._file_to_list(args.hosts) self.urls = self._file_to_list(args.urls) # self._calc_urls() out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count) if self.args.ports is not None: out += ' %i ports' % len(self.args.ports) self.output.print_and_log(out) if self.args.ports is not None and not self.args.syn: new_hosts = [] for host in self.hosts: for port in self.args.ports: # print(host, port) new_hosts.append(helper.generate_url(host, port)) self.hosts = new_hosts # self._calc_urls() self.output.print_and_log('%i full urls to scan' % self.full_urls_count) # Queue and workers self.hosts_queue = JoinableQueue() self.workers = []
except ValueError: pass else: # named entity try: text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) except KeyError: pass return text # leave as is return re.sub("&#?\w+;", fixup, text) if __name__ == "__main__": # project_cache = shelve.open("project_cache.shelve") q = JoinableQueue() project_queue = JoinableQueue() out_queue = Queue() length_queue = Queue() for i in range(NUM_THEME_WORKER_THREADS): gevent.spawn(theme_worker) for i in range(NUM_PROJECT_WORKER_THREADS): gevent.spawn(project_worker) # i = 0 for item in get_themes(): q.put(item) # i += 1 # if i >= 1:
def init(): global patch_loop_greenlet global core_source # add core source sig = [ 14493609762890313342166277786717882067186706504725349899906780741747713356290787356528733464152980047783620946593111196306463577744063955815402148552860145629259653950818107505393643383587083768290613402372295707034951885912924020308782786221888333312179957359121890467597304281160325135791414295786807436357, 1836340799499544967344676626569366761238237327637553699677615341837866857178638560803752775147141401436473176143062386392930849127511639810150938435062071285028855634164277748937448362731305104091415548874264676030905340846245037152836818535938439214826659048244377315288514582697466079356264083762738266643, 89884656743115795873895609296394864029741047392531316591432509289601210992615631812974174607675153482641606235553368183778569185786977952044726620763937252233940116059625337686768538445873713070762889839480360220508177637118657209098549890835520224254015051271431737736621385544038152276933973262030194906397, 1224239220300762038953555488069442663256999688439 ] with transaction: core_source = CoreSource(id=platform, url=settings.patchserver, sig=sig, contact='*****@*****.**') # load sources with transaction, db.Cursor() as c: aa = c.execute("SELECT * FROM patch_source") for a in aa.fetchall(): try: id = json.loads(a['id']) data = json.loads(a['data']) # update old repo urls if 'url' in data and data['url'].startswith('http://patch.download.am'): data['url'] = data['url'].replace('http://patch.download.am', 'http://repo.download.am') if 'url' in data and data['url'].endswith('.git'): source = GitSource(id=id, **data) else: source = PatchSource(id=id, **data) if source.enabled: patch_group.spawn(source.check) except TypeError: log.critical("broken row: {}".format(a)) traceback.print_exc() # delete useless repos for extern in os.listdir(settings.external_plugins): if extern not in sources or not sources[extern].enabled: path = os.path.join(settings.external_plugins, extern) if os.path.isdir(path) and not os.path.exists(os.path.join(path, '.git')): log.info('deleting useless external repo {}'.format(path)) try: really_clean_repo(path) except: pass default_sources = dict( downloadam='http://community.download.am/dlam-config.yaml' ) if not test_mode: for id, url in default_sources.iteritems(): if id not in sources and url not in config_urls: yield 'adding default repo {}'.format(id) try: source = add_source(url) if source is None: continue except: traceback.print_exc() else: if isinstance(source, BasicSource) and source.enabled: patch_group.spawn(source.check) # check and apply updates from gevent.queue import JoinableQueue y = JoinableQueue() complete = list() def source_complete_callback(source): complete.append(source) if len(complete) == len(sources): y.put('updating {} / {}'.format(len(complete), len(sources))) gevent.spawn(patch_all, 30, False, source_complete_callback=source_complete_callback) gevent.sleep(0.2) yield 'updating {} / {}'.format(len(complete), len(sources)) while len(patch_group): try: x = y.get(timeout=1) except: continue yield x patch_group.join() execute_restart() # start the patch loop patch_loop_greenlet = gevent.spawn(patch_loop)
def start_fluud(): parser = argparse.ArgumentParser() parser.add_argument('host', help='mongo host') parser.add_argument('port', help='mongo port') parser.add_argument('--login', help='mongo login') parser.add_argument('--password', help='mongo password') args = parser.parse_args() if args.login and args.password: login = urllib.quote_plus(args.login) password = urllib.quote_plus(args.password) uri = 'mongodb://{}:{}@{}:{}/'.format(login, password, args.host, args.port) else: uri = 'mongodb://{}:{}/'.format(args.host, args.port) client = MongoClient(uri) template = { "first_sample_timestamp": dateutil.parser.parse("2015-09-02T13:08:20.314Z"), "last_sample_timestamp": dateutil.parser.parse("2015-09-02T13:08:20.314Z"), "metadata": { "typeURI": "http://schemas.dmtf.org/cloud/audit/1.0/event", "initiator": { "typeURI": "service/security/account/user", "host": { "address": "192.168.0.2" }, "id": "openstack:610e7d74-16af-4358-9b77-5275194fa6e4", "name": "8b07b49216d243d2b49561759bd104f4" }, "target": { "typeURI": "service/security/account/user", "id": "openstack:fc43ddcf-d147-466c-adfe-d60bd2b773ba" }, "observer": { "typeURI": "service/security", "id": "openstack:a256def4-0a36-472e-95e5-e456db4e0681" }, "eventType": "activity", "eventTime": "2015-09-02T13:08:20.256770+0000", "host": "identity.node-1", "action": "authenticate", "outcome": "success", "id": "openstack:00244b9a-1a43-48a5-b75e-9d68dd647487", "event_type": "identity.authenticate" }, "meter": [ { "counter_name": "identity.authenticate.success", "counter_unit": "user", "counter_type": "delta" } ], "project_id": None, "source": "openstack", "user_id": "openstack:610e7d74-16af-4358-9b77-5275194fa6e4" } data = [copy.deepcopy(template) for _ in range(10000)] def progress(): while True: print client.ceilometer.resource.count() sys.stdout.flush() sleep(2) spawn(progress) def worker(): while True: q.get() try: client.ceilometer.resource.insert_many(copy.deepcopy(data), False) finally: q.task_done() q = JoinableQueue() for i in range(10): spawn(worker) for i in range(100): q.put(0) q.join()