def __init__(self, loop=None, target=None, name=None, args=(), kwargs={}): if not callable(target): raise TypeError("`target` needs to be callable, not %r" % (type(target), )) self._state = _ProcessState() self._loop = loop or IOLoop.current(instance=False) # _keep_child_alive is the write side of a pipe, which, when it is # closed, causes the read side of the pipe to unblock for reading. Note # that it is never closed directly. The write side is closed by the # kernel when our process exits, or possibly by the garbage collector # closing the file descriptor when the last reference to # _keep_child_alive goes away. We can take advantage of this fact to # monitor from the child and exit when the parent goes away unexpectedly # (for example due to SIGKILL). This variable is otherwise unused except # for the assignment here. parent_alive_pipe, self._keep_child_alive = mp_context.Pipe( duplex=False) self._process = mp_context.Process( target=self._run, name=name, args=(target, args, kwargs, parent_alive_pipe, self._keep_child_alive), ) _dangling.add(self._process) self._name = self._process.name self._watch_q = PyQueue() self._exit_future = Future() self._exit_callback = None self._closed = False self._start_threads()
def __init__(self, name, providers=[], push_q=SafeQ().async_q, loop=asyncio.get_event_loop(), address=None, mailbox_size=1000, inbox=SafeQ().async_q, empty_demand_logic="broadcast", concurrency=cpu_count(), routing_logic="round_robin", tick_delay=120): """ Constructor :param name: Name of the actor :type name: str() :param push_q: Queue for the provider :type push_q: janus.Queue() :param loop: Asyncio loop for the actor :type loop: AbstractEventLoop() :param address: Address for the actor :type address: str() :param mailbox_size: Size of the mailbox :type mailbox_size: int) :param inbox: Actor inbox :type inbox: asyncio.Queue() :param concurrency: The max concurrency in the system :type concurrency: int() :param routing_logic: round_robin or broadcast :param routing_logic: str() """ super().__init__(name, loop, address, mailbox_size, inbox) self.push_q = push_q self.result_q = PyQueue() self.__subscribers = [] self.__providers = providers self.__current_provider = 0 self.__task_q = PyQueue() self.__empty_demand_logic = empty_demand_logic self.__concurrency = concurrency self.__routing_logic = routing_logic self.set_handlers() self.tick_delay = tick_delay self.run_on_empty(self.__concurrency) self.__pull_tick()
def __init__(self, name, providers=[], loop=asyncio.get_event_loop(), address=None, mailbox_size=1000, inbox=None, empty_demand_logic = "broadcast", concurrency=cpu_count(), tick_delay=120): """ Constructor :param name: Name of the actor :type name: str() :param loop: Asyncio loop for the actor :type loop: AbstractEventLoop() :param address: Address for the actor :type address: str() :param mailbox_size: Size of the mailbox :type mailbox_size: int) :param inbox: Actor inbox :type inbox: asyncio.Queue() :param empty_demand_logic: round_robin or broadcast :type empty_demand_logic: str() :param concurrency: Number concurrent tasks to run :type concurrency: int() """ super().__init__(name, loop, address, mailbox_size, inbox) self.register_handler(Tick, self.__pull_tick) self.subscribers = [] self.__providers = providers self.__current_provider = 0 self.__task_q = PyQueue() self.__empty_logic = empty_demand_logic self.__result_q = PyQueue() self.router = None self.create_router(concurrency) self.set_handlers() self.tick_delay = tick_delay self.__concurrency = concurrency self.__pull_tick()
def qumulo_treewalk(path, ip, ses, q_crawl, num_sep, level, batchsize, cliargs, logger, reindex_dict): batch = [] dircount = 0 totaldirs = 0 totalfiles = 0 starttime = time.time() # queue for paths q_paths = PyQueue() q_paths_results = PyQueue() lock = Lock() # set up threads for tree walk for i in range(cliargs['walkthreads']): t = Thread(target=apiwalk_worker, args=(ip, ses, q_paths, q_paths_results, lock,)) t.daemon = True t.start() # set up progress bar if not cliargs['quiet'] and not cliargs['debug'] and not cliargs['verbose']: widgets = [progressbar.AnimatedMarker(), ' Crawling (Queue: ', progressbar.Counter(), progressbar.FormatLabel(''), ') ', progressbar.Timer()] bar = progressbar.ProgressBar(widgets=widgets, max_value=progressbar.UnknownLength) bar.start() else: bar = None bartimestamp = time.time() for root, dirs, files in qumulo_api_walk(path, ip, ses, q_paths, q_paths_results): dircount += 1 totaldirs += 1 files_len = len(files) dirs_len = len(dirs) totalfiles += files_len if dirs_len == 0 and files_len == 0 and not cliargs['indexemptydirs']: continue if root['path'] != '/': root_path = root['path'].rstrip(os.path.sep) else: root_path = root['path'] if not dir_excluded(root_path, config, cliargs): batch.append((root, dirs, files)) batch_len = len(batch) if batch_len >= batchsize or (cliargs['adaptivebatch'] and totalfiles >= config['adaptivebatch_maxfiles']): q_crawl.enqueue(scrape_tree_meta, args=(batch, cliargs, reindex_dict,), result_ttl=config['redis_ttl']) if cliargs['debug'] or cliargs['verbose']: logger.info("enqueued batchsize: %s (batchsize: %s)" % (batch_len, batchsize)) del batch[:] if cliargs['adaptivebatch']: batchsize = adaptive_batch(q_crawl, cliargs, batchsize) if cliargs['debug'] or cliargs['verbose']: logger.info("batchsize set to: %s" % batchsize) # check if at maxdepth level and delete dirs/files lists to not # descend further down the tree if cliargs['maxdepth']: num_sep_this = root_path.count(os.path.sep) if num_sep + level <= num_sep_this: del dirs[:] del files[:] else: # directory excluded del dirs[:] del files[:] # update progress bar if bar: try: if time.time() - bartimestamp >= 2: elapsed = round(time.time() - bartimestamp, 3) dirspersec = round(dircount / elapsed, 3) widgets[4] = progressbar.FormatLabel(', ' + str(dirspersec) + ' dirs/sec) ') bartimestamp = time.time() dircount = 0 bar.update(len(q_crawl)) except (ZeroDivisionError, ValueError): bar.update(0) # add any remaining in batch to queue q_crawl.enqueue(scrape_tree_meta, args=(batch, cliargs, reindex_dict,), result_ttl=config['redis_ttl']) # set up progress bar with time remaining if bar: bar.finish() bar_max_val = len(q_crawl) bar = progressbar.ProgressBar(max_value=bar_max_val) bar.start() else: bar = None # update progress bar until bots are idle and queue is empty while worker_bots_busy([q_crawl]): if bar: q_len = len(q_crawl) try: bar.update(bar_max_val - q_len) except (ZeroDivisionError, ValueError): bar.update(0) time.sleep(1) if bar: bar.finish() elapsed = round(time.time() - starttime, 3) dirspersec = round(totaldirs / elapsed, 3) logger.info("Finished crawling, elapsed time %s sec, dirs walked %s (%s dirs/sec)" % (elapsed, totaldirs, dirspersec))