def run_parallel_async(graph, nprocs=None, sleep=0.2, raise_errors=False): if nprocs == 1: return run_async(graph, sleep=sleep, raise_errors=raise_errors) nprocs = nprocs or mp.cpu_count() // 2 with mp.Manager() as manager: graph = tgraph.create_parallel_compatible_graph(graph, manager) ioq = mp.Queue(len(graph.funcs.keys())) cpuq = mp.Queue(len(graph.funcs.keys())) procs = [mp.Process(target=run_scheduler, args=(graph, sleep, ioq, cpuq, raise_errors)) for _ in range(nprocs)] for proc in procs: proc.start() while not tgraph.all_done(graph): for task in tgraph.get_ready_tasks(graph): graph = tgraph.mark_as_in_progress(graph, task) mlog(graph).info( 'pid {}: queueing task {}'.format(os.getpid(), task)) if task in graph.io_bound: ioq.put(task) else: cpuq.put(task) time.sleep(sleep) if raise_errors and sum(not p.exitcode for p in procs): raise RuntimeError('An async task has failed. Please check your logs') return tgraph.recover_values_from_manager(graph)
def __init__(self, num_of_process: int, mapper: Callable, reducer: Callable, mapper_queue_size: int = 0, reducer_queue_size: int = 0): self._mapper_queue = mp.Queue(maxsize=mapper_queue_size) self._reducer_queue = ChunkedQueue(maxsize=reducer_queue_size) self._result_queue = ChunkedQueue() self._mapper_cmd_queue = [mp.Queue() for _ in range(num_of_process)] self._reducer_cmd_queue = [mp.Queue() for _ in range(num_of_process)] self._manager_cmd_queue = mp.Queue() self._manager_process = mp.Process(target=self._run_manager) self._mapper_process = [ mp.Process(target=self._run_mapper, args=(i, )) for i in range(num_of_process) ] self._reducer_process = [ mp.Process(target=self._run_reducer, args=(i, )) for i in range(num_of_process) ] self._mapper = mapper self._reducer = reducer self._num_of_process = num_of_process
def run_parallel_async(graph, nprocs=None, sleep=0.2): if nprocs == 1: return run_async(graph) nprocs = nprocs or mp.cpu_count() // 2 with mp.Manager() as manager: graph = tgraph.create_parallel_compatible_graph(graph, manager) ioq = mp.Queue(len(graph.funcs.keys())) cpuq = mp.Queue(len(graph.funcs.keys())) for _ in range(nprocs): proc = mp.Process(target=run_scheduler, args=(graph, sleep, ioq, cpuq)) proc.start() while not tgraph.all_done(graph): for task in tgraph.get_ready_tasks(graph): graph = tgraph.mark_as_in_progress(graph, task) mlog(graph).info('pid {}: queueing task {}'.format( os.getpid(), task)) if task in graph.io_bound: ioq.put(task) else: cpuq.put(task) time.sleep(sleep) return tgraph.recover_values_from_manager(graph)
def backtrack(next_choice_func, *, partial_checker=None, candidate_matcher=None, intermediate_queue=None, solutions_queue=None, mailbox=None, discard=None): """next_choice_func should be a function that take a sequences and returns any a list of all possible next items in that sequence. candidate_matcher should be a function that returns whether Algorithm: Instantiate a queue. While it is not empty, pop all of its contents and put all the results of the next_choice_func back into the queue. Any results of the next_choice_func that match with the candidate_matcher are put in a results queue. The algorithm has finished when the queue is empty. After that, the results queue is fed out. """ # signal.signal(signal.SIGINT, signal.SIG_IGN) paused = False if intermediate_queue is None: q = multiprocessing.Queue() else: q = intermediate_queue if solutions_queue is None: solutions = multiprocessing.Queue() else: solutions = solutions_queue assert not candidate_matcher is None, "A function to match final solutions must be provided." while True: # quit() while not mailbox.empty(): v = q.get() print("Received in inbox:", v) if v == 1: # return quit() elif v == 2: paused = True elif v == 3: paused = False if not paused: try: partial = q.get() # print(partial) # print("partial",partial) if candidate_matcher(partial): # print(partial) solutions.put(partial) for guess in next_choice_func(partial): if partial_checker(guess): q.put(guess) else: # print("BAD:",partial) if discard: discard.put(guess) pass # print(head) except queue.Empty: pass
def run_parallel(graph, nprocs=None, sleep=0.2, raise_errors=False): nprocs = nprocs or mp.cpu_count() - 1 with mp.Manager() as manager: graph = tgraph.create_parallel_compatible_graph(graph, manager) with mp.Pool(nprocs) as pool: exception_q = mp.Queue(10) def error_callback(exception): exception_q.put_nowait(exception) pool.terminate() while not tgraph.all_done(graph): for task in tgraph.get_ready_tasks(graph, reverse=False): graph = tgraph.mark_as_in_progress(graph, task) mlog(graph).info('pid {}: assigning task {}'.format( os.getpid(), task)) pool.apply_async(run_task, args=(graph, task, raise_errors), error_callback=error_callback) time.sleep(sleep) if not exception_q.empty(): raise exception_q.get() return tgraph.recover_values_from_manager(graph)
def __init__(self, ds, nr_prefetch, nr_proc): """ Args: ds (DataFlow): input DataFlow. nr_prefetch (int): size of the queue to hold prefetched datapoints. nr_proc (int): number of processes to use. """ if os.name == 'nt': logger.warn("MultiProcessPrefetchData does support windows. \ However, windows requires more strict picklability on processes, which may \ lead of failure on some of the code.") super(MultiProcessPrefetchData, self).__init__(ds) try: self._size = ds.size() except NotImplementedError: self._size = -1 self.nr_proc = nr_proc self.nr_prefetch = nr_prefetch if nr_proc > 1: logger.info("[MultiProcessPrefetchData] Will fork a dataflow more than one times. " "This assumes the datapoints are i.i.d.") self.queue = mp.Queue(self.nr_prefetch) self.procs = [MultiProcessPrefetchData._Worker(self.ds, self.queue) for _ in range(self.nr_proc)] ensure_proc_terminate(self.procs) start_proc_mask_signal(self.procs)
def __init__(self, port=None): super(mp.Process, self).__init__() self.queue = mp.Queue() self.pause_state = mp.Event() self.halt = mp.Event() self.idle = True self.port = port self.kill_process = mp.Event()
def main(): enable = mp.Value('i', 0) imgQueue = mp.Queue(0) imgQueueBin = mp.Queue(0) while True: sign = receive() if sign == 1: print('connect') enable.value = 1 mp.Process(target=motionControl, args=(enable, imgQueue, imgQueueBin)).start() mp.Process(target=displayImage, args=(enable, imgQueue, imgQueueBin)).start() elif sign == 2: print('disconnect') enable.value = 0
def GetLoggings(logfile): terminating = multiprocess.Event() logger = logging.getLogger('') logger.setLevel(logging.DEBUG) logQueue = multiprocess.Queue(16) filehandler = MultiProcessingLogHandler(logging.FileHandler(logfile), logQueue) logger.addHandler(filehandler) filehandler.setLevel(logging.DEBUG) return (terminating, logger, logQueue)
def start(self): ''' Create tasks and results queues, and start consumers. ''' mp.freeze_support() self.tasks = mp.JoinableQueue() self.results = mp.Queue() self.consumers = [ Consumer(self.tasks, self.results) for i in range(self.getNConsumers()) ] for c in self.consumers: c.start()
def start_break_timer(self, length): # Basically the same implementation as the timer for the pomodoro, but this one cannot be paused print('\nBreak started\n') current_length = multiprocess.Queue() break_process = multiprocess.Process(target=countdown, args=(length, current_length, self.sound_file)) break_process.start() break_process.join() break_process.terminate() input('Press ENTER to start another pomodoro\r')
def multithread(): q = mp.Queue() # thread可放入process同样的queue中 t1 = td.Thread(target=job, args=(q, )) t2 = td.Thread(target=job, args=(q, )) t1.start() t2.start() t1.join() t2.join() res1 = q.get() res2 = q.get() print('multithread:', res1 + res2)
def multicore(): q = mp.Queue() p1 = mp.Process(target=job, args=(q, )) p2 = mp.Process(target=job, args=(q, )) p1.start() p2.start() p1.join() p2.join() res1 = q.get() res2 = q.get() print('multicore:', res1 + res2)
def run_apple_script(cmd, timeout=300): """ run apple script and return result. the script will run in a different process so if python crashes we will not fail. if the apple script doesn't return answer within the timeout, it will be terminated :param cmd: apple script :param timeout: timeout to end the apple script process :return: apple script result if exist """ def _run_apple_script_in_another_process(cmd, stdout_queue, stderr_queue): apple_script_process = subprocess.Popen(['osascript'], shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) p_stdout, p_stderr = apple_script_process.communicate(cmd) if p_stdout: stdout_queue.put(p_stdout) if p_stderr: stderr_queue.put(p_stderr) # logger.debug('Going to run the apple script: {}'.format(cmd)) stdout_queue_obj = multiprocess.Queue() stderr_queue_obj = multiprocess.Queue() p = multiprocess.Process(target=_run_apple_script_in_another_process, args=(cmd, stdout_queue_obj, stderr_queue_obj)) p.start() p.join(timeout=timeout) if p.is_alive(): logger.error( 'The process that runs the apple script was terminated after reaching the timeout' ) p.terminate() if not stderr_queue_obj.empty( ): # if stderr, log the error and return None logger.error(stderr_queue_obj.get()) return if not stdout_queue_obj.empty(): stdout = stdout_queue_obj.get() # logger.debug('Apple script result is: {}'.format(stdout)) return stdout
def test(): manager = processing.Manager() gc.disable() print('\n\t######## testing Queue.Queue\n') test_queuespeed(threading.Thread, Queue.Queue(), threading.Condition()) print('\n\t######## testing processing.Queue\n') test_queuespeed(processing.Process, processing.Queue(), processing.Condition()) print('\n\t######## testing Queue managed by server process\n') test_queuespeed(processing.Process, manager.Queue(), manager.Condition()) print('\n\t######## testing processing.Pipe\n') test_pipespeed() print print('\n\t######## testing list\n') test_seqspeed(range(10)) print('\n\t######## testing list managed by server process\n') test_seqspeed(manager.list(range(10))) print('\n\t######## testing Array("i", ..., lock=False)\n') test_seqspeed(processing.Array('i', range(10), lock=False)) print('\n\t######## testing Array("i", ..., lock=True)\n') test_seqspeed(processing.Array('i', range(10), lock=True)) print() print('\n\t######## testing threading.Lock\n') test_lockspeed(threading.Lock()) print('\n\t######## testing threading.RLock\n') test_lockspeed(threading.RLock()) print('\n\t######## testing processing.Lock\n') test_lockspeed(processing.Lock()) print('\n\t######## testing processing.RLock\n') test_lockspeed(processing.RLock()) print('\n\t######## testing lock managed by server process\n') test_lockspeed(manager.Lock()) print('\n\t######## testing rlock managed by server process\n') test_lockspeed(manager.RLock()) print() print('\n\t######## testing threading.Condition\n') test_conditionspeed(threading.Thread, threading.Condition()) print('\n\t######## testing processing.Condition\n') test_conditionspeed(processing.Process, processing.Condition()) print('\n\t######## testing condition managed by a server process\n') test_conditionspeed(processing.Process, manager.Condition()) gc.enable()
def __init__(self, measureFlowchart, numberProc=1): """ Object for parallel processing and preprocessing of image frames """ # Flowchart object, queues and processes self.measureFlowchart = measureFlowchart self.input_queue = multiprocessing.JoinableQueue(1) self.output_queue = multiprocessing.Queue() self.numberProc = numberProc self.processes = [ ProcessQueue(self.input_queue, self.output_queue) for _ in range(self.numberProc) ]
def __init__(self, data_queue, nr_producer, start=0): """ Args: data_queue(mp.Queue): a queue which contains datapoints. nr_producer(int): number of producer processes. This process will terminate after receiving this many of :class:`DIE` sentinel. start(int): the rank of the first object """ super(OrderedResultGatherProc, self).__init__() self.data_queue = data_queue self.ordered_container = OrderedContainer(start=start) self.result_queue = mp.Queue() self.nr_producer = nr_producer
def __init__(self, logger=None, fmt=None, level=None): """ Current logging instance or None - alternative format and level to be used within the bounded context. Redirect log requests to an multi-proc queue and a listener that redirects the request to handers bound to the input logger instance. """ self.__handlerInitialList = [] self.__handlerWrappedList = [] # self.logger = logger if logger else logging.getLogger() # self.__loggingQueue = multiprocessing.Queue(-1) self.__ql = None self.__altFmt = logging.Formatter(fmt) if fmt else None self.__altLevel = level if level else None
def test_queue(): q = processing.Queue() p = processing.Process(target=queue_func, args=(q, )) p.start() o = None while o != 'STOP': try: o = q.get(timeout=0.3) print(o, end=' ') sys.stdout.flush() except Empty: print('TIMEOUT') print()
def __init__(self, env_id, make_env, reward_predictor, num_workers, max_timesteps_per_episode, seed): self.num_workers = num_workers self.predictor = reward_predictor self.tasks_q = multiprocess.JoinableQueue() self.results_q = multiprocess.Queue() self.actors = [] for i in range(self.num_workers): new_seed = seed * 1000 + i # Give each actor a uniquely seeded env self.actors.append(Actor(self.tasks_q, self.results_q, env_id, make_env, new_seed, max_timesteps_per_episode)) for a in self.actors: a.start() # we will start by running 20,000 / 1000 = 20 episodes for the first iteration TODO OLD self.average_timesteps_in_episode = 1000
def generate(self: object) -> None: print("Beginning data generation...") start_seconds = time.time() queue = multiprocess.Queue() w = multiprocess.Process(target=self.write, args=(queue, "STOP")) jobs = [] for i in range(0, 5): p = multiprocess.Process(target=self.gen, args=(queue, )) jobs.append(p) p.start() w.start() for i, item in enumerate(jobs): item.join() queue.put("STOP") w.join() elapsed_time = (time.time() - start_seconds) / 60 print("Generation completed. Elapsed time: ", "{0:.2f}".format(elapsed_time), " minutes")
def process_proxy(f_, *args0): import multiprocess as mp def f1(f, q, *args): try: r = f(*args) q.put(r) q.close() except Exception as e: import traceback q.put((e, traceback.format_exc())) q = mp.Queue() p = mp.Process(target=f1, args=(f_, q) + args0) p.start() rv = q.get() q.close() p.terminate() p.join() q.join_thread() return rv
def start_pomodoro_timer(self): # Save the remaining length of the pomodoro inside a queue that can be accessed via the multiprocess current_length = multiprocess.Queue() # Define a asynchronous multiprocess for the timer pomodoro_process = multiprocess.Process(target=countdown, args=(self.remaining_length, current_length, self.sound_file)) pomodoro_process.daemon = True pomodoro_process.start() while pomodoro_process.is_alive(): # While the timer is running in the background, listen to user input input('Press ENTER to pause\n') # If the user has pressed ENTER, get the remaining length and kill the current timer process self.remaining_length = current_length.get() pomodoro_process.terminate() # Check if the timer has finished (reached 0 seconds) if self.remaining_length < 1: break # Monitor for how long the user has paused the timer pause_start_time = time.time() # Wait for the user input input('Press ENTER to resume\n') # Print for how long the timer was paused pause_end_time = time.time() - pause_start_time pause_time_format = '{:02d}:{:02d}'.format(int(pause_end_time / 60), int(pause_end_time % 60)) print('Total pause time: ' + pause_time_format + '\n') """ Start the pomodoro timer again, but counting down from the remaining time we got before killing the multiprocess """ self.start_pomodoro_timer()
def main(): # Some tests import logging import logging.handlers import multiprocess as mp # Set up loggingserver log_file = '~/mplogger.log' status_queue = mp.Queue() lserver_process = mp.Process(target=loggingserver, args=(log_file, status_queue)) lserver_process.daemon = True lserver_process.start() server_address = status_queue.get() # Connect main process to logging server rootLogger = logging.getLogger('') rootLogger.setLevel(logging.DEBUG) socketHandler = ClientSocketHandler(*server_address) rootLogger.addHandler(socketHandler) # Send some sample logs logging.info('Test1') logging.error('Test2') logging.critical('Test3') logging.debug('Test4') logging.warning('Test5') logger1 = logging.getLogger('test1') logger2 = logging.getLogger('test2') logger1.info('asdfasdfsa') logger2.info('1234567890') # Close the logging server status_queue.put('DIE') lserver_process.join() print("Server closed, exiting...")
def run_safely(f, x): """Runs f(args) in a separate process.""" # f_global = f # globalize(f) # mp.freeze_support() mp.set_start_method("spawn") q = mp.Queue() p = Process(target=with_queue, args=(f, q, x)) p.start() p.join() if p.exception: error, traceback = p.exception print(traceback) raise error try: out = q.get(False, 2.0) # Non-blocking mode except queue.Empty: print("Empty queue!") print("Exit code: ", p.exitcode) raise MemoryError() return out
def __init__(self, thread=None, *args, **kwargs): super(IProcess, self).__init__(*args, **kwargs) self._thread = thread or 1 self._pool = multiprocessing.Queue(self._thread) self._threads = [] self._requests = [] self._states = {} for i in range(self._thread): request, response = multiprocessing.Queue(), multiprocessing.Queue( ) t = threading.Thread(target=self.loop, args=(request, response), name='request-%s' % i) self._threads.append(t) self._requests.append((request, response)) self._pool.put(i) self._states[t.name] = None self._reader_mutex = multiprocessing.Lock() self._reader = multiprocessing.Queue(), multiprocessing.Queue() self._threads.append( threading.Thread(target=self.loop, args=self._reader, name='reader')) self._states['reader'] = None self._collection = multiprocessing.Queue() self._received = multiprocessing.Event() self._properties = {} for key in dir(self.__class__): value = getattr(self.__class__, key, None) value = getattr(value, '__doc__', None) if str(value).startswith('child_property.'): self._properties[key] = None if str(value).startswith('child_timer.'): delta = int(value.split('.')[-1]) self._threads.append( threading.Thread(target=self.timentry, args=(key, delta), name='timer.%s' % key)) self._wrap_run()
def fetchseq(ids, species, write=False, output_name='', delim='\t', id_type='brute', server=None, source="SQL", database="bioseqdb", database_path=None, host='localhost', driver='psycopg2', version='1.0', user='******', passwd='', email='', batch_size=50, output_type="fasta", verbose=1, n_threads=1, n_subthreads=1, add_length=(0, 0), indent=0): if isgenerator(ids): if verbose > 1: print('Received generator!', indent=indent) elif isinstance(ids, list): if verbose > 1: print('Received list!', indent=indent) else: if verbose > 1: print('Reading ID File... ', indent=indent) with ids.open('w') as in_handle: id_prelist = [line.strip() for line in in_handle ] # list of each line in the file print('Done!', indent=indent) ids = [id_item for id_item in filter(None, id_prelist) if id_item] if not id_prelist or id_prelist is None: if verbose: print('id_prelist is empty!', indent=indent) return 'None' for id_item in ids: assert len(id_item) == 12, ( "Item {0} in id_list has {1} items, not 5!\n" "Format should be: " "chr, (start,end), id, score, strand, thickStart, thickEnd, rgb, blockcount," " blockspans, blockstarts, query_span" "!").format( " ".join((" ".join(item) if not isinstance(item, str) else item for item in id_item)), len(id_item)) if verbose > 1: print('Readied ids!', indent=indent) id_list = multiprocessing.JoinableQueue() results = multiprocessing.Queue() if 'sql' in source.lower(): if server is None: try: if verbose > 1: print('No server received, opening server...', indent=indent) server = BioSeqDatabase.open_database(driver=driver, user=user, passwd=passwd, host=host, database=database) if verbose > 1: print('Done!', indent=indent) except Exception as err: if verbose > 1: print('Failed to open server!', indent=indent) print(str(type(err)), err, sep=' ', indent=indent) raise else: if verbose > 1: print('Received server handle:', indent=indent) print(server, indent=indent) if verbose > 2: print('Please note the sub_databases of server:\n\t', [str(i) for i in server.keys()], indent=indent) elif source.lower() in ['fasta', '2bit', 'twobit']: print('Search type: ', source, indent=indent) else: raise SearchEngineNotImplementedError( 'Search using source {} has not yet been implemented!'.format( source)) if verbose > 1: print('Creating FecSeq Processes...', indent=indent) fs_instances = [ FetchSeqMP(id_queue=id_list, seq_out_queue=results, delim=delim, id_type=id_type, server=server, species=species, source=source, database=database, database_path=database_path, host=host, driver=driver, version=version, user=user, passwd=passwd, email=email, output_type=output_type, batch_size=batch_size, verbose=verbose, n_subthreads=n_subthreads, add_length=add_length, indent=indent + 1) for _ in range(n_threads) ] if verbose > 1: print('Done! Starting processes...', indent=indent) for fs in fs_instances: fs.start() if verbose > 1: print('Done!', indent=indent) print('Assigning FetchSeq records to queue... ', indent=indent) id_order = [] for i, id_rec in enumerate(ids): try: id_order.append("{0}:{1}-{2}".format(id_rec[0], id_rec[1][0], id_rec[1][1])) except IndexError: id_order.append("{0}".format(id_rec[0])) try: id_list.put(FetchSeq(id_rec=id_rec)) except AssertionError as err: print(i, type(err), err, sep=' ') break for _ in fs_instances: id_list.put(None) if verbose > 1: print('Done!', indent=indent) output_dict = dict() missing_items_list = list() if verbose > 1: print('Getting sequences from processes... ', indent=indent) n_jobs = len(ids) while n_jobs: seq, missing = results.get() output_dict[seq[0]] = seq[1] missing_items_list.append(missing) n_jobs -= 1 if verbose > 1: print('Done! Finished fetching sequences!', indent=indent) print('Closing processes!', indent=indent) for fs in fs_instances: if fs.is_alive(): fs.join() output_list = [output_dict[i] for i in id_order if i in output_dict] if write: SeqIO.write(output_list, output_name, output_type) return else: if missing_items_list == [None]: missing_items_list = None return output_list, missing_items_list
def __init__(self, num_of_processor: int, mapper: Callable, max_size_per_mapper_queue: int = 0, collector: Callable = None, max_size_per_collector_queue: int = 0, enable_process_id: bool = False, batch_size: int = 1, progress=None, progress_total=None, use_shm=False, enable_collector_queues=True, single_mapper_queue: bool = False): self.num_of_processor = num_of_processor self.single_mapper_queue = single_mapper_queue if sys.version_info >= (3, 8): self.collector_queues: typing.Optional[typing.Union[ShmQueue, mp.Queue]] else: self.collector_queues: typing.Optional[mp.Queue] if use_shm: if sys.version_info >= (3, 8): if single_mapper_queue: self.mapper_queues = [ ShmQueue(maxsize=max_size_per_mapper_queue * num_of_processor) ] else: self.mapper_queues = [ ShmQueue(maxsize=max_size_per_mapper_queue) for _ in range(num_of_processor) ] if enable_collector_queues: self.collector_queues = [ ShmQueue(maxsize=max_size_per_collector_queue) for _ in range(num_of_processor) ] else: self.collector_queues = None else: raise ValueError( "shm not available in this version of Python.") else: if single_mapper_queue: self.mapper_queues = [ mp.Queue(maxsize=max_size_per_mapper_queue * num_of_processor) ] else: self.mapper_queues = [ mp.Queue(maxsize=max_size_per_mapper_queue) for _ in range(num_of_processor) ] if enable_collector_queues: self.collector_queues = [ mp.Queue(maxsize=max_size_per_collector_queue) for _ in range(num_of_processor) ] self.collector_qstats = [ self.QSTATS_ON for _ in range(num_of_processor) ] else: self.collector_queues = None if self.collector_queues is not None: if single_mapper_queue: self.processes = [ mp.Process(target=self._run, args=(i, self.mapper_queues[0], self.collector_queues[i])) for i in range(num_of_processor) ] else: self.processes = [ mp.Process(target=self._run, args=(i, self.mapper_queues[i], self.collector_queues[i])) for i in range(num_of_processor) ] else: if single_mapper_queue: self.processes = [ mp.Process(target=self._run, args=(i, self.mapper_queues[0], None)) for i in range(num_of_processor) ] else: self.processes = [ mp.Process(target=self._run, args=(i, self.mapper_queues[i], None)) for i in range(num_of_processor) ] if progress is not None: if sys.version_info >= (3, 8): self.progress_queues: typing.Optional[typing.Union[ShmQueue, mp.Queue]] else: self.progress_queues: typing.Optional[mp.Queue] if use_shm: if sys.version_info >= (3, 8): self.progress_queues = [ ShmQueue(maxsize=1) for _ in range(num_of_processor) ] else: raise ValueError( "shm not available in this version of Python.") else: self.progress_queues = [ mp.Queue(maxsize=1) for _ in range(num_of_processor) ] self.progress_qstats = [ self.QSTATS_ON for _ in range(num_of_processor) ] else: self.progress_queues = None self.progress = progress ctx = self if not inspect.isclass(mapper) or not issubclass(mapper, Mapper): class DefaultMapper(Mapper): def process(self, *args, **kwargs): if ctx.enable_process_id: kwargs['_idx'] = self._idx return mapper(*args, **kwargs) self.mapper = DefaultMapper else: self.mapper = mapper self.collector = collector self.mapper_queue_index = 0 self.enable_process_id = enable_process_id self.batch_size = batch_size self.batch_data = [] # collector can be handled in each process or in main process after merging (collector needs to be set) # if collector is set, it needs to be handled in main process; # otherwise, it assumes there's no collector. if collector: self.collector_thread = CollectorThread(self, collector) if progress: self.progress_thread = ProgressThread(self, progress, progress_total, num_of_processor)
def filter(self, filter_obj, squash=True, num_procs=mp.cpu_count()): """Filter the dataframe using a user-supplied function. Note: Operates in parallel on user-supplied lambda functions. Arguments: filter_obj (callable, list, or QueryMatcher): the filter to apply to the GraphFrame. squash (boolean, optional): if True, automatically call squash for the user. """ dataframe_copy = self.dataframe.copy() index_names = self.dataframe.index.names dataframe_copy.reset_index(inplace=True) filtered_df = None if callable(filter_obj): # applying pandas filter using the callable function if num_procs > 1: # perform filter in parallel (default) queue = mp.Queue() processes = [] returned_frames = [] subframes = np.array_split(dataframe_copy, num_procs) # Manually create a number of processes equal to the number of # logical cpus available for pid in range(num_procs): process = mp.Process( target=parallel_apply, args=(filter_obj, subframes[pid], queue), ) process.start() processes.append(process) # Stores filtered subframes in a list: 'returned_frames', for # pandas concatenation. This intermediary list is used because # pandas concat is faster when called only once on a list of # dataframes, than when called multiple times appending onto a # frame of increasing size. for pid in range(num_procs): returned_frames.append(queue.get()) for proc in processes: proc.join() filtered_df = pd.concat(returned_frames) else: # perform filter sequentiually if num_procs = 1 filtered_rows = dataframe_copy.apply(filter_obj, axis=1) filtered_df = dataframe_copy[filtered_rows] elif isinstance(filter_obj, list) or isinstance( filter_obj, QueryMatcher): # use a callpath query to apply the filter query = filter_obj if isinstance(filter_obj, list): query = QueryMatcher(filter_obj) query_matches = query.apply(self) match_set = list(set().union(*query_matches)) filtered_df = dataframe_copy.loc[dataframe_copy["node"].isin( match_set)] else: raise InvalidFilter( "The argument passed to filter must be a callable, a query path list, or a QueryMatcher object." ) if filtered_df.shape[0] == 0: raise EmptyFilter( "The provided filter would have produced an empty GraphFrame.") filtered_df.set_index(index_names, inplace=True) filtered_gf = GraphFrame(self.graph, filtered_df) filtered_gf.exc_metrics = self.exc_metrics filtered_gf.inc_metrics = self.inc_metrics if squash: return filtered_gf.squash() return filtered_gf
def _drain_queue(sock_queue): """ Ensures queue is empty before closing """ time.sleep(3) # TODO: the socket needs a better way of closing while not sock_queue.empty(): obj = sock_queue.get() _handleLogRecord(obj) if __name__ == '__main__': # Some tests import logging, logging.handlers import multiprocess as mp # Set up loggingserver log_file = '~/mplogger.log' status_queue = mp.Queue() loggingserver = mp.Process(target=loggingserver, args=(log_file, status_queue)) loggingserver.daemon = True loggingserver.start() server_address = status_queue.get() # Connect main process to logging server rootLogger = logging.getLogger('') rootLogger.setLevel(logging.DEBUG) socketHandler = ClientSocketHandler(*server_address) rootLogger.addHandler(socketHandler) # Send some sample logs logging.info('Test1') logging.error('Test2')