def g_app_init(path): global g_config global g_training_pool global g_nice global g_pool global g_queue global g_storage global g_timer g_config = loudml.config.load_config(path) g_storage = FileStorage(g_config.storage['path']) g_queue = multiprocessing.Queue() g_nice = g_config.training.get('nice', 0) g_training_pool = pebble.ProcessPool( max_workers=g_config.server.get('workers', 1), max_tasks=g_config.server.get('maxtasksperchild', 1), initializer=loudml.worker.init_worker, initargs=[path, g_queue], ) g_pool = pebble.ProcessPool( max_workers=g_config.server.get('workers', 1), max_tasks=g_config.server.get('maxtasksperchild', 1), initializer=loudml.worker.init_worker, initargs=[path, g_queue], ) g_timer = RepeatingTimer(1, read_messages) g_timer.start() def daemon_send_metrics(): send_metrics(g_config.metrics, g_storage, user_agent="loudmld") daemon_send_metrics() schedule.every().hour.do(daemon_send_metrics)
def test_pool_deadlock_stop(self): """Process Pool Fork writing deadlocks are stopping the Pool.""" with self.assertRaises(RuntimeError): pool = pebble.ProcessPool(max_workers=1) for _ in range(10): pool.schedule(function) time.sleep(0.1)
def run_with_timeout(plugin, timeout, progress, dt=0.005, **kwargs): # TO_DO : multi-process over the different tokens spinner = itertools.cycle(r"\|/-") pool = pebble.ProcessPool(max_workers=1) line = elapsed = format_time(0) with pool: t0 = time.time() future = pool.schedule(plugin[1].run, kwargs=kwargs, timeout=timeout) while not future.done(): if progress is not None: line = "\r" + elapsed + " " + progress \ + " " + next(spinner) sys.stderr.write(line) sys.stderr.flush() time.sleep(dt) elapsed = format_time(time.time() - t0, timeout) walltime = time.time() - t0 try: result_a, result_b = future.result() except Exception as err: result_a = Result.ok("") result_b = Result.ok("") error = repr(err) else: error = "" if progress is not None: sys.stderr.write("\r" + " " * len(line) + "\r") sys.stderr.flush() return result_a, result_b, walltime, error
def build(self): with pebble.ProcessPool(conf.get("workers")) as pool: instance = pool.map(resolv, sorted(self.threats.keys()), timeout=conf.get("queryTimeout")) iterator = instance.result() for index, element in enumerate(sorted(self.threats.keys()), start=1): try: self.threats.update({element: next(iterator)}) except: self.threats.update({element: []}) if index % round(len(self.threats) / 100) == 0 or index == len( self.threats): log.info( str("{}% done... ({}/{})").format( int(100 / len(self.threats) * index), index, len(self.threats))) try: next(iterator) log.warning( "Process pool is not empty (iterator object is still iterable)" ) except StopIteration: pass log.info( str("[!] BUILD part 1/1 done ({} threats)").format( len(self.threats))) return 0
def process(self): """ Processes the splitted PCAP files, extracting feature vector from each. The implementation leverages a pool of processes provided my the pebble module. """ # Determine the range of files to be processed futures = [] queue_length = len(self.file_queue) with pebble.ProcessPool(max_workers=config.NUM_JOBS) as pool: for counter, path in enumerate(self.file_queue): future = pool.schedule( self.process_pcap, (path, counter + 1, queue_length), timeout=1800, ) futures.append(future) raw_data = [] for future in futures: try: result = future.result() if result is not None: raw_data.append(result) except TimeoutError: pass pool.close() pool.join() data = pandas.DataFrame(raw_data) data.to_csv(self.output, header=True, index=False, mode='w')
def run(api: GBD, resultset, func, args: dict): first = True if api.jobs == 1: for (hash, local) in resultset: result = func(hash, local, args) safe_run_results(api, result, check=first) first = False else: with pebble.ProcessPool(min(multiprocessing.cpu_count(), api.jobs)) as p: futures = [ p.schedule(func, (hash, local, args)) for (hash, local) in resultset ] for f in as_completed( futures): #, timeout=api.tlim if api.tlim > 0 else None): try: result = f.result() safe_run_results(api, result, check=first) first = False except pebble.ProcessExpired as e: f.cancel() eprint("{}: {}".format(e.__class__.__name__, e)) except GBDException as e: # might receive special handling in the future eprint("{}: {}".format(e.__class__.__name__, e)) except Exception as e: eprint("{}: {}".format(e.__class__.__name__, e))
def run_parallel_tests(self): assert not self.futures assert not self.temporary_folders with pebble.ProcessPool(max_workers=self.parallel_tests) as pool: order = 1 self.timeout_count = 0 while self.state is not None: # do not create too many states if len(self.futures) >= self.parallel_tests: wait(self.futures, return_when=FIRST_COMPLETED) quit_loop = self.process_done_futures() if quit_loop: success = self.wait_for_first_success() self.terminate_all(pool) return success folder = tempfile.mkdtemp(prefix=self.TEMP_PREFIX, dir=self.root) test_env = TestEnvironment(self.state, order, self.test_script, folder, self.current_test_case, self.test_cases ^ {self.current_test_case}, self.current_pass.transform, self.pid_queue) future = pool.schedule(test_env.run, timeout=self.timeout) self.temporary_folders[future] = folder self.futures.append(future) self.pass_statistic.add_executed(self.current_pass) order += 1 state = self.current_pass.advance(self.current_test_case, self.state) # we are at the end of enumeration if state is None: success = self.wait_for_first_success() self.terminate_all(pool) return success else: self.state = state
def run_with_timeout(entry_point, timeout, progress, dt=0.1, **kwargs): # TODO : multi-process over the different tokens spinner = itertools.cycle(r"\|/-") pool = pebble.ProcessPool(max_workers=1) line = elapsed = format_time(0) with pool: t0 = time.time() func = entry_point.load() future = pool.schedule(func, kwargs=kwargs, timeout=timeout) while not future.done(): if progress is not None: line = "\r" + elapsed + " " + progress + " " + next( spinner) sys.stderr.write(line) sys.stderr.flush() time.sleep(dt) elapsed = format_time(time.time() - t0, timeout) walltime = time.time() - t0 try: a, b = future.result() except Exception as err: a = b = "" error = repr(err)[:50] else: error = "" # longest correct answer seen so far has been 32 chars a = str(a)[:50] b = str(b)[:50] if progress is not None: sys.stderr.write("\r" + " " * len(line) + "\r") sys.stderr.flush() return a, b, walltime, error
def g_app_init(path): global g_config global g_training_pool global g_nice global g_pool global g_queue global g_storage global g_timer g_config = loudml.config.load_config(path) g_storage = FileStorage(g_config.storage['path']) g_queue = multiprocessing.Queue() g_nice = g_config.training.get('nice', 0) g_training_pool = pebble.ProcessPool( max_workers=g_config.server.get('workers', 1), max_tasks=g_config.server.get('maxtasksperchild', 1), initializer=loudml.worker.init_worker, initargs=[g_queue], ) g_pool = pebble.ProcessPool( max_workers=g_config.server.get('workers', 1), max_tasks=g_config.server.get('maxtasksperchild', 1), initializer=loudml.worker.init_worker, initargs=[g_queue], ) g_timer = RepeatingTimer(1, read_messages) g_timer.start() def daemon_send_metrics(): send_metrics(g_config.metrics, g_storage, user_agent="loudmld") daemon_send_metrics() schedule.every().hour.do(daemon_send_metrics) def daemon_clear_jobs(): global g_jobs duration = g_config.server.get('jobs_max_ttl') now_dt = datetime.now(pytz.utc) expired = [ job.id for job in g_jobs.values() if (job.is_stopped() and (now_dt - job.done_dt) > timedelta( seconds=duration)) ] for i in expired: del g_jobs[i] schedule.every().minute.do(daemon_clear_jobs)
def _gen_compute_results(self, rule_mol, commit, max_workers, timeout, chunk_size): """Yield new results using RDKit to apply a rule on a chemical.""" with pebble.ProcessPool(max_workers=max_workers) as pool: # Prepare chunks of tasks # NB: it seems that pool.map does not avoid tasks to hold resources (memory) until they are consumed # even if a generator is used as input; so we use pool.schedule and we do our own chunks to avoid saturating # the RAM. logger.debug( f"Computing tasks in chunks of at most {chunk_size} couples (rule, molecule) " f"with {max_workers} workers and a per-task timeout of {timeout} seconds." ) for chunk_idx, chunk in enumerate(_chunkify(rule_mol, chunk_size)): if chunk_idx > 0: logger.debug(f"Working on task chunk #{chunk_idx+1}...") # Submit all the tasks for this chunk all_running_tasks = [] # list of Future objects for rid, rd_rule, cid, rd_mol in self._gen_couples(chunk): task = (rid, cid, pool.schedule(RuleBurner._task_fire, args=(rd_rule, rd_mol, self._with_hs, self._with_stereo), timeout=timeout)) all_running_tasks.append(task) # Gather the results for i, (rid, cid, future) in enumerate(all_running_tasks): try: rd_mol_list_list, inchikeys, inchis, smiles = future.result( ) if rd_mol_list_list: # silently discard tasks without a match result = { 'rule_id': rid, 'substrate_id': cid, 'product_list': rd_mol_list_list, # TODO: replace by list of ids? 'product_inchikeys': inchikeys, 'product_inchis': inchis, 'product_smiles': smiles, } if commit: self._insert_result(rid, cid, rd_mol_list_list, inchikeys, inchis, smiles) yield result except concurrent.futures.TimeoutError: logger.warning( f"Task {rid} on {cid} (#{i}) timed-out.") # task['future'].cancel() # NB: no need to cancel it, it's already canceled self._timeout_list.append((rid, cid)) except RuleFireError as error: logger.error( f"Task {rid} on {cid} (#{i}) failed: {error}.") self._errors_list.append((rid, cid)) except pebble.ProcessExpired as error: logger.critical( f"Task {rid} on {cid} (#{i}) crashed unexpectedly: {error}." ) self._errors_list.append((rid, cid)) # Attempt to free the memory del all_running_tasks
def run(self, cell_model, param_values, sim=None, isolate=None, timeout=None): """Instantiate protocol""" if isolate is None: isolate = True if isolate: def _reduce_method(meth): """Overwrite reduce""" return (getattr, (meth.__self__, meth.__func__.__name__)) import copyreg import types copyreg.pickle(types.MethodType, _reduce_method) import pebble from concurrent.futures import TimeoutError if timeout is not None: if timeout < 0: raise ValueError("timeout should be > 0") with pebble.ProcessPool(max_tasks=1) as pool: tasks = pool.schedule( self._run_func, kwargs={ "cell_model": cell_model, "param_values": param_values, "sim": sim, }, timeout=timeout, ) try: responses = tasks.result() except TimeoutError: logger.debug("SweepProtocol: task took longer than " "timeout, will return empty response " "for this recording") responses = { recording.name: None for recording in self.recordings } else: responses = self._run_func(cell_model=cell_model, param_values=param_values, sim=sim) return responses
def run_ica(self): """Run ICA calculation.""" methods = ["Infomax"] if have["picard"]: methods.insert(0, "Picard") if have["sklearn"]: methods.append("FastICA") dialog = RunICADialog(self, self.model.current["data"].info["nchan"], methods) if dialog.exec_(): calc = CalcDialog(self, "Calculating ICA", "Calculating ICA.") method = dialog.method.currentText().lower() exclude_bad_segments = dialog.exclude_bad_segments.isChecked() fit_params = {} if dialog.extended.isEnabled(): fit_params["extended"] = dialog.extended.isChecked() if dialog.ortho.isEnabled(): fit_params["ortho"] = dialog.ortho.isChecked() ica = mne.preprocessing.ICA(method=method, fit_params=fit_params) history = f"ica = mne.preprocessing.ICA(method='{method}'" if fit_params: history += f", fit_params={fit_params})" else: history += ")" self.model.history.append(history) pool = pebble.ProcessPool(max_workers=1) process = pool.schedule(function=ica.fit, args=(self.model.current["data"],), kwargs={"reject_by_annotation": exclude_bad_segments}) process.add_done_callback(lambda x: calc.accept()) pool.close() if not calc.exec_(): pool.stop() pool.join() else: self.model.current["ica"] = process.result() self.model.history.append(f"ica.fit(inst=raw, " f"reject_by_annotation=" f"{exclude_bad_segments})") self.data_changed() pool.join()
def __init__(self): # TODO: forward addtional arguments to ProcessPool() # function invocation in order to give users full control # Initializing process pool that will be used under the hood # to schedule all the tasks and get 'future' object as a return value # Important!!!: max_tasks=1 means that after performing each # task process is restarted. This is because there is no other # way to make deep learning frameworks to fully free up the gpu # memory that they have used other than stopping the process. # https://pebble.readthedocs.io/en/latest/#pebble.ProcessPool self.process_pool = pebble.ProcessPool(max_tasks=1) # This list is responsible for storing 'future' object of every # scheduled task self.tasks_list = []
def main(): global MINUTE cp = ps_collector.config.get_config() if cp.has_option("Scheduler", "debug"): if cp.get("Scheduler", "debug").lower() == "true": MINUTE = 1 ps_collector.config.setup_logging(cp) global log log = logging.getLogger("scheduler") pool_size = 5 if cp.has_option("Scheduler", "pool_size"): pool_size = cp.getint("Scheduler", "pool_size") pool = pebble.ProcessPool(max_workers=pool_size, max_tasks=5) state = SchedulerState(cp, pool, log) # Query the mesh the first time query_ps_mesh(state) query_ps_mesh_job = functools.partial(query_ps_mesh, state) cleanup_futures_job = functools.partial(cleanup_futures, state) mesh_interval_s = cp.getint("Scheduler", "mesh_interval") * MINUTE log.info("Will update the mesh config every %d seconds.", mesh_interval_s) schedule.every(mesh_interval_s).to(mesh_interval_s + MINUTE).seconds.do(query_ps_mesh_job) schedule.every(10).seconds.do(cleanup_futures_job) monitor = Monitoring() # Start the prometheus webserver start_http_server(8000) try: while True: schedule.run_pending() monitor.process_messages() time.sleep(1) except: pool.stop() pool.join() raise
def makeRanks(self, covProfiles, kmerSigs, contigLengths, silent=False, use_multiple_processes=True): """Compute pairwise rank distances separately for coverage profiles and kmer signatures, and give rank distances as a fraction of the largest rank. """ n = len(contigLengths) weights = np.empty(n * (n - 1) // 2, dtype=np.double) k = 0 for i in range(n - 1): weights[k:(k + n - 1 - i)] = contigLengths[i] * contigLengths[(i + 1):n] k = k + n - 1 - i weight_fun = lambda i: weights[i] if use_multiple_processes: with pebble.ProcessPool(max_workers=2, context=multiprocessing.get_context( 'forkserver')) as executor: futures = [ executor.schedule( choose_rank_method, (covProfiles, kmerSigs, weight_fun, switch)) for switch in range(2) ] executor.close() results = [] for future in futures: result = future.result() results.append(result) return results else: results = [ choose_rank_method(covProfiles, kmerSigs, weight_fun, switch) for switch in range(2) ] return results
def run_ica(self): """Run ICA calculation.""" dialog = RunICADialog(self, self.model.current["data"].info["nchan"], have["picard"], have["sklearn"]) if dialog.exec_(): calc = CalcDialog(self, "Calculating ICA", "Calculating ICA.") method = dialog.method.currentText() exclude_bad_segments = dialog.exclude_bad_segments.isChecked() fit_params = {} if not dialog.extended.isHidden(): fit_params["extended"] = dialog.extended.isChecked() if not dialog.ortho.isHidden(): fit_params["ortho"] = dialog.ortho.isChecked() ica = mne.preprocessing.ICA(method=dialog.methods[method], fit_params=fit_params) self.model.history.append(f"ica = mne.preprocessing.ICA(" f"method={dialog.methods[method]}, " f"fit_params={fit_params})") kwds = {"reject_by_annotation": exclude_bad_segments} pool = pebble.ProcessPool(max_workers=1) process = pool.schedule(function=ica.fit, args=(self.model.current["data"], ), kwargs=kwds) process.add_done_callback(lambda x: calc.accept()) pool.close() if not calc.exec_(): pool.stop() pool.join() else: self.model.current["ica"] = process.result() self.model.history.append(f"ica.fit(inst=raw, " f"reject_by_annotation=" f"{exclude_bad_segments})") self.data_changed() pool.join()
def map2(f, args, timeout=None): """Reproducible map with Pebble multiprocessing tool. Return all results that finish before timeout, None otherwise.""" fs = [f for _ in args] seeds = [random.getrandbits(128) for _ in args] fargseeds = zip(fs, args, seeds) pool = pebble.ProcessPool( max_workers=int(os.environ.get("JUDICIOUS_POOL_WORKERS", 10))) future = pool.map(unpack_seed_apply, fargseeds) iterator = future.result() results = [] while True: try: result = next(iterator) results.append(result) except StopIteration: break except pebble.ProcessExpired as error: print("%s. Exit code: %d" % (error, error.exitcode)) return results
def __init__( self, jobs: List[Job], check_interval=60, min_pool_processes=1, max_tasks_per_job=None, ): """ :param check_interval: number of seconds to wait in between checking for new tasks :param max_tasks_per_job: Jobs are limited to having this number of tasks waiting in the pool at once, to reduce the possibility of a single job flooding the pool. Defaults to the size of the process pool. :param min_pool_processes: The minimum size of the process pool to execute tasks. Defaults to the minimum of the detected number of CPUs or this value. """ self.jobs = {job.job_name: job for job in jobs} pool_size = max(min_pool_processes, mp.cpu_count()) self.pool = pebble.ProcessPool(pool_size, max_tasks=1) self.max_tasks_per_job = max_tasks_per_job or pool_size self.check_interval = check_interval
def process_pycs(pyc_iterable: Iterable[os.PathLike], alternate_opmap: Dict[str, int] = None) -> None: """Multi-processed decompilation orchestration of compiled Python files. Currently, pydecipher uses `uncompyle6`_ as its decompiler. It works well with `xdis`_ (same author) and allows for the decompilation of Code objects using alternate opmaps (with our extension of xdis). This function will start up CPU count * 2 pydecipher processes to decompile the given Python. Attempts to check for debugger, in which case the decompilation will be single-threaded to make debugging easier. .. _uncompyle6: https://github.com/rocky/python-uncompyle6/ .. _xdis: https://github.com/rocky/python-xdis Parameters ---------- pyc_iterable : Iterable[os.PathLike] An iterable of pathlib.Path objects, referencing compiled Python files to decompile. alternate_opmap : Dict[str, int], optional An opcode map of OPNAME: OPCODE (i.e. 'POP_TOP': 1). This should be a complete opmap for the Python version of the files being decompiled. Even if only two opcodes were swapped, the opcode map passed in should contain all 100+ Python bytecode operations. """ # This checks if the PyCharm debugger is attached. if sys.gettrace(): # Single-threaded for easier debugging. logger.debug( "[!] Debugger detected, not using multiprocessing for decompilation of pyc files." ) return_status_codes: List[str] = [] pyc_file: pathlib.Path for pyc_file in pyc_iterable: return_status_codes.append( decompile_pyc((pyc_file, alternate_opmap, pydecipher.get_logging_options()))) else: return_status_codes: List[str] = [] pool: pebble.ProcessPool with pebble.ProcessPool(os.cpu_count() * 2) as pool: iterables = [(pyc, alternate_opmap, pydecipher.get_logging_options()) for pyc in pyc_iterable] future: pebble.ProcessMapFuture = pool.map(decompile_pyc, iterables, timeout=300) iterator: Iterable = future.result() index: int = 0 while True: try: result: Any = next(iterator) return_status_codes.append(result) except StopIteration: break except TimeoutError as e: e: TimeoutError failed_pyc_path: str = str(iterables[index][0]) logger.error( f"[!] Timed out ({e.args[1]}s) trying to decompile {failed_pyc_path}." ) return_status_codes.append("error") except pebble.ProcessExpired as e: e: pebble.ProcessExpired logger.error( f"[!] Failed to decompile {failed_pyc_path} (process expired with status code {e.exitcode}." ) return_status_codes.append("error") except Exception as e: e: Exception logger.error( f"[!] Failed to decompile {failed_pyc_path} with unknown error: {e}" ) return_status_codes.append("error") finally: index += 1 successes: int = return_status_codes.count("success") opcode_errors: int = return_status_codes.count("opcode_error") errors: int = return_status_codes.count("error") + opcode_errors if opcode_errors: logger.warning( f"[!] {opcode_errors} file(s) failed to decompile with an error " "that indicate its opcode mappings may have been remapped. Try using" "`remap` on this set of bytecode.") if successes and not errors: logger.info(f"[+] Successfully decompiled {successes} .pyc files.") elif successes and errors: logger.warning( f"[!] Successfully decompiled {successes} .pyc files. Failed to decompile {errors} files. " "See log for more information.") elif not successes and errors: logger.error( f"[!] Failed to decompile all {errors} .pyc files. See log for more information." ) else: logger.warning( "[!] No pyc files were decompiled. See log for more information.")
def autoprocess(parallel=1, failed_processing=False, maxtasksperchild=7, memory_debugging=False, processing_timeout=300): maxcount = cfg.cuckoo.max_analysis_count count = 0 db = Database() # pool = multiprocessing.Pool(parallel, init_worker) try: memory_limit() log.info("Processing analysis data") with pebble.ProcessPool(max_workers=parallel, max_tasks=maxtasksperchild, initializer=init_worker) as pool: # CAUTION - big ugly loop ahead. while count < maxcount or not maxcount: # If not enough free disk space is available, then we print an # error message and wait another round (this check is ignored # when the freespace configuration variable is set to zero). if cfg.cuckoo.freespace: # Resolve the full base path to the analysis folder, just in # case somebody decides to make a symbolic link out of it. dir_path = os.path.join(CUCKOO_ROOT, "storage", "analyses") need_space, space_available = free_space_monitor(dir_path, return_value=True, processing=True) if need_space: log.error( "Not enough free disk space! (Only %d MB!). You can change limits it in cuckoo.conf -> freespace", space_available, ) time.sleep(60) continue # If still full, don't add more (necessary despite pool). if len(pending_task_id_map) >= parallel: time.sleep(5) continue if failed_processing: tasks = db.list_tasks(status=TASK_FAILED_PROCESSING, limit=parallel, order_by=Task.completed_on.asc()) else: tasks = db.list_tasks(status=TASK_COMPLETED, limit=parallel, order_by=Task.completed_on.asc()) added = False # For loop to add only one, nice. (reason is that we shouldn't overshoot maxcount) for task in tasks: # Not-so-efficient lock. if pending_task_id_map.get(task.id): continue log.info("Processing analysis data for Task #%d", task.id) if task.category != "url": sample = db.view_sample(task.sample_id) copy_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", str(task.id), sample.sha256) else: copy_path = None args = task.target, copy_path kwargs = dict(report=True, auto=True, task=task, memory_debugging=memory_debugging) if memory_debugging: gc.collect() log.info("[%d] (before) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage)) # result = pool.apply_async(process, args, kwargs) future = pool.schedule(process, args, kwargs, timeout=processing_timeout) pending_future_map[future] = task.id pending_task_id_map[task.id] = future future.add_done_callback(processing_finished) if memory_debugging: gc.collect() log.info("[%d] (after) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage)) count += 1 added = True copy_origin_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", sample.sha256) if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_origin_path): os.unlink(copy_origin_path) break if not added: # don't hog cpu time.sleep(5) except KeyboardInterrupt: # ToDo verify in finally # pool.terminate() raise except MemoryError: mem = get_memory() / 1024 / 1024 print("Remain: %.2f GB" % mem) sys.stderr.write("\n\nERROR: Memory Exception\n") sys.exit(1) except Exception as e: import traceback traceback.print_exc() finally: pool.close() pool.join()
def generate(self): self.init() num_generated = 0 num_processed = 0 num_raw_points = -1 if os.path.exists(self.args.raw_data_path + '.index'): reader = IndexedFileReader(self.args.raw_data_path) num_raw_points = len(reader) reader.close() start_time = time.time() with pebble.ProcessPool( max_workers=self.args.processes, initializer=FunctionSeqDataGenerator.Worker.init, initargs=(self.args, )) as p: chunksize = self.args.processes * self.args.chunksize for chunk in misc.grouper(chunksize, self.raw_data_iterator()): future = p.map(FunctionSeqDataGenerator.Worker.process, chunk, timeout=self.args.task_timeout) res_iter = future.result() idx = -1 while True: idx += 1 if idx < len(chunk) and chunk[idx] is not None: num_processed += 1 try: result = next(res_iter) if chunk[idx] is None: continue if result is not None: self.process_result(result) num_generated += 1 except StopIteration: break except TimeoutError as error: pass except Exception as e: try: logger.warn("Failed for", chunk[idx]) logging.exception(e) except: pass finally: speed = round( num_processed / (time.time() - start_time), 1) if num_raw_points != -1: time_remaining = round( (num_raw_points - num_processed) / speed, 1) else: time_remaining = '???' logger.log( "Generated/Processed : {}/{} ({}/s, TTC={}s)". format(num_generated, num_processed, speed, time_remaining), end='\r') p.stop() try: p.join(10) except: pass self.fwriter.close() logger.log("\n-------------------------------------------------") logger.info("Total Time : {:.2f}s".format(time.time() - start_time)) logger.info( "Generated {} training points from {} raw data points".format( num_generated, num_processed))
def test_pool_deadlock(self): """Process Pool Fork no deadlock if writing worker dies locking channel.""" with pebble.ProcessPool(max_workers=1) as pool: with self.assertRaises(pebble.ProcessExpired): pool.schedule(function).result()
def main(): """ Loud ML server """ global g_config global g_training_pool global g_nice global g_pool global g_queue global g_storage parser = argparse.ArgumentParser( description=main.__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( '-c', '--config', help="Path to configuration file", type=str, default="/etc/loudml/config.yml", ) args = parser.parse_args() logger = logging.getLogger() logger.setLevel(logging.INFO) app.logger.setLevel(logging.INFO) try: g_config = loudml.config.load_config(args.config) g_storage = FileStorage(g_config.storage['path']) loudml.config.load_plugins(args.config) except errors.LoudMLException as exn: logging.error(exn) sys.exit(1) try: cron = CronTab(user='******') cron.remove_all() if g_config.training['incremental']['enable']: for tab in g_config.training['incremental']['crons']: job = cron.new( command='/usr/bin/loudml train \* -i -f {} -t {}'.format( tab['from'], tab['to']), comment='incremental training') job.setall(tab['crontab']) for item in cron: logging.info(item) cron.write() except OSError: logging.error( "detected development environment - incremental training disabled") g_queue = multiprocessing.Queue() g_nice = g_config.training.get('nice', 0) g_training_pool = pebble.ProcessPool( max_workers=g_config.server.get('workers', 1), max_tasks=g_config.server.get('maxtasksperchild', 1), initializer=loudml.worker.init_worker, initargs=[args.config, g_queue], ) g_pool = pebble.ProcessPool( max_workers=g_config.server.get('workers', 1), max_tasks=g_config.server.get('maxtasksperchild', 1), initializer=loudml.worker.init_worker, initargs=[args.config, g_queue], ) timer = RepeatingTimer(1, read_messages) timer.start() listen_addr = g_config.server['listen'] host, port = listen_addr.split(':') restart_predict_jobs() def daemon_send_metrics(): send_metrics(g_config.metrics, g_storage, user_agent="loudmld") daemon_send_metrics() schedule.every().hour.do(daemon_send_metrics) try: http_server = WSGIServer((host, int(port)), app) logging.info("starting Loud ML server on %s", listen_addr) http_server.serve_forever() except OSError as exn: logging.error(str(exn)) except KeyboardInterrupt: pass logging.info("stopping") timer.cancel() g_training_pool.stop() g_training_pool.join() g_pool.stop() g_pool.join()
def call_with_timeout_multiprocess(func, *args, timeout=3): pool = pebble.ProcessPool(max_workers=1) with pool: future = pool.schedule(func, args=args, timeout=timeout) return future.result()
def get_cluster_labels_array( distances, metric="euclidean", selection_method="eom", top_n=3, min_size_start=1, min_size_end=10, solver="hbgf", threads=16, embeddings_for_precomputed=None, use_multiple_processes=True, ): """ Uses cluster ensembling with ClusterEnsembles package to produce partitioned set of high quality clusters from multiple HDBSCAN runs Takes top N clustering results and combines them solver - one of {'cspa', 'hgpa', 'mcla', 'hbgf', 'nmf', 'all'}, default='hbgf' """ label_array = np.array([np.array([-1 for _ in range(distances.shape[0])]) for _ in range(top_n)]) best_min_size = np.array([None for _ in range(top_n)]) best_min_sample = np.array([None for _ in range(top_n)]) best_validity = np.array([None for _ in range(top_n)]) best_unbinned = np.array([None for _ in range(top_n)]) best_n_bins = np.array([None for _ in range(top_n)]) index = 0 if use_multiple_processes: worker_limit = threads // 5 # thread_limit = worker_limit // 5 # with threadpoolctl.threadpool_limits(limits=max(threads // 5, 1), user_api='blas'): with pebble.ProcessPool(max_workers=threads // 5, context=multiprocessing.get_context('forkserver')) as executor: futures = [ executor.schedule( Clusterer.generate_cluster, ( distances, embeddings_for_precomputed, selection_method, metric, min_size, min_sample, threads ), timeout=1800, ) for (min_size, min_sample) in itertools.permutations(range(1, 10), 2) if min_size != 1 and min_sample <= min_size ] # executor.close() for future in futures: try: (cluster_validity, min_size, min_sample, labels) = future.result() if np.any(best_validity == None): best_min_size[index] = min_size best_min_sample[index] = min_sample best_validity[index] = cluster_validity label_array[index] = labels best_n_bins[index] = np.unique(labels).shape[0] best_unbinned[index] = (labels == -1).sum() index += 1 if index == top_n: # sort the current top by ascending validity order ranks = np.argsort(best_validity) best_validity = best_validity[ranks] best_min_sample = best_min_sample[ranks] best_min_size = best_min_size[ranks] label_array = label_array[ranks] best_n_bins = best_n_bins[ranks] best_unbinned = best_unbinned[ranks] elif np.any(best_validity < cluster_validity): # insert the new result and remove the worst result ind = np.searchsorted(best_validity, cluster_validity) best_validity = np.insert(best_validity, ind, cluster_validity)[1:] best_min_size = np.insert(best_min_size, ind, min_size)[1:] best_min_sample = np.insert(best_min_sample, ind, min_sample)[1:] label_array = np.insert(label_array, ind, labels, axis=0)[1:] best_n_bins = np.insert(best_n_bins, ind, np.unique(labels).shape[0])[1:] best_unbinned = np.insert(best_unbinned, ind, (labels == -1).sum())[1:] except TimeoutError: continue else: results = [ Clusterer.generate_cluster ( distances, embeddings_for_precomputed, selection_method, metric, min_size, min_sample, threads ) for (min_size, min_sample) in itertools.permutations(range(2, 10), 2) if min_size != 1 and min_sample <= min_size ] for result in results: (cluster_validity, min_size, min_sample, labels) = result if np.any(best_validity == None): best_min_size[index] = min_size best_min_sample[index] = min_sample best_validity[index] = cluster_validity label_array[index] = labels best_n_bins[index] = np.unique(labels).shape[0] best_unbinned[index] = (labels == -1).sum() index += 1 if index == top_n: # sort the current top by ascending validity order ranks = np.argsort(best_validity) best_validity = best_validity[ranks] best_min_sample = best_min_sample[ranks] best_min_size = best_min_size[ranks] label_array = label_array[ranks] best_n_bins = best_n_bins[ranks] best_unbinned = best_unbinned[ranks] elif np.any(best_validity < cluster_validity): # insert the new result and remove the worst result ind = np.searchsorted(best_validity, cluster_validity) best_validity = np.insert(best_validity, ind, cluster_validity)[1:] best_min_size = np.insert(best_min_size, ind, min_size)[1:] best_min_sample = np.insert(best_min_sample, ind, min_sample)[1:] label_array = np.insert(label_array, ind, labels, axis=0)[1:] best_n_bins = np.insert(best_n_bins, ind, np.unique(labels).shape[0])[1:] best_unbinned = np.insert(best_unbinned, ind, (labels == -1).sum())[1:] return label_array, best_validity, best_n_bins, best_unbinned
save_uncompressed=args.save_uncompressed, memoize=args.scraper_memoize): time.sleep(1) return "xyz" if __name__ == "__main__": month = extract_month(args.url_file) # in case we are resuming from a previous run completed_uids, state_fp, prev_cid = get_state(month, args.output_dir) # URLs we haven't scraped yet (if first run, all URLs in file) url_entries = load_urls(args.url_file, completed_uids, args.max_urls) pool = pbl.ProcessPool(max_workers=args.n_procs) # process one "chunk" of args.chunk_size URLs at a time for i, chunk in enumerate(chunks(url_entries, args.chunk_size)): cid = prev_cid + i + 1 print("Downloading chunk {}".format(cid)) t1 = time.time() if args.timeout > 0: # imap as iterator allows .next() w/ timeout. # ordered version doesn't seem to work correctly. # for some reason, you CANNOT track j or chunk[j] in the loop, # so don't add anything else to the loop below! # confusingly, chunksize below is unrelated to our chunk_size #chunk_iter = pool.imap_unordered(timeout_checker, chunk, chunksize=1)
return U.ppid(), foo + bar def target_wrap(function, *a, **ka): return a, ka, function(*a, **ka) gt = U.get_or_set('gt', []) ge = U.get_or_set('ge', []) def task_done(future): global gt, ge try: result = future.result() # blocks until results are ready print("success:", repr(result[2])) gt.append(future) except TimeoutError as error: print("Function took longer than %d seconds" % error.args[1]) ge.append(error) except Exception as error: print("Function raised %s" % error) print(error.traceback) # traceback of the function if __name__ == '__main__': with pebble.ProcessPool(max_workers=5, max_tasks=0) as pool: for i in range(0, 10): future = pool.schedule(function, args=[i], timeout=1) future.add_done_callback(task_done)
def run(self, cell_model, param_values, sim=None, isolate=None, timeout=None): """Instantiate protocol""" if isolate is None: isolate = True if isolate: # and not cell_model.name in 'L5PC': def _reduce_method(meth): """Overwrite reduce""" return (getattr, (meth.__self__, meth.__func__.__name__)) import copyreg import types copyreg.pickle(types.MethodType, _reduce_method) import pebble from concurrent.futures import TimeoutError if timeout is not None: if timeout < 0: raise ValueError("timeout should be > 0") ### # Foriegn code ### with pebble.ProcessPool(max_workers=1, max_tasks=1) as pool: tasks = pool.schedule(self._run_func, kwargs={ 'cell_model': cell_model, 'param_values': param_values, 'sim': sim }, timeout=timeout) ## # works if inverted try for except etc ## try: responses = tasks.result() except: responses = self._run_func(cell_model=cell_model, param_values=param_values, sim=sim) else: responses = self._run_func(cell_model=cell_model, param_values=param_values, sim=sim) new_responses = {} for k, v in responses.items(): if hasattr(v, 'response'): time = v.response[ 'time'].values #[r.response[0] for r in self.recording.repsonse ] vm = v.response[ 'voltage'].values #[ r.response[1] for r in self.recording.repsonse ] if not hasattr(cell_model, 'l5pc'): new_responses['neo_' + str(k)] = AnalogSignal( vm, units=pq.mV, sampling_period=(1 / 0.01255) * pq.s) else: new_responses['neo_' + str(k)] = AnalogSignal( vm, units=pq.mV, sampling_period=(time[1] - time[0]) * pq.s) train_len = len( sf.get_spike_train(new_responses['neo_' + str(k)])) if train_len > 0: pass responses.update(new_responses) return responses
def autoprocess(parallel=1, failed_processing=False, maxtasksperchild=7, memory_debugging=False, processing_timeout=300): maxcount = cfg.cuckoo.max_analysis_count count = 0 db = Database() #pool = multiprocessing.Pool(parallel, init_worker) pool = pebble.ProcessPool(max_workers=parallel, max_tasks=maxtasksperchild, initializer=init_worker) try: log.info("Processing analysis data") # CAUTION - big ugly loop ahead. while count < maxcount or not maxcount: # If still full, don't add more (necessary despite pool). if len(pending_task_id_map) >= parallel: time.sleep(5) continue # If we're here, getting parallel tasks should at least # have one we don't know. if failed_processing: tasks = db.list_tasks(status=TASK_FAILED_PROCESSING, limit=parallel, order_by=Task.completed_on.asc()) else: tasks = db.list_tasks(status=TASK_COMPLETED, limit=parallel, order_by=Task.completed_on.asc()) added = False # For loop to add only one, nice. (reason is that we shouldn't overshoot maxcount) for task in tasks: # Not-so-efficient lock. if pending_task_id_map.get(task.id): continue log.info("Processing analysis data for Task #%d", task.id) if task.category == "file": sample = db.view_sample(task.sample_id) copy_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", sample.sha256) else: copy_path = None args = task.target, copy_path kwargs = dict(report=True, auto=True, task=task, memory_debugging=memory_debugging) if memory_debugging: gc.collect() log.info("[%d] (before) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage)) #result = pool.apply_async(process, args, kwargs) future = pool.schedule(process, args, kwargs, timeout=processing_timeout) pending_future_map[future] = task.id pending_task_id_map[task.id] = future future.add_done_callback(processing_finished) if memory_debugging: gc.collect() log.info("[%d] (after) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage)) count += 1 added = True break if not added: # don't hog cpu time.sleep(5) except KeyboardInterrupt: #ToDo verify in finally #pool.terminate() raise except: import traceback traceback.print_exc() finally: pool.close() pool.join()
def main(): global MINUTE cp = ps_collector.config.get_config() if cp.has_option("Scheduler", "debug"): if cp.get("Scheduler", "debug").lower() == "true": MINUTE = 1 ps_collector.config.setup_logging(cp) global log log = logging.getLogger("scheduler") # Start the push processor if isPush(cp): log.debug("Starting the push parser") push_parser = PSPushParser(cp, log) push_parser.start() else: log.debug("Not starting the push parser") pool_size = 5 if cp.has_option("Scheduler", "pool_size"): pool_size = cp.getint("Scheduler", "pool_size") pool = pebble.ProcessPool(max_workers=pool_size, max_tasks=5) state = SchedulerState(cp, pool, log) # Parse the oneshot if isOneShot(cp): # Parse the start and end log.info("Starting Oneshot") state.oneshot = True start = dateutil.parser.parse(cp.get("Oneshot", "start")) end = dateutil.parser.parse(cp.get("Oneshot", "end")) state.query_range = (start, end) # Initialize the meshes # Get the mesh endpoint configuration, which may be a comma separated list mesh_config_val = state.cp.get("Mesh", "endpoint") if "," in mesh_config_val: meshes = mesh_config_val.split(",") else: meshes = [mesh_config_val] for mesh in meshes: state.meshes[mesh] = [] # Query the mesh the first time query_ps_mesh(state) query_ps_mesh_job = functools.partial(query_ps_mesh, state) cleanup_futures_job = functools.partial(cleanup_futures, state) mesh_interval_s = cp.getint("Scheduler", "mesh_interval") * MINUTE log.info("Will update the mesh config every %d seconds.", mesh_interval_s) if not isOneShot(cp): schedule.every(mesh_interval_s).to( mesh_interval_s + MINUTE).seconds.do(query_ps_mesh_job) schedule.every(10).seconds.do(cleanup_futures_job) monitor = Monitoring() # Start the prometheus webserver start_http_server(8000) try: if not isOneShot(cp): while True: schedule.run_pending() monitor.process_messages() if isPush(cp): push_parser = checkPushProcessor(push_parser, cp, log) time.sleep(1) else: pool.close() pool.join() except: pool.stop() pool.join() raise