Example #1
0
def g_app_init(path):
    global g_config
    global g_training_pool
    global g_nice
    global g_pool
    global g_queue
    global g_storage
    global g_timer

    g_config = loudml.config.load_config(path)
    g_storage = FileStorage(g_config.storage['path'])
    g_queue = multiprocessing.Queue()
    g_nice = g_config.training.get('nice', 0)
    g_training_pool = pebble.ProcessPool(
        max_workers=g_config.server.get('workers', 1),
        max_tasks=g_config.server.get('maxtasksperchild', 1),
        initializer=loudml.worker.init_worker,
        initargs=[path, g_queue],
    )
    g_pool = pebble.ProcessPool(
        max_workers=g_config.server.get('workers', 1),
        max_tasks=g_config.server.get('maxtasksperchild', 1),
        initializer=loudml.worker.init_worker,
        initargs=[path, g_queue],
    )
    g_timer = RepeatingTimer(1, read_messages)
    g_timer.start()

    def daemon_send_metrics():
        send_metrics(g_config.metrics, g_storage, user_agent="loudmld")

    daemon_send_metrics()
    schedule.every().hour.do(daemon_send_metrics)
Example #2
0
 def test_pool_deadlock_stop(self):
     """Process Pool Fork writing deadlocks are stopping the Pool."""
     with self.assertRaises(RuntimeError):
         pool = pebble.ProcessPool(max_workers=1)
         for _ in range(10):
             pool.schedule(function)
             time.sleep(0.1)
Example #3
0
def run_with_timeout(plugin, timeout, progress, dt=0.005, **kwargs):
    # TO_DO : multi-process over the different tokens
    spinner = itertools.cycle(r"\|/-")
    pool = pebble.ProcessPool(max_workers=1)
    line = elapsed = format_time(0)
    with pool:
        t0 = time.time()
        future = pool.schedule(plugin[1].run, kwargs=kwargs, timeout=timeout)
        while not future.done():
            if progress is not None:
                line = "\r" + elapsed + "   " + progress \
                        + "   " + next(spinner)
                sys.stderr.write(line)
                sys.stderr.flush()
            time.sleep(dt)
            elapsed = format_time(time.time() - t0, timeout)
        walltime = time.time() - t0
        try:
            result_a, result_b = future.result()
        except Exception as err:
            result_a = Result.ok("")
            result_b = Result.ok("")
            error = repr(err)
        else:
            error = ""
    if progress is not None:
        sys.stderr.write("\r" + " " * len(line) + "\r")
        sys.stderr.flush()
    return result_a, result_b, walltime, error
Example #4
0
 def build(self):
     with pebble.ProcessPool(conf.get("workers")) as pool:
         instance = pool.map(resolv,
                             sorted(self.threats.keys()),
                             timeout=conf.get("queryTimeout"))
         iterator = instance.result()
         for index, element in enumerate(sorted(self.threats.keys()),
                                         start=1):
             try:
                 self.threats.update({element: next(iterator)})
             except:
                 self.threats.update({element: []})
             if index % round(len(self.threats) / 100) == 0 or index == len(
                     self.threats):
                 log.info(
                     str("{}% done... ({}/{})").format(
                         int(100 / len(self.threats) * index), index,
                         len(self.threats)))
         try:
             next(iterator)
             log.warning(
                 "Process pool is not empty (iterator object is still iterable)"
             )
         except StopIteration:
             pass
     log.info(
         str("[!] BUILD part 1/1 done ({} threats)").format(
             len(self.threats)))
     return 0
Example #5
0
    def process(self):
        """
        Processes the splitted PCAP files, extracting feature vector from each.
        The implementation leverages a pool of processes provided my the
        pebble module.
        """

        # Determine the range of files to be processed
        futures = []

        queue_length = len(self.file_queue)

        with pebble.ProcessPool(max_workers=config.NUM_JOBS) as pool:
            for counter, path in enumerate(self.file_queue):
                future = pool.schedule(
                    self.process_pcap,
                    (path, counter + 1, queue_length),
                    timeout=1800,
                )
                futures.append(future)

        raw_data = []
        for future in futures:
            try:
                result = future.result()
                if result is not None:
                    raw_data.append(result)
            except TimeoutError:
                pass

        pool.close()
        pool.join()

        data = pandas.DataFrame(raw_data)
        data.to_csv(self.output, header=True, index=False, mode='w')
Example #6
0
File: init.py Project: Udopia/gbd
def run(api: GBD, resultset, func, args: dict):
    first = True
    if api.jobs == 1:
        for (hash, local) in resultset:
            result = func(hash, local, args)
            safe_run_results(api, result, check=first)
            first = False
    else:
        with pebble.ProcessPool(min(multiprocessing.cpu_count(),
                                    api.jobs)) as p:
            futures = [
                p.schedule(func, (hash, local, args))
                for (hash, local) in resultset
            ]
            for f in as_completed(
                    futures):  #, timeout=api.tlim if api.tlim > 0 else None):
                try:
                    result = f.result()
                    safe_run_results(api, result, check=first)
                    first = False
                except pebble.ProcessExpired as e:
                    f.cancel()
                    eprint("{}: {}".format(e.__class__.__name__, e))
                except GBDException as e:  # might receive special handling in the future
                    eprint("{}: {}".format(e.__class__.__name__, e))
                except Exception as e:
                    eprint("{}: {}".format(e.__class__.__name__, e))
Example #7
0
    def run_parallel_tests(self):
        assert not self.futures
        assert not self.temporary_folders
        with pebble.ProcessPool(max_workers=self.parallel_tests) as pool:
            order = 1
            self.timeout_count = 0
            while self.state is not None:
                # do not create too many states
                if len(self.futures) >= self.parallel_tests:
                    wait(self.futures, return_when=FIRST_COMPLETED)

                quit_loop = self.process_done_futures()
                if quit_loop:
                    success = self.wait_for_first_success()
                    self.terminate_all(pool)
                    return success

                folder = tempfile.mkdtemp(prefix=self.TEMP_PREFIX, dir=self.root)
                test_env = TestEnvironment(self.state, order, self.test_script, folder,
                                           self.current_test_case, self.test_cases ^ {self.current_test_case},
                                           self.current_pass.transform, self.pid_queue)
                future = pool.schedule(test_env.run, timeout=self.timeout)
                self.temporary_folders[future] = folder
                self.futures.append(future)
                self.pass_statistic.add_executed(self.current_pass)
                order += 1
                state = self.current_pass.advance(self.current_test_case, self.state)
                # we are at the end of enumeration
                if state is None:
                    success = self.wait_for_first_success()
                    self.terminate_all(pool)
                    return success
                else:
                    self.state = state
Example #8
0
def run_with_timeout(entry_point, timeout, progress, dt=0.1, **kwargs):
    # TODO : multi-process over the different tokens
    spinner = itertools.cycle(r"\|/-")
    pool = pebble.ProcessPool(max_workers=1)
    line = elapsed = format_time(0)
    with pool:
        t0 = time.time()
        func = entry_point.load()
        future = pool.schedule(func, kwargs=kwargs, timeout=timeout)
        while not future.done():
            if progress is not None:
                line = "\r" + elapsed + "   " + progress + "   " + next(
                    spinner)
                sys.stderr.write(line)
                sys.stderr.flush()
            time.sleep(dt)
            elapsed = format_time(time.time() - t0, timeout)
        walltime = time.time() - t0
        try:
            a, b = future.result()
        except Exception as err:
            a = b = ""
            error = repr(err)[:50]
        else:
            error = ""
            # longest correct answer seen so far has been 32 chars
            a = str(a)[:50]
            b = str(b)[:50]
    if progress is not None:
        sys.stderr.write("\r" + " " * len(line) + "\r")
        sys.stderr.flush()
    return a, b, walltime, error
Example #9
0
def g_app_init(path):
    global g_config
    global g_training_pool
    global g_nice
    global g_pool
    global g_queue
    global g_storage
    global g_timer

    g_config = loudml.config.load_config(path)
    g_storage = FileStorage(g_config.storage['path'])
    g_queue = multiprocessing.Queue()
    g_nice = g_config.training.get('nice', 0)
    g_training_pool = pebble.ProcessPool(
        max_workers=g_config.server.get('workers', 1),
        max_tasks=g_config.server.get('maxtasksperchild', 1),
        initializer=loudml.worker.init_worker,
        initargs=[g_queue],
    )
    g_pool = pebble.ProcessPool(
        max_workers=g_config.server.get('workers', 1),
        max_tasks=g_config.server.get('maxtasksperchild', 1),
        initializer=loudml.worker.init_worker,
        initargs=[g_queue],
    )
    g_timer = RepeatingTimer(1, read_messages)
    g_timer.start()

    def daemon_send_metrics():
        send_metrics(g_config.metrics, g_storage, user_agent="loudmld")

    daemon_send_metrics()
    schedule.every().hour.do(daemon_send_metrics)

    def daemon_clear_jobs():
        global g_jobs
        duration = g_config.server.get('jobs_max_ttl')
        now_dt = datetime.now(pytz.utc)
        expired = [
            job.id for job in g_jobs.values()
            if (job.is_stopped() and (now_dt - job.done_dt) > timedelta(
                seconds=duration))
        ]
        for i in expired:
            del g_jobs[i]

    schedule.every().minute.do(daemon_clear_jobs)
Example #10
0
 def _gen_compute_results(self, rule_mol, commit, max_workers, timeout,
                          chunk_size):
     """Yield new results using RDKit to apply a rule on a chemical."""
     with pebble.ProcessPool(max_workers=max_workers) as pool:
         # Prepare chunks of tasks
         # NB: it seems that pool.map does not avoid tasks to hold resources (memory) until they are consumed
         # even if a generator is used as input; so we use pool.schedule and we do our own chunks to avoid saturating
         # the RAM.
         logger.debug(
             f"Computing tasks in chunks of at most {chunk_size} couples (rule,  molecule) "
             f"with {max_workers} workers and a per-task timeout of {timeout} seconds."
         )
         for chunk_idx, chunk in enumerate(_chunkify(rule_mol, chunk_size)):
             if chunk_idx > 0:
                 logger.debug(f"Working on task chunk #{chunk_idx+1}...")
             # Submit all the tasks for this chunk
             all_running_tasks = []  # list of Future objects
             for rid, rd_rule, cid, rd_mol in self._gen_couples(chunk):
                 task = (rid, cid,
                         pool.schedule(RuleBurner._task_fire,
                                       args=(rd_rule, rd_mol, self._with_hs,
                                             self._with_stereo),
                                       timeout=timeout))
                 all_running_tasks.append(task)
             # Gather the results
             for i, (rid, cid, future) in enumerate(all_running_tasks):
                 try:
                     rd_mol_list_list, inchikeys, inchis, smiles = future.result(
                     )
                     if rd_mol_list_list:  # silently discard tasks without a match
                         result = {
                             'rule_id': rid,
                             'substrate_id': cid,
                             'product_list':
                             rd_mol_list_list,  # TODO: replace by list of ids?
                             'product_inchikeys': inchikeys,
                             'product_inchis': inchis,
                             'product_smiles': smiles,
                         }
                         if commit:
                             self._insert_result(rid, cid, rd_mol_list_list,
                                                 inchikeys, inchis, smiles)
                         yield result
                 except concurrent.futures.TimeoutError:
                     logger.warning(
                         f"Task {rid} on {cid} (#{i}) timed-out.")
                     # task['future'].cancel()  # NB: no need to cancel it, it's already canceled
                     self._timeout_list.append((rid, cid))
                 except RuleFireError as error:
                     logger.error(
                         f"Task {rid} on {cid} (#{i}) failed: {error}.")
                     self._errors_list.append((rid, cid))
                 except pebble.ProcessExpired as error:
                     logger.critical(
                         f"Task {rid} on {cid} (#{i}) crashed unexpectedly: {error}."
                     )
                     self._errors_list.append((rid, cid))
             # Attempt to free the memory
             del all_running_tasks
Example #11
0
    def run(self,
            cell_model,
            param_values,
            sim=None,
            isolate=None,
            timeout=None):
        """Instantiate protocol"""

        if isolate is None:
            isolate = True

        if isolate:

            def _reduce_method(meth):
                """Overwrite reduce"""
                return (getattr, (meth.__self__, meth.__func__.__name__))

            import copyreg
            import types

            copyreg.pickle(types.MethodType, _reduce_method)
            import pebble
            from concurrent.futures import TimeoutError

            if timeout is not None:
                if timeout < 0:
                    raise ValueError("timeout should be > 0")

            with pebble.ProcessPool(max_tasks=1) as pool:
                tasks = pool.schedule(
                    self._run_func,
                    kwargs={
                        "cell_model": cell_model,
                        "param_values": param_values,
                        "sim": sim,
                    },
                    timeout=timeout,
                )
                try:
                    responses = tasks.result()
                except TimeoutError:
                    logger.debug("SweepProtocol: task took longer than "
                                 "timeout, will return empty response "
                                 "for this recording")
                    responses = {
                        recording.name: None
                        for recording in self.recordings
                    }
        else:
            responses = self._run_func(cell_model=cell_model,
                                       param_values=param_values,
                                       sim=sim)
        return responses
Example #12
0
    def run_ica(self):
        """Run ICA calculation."""

        methods = ["Infomax"]
        if have["picard"]:
            methods.insert(0, "Picard")
        if have["sklearn"]:
            methods.append("FastICA")

        dialog = RunICADialog(self,
                              self.model.current["data"].info["nchan"],
                              methods)

        if dialog.exec_():
            calc = CalcDialog(self, "Calculating ICA", "Calculating ICA.")

            method = dialog.method.currentText().lower()
            exclude_bad_segments = dialog.exclude_bad_segments.isChecked()

            fit_params = {}
            if dialog.extended.isEnabled():
                fit_params["extended"] = dialog.extended.isChecked()
            if dialog.ortho.isEnabled():
                fit_params["ortho"] = dialog.ortho.isChecked()

            ica = mne.preprocessing.ICA(method=method,
                                        fit_params=fit_params)
            history = f"ica = mne.preprocessing.ICA(method='{method}'"
            if fit_params:
                history += f", fit_params={fit_params})"
            else:
                history += ")"
            self.model.history.append(history)

            pool = pebble.ProcessPool(max_workers=1)
            process = pool.schedule(function=ica.fit,
                                    args=(self.model.current["data"],),
                                    kwargs={"reject_by_annotation":
                                            exclude_bad_segments})
            process.add_done_callback(lambda x: calc.accept())
            pool.close()

            if not calc.exec_():
                pool.stop()
                pool.join()
            else:
                self.model.current["ica"] = process.result()
                self.model.history.append(f"ica.fit(inst=raw, "
                                          f"reject_by_annotation="
                                          f"{exclude_bad_segments})")
                self.data_changed()
                pool.join()
Example #13
0
    def __init__(self):

        # TODO: forward addtional arguments to ProcessPool()
        # function invocation in order to give users full control

        # Initializing process pool that will be used under the hood
        # to schedule all the tasks and get 'future' object as a return value

        # Important!!!: max_tasks=1 means that after performing each
        # task process is restarted. This is because there is no other
        # way to make deep learning frameworks to fully free up the gpu
        # memory that they have used other than stopping the process.
        # https://pebble.readthedocs.io/en/latest/#pebble.ProcessPool
        self.process_pool = pebble.ProcessPool(max_tasks=1)

        # This list is responsible for storing 'future' object of every
        # scheduled task
        self.tasks_list = []
Example #14
0
def main():
    global MINUTE
    cp = ps_collector.config.get_config()
    if cp.has_option("Scheduler", "debug"):
        if cp.get("Scheduler", "debug").lower() == "true":
            MINUTE = 1
    ps_collector.config.setup_logging(cp)
    global log
    log = logging.getLogger("scheduler")

    pool_size = 5
    if cp.has_option("Scheduler", "pool_size"):
        pool_size = cp.getint("Scheduler", "pool_size")
    pool = pebble.ProcessPool(max_workers=pool_size, max_tasks=5)

    state = SchedulerState(cp, pool, log)

    # Query the mesh the first time
    query_ps_mesh(state)

    query_ps_mesh_job = functools.partial(query_ps_mesh, state)
    cleanup_futures_job = functools.partial(cleanup_futures, state)

    mesh_interval_s = cp.getint("Scheduler", "mesh_interval") * MINUTE
    log.info("Will update the mesh config every %d seconds.", mesh_interval_s)
    schedule.every(mesh_interval_s).to(mesh_interval_s +
                                       MINUTE).seconds.do(query_ps_mesh_job)

    schedule.every(10).seconds.do(cleanup_futures_job)

    monitor = Monitoring()
    # Start the prometheus webserver
    start_http_server(8000)
    try:
        while True:
            schedule.run_pending()
            monitor.process_messages()
            time.sleep(1)

    except:
        pool.stop()
        pool.join()
        raise
Example #15
0
    def makeRanks(self,
                  covProfiles,
                  kmerSigs,
                  contigLengths,
                  silent=False,
                  use_multiple_processes=True):
        """Compute pairwise rank distances separately for coverage profiles and
        kmer signatures, and give rank distances as a fraction of the largest rank.
        """

        n = len(contigLengths)
        weights = np.empty(n * (n - 1) // 2, dtype=np.double)
        k = 0
        for i in range(n - 1):
            weights[k:(k + n - 1 -
                       i)] = contigLengths[i] * contigLengths[(i + 1):n]
            k = k + n - 1 - i
        weight_fun = lambda i: weights[i]

        if use_multiple_processes:
            with pebble.ProcessPool(max_workers=2,
                                    context=multiprocessing.get_context(
                                        'forkserver')) as executor:
                futures = [
                    executor.schedule(
                        choose_rank_method,
                        (covProfiles, kmerSigs, weight_fun, switch))
                    for switch in range(2)
                ]
                executor.close()
                results = []
                for future in futures:
                    result = future.result()
                    results.append(result)

                return results
        else:
            results = [
                choose_rank_method(covProfiles, kmerSigs, weight_fun, switch)
                for switch in range(2)
            ]

            return results
Example #16
0
    def run_ica(self):
        """Run ICA calculation."""
        dialog = RunICADialog(self, self.model.current["data"].info["nchan"],
                              have["picard"], have["sklearn"])

        if dialog.exec_():
            calc = CalcDialog(self, "Calculating ICA", "Calculating ICA.")
            method = dialog.method.currentText()
            exclude_bad_segments = dialog.exclude_bad_segments.isChecked()
            fit_params = {}

            if not dialog.extended.isHidden():
                fit_params["extended"] = dialog.extended.isChecked()

            if not dialog.ortho.isHidden():
                fit_params["ortho"] = dialog.ortho.isChecked()

            ica = mne.preprocessing.ICA(method=dialog.methods[method],
                                        fit_params=fit_params)
            self.model.history.append(f"ica = mne.preprocessing.ICA("
                                      f"method={dialog.methods[method]}, "
                                      f"fit_params={fit_params})")

            kwds = {"reject_by_annotation": exclude_bad_segments}
            pool = pebble.ProcessPool(max_workers=1)
            process = pool.schedule(function=ica.fit,
                                    args=(self.model.current["data"], ),
                                    kwargs=kwds)
            process.add_done_callback(lambda x: calc.accept())
            pool.close()

            if not calc.exec_():
                pool.stop()
                pool.join()
            else:
                self.model.current["ica"] = process.result()
                self.model.history.append(f"ica.fit(inst=raw, "
                                          f"reject_by_annotation="
                                          f"{exclude_bad_segments})")
                self.data_changed()
                pool.join()
Example #17
0
def map2(f, args, timeout=None):
    """Reproducible map with Pebble multiprocessing tool.

    Return all results that finish before timeout, None otherwise."""
    fs = [f for _ in args]
    seeds = [random.getrandbits(128) for _ in args]
    fargseeds = zip(fs, args, seeds)
    pool = pebble.ProcessPool(
        max_workers=int(os.environ.get("JUDICIOUS_POOL_WORKERS", 10)))
    future = pool.map(unpack_seed_apply, fargseeds)
    iterator = future.result()
    results = []
    while True:
        try:
            result = next(iterator)
            results.append(result)
        except StopIteration:
            break
        except pebble.ProcessExpired as error:
            print("%s. Exit code: %d" % (error, error.exitcode))

    return results
Example #18
0
    def __init__(
        self,
        jobs: List[Job],
        check_interval=60,
        min_pool_processes=1,
        max_tasks_per_job=None,
    ):
        """
        :param check_interval: number of seconds to wait in between checking for new
        tasks
        :param max_tasks_per_job: Jobs are limited to having this number of tasks
        waiting in the pool at once, to reduce the possibility of a single job
        flooding the pool. Defaults to the size of the process pool.
        :param min_pool_processes: The minimum size of the process pool to execute
        tasks. Defaults to the minimum of the detected number of CPUs or this value.
        """

        self.jobs = {job.job_name: job for job in jobs}

        pool_size = max(min_pool_processes, mp.cpu_count())

        self.pool = pebble.ProcessPool(pool_size, max_tasks=1)
        self.max_tasks_per_job = max_tasks_per_job or pool_size
        self.check_interval = check_interval
Example #19
0
def process_pycs(pyc_iterable: Iterable[os.PathLike],
                 alternate_opmap: Dict[str, int] = None) -> None:
    """Multi-processed decompilation orchestration of compiled Python files.

    Currently, pydecipher uses `uncompyle6`_ as its decompiler. It works well
    with `xdis`_ (same author) and allows for the decompilation of Code objects
    using alternate opmaps (with our extension of xdis).

    This function will start up CPU count * 2 pydecipher processes to decompile
    the given Python. Attempts to check for debugger, in which case the
    decompilation will be single-threaded to make debugging easier.

    .. _uncompyle6: https://github.com/rocky/python-uncompyle6/
    .. _xdis: https://github.com/rocky/python-xdis

    Parameters
    ----------
    pyc_iterable : Iterable[os.PathLike]
        An iterable of pathlib.Path objects, referencing compiled Python files
        to decompile.
    alternate_opmap : Dict[str, int], optional
        An opcode map of OPNAME: OPCODE (i.e. 'POP_TOP': 1). This should be a
        complete opmap for the Python version of the files being decompiled.
        Even if only two opcodes were swapped, the opcode map passed in should
        contain all 100+ Python bytecode operations.
    """
    # This checks if the PyCharm debugger is attached.
    if sys.gettrace():
        # Single-threaded for easier debugging.
        logger.debug(
            "[!] Debugger detected, not using multiprocessing for decompilation of pyc files."
        )
        return_status_codes: List[str] = []
        pyc_file: pathlib.Path
        for pyc_file in pyc_iterable:
            return_status_codes.append(
                decompile_pyc((pyc_file, alternate_opmap,
                               pydecipher.get_logging_options())))
    else:
        return_status_codes: List[str] = []
        pool: pebble.ProcessPool
        with pebble.ProcessPool(os.cpu_count() * 2) as pool:
            iterables = [(pyc, alternate_opmap,
                          pydecipher.get_logging_options())
                         for pyc in pyc_iterable]
            future: pebble.ProcessMapFuture = pool.map(decompile_pyc,
                                                       iterables,
                                                       timeout=300)
            iterator: Iterable = future.result()
            index: int = 0
            while True:
                try:
                    result: Any = next(iterator)
                    return_status_codes.append(result)
                except StopIteration:
                    break
                except TimeoutError as e:
                    e: TimeoutError
                    failed_pyc_path: str = str(iterables[index][0])
                    logger.error(
                        f"[!] Timed out ({e.args[1]}s) trying to decompile {failed_pyc_path}."
                    )
                    return_status_codes.append("error")
                except pebble.ProcessExpired as e:
                    e: pebble.ProcessExpired
                    logger.error(
                        f"[!] Failed to decompile {failed_pyc_path} (process expired with status code {e.exitcode}."
                    )
                    return_status_codes.append("error")
                except Exception as e:
                    e: Exception
                    logger.error(
                        f"[!] Failed to decompile {failed_pyc_path} with unknown error: {e}"
                    )
                    return_status_codes.append("error")
                finally:
                    index += 1

    successes: int = return_status_codes.count("success")
    opcode_errors: int = return_status_codes.count("opcode_error")
    errors: int = return_status_codes.count("error") + opcode_errors
    if opcode_errors:
        logger.warning(
            f"[!] {opcode_errors} file(s) failed to decompile with an error "
            "that indicate its opcode mappings may have been remapped. Try using"
            "`remap` on this set of bytecode.")
    if successes and not errors:
        logger.info(f"[+] Successfully decompiled {successes} .pyc files.")
    elif successes and errors:
        logger.warning(
            f"[!] Successfully decompiled {successes} .pyc files. Failed to decompile {errors} files. "
            "See log for more information.")
    elif not successes and errors:
        logger.error(
            f"[!] Failed to decompile all {errors} .pyc files. See log for more information."
        )
    else:
        logger.warning(
            "[!] No pyc files were decompiled. See log for more information.")
Example #20
0
def autoprocess(parallel=1, failed_processing=False, maxtasksperchild=7, memory_debugging=False, processing_timeout=300):
    maxcount = cfg.cuckoo.max_analysis_count
    count = 0
    db = Database()
    # pool = multiprocessing.Pool(parallel, init_worker)
    try:
        memory_limit()
        log.info("Processing analysis data")
        with pebble.ProcessPool(max_workers=parallel, max_tasks=maxtasksperchild, initializer=init_worker) as pool:
            # CAUTION - big ugly loop ahead.
            while count < maxcount or not maxcount:

                # If not enough free disk space is available, then we print an
                # error message and wait another round (this check is ignored
                # when the freespace configuration variable is set to zero).
                if cfg.cuckoo.freespace:
                    # Resolve the full base path to the analysis folder, just in
                    # case somebody decides to make a symbolic link out of it.
                    dir_path = os.path.join(CUCKOO_ROOT, "storage", "analyses")
                    need_space, space_available = free_space_monitor(dir_path, return_value=True, processing=True)
                    if need_space:
                        log.error(
                            "Not enough free disk space! (Only %d MB!). You can change limits it in cuckoo.conf -> freespace",
                            space_available,
                        )
                        time.sleep(60)
                        continue

                # If still full, don't add more (necessary despite pool).
                if len(pending_task_id_map) >= parallel:
                    time.sleep(5)
                    continue
                if failed_processing:
                    tasks = db.list_tasks(status=TASK_FAILED_PROCESSING, limit=parallel, order_by=Task.completed_on.asc())
                else:
                    tasks = db.list_tasks(status=TASK_COMPLETED, limit=parallel, order_by=Task.completed_on.asc())
                added = False
                # For loop to add only one, nice. (reason is that we shouldn't overshoot maxcount)
                for task in tasks:
                    # Not-so-efficient lock.
                    if pending_task_id_map.get(task.id):
                        continue
                    log.info("Processing analysis data for Task #%d", task.id)
                    if task.category != "url":
                        sample = db.view_sample(task.sample_id)
                        copy_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", str(task.id), sample.sha256)
                    else:
                        copy_path = None
                    args = task.target, copy_path
                    kwargs = dict(report=True, auto=True, task=task, memory_debugging=memory_debugging)
                    if memory_debugging:
                        gc.collect()
                        log.info("[%d] (before) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage))
                    # result = pool.apply_async(process, args, kwargs)
                    future = pool.schedule(process, args, kwargs, timeout=processing_timeout)
                    pending_future_map[future] = task.id
                    pending_task_id_map[task.id] = future
                    future.add_done_callback(processing_finished)
                    if memory_debugging:
                        gc.collect()
                        log.info("[%d] (after) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage))
                    count += 1
                    added = True
                    copy_origin_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", sample.sha256)
                    if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_origin_path):
                        os.unlink(copy_origin_path)
                    break
                if not added:
                    # don't hog cpu
                    time.sleep(5)
    except KeyboardInterrupt:
        # ToDo verify in finally
        # pool.terminate()
        raise
    except MemoryError:
        mem = get_memory() / 1024 / 1024
        print("Remain: %.2f GB" % mem)
        sys.stderr.write("\n\nERROR: Memory Exception\n")
        sys.exit(1)
    except Exception as e:
        import traceback

        traceback.print_exc()
    finally:
        pool.close()
        pool.join()
Example #21
0
    def generate(self):
        self.init()
        num_generated = 0
        num_processed = 0
        num_raw_points = -1
        if os.path.exists(self.args.raw_data_path + '.index'):
            reader = IndexedFileReader(self.args.raw_data_path)
            num_raw_points = len(reader)
            reader.close()

        start_time = time.time()
        with pebble.ProcessPool(
                max_workers=self.args.processes,
                initializer=FunctionSeqDataGenerator.Worker.init,
                initargs=(self.args, )) as p:

            chunksize = self.args.processes * self.args.chunksize
            for chunk in misc.grouper(chunksize, self.raw_data_iterator()):
                future = p.map(FunctionSeqDataGenerator.Worker.process,
                               chunk,
                               timeout=self.args.task_timeout)
                res_iter = future.result()

                idx = -1
                while True:
                    idx += 1
                    if idx < len(chunk) and chunk[idx] is not None:
                        num_processed += 1

                    try:
                        result = next(res_iter)
                        if chunk[idx] is None:
                            continue

                        if result is not None:
                            self.process_result(result)
                            num_generated += 1

                    except StopIteration:
                        break

                    except TimeoutError as error:
                        pass

                    except Exception as e:
                        try:
                            logger.warn("Failed for", chunk[idx])
                            logging.exception(e)

                        except:
                            pass

                    finally:

                        speed = round(
                            num_processed / (time.time() - start_time), 1)
                        if num_raw_points != -1:
                            time_remaining = round(
                                (num_raw_points - num_processed) / speed, 1)
                        else:
                            time_remaining = '???'

                        logger.log(
                            "Generated/Processed : {}/{} ({}/s, TTC={}s)".
                            format(num_generated, num_processed, speed,
                                   time_remaining),
                            end='\r')

            p.stop()
            try:
                p.join(10)
            except:
                pass

        self.fwriter.close()

        logger.log("\n-------------------------------------------------")
        logger.info("Total Time : {:.2f}s".format(time.time() - start_time))
        logger.info(
            "Generated {} training points from {} raw data points".format(
                num_generated, num_processed))
Example #22
0
 def test_pool_deadlock(self):
     """Process Pool Fork no deadlock if writing worker dies locking channel."""
     with pebble.ProcessPool(max_workers=1) as pool:
         with self.assertRaises(pebble.ProcessExpired):
             pool.schedule(function).result()
Example #23
0
def main():
    """
    Loud ML server
    """

    global g_config
    global g_training_pool
    global g_nice
    global g_pool
    global g_queue
    global g_storage

    parser = argparse.ArgumentParser(
        description=main.__doc__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        '-c',
        '--config',
        help="Path to configuration file",
        type=str,
        default="/etc/loudml/config.yml",
    )

    args = parser.parse_args()

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    app.logger.setLevel(logging.INFO)

    try:
        g_config = loudml.config.load_config(args.config)
        g_storage = FileStorage(g_config.storage['path'])
        loudml.config.load_plugins(args.config)
    except errors.LoudMLException as exn:
        logging.error(exn)
        sys.exit(1)

    try:
        cron = CronTab(user='******')
        cron.remove_all()
        if g_config.training['incremental']['enable']:
            for tab in g_config.training['incremental']['crons']:
                job = cron.new(
                    command='/usr/bin/loudml train \* -i -f {} -t {}'.format(
                        tab['from'], tab['to']),
                    comment='incremental training')
                job.setall(tab['crontab'])

        for item in cron:
            logging.info(item)

        cron.write()
    except OSError:
        logging.error(
            "detected development environment - incremental training disabled")

    g_queue = multiprocessing.Queue()
    g_nice = g_config.training.get('nice', 0)
    g_training_pool = pebble.ProcessPool(
        max_workers=g_config.server.get('workers', 1),
        max_tasks=g_config.server.get('maxtasksperchild', 1),
        initializer=loudml.worker.init_worker,
        initargs=[args.config, g_queue],
    )
    g_pool = pebble.ProcessPool(
        max_workers=g_config.server.get('workers', 1),
        max_tasks=g_config.server.get('maxtasksperchild', 1),
        initializer=loudml.worker.init_worker,
        initargs=[args.config, g_queue],
    )

    timer = RepeatingTimer(1, read_messages)
    timer.start()

    listen_addr = g_config.server['listen']
    host, port = listen_addr.split(':')

    restart_predict_jobs()

    def daemon_send_metrics():
        send_metrics(g_config.metrics, g_storage, user_agent="loudmld")

    daemon_send_metrics()
    schedule.every().hour.do(daemon_send_metrics)

    try:
        http_server = WSGIServer((host, int(port)), app)
        logging.info("starting Loud ML server on %s", listen_addr)
        http_server.serve_forever()
    except OSError as exn:
        logging.error(str(exn))
    except KeyboardInterrupt:
        pass

    logging.info("stopping")
    timer.cancel()
    g_training_pool.stop()
    g_training_pool.join()
    g_pool.stop()
    g_pool.join()
Example #24
0
def call_with_timeout_multiprocess(func, *args, timeout=3):
    pool = pebble.ProcessPool(max_workers=1)
    with pool:
        future = pool.schedule(func, args=args, timeout=timeout)
        return future.result()
Example #25
0
    def get_cluster_labels_array(
            distances,
            metric="euclidean",
            selection_method="eom",
            top_n=3,
            min_size_start=1,
            min_size_end=10,
            solver="hbgf",
            threads=16,
            embeddings_for_precomputed=None,
            use_multiple_processes=True,

    ):
        """
        Uses cluster ensembling with ClusterEnsembles package to produce partitioned set of
        high quality clusters from multiple HDBSCAN runs
        Takes top N clustering results and combines them
        solver - one of {'cspa', 'hgpa', 'mcla', 'hbgf', 'nmf', 'all'}, default='hbgf'
        """
        label_array = np.array([np.array([-1 for _ in range(distances.shape[0])]) for _ in range(top_n)])
        best_min_size = np.array([None for _ in range(top_n)])
        best_min_sample = np.array([None for _ in range(top_n)])
        best_validity = np.array([None for _ in range(top_n)])
        best_unbinned = np.array([None for _ in range(top_n)])
        best_n_bins = np.array([None for _ in range(top_n)])
        index = 0


        if use_multiple_processes:

            worker_limit = threads // 5
            # thread_limit = worker_limit // 5
            # with threadpoolctl.threadpool_limits(limits=max(threads // 5, 1), user_api='blas'):
            with pebble.ProcessPool(max_workers=threads // 5, context=multiprocessing.get_context('forkserver')) as executor:
                futures = [
                    executor.schedule(
                        Clusterer.generate_cluster,
                        (
                            distances,
                            embeddings_for_precomputed,
                            selection_method,
                            metric,
                            min_size,
                            min_sample,
                            threads
                        ),
                        timeout=1800,
                    ) for (min_size, min_sample) in itertools.permutations(range(1, 10), 2) if min_size != 1 and min_sample <= min_size
                ]
                # executor.close()
                for future in futures:
                    try:
                        (cluster_validity, min_size, min_sample, labels) = future.result()
                        if np.any(best_validity == None):
                            best_min_size[index] = min_size
                            best_min_sample[index] = min_sample
                            best_validity[index] = cluster_validity
                            label_array[index] = labels
                            best_n_bins[index] = np.unique(labels).shape[0]
                            best_unbinned[index] = (labels == -1).sum()
                            index += 1

                            if index == top_n:

                                # sort the current top by ascending validity order
                                ranks = np.argsort(best_validity)
                                best_validity = best_validity[ranks]
                                best_min_sample = best_min_sample[ranks]
                                best_min_size = best_min_size[ranks]
                                label_array = label_array[ranks]
                                best_n_bins = best_n_bins[ranks]
                                best_unbinned = best_unbinned[ranks]

                        elif np.any(best_validity < cluster_validity):
                            # insert the new result and remove the worst result
                            ind = np.searchsorted(best_validity, cluster_validity)
                            best_validity = np.insert(best_validity, ind, cluster_validity)[1:]
                            best_min_size = np.insert(best_min_size, ind, min_size)[1:]
                            best_min_sample = np.insert(best_min_sample, ind, min_sample)[1:]
                            label_array = np.insert(label_array, ind, labels, axis=0)[1:]
                            best_n_bins = np.insert(best_n_bins, ind, np.unique(labels).shape[0])[1:]
                            best_unbinned = np.insert(best_unbinned, ind, (labels == -1).sum())[1:]
                    except TimeoutError:
                        continue

        else:
            results = [
                    Clusterer.generate_cluster
                    (
                        distances,
                        embeddings_for_precomputed,
                        selection_method,
                        metric,
                        min_size,
                        min_sample,
                        threads
                ) for (min_size, min_sample) in itertools.permutations(range(2, 10), 2) if min_size != 1 and min_sample <= min_size
            ]

            for result in results:
                (cluster_validity, min_size, min_sample, labels) = result
                if np.any(best_validity == None):
                    best_min_size[index] = min_size
                    best_min_sample[index] = min_sample
                    best_validity[index] = cluster_validity
                    label_array[index] = labels
                    best_n_bins[index] = np.unique(labels).shape[0]
                    best_unbinned[index] = (labels == -1).sum()
                    index += 1

                    if index == top_n:
                        # sort the current top by ascending validity order
                        ranks = np.argsort(best_validity)
                        best_validity = best_validity[ranks]
                        best_min_sample = best_min_sample[ranks]
                        best_min_size = best_min_size[ranks]
                        label_array = label_array[ranks]
                        best_n_bins = best_n_bins[ranks]
                        best_unbinned = best_unbinned[ranks]

                elif np.any(best_validity < cluster_validity):
                    # insert the new result and remove the worst result
                    ind = np.searchsorted(best_validity, cluster_validity)
                    best_validity = np.insert(best_validity, ind, cluster_validity)[1:]
                    best_min_size = np.insert(best_min_size, ind, min_size)[1:]
                    best_min_sample = np.insert(best_min_sample, ind, min_sample)[1:]
                    label_array = np.insert(label_array, ind, labels, axis=0)[1:]
                    best_n_bins = np.insert(best_n_bins, ind, np.unique(labels).shape[0])[1:]
                    best_unbinned = np.insert(best_unbinned, ind, (labels == -1).sum())[1:]

        return label_array, best_validity, best_n_bins, best_unbinned
Example #26
0
                    save_uncompressed=args.save_uncompressed,
                    memoize=args.scraper_memoize):
    time.sleep(1)
    return "xyz"


if __name__ == "__main__":
    month = extract_month(args.url_file)

    # in case we are resuming from a previous run
    completed_uids, state_fp, prev_cid = get_state(month, args.output_dir)

    # URLs we haven't scraped yet (if first run, all URLs in file)
    url_entries = load_urls(args.url_file, completed_uids, args.max_urls)

    pool = pbl.ProcessPool(max_workers=args.n_procs)

    # process one "chunk" of args.chunk_size URLs at a time
    for i, chunk in enumerate(chunks(url_entries, args.chunk_size)):
        cid = prev_cid + i + 1

        print("Downloading chunk {}".format(cid))
        t1 = time.time()

        if args.timeout > 0:
            # imap as iterator allows .next() w/ timeout.
            # ordered version doesn't seem to work correctly.
            # for some reason, you CANNOT track j or chunk[j] in the loop,
            # so don't add anything else to the loop below!
            # confusingly, chunksize below is unrelated to our chunk_size
            #chunk_iter = pool.imap_unordered(timeout_checker, chunk, chunksize=1)
Example #27
0
    return U.ppid(), foo + bar


def target_wrap(function, *a, **ka):
    return a, ka, function(*a, **ka)


gt = U.get_or_set('gt', [])
ge = U.get_or_set('ge', [])


def task_done(future):
    global gt, ge
    try:
        result = future.result()  # blocks until results are ready
        print("success:", repr(result[2]))
        gt.append(future)
    except TimeoutError as error:
        print("Function took longer than %d seconds" % error.args[1])
        ge.append(error)
    except Exception as error:
        print("Function raised %s" % error)
        print(error.traceback)  # traceback of the function


if __name__ == '__main__':
    with pebble.ProcessPool(max_workers=5, max_tasks=0) as pool:
        for i in range(0, 10):
            future = pool.schedule(function, args=[i], timeout=1)
            future.add_done_callback(task_done)
Example #28
0
    def run(self,
            cell_model,
            param_values,
            sim=None,
            isolate=None,
            timeout=None):
        """Instantiate protocol"""

        if isolate is None:
            isolate = True
        if isolate:  # and not cell_model.name in 'L5PC':

            def _reduce_method(meth):
                """Overwrite reduce"""
                return (getattr, (meth.__self__, meth.__func__.__name__))

            import copyreg
            import types
            copyreg.pickle(types.MethodType, _reduce_method)

            import pebble
            from concurrent.futures import TimeoutError

            if timeout is not None:
                if timeout < 0:
                    raise ValueError("timeout should be > 0")
            ###
            # Foriegn code
            ###

            with pebble.ProcessPool(max_workers=1, max_tasks=1) as pool:
                tasks = pool.schedule(self._run_func,
                                      kwargs={
                                          'cell_model': cell_model,
                                          'param_values': param_values,
                                          'sim': sim
                                      },
                                      timeout=timeout)
                ##
                # works if inverted try for except etc
                ##

                try:
                    responses = tasks.result()
                except:
                    responses = self._run_func(cell_model=cell_model,
                                               param_values=param_values,
                                               sim=sim)

        else:
            responses = self._run_func(cell_model=cell_model,
                                       param_values=param_values,
                                       sim=sim)
        new_responses = {}
        for k, v in responses.items():
            if hasattr(v, 'response'):
                time = v.response[
                    'time'].values  #[r.response[0] for r in self.recording.repsonse ]
                vm = v.response[
                    'voltage'].values  #[ r.response[1] for r in self.recording.repsonse ]
                if not hasattr(cell_model, 'l5pc'):
                    new_responses['neo_' + str(k)] = AnalogSignal(
                        vm, units=pq.mV, sampling_period=(1 / 0.01255) * pq.s)

                else:
                    new_responses['neo_' + str(k)] = AnalogSignal(
                        vm,
                        units=pq.mV,
                        sampling_period=(time[1] - time[0]) * pq.s)
                train_len = len(
                    sf.get_spike_train(new_responses['neo_' + str(k)]))
                if train_len > 0:
                    pass

        responses.update(new_responses)
        return responses
Example #29
0
def autoprocess(parallel=1,
                failed_processing=False,
                maxtasksperchild=7,
                memory_debugging=False,
                processing_timeout=300):
    maxcount = cfg.cuckoo.max_analysis_count
    count = 0
    db = Database()
    #pool = multiprocessing.Pool(parallel, init_worker)
    pool = pebble.ProcessPool(max_workers=parallel,
                              max_tasks=maxtasksperchild,
                              initializer=init_worker)

    try:
        log.info("Processing analysis data")
        # CAUTION - big ugly loop ahead.
        while count < maxcount or not maxcount:

            # If still full, don't add more (necessary despite pool).
            if len(pending_task_id_map) >= parallel:
                time.sleep(5)
                continue

            # If we're here, getting parallel tasks should at least
            # have one we don't know.
            if failed_processing:
                tasks = db.list_tasks(status=TASK_FAILED_PROCESSING,
                                      limit=parallel,
                                      order_by=Task.completed_on.asc())
            else:
                tasks = db.list_tasks(status=TASK_COMPLETED,
                                      limit=parallel,
                                      order_by=Task.completed_on.asc())
            added = False
            # For loop to add only one, nice. (reason is that we shouldn't overshoot maxcount)
            for task in tasks:
                # Not-so-efficient lock.
                if pending_task_id_map.get(task.id):
                    continue
                log.info("Processing analysis data for Task #%d", task.id)
                if task.category == "file":
                    sample = db.view_sample(task.sample_id)
                    copy_path = os.path.join(CUCKOO_ROOT, "storage",
                                             "binaries", sample.sha256)
                else:
                    copy_path = None
                args = task.target, copy_path
                kwargs = dict(report=True,
                              auto=True,
                              task=task,
                              memory_debugging=memory_debugging)
                if memory_debugging:
                    gc.collect()
                    log.info("[%d] (before) GC object counts: %d, %d", task.id,
                             len(gc.get_objects()), len(gc.garbage))

                #result = pool.apply_async(process, args, kwargs)
                future = pool.schedule(process,
                                       args,
                                       kwargs,
                                       timeout=processing_timeout)
                pending_future_map[future] = task.id
                pending_task_id_map[task.id] = future
                future.add_done_callback(processing_finished)
                if memory_debugging:
                    gc.collect()
                    log.info("[%d] (after) GC object counts: %d, %d", task.id,
                             len(gc.get_objects()), len(gc.garbage))

                count += 1
                added = True
                break

            if not added:
                # don't hog cpu
                time.sleep(5)

    except KeyboardInterrupt:
        #ToDo verify in finally
        #pool.terminate()
        raise
    except:
        import traceback
        traceback.print_exc()
    finally:
        pool.close()
        pool.join()
Example #30
0
def main():
    global MINUTE
    cp = ps_collector.config.get_config()
    if cp.has_option("Scheduler", "debug"):
        if cp.get("Scheduler", "debug").lower() == "true":
            MINUTE = 1
    ps_collector.config.setup_logging(cp)
    global log
    log = logging.getLogger("scheduler")

    # Start the push processor
    if isPush(cp):
        log.debug("Starting the push parser")
        push_parser = PSPushParser(cp, log)
        push_parser.start()
    else:
        log.debug("Not starting the push parser")

    pool_size = 5
    if cp.has_option("Scheduler", "pool_size"):
        pool_size = cp.getint("Scheduler", "pool_size")
    pool = pebble.ProcessPool(max_workers=pool_size, max_tasks=5)

    state = SchedulerState(cp, pool, log)

    # Parse the oneshot
    if isOneShot(cp):
        # Parse the start and end
        log.info("Starting Oneshot")
        state.oneshot = True
        start = dateutil.parser.parse(cp.get("Oneshot", "start"))
        end = dateutil.parser.parse(cp.get("Oneshot", "end"))
        state.query_range = (start, end)

    # Initialize the meshes
    # Get the mesh endpoint configuration, which may be a comma separated list
    mesh_config_val = state.cp.get("Mesh", "endpoint")
    if "," in mesh_config_val:
        meshes = mesh_config_val.split(",")
    else:
        meshes = [mesh_config_val]

    for mesh in meshes:
        state.meshes[mesh] = []

    # Query the mesh the first time
    query_ps_mesh(state)

    query_ps_mesh_job = functools.partial(query_ps_mesh, state)
    cleanup_futures_job = functools.partial(cleanup_futures, state)

    mesh_interval_s = cp.getint("Scheduler", "mesh_interval") * MINUTE
    log.info("Will update the mesh config every %d seconds.", mesh_interval_s)

    if not isOneShot(cp):
        schedule.every(mesh_interval_s).to(
            mesh_interval_s + MINUTE).seconds.do(query_ps_mesh_job)
        schedule.every(10).seconds.do(cleanup_futures_job)

    monitor = Monitoring()
    # Start the prometheus webserver
    start_http_server(8000)
    try:
        if not isOneShot(cp):
            while True:
                schedule.run_pending()
                monitor.process_messages()
                if isPush(cp):
                    push_parser = checkPushProcessor(push_parser, cp, log)
                time.sleep(1)
        else:
            pool.close()
            pool.join()

    except:
        pool.stop()
        pool.join()
        raise