예제 #1
0
 def test_process_pool_join_futures_timeout(self):
     """Process Pool Spawn TimeoutError is raised if join on long tasks."""
     pool = ProcessPool()
     for _ in range(2):
         pool.schedule(long_function)
     pool.close()
     self.assertRaises(TimeoutError, pool.join, 0.4)
     pool.stop()
     pool.join()
예제 #2
0
 def test_process_pool_join_workers(self):
     """Process Pool Spawn no worker is running after join."""
     pool = ProcessPool(max_workers=4)
     pool.schedule(function, args=[1])
     pool.stop()
     pool.join()
     self.assertEqual(len(pool._pool_manager.worker_manager.workers), 0)
예제 #3
0
 def test_process_pool_close_stopped(self):
     """Process Pool Fork is stopped after close."""
     pool = ProcessPool(max_workers=1)
     pool.schedule(function, args=[1])
     pool.close()
     pool.join()
     self.assertFalse(pool.active)
예제 #4
0
 def test_process_pool_stop_stopped(self):
     """Process Pool Spawn is stopped after stop."""
     pool = ProcessPool()
     pool.schedule(function, args=[1])
     pool.stop()
     pool.join()
     self.assertFalse(pool.active)
예제 #5
0
    def test_process_pool_stop_large_data(self):
        """Process Pool Spawn is stopped if large data is sent on the channel."""
        data = "a" * 4098 * 1024
        pool = ProcessPool(initializer=long_initializer)
        pool.schedule(function, args=[data])
        pool.stop()
        pool.join()

        self.assertFalse(pool.active)
예제 #6
0
    def test_process_pool_stop_large_data(self):
        """Process Pool Fork is stopped if large data is sent on the channel."""
        data = "a" * 1098 * 1024 * 50  # 50 Mb
        pool = ProcessPool(max_workers=1)
        pool.schedule(function, args=[data])
        pool.stop()
        pool.join()

        self.assertFalse(pool.active)
예제 #7
0
 def test_process_pool_stop_futures(self):
     """Process Pool Spawn not all futures are performed on stop."""
     futures = []
     pool = ProcessPool()
     for index in range(10):
         futures.append(pool.schedule(function, args=[index]))
     pool.stop()
     pool.join()
     self.assertTrue(len([f for f in futures if not f.done()]) > 0)
예제 #8
0
 def test_process_pool_close_futures(self):
     """Process Pool Spawn all futures are performed on close."""
     futures = []
     pool = ProcessPool()
     for index in range(10):
         futures.append(pool.schedule(function, args=[index]))
     pool.close()
     pool.join()
     map(self.assertTrue, [f.done() for f in futures])
예제 #9
0
    def test_process_pool_stop_stopped_callback(self):
        """Process Pool Spawn is stopped in callback."""
        with ProcessPool(max_workers=1, context=mp_context) as pool:

            def stop_pool_callback(_):
                pool.stop()

            future = pool.schedule(function, args=[1])
            future.add_done_callback(stop_pool_callback)
            with self.assertRaises(RuntimeError):
                for index in range(10):
                    time.sleep(0.1)
                    pool.schedule(long_function, args=[index])

        self.assertFalse(pool.active)
예제 #10
0
def process_all(func, arr, timeout_func=None, total=None, max_workers=None, timeout=None):
    with ProcessPool() as pool:
        future = pool.map(func, arr, timeout=timeout)

        iterator = future.result()
        results = []
        for i in progress_bar(range(len(arr)), total=len(arr)):
            try:
                result = next(iterator)
                if result: results.append(result)
            except StopIteration:
                break  
            except TimeoutError as error:
                if timeout_func: timeout_func(arr[i], error.args[1])
    return results
예제 #11
0
class Processor:

    conf = None
    api = None
    log_q = None

    def __init__(self, recording_type, processing_state, process_func,
                 num_workers):
        self.recording_type = recording_type
        self.processing_state = processing_state
        self.process_func = process_func
        self.num_workers = num_workers
        self.pool = ProcessPool(num_workers,
                                initializer=logs.init_worker,
                                initargs=(self.log_q, ))
        self.in_progress = {}

    def poll(self):
        self.reap_completed()
        if len(self.in_progress) >= self.num_workers:
            return True

        recording = self.api.next_job(self.recording_type,
                                      self.processing_state)
        if recording:
            logger.debug(
                "scheduling %s (%s: %s)",
                recording["id"],
                recording["type"],
                self.processing_state,
            )
            future = self.pool.schedule(self.process_func,
                                        (recording, self.conf))
            self.in_progress[recording["id"]] = future
            return True
        return False

    def reap_completed(self):
        for recording_id, future in list(self.in_progress.items()):
            if future.done():
                del self.in_progress[recording_id]
                err = future.exception()
                if err:
                    msg = f"{self.recording_type}.{self.processing_state} processing of {recording_id} failed: {err}"
                    tb = getattr(err, "traceback", None)
                    if tb:
                        msg += f":\n{tb}"
                    logger.error(msg)
예제 #12
0
def manager(date_str):

    #unpack and list daily zip
    vol_zip = f'{vol_root}/{RADAR_ID:02}/{date_str[0:4]}/vol/{RADAR_ID:02}_{date_str}.pvol.zip'
    temp_dir = True
    vol_ffn_list = file_util.unpack_zip(vol_zip)

    for arg_slice in file_util.chunks(vol_ffn_list, NCPU):
        with ProcessPool() as pool:
            future = pool.map(buffer, arg_slice, timeout=360)
            iterator = future.result()
            while True:
                try:
                    _ = next(iterator)
                except StopIteration:
                    break
                except TimeoutError as error:
                    print("function took longer than %d seconds" %
                          error.args[1])
                except ProcessExpired as error:
                    print("%s. Exit code: %d" % (error, error.exitcode))
                except TypeError as error:
                    print("%s. Exit code: %d" % (error, error.exitcode))
                except Exception:
                    traceback.print_exc()


#     import time
#     for vol_ffn in vol_ffn_list:
#         start = time.time()
#         torrentfields(vol_ffn)
#         end = time.time()
#         print('timer', end - start)

#     #run retrieval
#     i            = 0
#     n_files      = len(vol_ffn_list)
#     for flist_chunk in file_util.chunks(vol_ffn_list, NCPU): #CUSTOM RANGE USED
#         bag = db.from_sequence(flist_chunk).map(buffer)
#         _ = bag.compute()
#         i += NCPU
#         del bag
#         print('processed: ' + str(round(i/n_files*100,2)))

#clean up
    temp_vol_dir = os.path.dirname(vol_ffn_list[0])
    if '/tmp' in temp_vol_dir:
        os.system('rm -rf ' + temp_vol_dir)
def dump_packs(artifact_manager: ArtifactsManager, pool: ProcessPool) -> List[ProcessFuture]:
    """ Create futures which dumps conditionally content/Packs.

    Args:
        artifact_manager: Artifacts manager object.
        pool: Process pool to schedule new processes.

    Returns:
        List[ProcessFuture]: List of pebble futures to wait for.
    """
    futures = []
    for pack_name, pack in artifact_manager.content.packs.items():
        if pack_name not in IGNORED_PACKS:
            futures.append(pool.schedule(dump_pack, args=(artifact_manager, pack)))

    return futures
예제 #14
0
    def start_workers(self):
        """
        Start the pool and workers
        :return: The pool instance
        """
        with self._start_lock:
            if self._pool is None:

                # Start the process pool
                log_queue = om.manager.get_in_queue()
                self._pool = ProcessPool(self.MAX_WORKERS,
                                         max_tasks=20,
                                         initializer=init_worker,
                                         initargs=(log_queue,))

        return self._pool
예제 #15
0
def ProcessPoolHandler() -> ProcessPool:
    """ Process pool Handler which terminate all processes in case of Exception.

    Yields:
        ProcessPool: Pebble process pool.
    """
    with ProcessPool(max_workers=3) as pool:
        try:
            yield pool
        except Exception:
            logging.exception(
                "Gracefully release all resources due to Error...")
            raise
        finally:
            pool.close()
            pool.join()
예제 #16
0
def muti():
    results = []
    errorcode = []
    with ProcessPool(max_workers=8) as pool:
        totallist = []
        for i in range(Ilist):
            aList[i] = 2.0 + 0.2 * i
            for m in range(PressureS):
                Ipressure[m] = m * 2 + 1
                for t in range(tempertureS):
                    Itemperture[t] = 300 + 50 * t
                    totallist.append((aList[i], Ipressure[m], Itemperture[t]))

        future = pool.map(flamespeedcal, totallist, timeout=10000)
        iterator = future.result()
        while True:
            try:
                result = next(iterator)
                results.append(result)
            except StopIteration:
                break
            except TimeoutError as error:
                errorcode.append("function took longer than %d seconds" %
                                 error.args[1])
            except ProcessExpired as error:
                errorcode.append("%s. Exit code: %d" % (error, error.exitcode))
            except Exception as error:
                errorcode.append("function raised %s" % error)
                errorcode.append(error.traceback)

    with open("finaloutputdataO2N2.csv", 'w') as outfile:
        writer = csv.writer(outfile)
        writer.writerow([
            "u(m/s)", "T(K)", "rho(kg/m3)", "pressure", "H2", "H", "O", "O2",
            "OH", "H2O", "HO2", "H2O2", "C", "CH", "CH2", "CH2(S)", "CH3",
            "CH4", "CO", "CO2", "HCO", "CH2O", "CH2OH", "CH3O", "CH3OH", "C2H",
            "C2H2", "C2H3", "C2H4", "C2H5", "C2H6", "HCCO", "CH2CO", "HCCOH",
            "N", "NH", "NH2", "NH3", "NNH", "NO", "NO2", "N2O", "HNO", "CN",
            "HCN", "H2CN", "HCNN", "HCNO", "HOCN", "HNCO", "NCO", "N2", "AR",
            "C3H7", "C3H8", "CH2CHO", "CH3CHO"
        ])
        writer.writerows(results)
    if totallist == []:
        pass
    else:
        errorfile = open("errorcodeO2N2.txt", "w")
        errorfile.write(str(errorcode))
예제 #17
0
def main(year: int) -> None:
    """
    It calls the production line and manages it. Buffer function that is used
    to catch any problem with the processing line without screwing the whole
    multiprocessing stuff.

    Parameters:
    ===========
    infile: str
        Name of the input radar file.
    outpath: str
        Path for saving output data.
    """
    flist = glob.glob(os.path.join(INPATH, f"{year}/**/*.nc"))
    outlist = glob.glob(os.path.join(OUTPATH, f"v2021/ppi/{year}/**/*.nc"))

    oset = set([f[-18:-3] for f in outlist])
    iset = set([f[-18:-3] for f in flist])
    datelist = [*oset ^ iset]

    if len(datelist) == 0:
        print(f"No file to process for {YEAR}.")
        return None
    print(f"{year}: {len(datelist)} files to process.")

    inflist = []
    for d in datelist:
        inflist.append([f for f in flist if d in f][0])

    for fchunk in chunks(inflist, NCPUS):
        with ProcessPool() as pool:
            future = pool.map(buffer, fchunk, timeout=360)
            iterator = future.result()

            while True:
                try:
                    _ = next(iterator)
                except StopIteration:
                    break
                except TimeoutError as error:
                    print("function took longer than %d seconds" % error.args[1])
                except ProcessExpired as error:
                    print("%s. Exit code: %d" % (error, error.exitcode))
                except TypeError:
                    continue
                except Exception:
                    traceback.print_exc()
예제 #18
0
    def test_process_pool_map_timeout(self):
        """Process Pool Fork map with timeout."""
        raised = []
        elements = [1, 2, 3]

        with ProcessPool(max_workers=1) as pool:
            future = pool.map(long_function, elements, timeout=0.1)
            generator = future.result()
            while True:
                try:
                    next(generator)
                except TimeoutError as error:
                    raised.append(error)
                except StopIteration:
                    break

        self.assertTrue(all((isinstance(e, TimeoutError) for e in raised)))
예제 #19
0
    def test_process_pool_map_broken_pool(self):
        """Process Pool Forkserver Broken Pool."""
        elements = [1, 2, 3]

        with ProcessPool(max_workers=1, context=mp_context) as pool:
            future = pool.map(long_function, elements, timeout=1)
            generator = future.result()
            pool._context.state = ERROR
            while True:
                try:
                    next(generator)
                except TimeoutError as error:
                    self.assertFalse(pool.active)
                    future.cancel()
                    break
                except StopIteration:
                    break
예제 #20
0
    def test_process_pool_map_error(self):
        """Process Pool Forkserver errors do not stop the iteration."""
        raised = None
        elements = [1, 'a', 3]

        with ProcessPool(max_workers=1, context=mp_context) as pool:
            future = pool.map(function, elements)
            generator = future.result()
            while True:
                try:
                    next(generator)
                except TypeError as error:
                    raised = error
                except StopIteration:
                    break

        self.assertTrue(isinstance(raised, TypeError))
    def start(self):
        """start of the program"""
        # get domains from file
        self.get_domains()

        # create a pool for multi-threaded processing
        with ProcessPool(max_workers=5, max_tasks=10) as pool:
            for i in self.domains:
                future = pool.schedule(self.check_domain, args=[i], timeout=self.timeout)
                future.item = i
                future.add_done_callback(self.task_done)

        # add objects to the database with which a connection could not be established
        try:
            self.run_buffer()
        except Exception as e:
            print(f'run_buffer error: {e}')
예제 #22
0
def main():
    install_logging('Update_Tests_step.log', include_process_name=True)
    existing_test_playbooks = load_test_data_from_conf_json()
    with ProcessPool(max_workers=os.cpu_count(), max_tasks=100) as pool:
        for pack_name in os.listdir(PACKS_DIR):
            future_object = pool.schedule(generate_pack_tests_configuration,
                                          args=(pack_name,
                                                existing_test_playbooks),
                                          timeout=20)
            future_object.add_done_callback(update_new_conf_json)

    add_to_conf_json(NEW_CONF_JSON_OBJECT)
    logging.success(
        f'Added {len(NEW_CONF_JSON_OBJECT)} tests to the conf.json')
    logging.success(
        f'Added the following objects to the conf.json:\n{pformat(NEW_CONF_JSON_OBJECT)}'
    )
예제 #23
0
 def find_tlds(self):
     dom_list = [self.known_domain + '.' + tld for tld in self.tld_list]
     try:
         pool = ThreadPool(max_workers=self.max_workers,
                           max_tasks=self.max_tasks)
         results = pool.map(self.check_tld, dom_list, timeout=self.timeout)
         pool.close()
         pool.join()
         print(results)
     except Exception as e:
         print(repr(e))
         pass
def run_mc_region_level(
    input_allc_files, 
    input_bed_file, 
    output_prefix, 
    bed_file_name_column=False, 
    contexts=CONTEXTS,
    compress=True, 
    cap=2,
    overwrite=False,
    nprocs=1, 
    timeout=None,
    ):
    """
    run mc_gene_level in parallel
    """
    # assume certain structures in the inputs and outputs
    # allc_xxx.tsv.gz -> output_prefix + "_" + allc_xxx.tsv.gz
    # but the output_files should remove .gz suffix at first 
    output_files = [
        output_prefix+"_"+os.path.basename(input_allc_file).replace('.tsv.gz', '.tsv')
        for input_allc_file in input_allc_files] 

    nprocs = min(nprocs, len(input_allc_files))
    logging.info("""Begin run_mc_region_level.\n
                Number of processes:{}\n
                Number of allc_files:{}\n
                Bed file: {}\n
                """.format(nprocs, len(input_allc_files), input_bed_file))
    
    # parallelized processing
    with ProcessPool(max_workers=nprocs, max_tasks=10) as pool:
        for input_allc_file, output_file in zip(input_allc_files, output_files):
            future = pool.schedule(mc_region_level_worker, 
                                   args=(input_allc_file, output_file, input_bed_file), 
                                   kwargs={
                                        'bed_file_name_column': bed_file_name_column,
                                        'contexts': contexts, 
                                        'compress': compress,
                                        'cap': cap,
                                        'overwrite': overwrite,
                                        },
                                   timeout=timeout)
            future.add_done_callback(utils.task_done)
    # end parallel
    return
예제 #25
0
 def generated_chunked_parallelized_results(self,
                                            partially_bound_function,
                                            tasks,
                                            n_processes,
                                            chunksize=1):
     with ProcessPool(n_processes, max_tasks=1) as pool:
         future = pool.map(
             partially_bound_function,
             [list(task_batch) for task_batch in Batch(tasks, chunksize)],
         )
         iterator = future.result()
         while True:
             try:
                 yield next(iterator)
             except StopIteration:
                 break
             except Exception:
                 logging.exception('Child failure')
예제 #26
0
    def play_generation(self):

        with ProcessPool(max_workers=cpu_count() - 1) as pool: # TODO it take to match time, does it really run concurrently?
            generation_thread_partial = partial(generation_threed, self)
            future = pool.map(generation_thread_partial, list(self.population.keys()), timeout=60 * 5)

            iterator = future.result()
            print(list(iterator))
            while True:
                try:
                    result = next(iterator)
                    self.results[result[0]] = result[1], result[2]
                    print("*", end='')
                except StopIteration:
                    print("\n"+"_" * 50)
                    break
                except TimeoutError as error:
                    print(f"function took longer than {error.args[1]} seconds", flush=True)
예제 #27
0
def parallel_load_image_tensor(image_file_paths, target_size, batch_count):
    batched_image_file_paths = construct_batch_arguments(
        image_file_paths, target_size, batch_count)

    standarized_images = [
        np.array([]).reshape([0, target_size[0], target_size[1], 3])
    ]
    is_successful = []

    with ProcessPool(max_workers=config.MULTI_CORE_COUNT) as pool:
        future = pool.map(load_image_tensor_task,
                          batched_image_file_paths,
                          timeout=config.TIMEOUT_SECS,
                          chunksize=config.CHUNK_SIZE)

        iterator = future.result()
        i = 0
        while True:
            try:
                standardized_image_batch, is_successful_batch = next(iterator)
                standarized_images.append(standardized_image_batch)
                is_successful.append(is_successful_batch)
            except StopIteration:
                break
            except TimeoutError as error:

                is_successful.append(
                    np.zeros([len(batched_image_file_paths[i][0])
                              ]).astype(bool))
                print("function took longer than %d seconds" % error.args[1])

            print("RESULTS PROCESSED = " + str(i) + " / " +
                  str(len(batched_image_file_paths) / config.CHUNK_SIZE))

            i = i + 1

    print("finished mapping")

    print("images retrieved = " + str(len(standarized_images)))

    standarized_images = np.concatenate(standarized_images)
    is_successful = np.concatenate(is_successful)

    return standarized_images, is_successful
예제 #28
0
def calculate_all_packs_dependencies(pack_dependencies_result: dict,
                                     id_set: dict, packs: list) -> None:
    """
    Calculates the pack dependencies and adds them to 'pack_dependencies_result' in parallel.
    First - the method generates the full dependency graph.

    Them - using a process pool we extract the dependencies of each pack and adds them to the 'pack_dependencies_result'
    Args:
        pack_dependencies_result: The dict to which the results should be added
        id_set: The id_set content
        packs: The packs that should be part of the dependencies calculation
    """
    def add_pack_metadata_results(future: ProcessFuture) -> None:
        """
        This is a callback that should be called once the result of the future is ready.
        The results include: first_level_dependencies, all_level_dependencies, pack_name
        Using these results we write the dependencies
        """
        try:
            first_level_dependencies, all_level_dependencies, pack_name = future.result(
            )  # blocks until results ready
            logging.debug(
                f'Got dependencies for pack {pack_name}\n: {pformat(all_level_dependencies)}'
            )
            pack_dependencies_result[pack_name] = {
                "dependencies": first_level_dependencies,
                "displayedImages": list(first_level_dependencies.keys()),
                "allLevelDependencies": all_level_dependencies,
                "path": os.path.join(PACKS_FOLDER, pack_name),
                "fullPath":
                os.path.abspath(os.path.join(PACKS_FOLDER, pack_name))
            }
        except Exception:
            logging.exception('Failed to collect pack dependencies results')

    # Generating one graph with dependencies for all packs
    dependency_graph = get_all_packs_dependency_graph(id_set, packs)

    with ProcessPool(max_workers=cpu_count(), max_tasks=100) as pool:
        for pack in dependency_graph:
            future_object = pool.schedule(calculate_single_pack_dependencies,
                                          args=(pack, dependency_graph),
                                          timeout=10)
            future_object.add_done_callback(add_pack_metadata_results)
예제 #29
0
def watch(queue_endpoint, loop=True):
    logging.info(f"Started watching {queue_endpoint}")
    session = requests.Session()
    # Retries for up to 2 minutes, by default
    retry = Retry(connect=30, backoff_factor=0.5)
    adapter = HTTPAdapter(max_retries=retry)
    session.mount(queue_endpoint, adapter)
    with ProcessPool(max_tasks=50) as pool:
        while True:
            logging.debug(f"Polling {queue_endpoint}")
            try:
                result = session.get(
                    queue_endpoint,
                    params={
                        "started": False,
                        "page_size": 100
                    },
                    auth=get_auth(),
                )
            except requests.exceptions.ConnectionError:
                logging.exception("Connection error; sleeping for 15 mins")
                time.sleep(60 * 15)
            result.raise_for_status()
            jobs = result.json()
            for job in jobs["results"]:
                assert (
                    job["operation"] == "generate_cohort"
                ), f"The only currently-supported operation is `generate_cohort`, not `{job['operation']}`"
                response = requests.patch(job["url"],
                                          json={"started": True},
                                          auth=get_auth())
                response.raise_for_status()
                future = pool.schedule(
                    run_job,
                    (job, ),
                    timeout=6 * HOUR,
                )
                future.job = job
                future.add_done_callback(report_result)
            if loop:
                time.sleep(POLL_INTERVAL)
            else:
                break
def run_allc_count_contexts(
    input_allc_files, 
    output_prefix, 
    compress=True, 
    overwrite=False,
    nprocs=1,
    timeout=None,
    ):
    """
    run bin_allc in parallel
    """
    # assume certain structures in the inputs and outputs
    # allc_xxx.tsv.gz -> output_prefix + "_" + allc_xxx.tsv.gz
    # but the output_files should remove .gz suffix at first 
    nprocs = min(nprocs, len(input_allc_files))
    logging.info("""Begin run bin allc.\n
                Number of processes:{}\n
                Number of allc_files:{}\n
                """.format(nprocs, len(input_allc_files)))

    output_files = [
        output_prefix+"_"+os.path.basename(input_allc_file).replace('.tsv.gz', '.tsv')
        for input_allc_file in input_allc_files] 

    output_dir = os.path.dirname(output_prefix) 
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # parallelized processing
    with ProcessPool(max_workers=nprocs, max_tasks=10) as pool:
        for input_allc_file, output_file in zip(input_allc_files, output_files):
            future = pool.schedule(allc_count_context_worker_wrap, 
                                   args=(input_allc_file, output_file,), 
                                   kwargs={
                                        'compress': compress,
                                        'overwrite': overwrite,
                                        },
                                   timeout=timeout)
            future.add_done_callback(utils.task_done)
    # end parallel
    return 
예제 #31
0
def _parallel_init(eval_func, iterable, metab_index, base_biomass, model,
                   weight_fraction):
    """
    This function runs the evaluation function in parallel with 3 arguments.
    It is used twice: first to get the metabolite that the model can produce,
    second to verify the solvability of the generated individuals (multiple metabolites)
    """
    processes = 4
    metab_index_iter = repeat(metab_index)
    base_biomass_iter = repeat(base_biomass)
    model_iter = repeat(model)
    weight_fraction_iter = repeat(weight_fraction)
    with ProcessPool(max_workers=processes, max_tasks=4) as pool:
        future = pool.map(eval_func,
                          iterable,
                          metab_index_iter,
                          base_biomass_iter,
                          model_iter,
                          weight_fraction_iter,
                          timeout=400)
        iterator = future.result()
        all_results = []
        while True:
            try:
                result = next(iterator)
                all_results.append(result)
            except StopIteration:
                break
            except TimeoutError as error:
                print("function took longer than %d seconds" % error.args[1])
                result = 0, 100
                all_results.append(result)
            except ProcessExpired as error:
                print("%s. Exit code: %d" % (error, error.exitcode))
            except Exception as error:
                print("function raised %s" % error)
                print(error.traceback)  # Python's traceback of remote process

    return all_results
예제 #32
0
def process(**kwargs):
    assert isdir(kwargs['database'])
    path = join(kwargs['database'],
                "*{}.hdf".format(extensions_dict[kwargs['action']]))
    files = list(map(lambda file: abspath(file), glob(path)))
    assert len(files) > 0
    args = list(map(lambda file: merge_dicts({'file': file}, kwargs), files))
    with ProcessPool(max_workers=cpu_count()) as pool:
        future = pool.map(main_entrance_point,
                          args,
                          timeout=timeout_dict[kwargs['action']])

    iterator = future.result()
    while True:
        try:
            next(iterator)
        except StopIteration:
            break
        except TimeoutError as error:
            pass
            #print("function took longer than %d seconds" % error.args[1])
        time.sleep(0.5)
예제 #33
0
class PebbleExecutor(concurrent.futures.Executor):
    def __init__(self, max_workers, timeout=None):
        self.pool = ProcessPool(max_workers=max_workers)
        self.timeout = timeout

    def submit(self, fn, *args, **kwargs):
        return self.pool.schedule(fn, args=args, timeout=self.timeout)

    def map(self, func, *iterables, timeout=None, chunksize=1):
        raise NotImplementedError("This wrapper does not support `map`.")

    def shutdown(self, wait=True):
        if wait:
            log.info("Closing workers...")
            self.pool.close()
        else:
            log.info("Ending workers...")
            self.pool.stop()
        self.pool.join()
        log.info("Workers joined.")
예제 #34
0
    def propagate(self, tasks: List[Task], **kwargs) -> List[Result]:
        output, instance = kwargs['output'], kwargs['instance']

        pool = ProcessPool(
            max_workers=self.processes,
            initializer=propagate_init,
            initargs=(self.propagator, instance)
        )
        results = []
        future = pool.map(propagate_solve, tasks)
        try:
            for result in future.result():
                results.append(result)
                output.debug(2, 3, 'Already solved %d tasks' % len(results))
        except Exception as e:
            output.debug(0, 1, 'Error while fetching pool results: %s' % e)
        pool.stop()
        pool.join()

        return [result.set_value(self.measure.get(result)) for result in results]
예제 #35
0
def main():
    args.dump_root = Path(args.dump_root)
    args.dump_root.mkdir_p()

    n_scenes = len(data_loader.scenes)
    print('Found {} potential scenes'.format(n_scenes))
    print('Retrieving frames')
    if args.num_threads == 1:
        for scene in tqdm(data_loader.scenes):
            dump_example(args, scene)
    else:
        with ProcessPool(max_workers=args.num_threads) as pool:
            tasks = pool.map(dump_example, [args] * n_scenes,
                             data_loader.scenes)
            try:
                for _ in tqdm(tasks.result(), total=n_scenes):
                    pass
            except KeyboardInterrupt as e:
                tasks.cancel()
                raise e

    print('Generating train val lists')
    np.random.seed(8964)
    # to avoid DataFlow snooping, we will make two cameras of the same scene to fall in the same set, train or val
    subdirs = args.dump_root.dirs()
    canonic_prefixes = set([subdir.basename()[:-2] for subdir in subdirs])
    with open(args.dump_root / 'train.txt', 'w') as tf:
        with open(args.dump_root / 'val.txt', 'w') as vf:
            for pr in tqdm(canonic_prefixes):
                corresponding_dirs = args.dump_root.dirs('{}*'.format(pr))
                if np.random.random() < 0.1:
                    for s in corresponding_dirs:
                        vf.write('{}\n'.format(s.name))
                else:
                    for s in corresponding_dirs:
                        tf.write('{}\n'.format(s.name))
                        if args.with_depth and args.no_train_gt:
                            for gt_file in s.files('*.npy'):
                                gt_file.remove_p()
예제 #36
0
def crack_zip(file_path):
    logging.info('[7z] Decrypting 7z file')
    dict_txt_files = glob.glob(
        rf"./logged_in/archive_cracker/dictionaries/*.txt"
    )  # Lista słówników z folderu
    if len(dict_txt_files) == 0:
        logging.error('[7z] Dict not found')
        exit(1)
    future_list = []
    with ProcessPool(max_workers=2, max_tasks=1000) as pool:
        future_list.append(pool.schedule(SevenZip(file_path).brute_crack))
        for dict_path in dict_txt_files:
            future_list.append(
                pool.schedule(SevenZip(file_path).check_zip,
                              args=(dict_path, )))
            time.sleep(0.3)
        found = False
        # from concurrent.futures import ProcessPoolExecutor, wait, FIRST_COMPLETED
        # done, not_done = wait(thread_list, timeout=6, return_when=FIRST_COMPLETED) # Alternative
        while not found:
            if len(future_list) == 0:
                break
            for f in future_list:
                if f.done():
                    ret = f.result()
                    if ret is None:
                        f.cancel()
                        future_list.remove(f)
                        continue
                    else:
                        found = True
                        for _f in future_list:  # Clear all processes left
                            _f.cancel()
                            future_list.remove(_f)
                        pool.stop()
                        return ret
                else:
                    continue
예제 #37
0
def parallelize(partially_bound_function, tasks, n_processes):
    num_successes = 0
    num_failures = 0
    results = []
    with ProcessPool(n_processes, max_tasks=1) as pool:
        future = pool.map(partially_bound_function, tasks)
        iterator = future.result()
        results = []
        while True:
            try:
                result = next(iterator)
            except StopIteration:
                break
            except Exception:
                logging.exception('Child failure')
                num_failures += 1
            else:
                results.append(result)
                num_successes += 1

        logging.info("Done. successes: %s, failures: %s", num_successes,
                     num_failures)
        return results
예제 #38
0
    def run_parallel_tests(self):
        assert not self.futures
        assert not self.temporary_folders
        with ProcessPool(max_workers=self.parallel_tests) as pool:
            order = 1
            self.timeout_count = 0
            while self.state != None:
                # do not create too many states
                if len(self.futures) >= self.parallel_tests:
                    wait(self.futures, return_when=FIRST_COMPLETED)

                quit_loop = self.process_done_futures()
                if quit_loop:
                    success = self.wait_for_first_success()
                    self.terminate_all(pool)
                    return success

                folder = tempfile.mkdtemp(prefix=self.TEMP_PREFIX,
                                          dir=self.root)
                test_env = TestEnvironment(
                    self.state, order, self.test_script, folder,
                    self.current_test_case,
                    self.test_cases ^ {self.current_test_case},
                    self.current_pass.transform, self.pid_queue)
                future = pool.schedule(test_env.run, timeout=self.timeout)
                self.temporary_folders[future] = folder
                self.futures.append(future)
                order += 1
                state = self.current_pass.advance(self.current_test_case,
                                                  self.state)
                # we are at the end of enumeration
                if state == None:
                    success = self.wait_for_first_success()
                    self.terminate_all(pool)
                    return success
                else:
                    self.state = state
예제 #39
0
def main():
    range_list = list(range(10))
    range_list.extend(range(10, 0, -1))
    randoclass = RandoClass()
    with ProcessPool() as pool:
        future = pool.map(function, range_list, itertools.repeat(randoclass), \
                timeout=5)

        iterator = future.result()
        all_results = []
        while True:
            try:
                result = next(iterator)
                all_results.append(result)
            except StopIteration:
                break
            except TimeoutError as error:
                print("function took longer than %d seconds" % error.args[1])
            except ProcessExpired as error:
                print("%s. Exit code: %d" % (error, error.exitcode))
            except Exception as error:
                print("function raised %s" % error)
                print(error.traceback)  # Python's traceback of remote process
    return all_results
예제 #40
0
class MultiProcessingDocumentParser(object):
    """
    A document parser that performs all it's tasks in different processes and
    returns results to the main process.

    Also implements a parsing timeout just in case the parser enters an infinite
    loop.

    :author: Andres Riancho ([email protected])
    """
    DEBUG = core_profiling_is_enabled()
    MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() / 2) or 1

    # Increasing the timeout when profiling is enabled seems to fix issue #9713
    #
    # https://github.com/andresriancho/w3af/issues/9713
    PROFILING_ENABLED = (user_wants_memory_profiling() or
                         user_wants_pytracemalloc() or
                         user_wants_cpu_profiling())

    # in seconds
    PARSER_TIMEOUT = 60 * 3 if PROFILING_ENABLED else 10

    # Document parsers can go crazy on memory usage when parsing some very
    # specific HTML / PDF documents. Sometimes when this happens the operating
    # system does an out of memory (OOM) kill of a "randomly chosen" process.
    #
    # We limit the memory which can be used by parsing processes to this constant
    #
    # The feature was tested in test_pebble_limit_memory_usage.py
    MEMORY_LIMIT = get_memory_limit()

    def __init__(self):
        self._pool = None
        self._start_lock = threading.RLock()

    def start_workers(self):
        """
        Start the pool and workers
        :return: The pool instance
        """
        with self._start_lock:
            if self._pool is None:

                # Start the process pool
                log_queue = om.manager.get_in_queue()
                self._pool = ProcessPool(self.MAX_WORKERS,
                                         max_tasks=20,
                                         initializer=init_worker,
                                         initargs=(log_queue, self.MEMORY_LIMIT))

        return self._pool

    def stop_workers(self):
        """
        Stop the pool workers
        :return: None
        """
        if self._pool is not None:
            self._pool.stop()
            self._pool.join()
            self._pool = None

    def get_document_parser_for(self, http_response):
        """
        Get a document parser for http_response

        This parses the http_response in a pool worker. This method has two
        features:
            * We can kill the worker if the parser is taking too long
            * We can have different parsers

        :param http_response: The http response instance
        :return: An instance of DocumentParser
        """
        # Start the worker processes if needed
        self.start_workers()

        filename = write_http_response_to_temp_file(http_response)

        apply_args = (process_document_parser,
                      filename,
                      self.DEBUG)

        # Push the task to the workers
        try:
            future = self._pool.schedule(apply_with_return_error,
                                         args=(apply_args,),
                                         timeout=self.PARSER_TIMEOUT)
        except RuntimeError, rte:
            # Remove the temp file used to send data to the process
            remove_file_if_exists(filename)

            # We get here when the pebble pool management thread dies and
            # suddenly starts answering all calls with:
            #
            # RuntimeError('Unexpected error within the Pool')
            #
            # The scan needs to stop because we can't parse any more
            # HTTP responses, which is a very critical part of the process
            msg = str(rte)
            raise ScanMustStopException(msg)

        try:
            process_result = future.result()
        except TimeoutError:
            msg = ('[timeout] The parser took more than %s seconds'
                   ' to complete parsing of "%s", killed it!')
            args = (self.PARSER_TIMEOUT, http_response.get_url())
            raise TimeoutError(msg % args)
        except ProcessExpired:
            # We reach here when the process died because of an error, we
            # handle this just like when the parser takes a lot of time and
            # we're unable to retrieve an answer from it
            msg = ('One of the parser processes died unexpectedly, this could'
                   ' be because of a bug, the operating system triggering OOM'
                   ' kills, etc. The scanner will continue with the next'
                   ' document, but the scan results might be inconsistent.')
            raise TimeoutError(msg)
        finally:
            # Remove the temp file used to send data to the process, we already
            # have the result, so this file is not needed anymore
            remove_file_if_exists(filename)

        # We still need to perform some error handling here...
        if isinstance(process_result, Error):
            if isinstance(process_result.exc_value, MemoryError):
                msg = ('The parser exceeded the memory usage limit of %s bytes'
                       ' while trying to parse "%s". The parser was stopped in'
                       ' order to prevent OOM issues.')
                args = (self.MEMORY_LIMIT, http_response.get_url())
                om.out.debug(msg % args)
                raise MemoryError(msg % args)

            process_result.reraise()

        try:
            parser_output = load_object_from_temp_file(process_result)
        except Exception, e:
            msg = 'Failed to deserialize sub-process result. Exception: "%s"'
            args = (e,)
            raise Exception(msg % args)
예제 #41
0
class MultiProcessingDocumentParser(object):
    """
    A document parser that performs all it's tasks in different processes and
    returns results to the main process.

    Also implements a parsing timeout just in case the parser enters an infinite
    loop.

    :author: Andres Riancho ([email protected])
    """
    DEBUG = core_profiling_is_enabled()
    MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() / 2) or 1

    # Increasing the timeout when profiling is enabled seems to fix issue #9713
    #
    # https://github.com/andresriancho/w3af/issues/9713
    PROFILING_ENABLED = (user_wants_memory_profiling() or
                         user_wants_pytracemalloc() or
                         user_wants_cpu_profiling())

    # in seconds
    PARSER_TIMEOUT = 60 * 3 if PROFILING_ENABLED else 10

    def __init__(self):
        self._pool = None
        self._start_lock = threading.RLock()

    def start_workers(self):
        """
        Start the pool and workers
        :return: The pool instance
        """
        with self._start_lock:
            if self._pool is None:

                # Start the process pool
                log_queue = om.manager.get_in_queue()
                self._pool = ProcessPool(self.MAX_WORKERS,
                                         max_tasks=20,
                                         initializer=init_worker,
                                         initargs=(log_queue,))

        return self._pool

    def stop_workers(self):
        """
        Stop the pool workers
        :return: None
        """
        if self._pool is not None:
            self._pool.stop()
            self._pool.join()
            self._pool = None

    def get_document_parser_for(self, http_response):
        """
        Get a document parser for http_response

        This parses the http_response in a pool worker. This method has two
        features:
            * We can kill the worker if the parser is taking too long
            * We can have different parsers

        :param http_response: The http response instance
        :return: An instance of DocumentParser
        """
        # Start the worker processes if needed
        self.start_workers()

        apply_args = (process_document_parser,
                      http_response,
                      self.DEBUG)

        # Push the task to the workers
        future = self._pool.schedule(apply_with_return_error,
                                     args=(apply_args,),
                                     timeout=self.PARSER_TIMEOUT)

        try:
            parser_output = future.result()
        except TimeoutError:
            # Act just like when there is no parser
            msg = ('[timeout] The parser took more than %s seconds'
                   ' to complete parsing of "%s", killed it!')

            args = (self.PARSER_TIMEOUT, http_response.get_url())

            raise BaseFrameworkException(msg % args)
        else:
            if isinstance(parser_output, Error):
                parser_output.reraise()

        return parser_output

    def get_tags_by_filter(self, http_response, tags, yield_text=False):
        """
        Return Tag instances for the tags which match the `tags` filter,
        parsing and all lxml stuff is done in another process and the Tag
        instances are sent to the main process (the one calling this method)
        through a pipe

        Some things to note:
            * Not all responses can be parsed, so I need to call DocumentParser
              and handle exceptions

            * The parser selected by DocumentParser might not have tags, and
              it might not have get_tags_by_filter. In this case just return an
              empty list

            * Just like get_document_parser_for we have a timeout in place,
              when we hit the timeout just return an empty list, this is not
              the best thing to do, but makes the plugin code easier to write
              (plugins would ignore this anyways)

        :param tags: The filter
        :param yield_text: Should we yield the tag text?
        :return: A list of Tag instances as defined in sgml.py

        :see: SGMLParser.get_tags_by_filter
        """
        # Start the worker processes if needed
        self.start_workers()

        apply_args = (process_get_tags_by_filter,
                      http_response,
                      tags,
                      yield_text,
                      self.DEBUG)

        # Push the task to the workers
        future = self._pool.schedule(apply_with_return_error,
                                     args=(apply_args,),
                                     timeout=self.PARSER_TIMEOUT)

        try:
            filtered_tags = future.result()
        except TimeoutError:
            # We hit a timeout, return an empty list
            return []
        else:
            # There was an exception in the parser, maybe the HTML was really
            # broken, or it wasn't an HTML at all.
            if isinstance(filtered_tags, Error):
                return []

        return filtered_tags