def __init__(self, hyperopt_sampler: HyperoptSampler, output_feature: str, metric: str, split: str, num_workers: int = 2, num_cpus_per_worker: int = -1, num_gpus_per_worker: int = -1, fiber_backend: str = "local", **kwargs) -> None: import fiber HyperoptExecutor.__init__(self, hyperopt_sampler, output_feature, metric, split) fiber.init(backend=fiber_backend) self.fiber_meta = fiber.meta self.num_cpus_per_worker = num_cpus_per_worker self.num_gpus_per_worker = num_gpus_per_worker self.resource_limits = {} if num_cpus_per_worker != -1: self.resource_limits["cpu"] = num_cpus_per_worker if num_gpus_per_worker != -1: self.resource_limits["gpu"] = num_gpus_per_worker self.num_workers = num_workers self.pool = fiber.Pool(num_workers)
def test_pool_with_no_argument(self): # Make sure no exception is raised p = fiber.Pool() p.map(print, [1, 2, 3, 4]) p.terminate() p.join() assert 1 == 1
def test_start_timeout(self): fiber.backend.get_backend(name="docker") old_backend = fiber.backend._backends["docker"] fiber.backend._backends["docker"] = TimeoutBackend(n=4) p = fiber.Pool(4) res = p.map(square_worker, [1, 2, 3, 4]) p.terminate() fiber.backend._backends["docker"] = old_backend assert res == [i**2 for i in range(1, 5)]
def test_error_handling(self): try: pool = fiber.Pool(3, error_handling=True) pool.wait_until_workers_up() res = pool.map(random_error_worker, [i for i in range(300)], chunksize=1) assert res == [i for i in range(300)] finally: pool.terminate() pool.join()
def bench_fiber(tasks, workers, task_duration, warmup=True, pool=None): if warmup: if not pool: pool = fiber.Pool(workers) pool.map(sleep_worker, [task_duration for x in range(tasks)], chunksize=1) logger.debug("warm up finished") res, elapsed = timeit( pool.map, sleep_worker, [task_duration for x in range(tasks)], chunksize=1, ) return elapsed
def test_error_handling_unordered(self): try: pool = fiber.Pool(3, error_handling=True) pool.wait_until_workers_up() res_iter = pool.imap_unordered(random_error_worker, [i for i in range(300)], chunksize=1) res = list(res_iter) res.sort() assert res == [i for i in range(300)] finally: pool.terminate() pool.join()
def test_job_creation_with_delay(self): fiber.backend.get_backend(name="docker") old_backend = fiber.backend._backends["docker"] fiber.backend._backends["docker"] = DelayedBackend() p = fiber.Pool(4) res = p.map(square_worker, [1, 2, 3, 4]) p.wait_until_workers_up() p.terminate() fiber.backend._backends["docker"] = old_backend assert res == [i**2 for i in range(1, 5)] # wait for 2 seconds to let docker finish starting #time.sleep(2) p.join()
def es(theta0, worker, workers=40, sigma=0.1, alpha=0.05, iterations=200): dim = theta0.shape[0] theta = theta0 pool = fiber.Pool(workers) func = functools.partial(worker, dim, sigma) for t in range(iterations): returns = pool.map(func, [theta] * workers) rewards = [ret[0] for ret in returns] epsilons = [ret[1] for ret in returns] # normalize rewards normalized_rewards = (rewards - np.mean(rewards)) / np.std(rewards) theta = theta + alpha * 1.0 / (workers * sigma) * sum([ reward * epsilon for reward, epsilon in zip(normalized_rewards, epsilons) ]) if t % 10 == 0: print(theta) return theta
def test_many_jobs(self): """ This is to test a race condition in handling data in pending table """ workers = 5 pool = fiber.Pool(workers) tasks = 5000 duration = 0.001 pool.wait_until_workers_up() res = [None] * workers for i in range(tasks // workers): for j in range(workers): handle = pool.apply_async(sleep_worker, (duration, )) res[j] = handle for j in range(workers): res[j].get() pool.terminate() pool.join()
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'frameworks', nargs='+', choices=['mp', 'fiber', 'pyspark', 'ray', 'ipyparallel'], help='frameworks to benchmark') parser.add_argument('-t', '--total-duration', type=int, default=1, help='total running time') parser.add_argument('-d', '--task-duration', type=float, default=None, choices=[0.001, 0.01, 0.1, 1], help='task duration in ms') args = parser.parse_args() workers = 5 max_duration = args.total_duration results = {} frameworks = args.frameworks for framework in frameworks: results[framework] = [] results[framework + "_seq"] = [] if "pyspark" in frameworks: from pyspark import SparkContext import pyspark sc = SparkContext() conf = pyspark.SparkConf().setAll([("spark.cores.max", 5)]) sc.stop() sc = pyspark.SparkContext(conf=conf) if "ray" in frameworks: import ray ray.init() if "fiber" in frameworks: import fiber.pool fiber_pool = fiber.Pool(workers) if "ipyparallel" in frameworks: print("before popen") #ipp_controller = subprocess.Popen(["ipcontroller", "--ip", "*"]) print("after popen") import atexit import signal import os #atexit.register(ipp_controller.kill) pids = spawn_workers(workers) for pid in pids: atexit.register(os.kill, pid, signal.SIGKILL) time.sleep(4) for i in range(4): factor = 10**i duration = 1 / factor if args.task_duration is not None: print(args.task_duration, duration, type(args.task_duration), type(duration)) if args.task_duration != duration: continue tasks = int(max_duration * workers / duration) print("Benchmarking {} workers with {} tasks each takes {} " "seconds".format(workers, tasks, duration)) # sequential tests (simulating RL) if "mp" in frameworks: elapsed = bench_mp_seq(tasks, workers, duration, True) results["mp_seq"].append({ "task_duration": duration, "elapsed": elapsed }) print("mp_seq", elapsed) if "fiber" in frameworks: elapsed = bench_fiber_seq(tasks, workers, duration, True, pool=fiber_pool) results["fiber_seq"].append({ "task_duration": duration, "elapsed": elapsed }) print("fiber_seq", elapsed) if "pyspark" in frameworks: elapsed = bench_spark_seq(tasks, workers, duration, warmup=True, sc=sc) results["pyspark_seq"].append({ "task_duration": duration, "elapsed": elapsed }) print("pyspark_seq", elapsed) if "ray" in frameworks: elapsed = bench_ray_seq(tasks, workers, duration, warmup=True) results["ray_seq"].append({ "task_duration": duration, "elapsed": elapsed }) print("ray_seq", elapsed) if "ipyparallel" in frameworks: elapsed = bench_ipp_seq(tasks, workers, duration, warmup=True) results["ipyparallel_seq"].append({ "task_duration": duration, "elapsed": elapsed }) print("ipyparallel_seq", elapsed) # batch tests (simulating ES) """ if "mp" in frameworks: elapsed = bench_mp(tasks, workers, duration, True) results["mp"].append({"task_duration": duration, "elapsed": elapsed}) print("mp", elapsed) if "fiber" in frameworks: elapsed = bench_fiber(tasks, workers, duration, True, pool=fiber_pool) results["fiber"].append({"task_duration": duration, "elapsed": elapsed}) print("fiber", elapsed) if "pyspark" in frameworks: elapsed = bench_spark(tasks, workers, duration, warmup=True, sc=sc) results["pyspark"].append({"task_duration": duration, "elapsed": elapsed}) print("pyspark", elapsed) if "ray" in frameworks: elapsed = bench_ray(tasks, workers, duration, warmup=True) results["ray"].append({"task_duration": duration, "elapsed": elapsed}) print("ray", elapsed) """ pprint(results)
def main(): files = sorted(Path('.').glob('*.py')) pool = fiber.Pool(4) counts = pool.map(line_count, files) for f, c in zip(files, counts): print("{}\t{}".format(f, c))