def map(self, func, iterable): if self.n_workers == 1: # only 1 worker, normal listcomp/map will work fine. Useful for testing code? ##results = [func(item) for item in iterable] results = list(map(func, iterable)) #forced eval to time it else: # many workers, lets use ActorPool if len(iterable) < self.n_workers: n_workers = len(iterable) else: n_workers = self.n_workers n_per_batch = int(len(iterable)/n_workers) + 1 batches = [iterable[i:i + n_per_batch] for i in range(0, len(iterable), n_per_batch)] id_for_reorder = range(len(batches)) eval_pool = ActorPool([Ray_Deap_Map.remote(self.creator_setup, self.pset_creator) for _ in range(n_workers)]) unordered_results = list(eval_pool.map_unordered(lambda actor, input_tuple: actor.ray_remote_eval_batch.remote(func, input_tuple), zip(batches, id_for_reorder))) # ensure order of batches ordered_batch_results = [batch for batch_id in id_for_reorder for batch in unordered_results if batch_id == batch[0][1]] #flatten batches to list of fitnes results = [item[0] for sublist in ordered_batch_results for item in sublist] return results
def test_map_gh23107(init): sleep_time = 40 # Reference - https://github.com/ray-project/ray/issues/23107 @ray.remote class DummyActor: async def identity(self, s): if s == 6: await asyncio.sleep(sleep_time) return s, time.time() def func(a, v): return a.identity.remote(v) map_values = [1, 2, 3, 4, 5] pool_map = ActorPool([DummyActor.remote() for i in range(2)]) pool_map.submit(func, 6) start_time = time.time() gen = pool_map.map(func, map_values) assert all(elem[0] in [1, 2, 3, 4, 5] for elem in list(gen)) assert all( abs(elem[1] - start_time) < sleep_time in [1, 2, 3, 4, 5] for elem in list(gen)) pool_map_unordered = ActorPool([DummyActor.remote() for i in range(2)]) pool_map_unordered.submit(func, 6) start_time = time.time() gen = pool_map_unordered.map_unordered(func, map_values) assert all(elem[0] in [1, 2, 3, 4, 5] for elem in list(gen)) assert all( abs(elem[1] - start_time) < sleep_time in [1, 2, 3, 4, 5] for elem in list(gen))
def _main(): opts = _parse_main() if not os.path.exists(os.path.join(opts.ckpt_dir) + "/"): os.makedirs(os.path.join(opts.ckpt_dir) + "/") files = recursively_get_files(opts.cnfs, exts=["cnf","gz", "dimacs"], forbidden=["bz2"]) print(f"TRAINING WITH {len(files)} CNFS") ray.init() WM_USE_GPU = False weight_manager = ray.remote(num_gpus=(1 if WM_USE_GPU else 0))(WeightManager).remote(ckpt_dir=opts.ckpt_dir) ray.get(weight_manager.load_latest_ckpt.remote()) if opts.model_cfg is not None: with open(opts.model_cfg, "r") as f: model_cfg = json.load(f) else: print("[rl_lbd._main] warning: using default configuration") model_cfg = defaultGNN1Cfg learner = ray.remote(num_gpus=(1 if torch.cuda.is_available() else 0))(Learner).options(max_concurrency=(opts.n_workers+2)).remote(weight_manager=weight_manager, batch_size=opts.batch_size, ckpt_freq=opts.ckpt_freq, ckpt_dir=opts.ckpt_dir, lr=opts.lr, restore=True, model_cfg=model_cfg) # TODO: to avoid oom, either dynamically batch or preprocess the formulas beforehand to ensure that they are under a certain size -- this will requre some changes throughout to avoid a fixed batch size print("LEARNER ONLINE") ray.get(learner.restore_weights.remote()) workers = [ray.remote(EpisodeWorker).remote(learner=learner, weight_manager=weight_manager, model_cfg=model_cfg) for _ in range(opts.n_workers)] pool = ActorPool(workers) for w in workers: ray.get(w.try_update_weights.remote()) with open(os.path.join(opts.ckpt_dir, "log.txt"), "a") as f: print(f"[{datetime.datetime.now()}] STARTING TRAINING RUN", file=f) print("ARGS:", file=f) for k,v in vars(opts).items(): print(f" {k} : {v}", file=f) print("\n\n", file=f) def shuffle_environments(ws, resample_frac=1.0): for w in ws: resample = np.random.choice([True,False], p=[resample_frac, 1-resample_frac]) if resample: ray.get(w.set_env.remote(from_file=random.choice(files))) print("shuffled environments") shuffle_environments(workers) for k_epoch in range(opts.n_epochs): if opts.asynchronous: train_handle = learner.train.remote(synchronous=False) waiting = 0 completed = 0 shuffle_environments(workers, opts.resample_frac) for _ in pool.map_unordered((lambda a,v: a.sample_trajectory.remote()), range(opts.eps_per_worker*opts.n_workers)): pass if opts.asynchronous: ray.get(train_handle) else: ray.get(learner.train.remote(synchronous=True))
def extract_flame(fps): files = list(DATASET_DIR.glob(f"*/*/video_{fps}fps.mp4")) for i, video_file in enumerate( tqdm(files, desc="Extracting flame parameters", leave=False)): flame_h5_file = video_file.parent / f"flame_{fps}fps.h5" if flame_h5_file.exists(): continue flame_dir = video_file.parent / f"flame_{fps}fps" gender = get_gender(video_file.parent.parent.name, video_file.parent.name) template_path = BASE_DIR / CONFIG["flame"][f"model_path_{gender}"] # with open(template_model_fname, "rb") as f: # template = pickle.load(f, encoding="latin1") ringnet_file = video_file.parent / f"ringnet_{fps}fps.h5" openface_file = video_file.parent / f"openface_{fps}fps.csv" neutral_mesh_faces = Mesh(filename=str(video_file.parent / "neutral_mesh.ply")).f f = h5py.File(ringnet_file, "r")["flame_params"] pool = ActorPool([ FrameOptimizer.remote(neutral_mesh_faces, template_path) for _ in range(8) ]) openface_data = list(csv.reader(openface_file.open()))[1:] data = f["pose"], f["shape"], f["expression"], openface_data flame_dir.mkdir(parents=True, exist_ok=True) runners = [] for i, (pose, shape, expression, openface) in enumerate(zip(*data), 1): flame_file = flame_dir / f"{i:06}.npy" if flame_file.exists(): continue # Get 68 facial landmarks landmarks = [float(x) for x in openface[299:435]] # reshape the landmarks so that they are 2x51 (cut of the jaw (17 landmarks)) target_2d_lmks = np.array(landmarks).reshape(2, -1).T[17:] runners.append( (pose, shape, expression, target_2d_lmks, flame_file)) for file_name, flame_params in tqdm( pool.map(lambda a, v: a.fit_lmk2d_v2.remote(*v), runners), total=len(runners), leave=False, ): np.save(file_name, flame_params) np_files = list(flame_dir.glob("*.npy")) assert len(np_files) == len(openface_data) results = defaultdict(list) for file in flame_dir.glob("*.npy"): for key, value in np.load(file, allow_pickle=True).item().items(): results[key].append(value) with h5py.File(flame_h5_file, "w") as f: for key, value in results.items(): f.create_dataset(key, data=np.vstack(value))
def remote_worker_caller(executor: ActorPool, sleep_time: int, number_invokes: int): start = time.time() [ executor.submit(lambda a, v: a.computation.remote(v), sleep_time) for n in range(number_invokes) ] starttime = time.time() - start while (executor.has_next()): executor.get_next() return starttime, time.time() - start
def train(self): file = open('log_reward', 'w') """ train agent""" mainWorker = Worker.remote(self.env, self.hp) worker = ActorPool([ Worker.remote(gym.make(self.hp.env_name), self.hp) for i in range(8) ]) print(worker) for step in range(self.hp.nb_steps): #generate random pertrutbation deltas = self.policy.sample_deltas() positive_rewards = worker.map( lambda a, v: a.explore.remote(*v), [(self.normalizer, self.policy, 'positive', deltas[i]) for i in range(self.hp.nb_directions)]) positive_rewards = [i for i in positive_rewards] negative_rewards = worker.map( lambda a, v: a.explore.remote(*v), [(self.normalizer, self.policy, 'negative', deltas[i]) for i in range(self.hp.nb_directions)]) negative_rewards = [i for i in negative_rewards] # gathering the rewards all_rewards = np.array(positive_rewards + negative_rewards) #get the standard deviation of all rewards sigma_r = all_rewards.std() #soring the rewards to generate rollouts for updating weight scores = { k: max(r_pos, r_neg) for k, (r_pos, r_neg ) in enumerate(zip(positive_rewards, negative_rewards)) } order = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)[:self.hp.nb_best_directions] rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order] #update the policy with new weight self.policy.update(rollouts, sigma_r) #print result reward_evaluation = ray.get( mainWorker.explore.remote(self.normalizer, self.policy)) print("Step ", step, "=> Reward: ", reward_evaluation) file.write(str(reward_evaluation) + '\n') return self.policy.tetha
def test_get_next(init): @ray.remote class MyActor: def __init__(self): pass def f(self, x): return x + 1 def double(self, x): return 2 * x actors = [MyActor.remote() for _ in range(4)] pool = ActorPool(actors) for i in range(5): pool.submit(lambda a, v: a.f.remote(v), i) assert pool.get_next() == i + 1
def __init__(self, *, random_seed: int = 0, volumes_dir: str = None, scratch_dir: str = None, store_results=False, n_workers=None, blocklist=()) -> None: if not ray.is_initialized(): ray.init() super().__init__(random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir) self.data_handler = DataHandler.remote() self.ray_executor = RayExecutor.remote(random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir, store_results=store_results,blocklist=blocklist) if n_workers is None: n_workers = multiprocessing.cpu_count() self.actor_pool = ActorPool([ RayExecutor.remote(random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir, store_results=store_results, blocklist=blocklist) for _ in range(n_workers)] )
def test_map(init): @ray.remote class MyActor: def __init__(self): pass def f(self, x): return x + 1 def double(self, x): return 2 * x actors = [MyActor.remote() for _ in range(4)] pool = ActorPool(actors) index = 0 for v in pool.map(lambda a, v: a.double.remote(v), range(5)): assert v == 2 * index index += 1
def tqdm_map(actors, actor_tup_function, tups, res=None): assert res is not None, "provide way to save partial results" initial_len = len(res) actor_pool = ActorPool(actors) for tup in tups: actor_pool.submit(actor_tup_function, tup) pbar = tqdm(total=len(tups)) while True: nxt = actor_pool.get_next_unordered() ## copy to free up any references at the source res.append(copy.deepcopy(nxt)) pbar.update(1) if (len(res) - initial_len) == len(tups): print("done with new tups") break pbar.close() return res
def test_map_unordered(init): @ray.remote class MyActor: def __init__(self): pass def f(self, x): return x + 1 def double(self, x): return 2 * x actors = [MyActor.remote() for _ in range(4)] pool = ActorPool(actors) total = [] for v in pool.map(lambda a, v: a.double.remote(v), range(5)): total += [v] assert all(elem in [0, 2, 4, 6, 8] for elem in total)
def test_map_gh23107(init): # Reference - https://github.com/ray-project/ray/issues/23107 @ray.remote class DummyActor: async def identity(self, s): return s def func(a, v): return a.identity.remote(v) map_values = [1, 2, 3, 4, 5] pool_map = ActorPool([DummyActor.remote() for i in range(2)]) pool_map.submit(func, 6) gen = pool_map.map(func, map_values) assert list(gen) == [1, 2, 3, 4, 5] pool_map_unordered = ActorPool([DummyActor.remote() for i in range(2)]) pool_map_unordered.submit(func, 6) gen = pool_map_unordered.map(func, map_values) assert all(elem in [1, 2, 3, 4, 5] for elem in list(gen))
def test_push(init): @ray.remote class MyActor: def __init__(self): pass def f(self, x): return x + 1 def double(self, x): return 2 * x a1, a2 = MyActor.remote(), MyActor.remote() pool = ActorPool([a1]) pool.submit(lambda a, v: a.double.remote(v), 1) assert pool.has_free() is False # actor is busy with pytest.raises(ValueError): pool.push(a1) pool.push(a2) assert pool.has_free() # a2 is available
def get_flame_parameters_for_objs( voca_objs, dest_path, model_fname="/models/flame_model/ch_models/generic_model.pkl", ): global ray_is_init if not ray_is_init: ray.init(num_gpus=2) ray_is_init = True MeshFitterActor = ray.remote(MeshFitter).options(num_gpus=0.01, num_cpus=1) dest_path.mkdir(parents=True, exist_ok=True) files = [x for x in voca_objs if not (dest_path / x.name).exists()] if not files: return [dest_path / x.name for x in voca_objs] cpu_count = int(ray.available_resources()["CPU"]) - 2 actors = [] for i in range(cpu_count): actors.append(MeshFitterActor.remote(model_fname)) pool = ActorPool(actors) def run(a, file_): vertices = np.load(file_, allow_pickle=True) return a.fit.remote(vertices, dest_path / file_.name) dest_paths = [] for dest_file_path, flame_params in tqdm( pool.map_unordered(lambda a, file_: run(a, file_), voca_objs), total=len(voca_objs), ): np.save(dest_file_path, flame_params) dest_paths.append(dest_file_path) return sorted(dest_paths)
def test_multiple_returns(init): @ray.remote class Foo(object): @ray.method(num_returns=2) def bar(self): return 1, 2 pool = ActorPool([Foo.remote() for _ in range(2)]) for _ in range(4): pool.submit(lambda a, v: a.bar.remote(), None) while pool.has_next(): assert pool.get_next(timeout=None) == [1, 2]
def test_get_next_timeout(init): @ray.remote class MyActor: def __init__(self): pass def f(self, x): while True: x = x + 1 time.sleep(1) return None def double(self, x): return 2 * x actors = [MyActor.remote() for _ in range(4)] pool = ActorPool(actors) pool.submit(lambda a, v: a.f.remote(v), 0) with pytest.raises(TimeoutError): pool.get_next_unordered(timeout=0.1)
def test_get_next_unordered(init): @ray.remote class MyActor: def __init__(self): pass def f(self, x): return x + 1 def double(self, x): return 2 * x actors = [MyActor.remote() for _ in range(4)] pool = ActorPool(actors) total = [] for i in range(5): pool.submit(lambda a, v: a.f.remote(v), i) while pool.has_next(): total += [pool.get_next_unordered()] assert all(elem in [1, 2, 3, 4, 5] for elem in total)
num_gpu = 0.5 elif '-large' in base_config.model.encoder_model: num_gpu = 1.0 else: num_gpu = 2.0 else: num_gpu = 0.0 if num_gpu not in experiment_map: experiment_map[num_gpu] = [] experiment_map[num_gpu] += experiments for num_gpu, experiments in experiment_map.items(): max_available_actors = min(cpus // 2, gpus // num_gpu if gpus else cpus) pool = ActorPool([RayBatchActor.options(num_cpus=2, num_gpus=num_gpu).remote() for _ in range(int(max_available_actors))]) exp_result = pool.map_unordered(lambda actor, kwargs: actor.train.remote(kwargs), experiments) for name, results in groupby(sorted(exp_result, key=lambda t: t[0]), key=lambda t: t[0]): results = list(results) # Log the results logger.info('Experiment: %s ------------------', name) for name, runname, timedelta, devresult, devmaxresult, testresult in results: logger.info('\t%s training time: %10.3f', runname, timedelta) logger.info('\t%s dev. accuracy: %7.3f', runname, devresult) logger.info('\t%s dev. max. acc: %7.3f', runname, devmaxresult) logger.info('\t%s test accuracy: %7.3f', runname, testresult) # Write the average result if len(results) > 1:
def test_pop_idle(init): @ray.remote class MyActor: def __init__(self): pass def f(self, x): return x + 1 def double(self, x): return 2 * x actors = [MyActor.remote()] pool = ActorPool(actors) pool.submit(lambda a, v: a.double.remote(v), 1) assert pool.pop_idle() is None assert pool.has_free() is False # actor is busy assert pool.get_next() == 2 assert pool.has_free() pool.pop_idle() # removes actor from pool assert pool.has_free() is False # no more actors in pool
class RayRunner(RunnerBase): def __init__(self, *, random_seed: int = 0, volumes_dir: str = None, scratch_dir: str = None, store_results=False, n_workers=None, blocklist=()) -> None: if not ray.is_initialized(): ray.init() super().__init__(random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir) self.data_handler = DataHandler.remote() self.ray_executor = RayExecutor.remote(random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir, store_results=store_results,blocklist=blocklist) if n_workers is None: n_workers = multiprocessing.cpu_count() self.actor_pool = ActorPool([ RayExecutor.remote(random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir, store_results=store_results, blocklist=blocklist) for _ in range(n_workers)] ) # Wait for primitives to be load on the workers # time.sleep(len(d3m_index.search()) * 0.15) def stop_ray(self): ray.shutdown() def get_request(self, request_id: str): return ray.get(ray.ObjectID(binascii.unhexlify(request_id))) def fit_pipeline_request(self, problem_description: Problem, pipeline: Pipeline, input_data: typing.Sequence[ContainerType], *, timeout: float = None, expose_outputs: bool = False) -> str: request_id = self.data_handler.add_data.remote(input_data) input_data_id = ray.get(request_id) request_id = self.ray_executor.fit_pipeline.remote(self.data_handler, problem_description, pipeline, input_data_id, timeout=timeout, expose_outputs=expose_outputs) return request_id.hex() def produce_pipeline_request(self, fitted_pipeline_id: str, input_data: typing.Sequence[ContainerType], *, timeout: float = None, expose_outputs: bool = False) -> str: request_id = self.data_handler.add_data.remote(input_data) input_data_id = ray.get(request_id) request_id = self.ray_executor.produce_pipeline.remote(self.data_handler, fitted_pipeline_id, input_data_id, timeout=timeout, expose_outputs=expose_outputs) return request_id.hex() def evaluate_pipeline_request( self, problem_description: Problem, pipeline: Pipeline, input_data: typing.Sequence[ContainerType], *, metrics: typing.Sequence[typing.Dict], data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None, data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None, timeout: float = None ) -> str: request_id = self.data_handler.add_data.remote(input_data) input_data_id = ray.get(request_id) request_id = self.ray_executor.evaluate_pipeline.remote( self.data_handler, problem_description, pipeline, input_data_id, metrics=metrics, data_preparation_pipeline=data_preparation_pipeline, scoring_pipeline=scoring_pipeline, data_preparation_params=data_preparation_params, scoring_params=scoring_params, timeout=timeout ) return request_id.hex() def fitted_pipeline_id_exists(self, fitted_pipeline_id): request_id = self.ray_executor.fitted_pipeline_id_exists.remote(fitted_pipeline_id) return ray.get(request_id) def evaluate_pipelines( self, problem_description: Problem, pipelines: typing.Sequence[Pipeline], input_data: typing.Sequence[ContainerType], *, metrics: typing.Sequence[typing.Dict], data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None, data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None, timeout: float = None ) -> typing.Sequence[PipelineResult]: request_id = self.data_handler.add_data.remote(input_data) input_data_id = ray.get(request_id) args = [] for pipeline in pipelines: args.append({ 'data_handler': self.data_handler, 'problem_description': problem_description, 'pipeline': pipeline, 'input_data_id': input_data_id, 'metrics': metrics, 'data_preparation_pipeline': data_preparation_pipeline, 'scoring_pipeline': scoring_pipeline,'data_preparation_params': data_preparation_params, 'scoring_params': scoring_params,'timeout': timeout }) return self.actor_pool.map(lambda actor, arg: actor.evaluate_pipeline.remote(**arg), args)
def run_files( pbar, flame_fitting_dir, ringnet_dir, dir_, neutral_mesh_faces, dd, lmk_face_idx, lmk_b_coords, attempt=0, ): from tqdm import tqdm existing_files = set( os.path.basename(os.path.dirname(x)) for x in glob(str(flame_fitting_dir / "*/flame_params.npy")) ) files = sorted( [ x for x in glob(str(dir_ / "*")) if os.path.basename(x)[:-4] not in existing_files ] ) counter = 0 actors = [] cpu_count = int(ray.available_resources()["CPU"]) - 2 pbar.set_description(f"{dir_.parent.name}/{dir_.name} ({cpu_count} cpus)") for x in range(min(len(files), cpu_count)): actors.append( FrameOptimizer.remote( dir_, neutral_mesh_faces, dd, lmk_face_idx, lmk_b_coords, ) ) file_len = len(files) pool = ActorPool(actors) try: pbar2 = tqdm( pool.map_unordered( lambda a, v: run(a, v, flame_fitting_dir, ringnet_dir), files ), total=file_len, ) for x in pbar2: pbar2.set_description(f"{dir_.parent.name}/{dir_.name} ({cpu_count} cpus)") counter += 1 if x is not None: flame_out_path, flame_out_params = x os.makedirs(os.path.dirname(flame_out_path), exist_ok=True) np.save(flame_out_path, flame_out_params) except ray.exceptions.RayActorError: if attempt > 10: raise Exception("too many attempts") for actor in actors: ray.kill(actor) if counter > 0: attempt = 0 else: attempt += 1 run_files( pbar, flame_fitting_dir, ringnet_dir, dir_, neutral_mesh_faces, dd, lmk_face_idx, lmk_b_coords, attempt=attempt, )
import ray from ray.util import ActorPool @ray.remote class Actor: def double(self, n): return n * 2 a1, a2 = Actor.remote(), Actor.remote() pool = ActorPool([a1, a2]) # pool.map(..) returns a Python generator object ActorPool.map gen = pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4]) print(list(gen)) # [2, 4, 6, 8]
Nb_sequences=s) inputs = sm.splitPandas(df=df, nb_splits=n) # for the walkers already initilized: just change the dataframe ac = ray.get([ walker.set_df.remote(i) for walker, i in zip(walkers[0:n], inputs) ]) # for walkers not already initilzed. add new ones!! # find the initial yield, return the min yield from each worker # res=ray.get([walker.get_df.remote() for walker in walkers[0:n]]) # print(res) res = ray.get([walker.init_yield.remote() for walker in walkers[0:n]]) min_yield = [np.min(res)] start = time.time() pool = ActorPool(ac) with dm.suppress_stdout(): # res=ray.get(pool.map(lambda a,v: a.walk.remote(min_yield[0],nb_mutations))) res = ray.get([ walker.walk.remote(min_yield[0], nb_mutations) for walker in walkers[0:n] ]) t.append((time.time() - start) / nb_steps) # [walker.reset() for walker in walkers] # save states of those processor numbers # include standard of deviation times.loc[:, 'nproc: %i' % n] = t times.to_pickle( path=
bytes = ray.cloudpickle.dumps(keyvalue[1]) with open(self.basedir + '/' + keyvalue[0], "wb") as f: f.write(bytes) def restore(self, key: str) -> dict: if self.exists(key): with open(self.basedir + '/' + key, "rb") as f: bytes = f.read() return ray.cloudpickle.loads(bytes) else: return None pool = ActorPool([ FilePersistence.remote(), FilePersistence.remote(), FilePersistence.remote() ]) @ray.remote class Account: def __init__(self, balance: float, minimal_balance: float, account_key: str, persistence: ActorPool): self.persistence = persistence self.key = account_key if not self.restorestate(): if balance < minimal_balance: raise Exception( "Starting balance is less then minimal balance") self.balance = balance
def _main(cnf_dir, n_datapoints_per_file, dest, n_workers, dumpfreq=100e3, num_subproblems=8, random_units=5, timeout=300, rootdir=None, n_datapoints=25000): logger = SimpleLogger( os.path.join(dest, "logs", "main_loop", f"{str(uuid.uuid4())}.txt")) try: ray.init(address='auto', redis_password='******') except: ray.init() tms_dict = { cnf_path: TaskManager(cnf_path, rootdir) for cnf_path in recursively_get_files( cnf_dir, forbidden=["bz2", "xz"], exts=["cnf", "gz"]) } logger.write("STARTING DATA GENERATION LOOP WITH", len(tms_dict.keys()), "CNFs") time.sleep(5) writer = lbdpWriter.remote(n_datapoints_per_file, dest) workers = [ Worker.remote(writer, dumpfreq=dumpfreq, num_subproblems=num_subproblems, random_units=random_units, timeout=timeout) for _ in range(n_workers) ] pool = ActorPool(workers) for tm in tms_dict.values(): for task in tm: if task is not None: pool.submit((lambda a, v: a.work.remote(v)), task) logger.write( f"SUBMITTED TASK: TYPE {task.task_type}; CNF_PATH {task.cnf_path}" ) try: LOOP_COUNT = 0 while any([(x.waiting > 0) for x in tms_dict.values()]): LOOP_COUNT += 1 if LOOP_COUNT % 100 == 0: if ray.get(writer.get_write_count.remote()) > n_datapoints: print( f"NUMBER OF WRITES EXCEEDS N_DATAPOINTS={n_datapoints}, STOPPING" ) break cnf_path, tasks, original_task = pool.get_next_unordered() logger.write(f"GOT TASK RESULT (TYPE {original_task.task_type})") if original_task.task_type == 0 and len(tasks) == 0: logger.write("WARNING: Task", original_task.cnf_path, "returned no subtasks.") tm = tms_dict[cnf_path] tm.set_tasks(tasks) if (tm.waiting == 0) and len(tm.tasks) == 0: print(f"ROOT TASK {tm.cnf_path} FINISHED") tms_dict.pop(cnf_path) print("POPPED FROM TMS_DICT") try: shutil.rmtree(tms.tmpdir.name) os.makedirs(tms.tmpdir.name) time.sleep(1) except: pass else: for task in tm: if task.task_type == 0: logger.write(f"SUBMITTING SUBPROBLEM (TIMEOUT) TASK") elif task.task_type == 1: logger.write(f"SUBMITTING DERIVED FORMULA") elif task.task_type == 2: logger.write(f"SUBMITTING DERIVED SUBFORMULA") pool.submit((lambda a, v: a.work.remote(v)), task) logger.write(f"SUBMITTED {len(tasks)} NEW TASKS") finally: del writer
@ray.remote class PoolActor: def __init__(self): self.id = str(uuid4()) def computation(self, num): print(f'Actor with id {self.id} waiting for {num} sec') for x in range(num): sleep(1) print(f'Actor with id {self.id} slept for {x} sec') return num # Create actors and add them to the pool a1, a2, a3 = PoolActor.remote(), PoolActor.remote(), PoolActor.remote() pool = ActorPool([a1, a2, a3]) print(list(pool.map(lambda a, v: a.computation.remote(v), [3, 4, 5, 4]))) pool.submit(lambda a, v: a.computation.remote(v), 3) pool.submit(lambda a, v: a.computation.remote(v), 4) pool.submit(lambda a, v: a.computation.remote(v), 5) pool.submit(lambda a, v: a.computation.remote(v), 4) print(pool.get_next()) print(pool.get_next()) print(pool.get_next()) print(pool.get_next())
#print("Creating placement group") #start = time.time() #pg = placement_group([{"CPU": 10}, {"CPU": 10}, {"CPU": 10}, {"CPU": 10}, {"CPU": 10}], strategy="STRICT_SPREAD") #ray.get(pg.ready()) #print(f"Placement group created {time.time() - start}") # invoking remote function #for n in number_invokes: # start, execution = ray.get(remote_caller_placement.remote(sleep_time, n, pg)) # print(f'Executed {n} invocations with sleep {sleep_time} in {execution}s, start time {start}s') # Delete placement group. This API is asynchronous. #remove_placement_group(pg) # Wait until placement group is killed. #sleep(1) #using executors print("Creating Actor pool") start = time.time() actors = [JobExecutor.remote() for n in range(500)] pool = ActorPool(actors) print(f"Actor pool is created {time.time() - start}") for n in number_invokes: start, execution = ray.get(remote_worker_caller.remote( pool, sleep_time, n)) print( f'Executed {n} invocations with sleep {sleep_time} in {execution}s, start time {start}s' )