def map(self, func, iterable):


        if self.n_workers == 1:
            # only 1 worker, normal listcomp/map will work fine. Useful for testing code?
            ##results = [func(item) for item in iterable]
            results = list(map(func, iterable)) #forced eval to time it
        else:
            # many workers, lets use ActorPool

            if len(iterable) < self.n_workers:
                n_workers = len(iterable)
            else:
                n_workers = self.n_workers

            n_per_batch = int(len(iterable)/n_workers) + 1
            batches = [iterable[i:i + n_per_batch] for i in range(0, len(iterable), n_per_batch)]
            id_for_reorder = range(len(batches))
            eval_pool = ActorPool([Ray_Deap_Map.remote(self.creator_setup, self.pset_creator) for _ in range(n_workers)])
            unordered_results = list(eval_pool.map_unordered(lambda actor, input_tuple: actor.ray_remote_eval_batch.remote(func, input_tuple),
                                                             zip(batches, id_for_reorder)))
            # ensure order of batches
            ordered_batch_results = [batch for batch_id in id_for_reorder for batch in unordered_results if batch_id == batch[0][1]]
            
            #flatten batches to list of fitnes
            results = [item[0] for sublist in ordered_batch_results for item in sublist]
            
        return results
Exemple #2
0
def test_map_gh23107(init):
    sleep_time = 40

    # Reference - https://github.com/ray-project/ray/issues/23107
    @ray.remote
    class DummyActor:
        async def identity(self, s):
            if s == 6:
                await asyncio.sleep(sleep_time)
            return s, time.time()

    def func(a, v):
        return a.identity.remote(v)

    map_values = [1, 2, 3, 4, 5]

    pool_map = ActorPool([DummyActor.remote() for i in range(2)])
    pool_map.submit(func, 6)
    start_time = time.time()
    gen = pool_map.map(func, map_values)
    assert all(elem[0] in [1, 2, 3, 4, 5] for elem in list(gen))
    assert all(
        abs(elem[1] - start_time) < sleep_time in [1, 2, 3, 4, 5]
        for elem in list(gen))

    pool_map_unordered = ActorPool([DummyActor.remote() for i in range(2)])
    pool_map_unordered.submit(func, 6)
    start_time = time.time()
    gen = pool_map_unordered.map_unordered(func, map_values)
    assert all(elem[0] in [1, 2, 3, 4, 5] for elem in list(gen))
    assert all(
        abs(elem[1] - start_time) < sleep_time in [1, 2, 3, 4, 5]
        for elem in list(gen))
Exemple #3
0
def _main():
  opts = _parse_main()
  if not os.path.exists(os.path.join(opts.ckpt_dir) + "/"):
    os.makedirs(os.path.join(opts.ckpt_dir) + "/")
  files = recursively_get_files(opts.cnfs, exts=["cnf","gz", "dimacs"], forbidden=["bz2"])
  print(f"TRAINING WITH {len(files)} CNFS")

  ray.init()

  WM_USE_GPU = False
  weight_manager = ray.remote(num_gpus=(1 if WM_USE_GPU else 0))(WeightManager).remote(ckpt_dir=opts.ckpt_dir)
  ray.get(weight_manager.load_latest_ckpt.remote())

  if opts.model_cfg is not None:
    with open(opts.model_cfg, "r") as f:
      model_cfg = json.load(f)
  else:
    print("[rl_lbd._main] warning: using default configuration")
    model_cfg = defaultGNN1Cfg

  learner = ray.remote(num_gpus=(1 if torch.cuda.is_available() else 0))(Learner).options(max_concurrency=(opts.n_workers+2)).remote(weight_manager=weight_manager, batch_size=opts.batch_size, ckpt_freq=opts.ckpt_freq, ckpt_dir=opts.ckpt_dir, lr=opts.lr, restore=True, model_cfg=model_cfg) # TODO: to avoid oom, either dynamically batch or preprocess the formulas beforehand to ensure that they are under a certain size -- this will requre some changes throughout to avoid a fixed batch size

  print("LEARNER ONLINE")
  ray.get(learner.restore_weights.remote())

  workers = [ray.remote(EpisodeWorker).remote(learner=learner, weight_manager=weight_manager, model_cfg=model_cfg) for _ in range(opts.n_workers)]

  pool = ActorPool(workers)

  for w in workers:
    ray.get(w.try_update_weights.remote())

  with open(os.path.join(opts.ckpt_dir, "log.txt"), "a") as f:
    print(f"[{datetime.datetime.now()}] STARTING TRAINING RUN", file=f)
    print("ARGS:", file=f)
    for k,v in vars(opts).items():
      print(f"    {k}  :  {v}", file=f)
    print("\n\n", file=f)

  def shuffle_environments(ws, resample_frac=1.0):
    for w in ws:
      resample = np.random.choice([True,False], p=[resample_frac, 1-resample_frac])
      if resample:
        ray.get(w.set_env.remote(from_file=random.choice(files)))
    print("shuffled environments")

  shuffle_environments(workers)
  for k_epoch in range(opts.n_epochs):
    if opts.asynchronous:
      train_handle = learner.train.remote(synchronous=False)
    waiting = 0
    completed = 0
    shuffle_environments(workers, opts.resample_frac)
    for _ in pool.map_unordered((lambda a,v: a.sample_trajectory.remote()), range(opts.eps_per_worker*opts.n_workers)):
      pass
    if opts.asynchronous:
      ray.get(train_handle)
    else:
      ray.get(learner.train.remote(synchronous=True))
Exemple #4
0
def extract_flame(fps):
    files = list(DATASET_DIR.glob(f"*/*/video_{fps}fps.mp4"))
    for i, video_file in enumerate(
            tqdm(files, desc="Extracting flame parameters", leave=False)):
        flame_h5_file = video_file.parent / f"flame_{fps}fps.h5"
        if flame_h5_file.exists():
            continue
        flame_dir = video_file.parent / f"flame_{fps}fps"
        gender = get_gender(video_file.parent.parent.name,
                            video_file.parent.name)
        template_path = BASE_DIR / CONFIG["flame"][f"model_path_{gender}"]
        # with open(template_model_fname, "rb") as f:
        #     template = pickle.load(f, encoding="latin1")

        ringnet_file = video_file.parent / f"ringnet_{fps}fps.h5"
        openface_file = video_file.parent / f"openface_{fps}fps.csv"
        neutral_mesh_faces = Mesh(filename=str(video_file.parent /
                                               "neutral_mesh.ply")).f

        f = h5py.File(ringnet_file, "r")["flame_params"]

        pool = ActorPool([
            FrameOptimizer.remote(neutral_mesh_faces, template_path)
            for _ in range(8)
        ])
        openface_data = list(csv.reader(openface_file.open()))[1:]
        data = f["pose"], f["shape"], f["expression"], openface_data
        flame_dir.mkdir(parents=True, exist_ok=True)
        runners = []
        for i, (pose, shape, expression, openface) in enumerate(zip(*data), 1):
            flame_file = flame_dir / f"{i:06}.npy"
            if flame_file.exists():
                continue

            # Get 68 facial landmarks
            landmarks = [float(x) for x in openface[299:435]]
            # reshape the landmarks so that they are 2x51 (cut of the jaw (17 landmarks))
            target_2d_lmks = np.array(landmarks).reshape(2, -1).T[17:]
            runners.append(
                (pose, shape, expression, target_2d_lmks, flame_file))

        for file_name, flame_params in tqdm(
                pool.map(lambda a, v: a.fit_lmk2d_v2.remote(*v), runners),
                total=len(runners),
                leave=False,
        ):
            np.save(file_name, flame_params)

        np_files = list(flame_dir.glob("*.npy"))
        assert len(np_files) == len(openface_data)

        results = defaultdict(list)
        for file in flame_dir.glob("*.npy"):
            for key, value in np.load(file, allow_pickle=True).item().items():
                results[key].append(value)
        with h5py.File(flame_h5_file, "w") as f:
            for key, value in results.items():
                f.create_dataset(key, data=np.vstack(value))
Exemple #5
0
def remote_worker_caller(executor: ActorPool, sleep_time: int,
                         number_invokes: int):
    start = time.time()
    [
        executor.submit(lambda a, v: a.computation.remote(v), sleep_time)
        for n in range(number_invokes)
    ]
    starttime = time.time() - start
    while (executor.has_next()):
        executor.get_next()
    return starttime, time.time() - start
Exemple #6
0
    def train(self):
        file = open('log_reward', 'w')
        """ train agent"""
        mainWorker = Worker.remote(self.env, self.hp)
        worker = ActorPool([
            Worker.remote(gym.make(self.hp.env_name), self.hp)
            for i in range(8)
        ])
        print(worker)
        for step in range(self.hp.nb_steps):
            #generate random pertrutbation
            deltas = self.policy.sample_deltas()

            positive_rewards = worker.map(
                lambda a, v: a.explore.remote(*v),
                [(self.normalizer, self.policy, 'positive', deltas[i])
                 for i in range(self.hp.nb_directions)])
            positive_rewards = [i for i in positive_rewards]

            negative_rewards = worker.map(
                lambda a, v: a.explore.remote(*v),
                [(self.normalizer, self.policy, 'negative', deltas[i])
                 for i in range(self.hp.nb_directions)])
            negative_rewards = [i for i in negative_rewards]

            # gathering the rewards
            all_rewards = np.array(positive_rewards + negative_rewards)

            #get the standard deviation of all rewards
            sigma_r = all_rewards.std()

            #soring the rewards to generate rollouts for updating weight
            scores = {
                k: max(r_pos, r_neg)
                for k, (r_pos, r_neg
                        ) in enumerate(zip(positive_rewards, negative_rewards))
            }
            order = sorted(scores.keys(),
                           key=lambda x: scores[x],
                           reverse=True)[:self.hp.nb_best_directions]
            rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k])
                        for k in order]

            #update the policy with new weight
            self.policy.update(rollouts, sigma_r)
            #print result
            reward_evaluation = ray.get(
                mainWorker.explore.remote(self.normalizer, self.policy))
            print("Step ", step, "=> Reward: ", reward_evaluation)
            file.write(str(reward_evaluation) + '\n')
        return self.policy.tetha
Exemple #7
0
def test_get_next(init):
    @ray.remote
    class MyActor:
        def __init__(self):
            pass

        def f(self, x):
            return x + 1

        def double(self, x):
            return 2 * x

    actors = [MyActor.remote() for _ in range(4)]
    pool = ActorPool(actors)
    for i in range(5):
        pool.submit(lambda a, v: a.f.remote(v), i)
        assert pool.get_next() == i + 1
Exemple #8
0
    def __init__(self, *, random_seed: int = 0, volumes_dir: str = None, scratch_dir: str = None,
                 store_results=False, n_workers=None, blocklist=()) -> None:
        if not ray.is_initialized():
            ray.init()

        super().__init__(random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir)
        self.data_handler = DataHandler.remote()
        self.ray_executor = RayExecutor.remote(random_seed=random_seed,
                                               volumes_dir=volumes_dir, scratch_dir=scratch_dir,
                                               store_results=store_results,blocklist=blocklist)

        if n_workers is None:
            n_workers = multiprocessing.cpu_count()
        self.actor_pool = ActorPool([
            RayExecutor.remote(random_seed=random_seed, volumes_dir=volumes_dir,
                               scratch_dir=scratch_dir, store_results=store_results,
                               blocklist=blocklist) for _ in range(n_workers)]
        )
Exemple #9
0
def test_map(init):
    @ray.remote
    class MyActor:
        def __init__(self):
            pass

        def f(self, x):
            return x + 1

        def double(self, x):
            return 2 * x

    actors = [MyActor.remote() for _ in range(4)]
    pool = ActorPool(actors)

    index = 0
    for v in pool.map(lambda a, v: a.double.remote(v), range(5)):
        assert v == 2 * index
        index += 1
Exemple #10
0
def tqdm_map(actors, actor_tup_function, tups, res=None):
    assert res is not None, "provide way to save partial results"

    initial_len = len(res)
    actor_pool = ActorPool(actors)
    for tup in tups:
        actor_pool.submit(actor_tup_function, tup)

    pbar = tqdm(total=len(tups))
    while True:
        nxt = actor_pool.get_next_unordered()
        ## copy to free up any references at the source
        res.append(copy.deepcopy(nxt))
        pbar.update(1)
        if (len(res) - initial_len) == len(tups):
            print("done with new tups")
            break

    pbar.close()
    return res
Exemple #11
0
def test_map_unordered(init):
    @ray.remote
    class MyActor:
        def __init__(self):
            pass

        def f(self, x):
            return x + 1

        def double(self, x):
            return 2 * x

    actors = [MyActor.remote() for _ in range(4)]
    pool = ActorPool(actors)

    total = []
    for v in pool.map(lambda a, v: a.double.remote(v), range(5)):
        total += [v]

    assert all(elem in [0, 2, 4, 6, 8] for elem in total)
Exemple #12
0
def test_map_gh23107(init):
    # Reference - https://github.com/ray-project/ray/issues/23107
    @ray.remote
    class DummyActor:
        async def identity(self, s):
            return s

    def func(a, v):
        return a.identity.remote(v)

    map_values = [1, 2, 3, 4, 5]

    pool_map = ActorPool([DummyActor.remote() for i in range(2)])
    pool_map.submit(func, 6)
    gen = pool_map.map(func, map_values)
    assert list(gen) == [1, 2, 3, 4, 5]

    pool_map_unordered = ActorPool([DummyActor.remote() for i in range(2)])
    pool_map_unordered.submit(func, 6)
    gen = pool_map_unordered.map(func, map_values)
    assert all(elem in [1, 2, 3, 4, 5] for elem in list(gen))
Exemple #13
0
def test_push(init):
    @ray.remote
    class MyActor:
        def __init__(self):
            pass

        def f(self, x):
            return x + 1

        def double(self, x):
            return 2 * x

    a1, a2 = MyActor.remote(), MyActor.remote()
    pool = ActorPool([a1])

    pool.submit(lambda a, v: a.double.remote(v), 1)
    assert pool.has_free() is False  # actor is busy
    with pytest.raises(ValueError):
        pool.push(a1)
    pool.push(a2)
    assert pool.has_free()  # a2 is available
Exemple #14
0
def get_flame_parameters_for_objs(
    voca_objs,
    dest_path,
    model_fname="/models/flame_model/ch_models/generic_model.pkl",
):
    global ray_is_init
    if not ray_is_init:
        ray.init(num_gpus=2)
        ray_is_init = True

    MeshFitterActor = ray.remote(MeshFitter).options(num_gpus=0.01, num_cpus=1)

    dest_path.mkdir(parents=True, exist_ok=True)

    files = [x for x in voca_objs if not (dest_path / x.name).exists()]

    if not files:
        return [dest_path / x.name for x in voca_objs]

    cpu_count = int(ray.available_resources()["CPU"]) - 2

    actors = []
    for i in range(cpu_count):
        actors.append(MeshFitterActor.remote(model_fname))

    pool = ActorPool(actors)

    def run(a, file_):
        vertices = np.load(file_, allow_pickle=True)
        return a.fit.remote(vertices, dest_path / file_.name)

    dest_paths = []
    for dest_file_path, flame_params in tqdm(
            pool.map_unordered(lambda a, file_: run(a, file_), voca_objs),
            total=len(voca_objs),
    ):
        np.save(dest_file_path, flame_params)
        dest_paths.append(dest_file_path)

    return sorted(dest_paths)
Exemple #15
0
def test_multiple_returns(init):
    @ray.remote
    class Foo(object):
        @ray.method(num_returns=2)
        def bar(self):
            return 1, 2

    pool = ActorPool([Foo.remote() for _ in range(2)])
    for _ in range(4):
        pool.submit(lambda a, v: a.bar.remote(), None)

    while pool.has_next():
        assert pool.get_next(timeout=None) == [1, 2]
Exemple #16
0
def test_get_next_timeout(init):
    @ray.remote
    class MyActor:
        def __init__(self):
            pass

        def f(self, x):
            while True:
                x = x + 1
                time.sleep(1)
            return None

        def double(self, x):
            return 2 * x

    actors = [MyActor.remote() for _ in range(4)]
    pool = ActorPool(actors)
    pool.submit(lambda a, v: a.f.remote(v), 0)
    with pytest.raises(TimeoutError):
        pool.get_next_unordered(timeout=0.1)
Exemple #17
0
def test_get_next_unordered(init):
    @ray.remote
    class MyActor:
        def __init__(self):
            pass

        def f(self, x):
            return x + 1

        def double(self, x):
            return 2 * x

    actors = [MyActor.remote() for _ in range(4)]
    pool = ActorPool(actors)

    total = []

    for i in range(5):
        pool.submit(lambda a, v: a.f.remote(v), i)
    while pool.has_next():
        total += [pool.get_next_unordered()]

    assert all(elem in [1, 2, 3, 4, 5] for elem in total)
Exemple #18
0
                num_gpu = 0.5
            elif '-large' in base_config.model.encoder_model:
                num_gpu = 1.0
            else:
                num_gpu = 2.0
        else:
            num_gpu = 0.0

        if num_gpu not in experiment_map:
            experiment_map[num_gpu] = []

        experiment_map[num_gpu] += experiments

    for num_gpu, experiments in experiment_map.items():
        max_available_actors = min(cpus // 2, gpus // num_gpu if gpus else cpus)
        pool = ActorPool([RayBatchActor.options(num_cpus=2, num_gpus=num_gpu).remote()
                          for _ in range(int(max_available_actors))])

        exp_result = pool.map_unordered(lambda actor, kwargs: actor.train.remote(kwargs), experiments)
        for name, results in groupby(sorted(exp_result, key=lambda t: t[0]), key=lambda t: t[0]):
            results = list(results)

            # Log the results
            logger.info('Experiment: %s ------------------', name)
            for name, runname, timedelta, devresult, devmaxresult, testresult in results:
                logger.info('\t%s training time: %10.3f', runname, timedelta)
                logger.info('\t%s dev. accuracy: %7.3f', runname, devresult)
                logger.info('\t%s dev. max. acc: %7.3f', runname, devmaxresult)
                logger.info('\t%s test accuracy: %7.3f', runname, testresult)

            # Write the average result
            if len(results) > 1:
Exemple #19
0
def test_pop_idle(init):
    @ray.remote
    class MyActor:
        def __init__(self):
            pass

        def f(self, x):
            return x + 1

        def double(self, x):
            return 2 * x

    actors = [MyActor.remote()]
    pool = ActorPool(actors)

    pool.submit(lambda a, v: a.double.remote(v), 1)
    assert pool.pop_idle() is None
    assert pool.has_free() is False  # actor is busy
    assert pool.get_next() == 2
    assert pool.has_free()
    pool.pop_idle()  # removes actor from pool
    assert pool.has_free() is False  # no more actors in pool
Exemple #20
0
class RayRunner(RunnerBase):
    def __init__(self, *, random_seed: int = 0, volumes_dir: str = None, scratch_dir: str = None,
                 store_results=False, n_workers=None, blocklist=()) -> None:
        if not ray.is_initialized():
            ray.init()

        super().__init__(random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir)
        self.data_handler = DataHandler.remote()
        self.ray_executor = RayExecutor.remote(random_seed=random_seed,
                                               volumes_dir=volumes_dir, scratch_dir=scratch_dir,
                                               store_results=store_results,blocklist=blocklist)

        if n_workers is None:
            n_workers = multiprocessing.cpu_count()
        self.actor_pool = ActorPool([
            RayExecutor.remote(random_seed=random_seed, volumes_dir=volumes_dir,
                               scratch_dir=scratch_dir, store_results=store_results,
                               blocklist=blocklist) for _ in range(n_workers)]
        )

        # Wait for primitives to be load on the workers
        # time.sleep(len(d3m_index.search()) * 0.15)

    def stop_ray(self):
        ray.shutdown()

    def get_request(self, request_id: str):
        return ray.get(ray.ObjectID(binascii.unhexlify(request_id)))

    def fit_pipeline_request(self, problem_description: Problem, pipeline: Pipeline,
                             input_data: typing.Sequence[ContainerType], *, timeout: float = None,
                             expose_outputs: bool = False) -> str:

        request_id = self.data_handler.add_data.remote(input_data)
        input_data_id = ray.get(request_id)
        request_id = self.ray_executor.fit_pipeline.remote(self.data_handler, problem_description, pipeline, input_data_id,
                                                           timeout=timeout, expose_outputs=expose_outputs)
        return request_id.hex()

    def produce_pipeline_request(self, fitted_pipeline_id: str, input_data: typing.Sequence[ContainerType], *,
                                 timeout: float = None, expose_outputs: bool = False) -> str:
        request_id = self.data_handler.add_data.remote(input_data)
        input_data_id = ray.get(request_id)
        request_id = self.ray_executor.produce_pipeline.remote(self.data_handler, fitted_pipeline_id, input_data_id, timeout=timeout,
                                                               expose_outputs=expose_outputs)
        return request_id.hex()

    def evaluate_pipeline_request(
            self, problem_description: Problem, pipeline: Pipeline,
            input_data: typing.Sequence[ContainerType], *, metrics: typing.Sequence[typing.Dict],
            data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None,
            data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None,
            timeout: float = None
    ) -> str:
        request_id = self.data_handler.add_data.remote(input_data)
        input_data_id = ray.get(request_id)

        request_id = self.ray_executor.evaluate_pipeline.remote(
            self.data_handler, problem_description, pipeline, input_data_id, metrics=metrics,
            data_preparation_pipeline=data_preparation_pipeline, scoring_pipeline=scoring_pipeline,
            data_preparation_params=data_preparation_params, scoring_params=scoring_params, timeout=timeout
        )
        return request_id.hex()

    def fitted_pipeline_id_exists(self, fitted_pipeline_id):
        request_id = self.ray_executor.fitted_pipeline_id_exists.remote(fitted_pipeline_id)
        return ray.get(request_id)

    def evaluate_pipelines(
            self, problem_description: Problem, pipelines: typing.Sequence[Pipeline],
            input_data: typing.Sequence[ContainerType], *, metrics: typing.Sequence[typing.Dict],
            data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None,
            data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None,
            timeout: float = None
    ) -> typing.Sequence[PipelineResult]:
        request_id = self.data_handler.add_data.remote(input_data)
        input_data_id = ray.get(request_id)

        args = []
        for pipeline in pipelines:
            args.append({
                'data_handler': self.data_handler, 'problem_description': problem_description, 'pipeline': pipeline,
                'input_data_id': input_data_id, 'metrics': metrics, 'data_preparation_pipeline': data_preparation_pipeline,
                'scoring_pipeline': scoring_pipeline,'data_preparation_params': data_preparation_params,
                'scoring_params': scoring_params,'timeout': timeout
            })

        return self.actor_pool.map(lambda actor, arg: actor.evaluate_pipeline.remote(**arg), args)
def run_files(
    pbar,
    flame_fitting_dir,
    ringnet_dir,
    dir_,
    neutral_mesh_faces,
    dd,
    lmk_face_idx,
    lmk_b_coords,
    attempt=0,
):
    from tqdm import tqdm

    existing_files = set(
        os.path.basename(os.path.dirname(x))
        for x in glob(str(flame_fitting_dir / "*/flame_params.npy"))
    )

    files = sorted(
        [
            x
            for x in glob(str(dir_ / "*"))
            if os.path.basename(x)[:-4] not in existing_files
        ]
    )
    counter = 0
    actors = []

    cpu_count = int(ray.available_resources()["CPU"]) - 2

    pbar.set_description(f"{dir_.parent.name}/{dir_.name} ({cpu_count} cpus)")
    for x in range(min(len(files), cpu_count)):
        actors.append(
            FrameOptimizer.remote(
                dir_, neutral_mesh_faces, dd, lmk_face_idx, lmk_b_coords,
            )
        )
    file_len = len(files)
    pool = ActorPool(actors)
    try:
        pbar2 = tqdm(
            pool.map_unordered(
                lambda a, v: run(a, v, flame_fitting_dir, ringnet_dir), files
            ),
            total=file_len,
        )

        for x in pbar2:
            pbar2.set_description(f"{dir_.parent.name}/{dir_.name} ({cpu_count} cpus)")
            counter += 1
            if x is not None:
                flame_out_path, flame_out_params = x
                os.makedirs(os.path.dirname(flame_out_path), exist_ok=True)
                np.save(flame_out_path, flame_out_params)

    except ray.exceptions.RayActorError:
        if attempt > 10:
            raise Exception("too many attempts")
        for actor in actors:
            ray.kill(actor)
        if counter > 0:
            attempt = 0
        else:
            attempt += 1
        run_files(
            pbar,
            flame_fitting_dir,
            ringnet_dir,
            dir_,
            neutral_mesh_faces,
            dd,
            lmk_face_idx,
            lmk_b_coords,
            attempt=attempt,
        )
Exemple #22
0
import ray
from ray.util import ActorPool


@ray.remote
class Actor:
    def double(self, n):
        return n * 2


a1, a2 = Actor.remote(), Actor.remote()
pool = ActorPool([a1, a2])

# pool.map(..) returns a Python generator object ActorPool.map
gen = pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4])
print(list(gen))
# [2, 4, 6, 8]
Exemple #23
0
                                                  Nb_sequences=s)
        inputs = sm.splitPandas(df=df, nb_splits=n)
        # for the walkers already initilized:  just change the dataframe

        ac = ray.get([
            walker.set_df.remote(i) for walker, i in zip(walkers[0:n], inputs)
        ])

        # for walkers not already initilzed. add new ones!!
        # find the initial yield, return the min yield from each worker
        # res=ray.get([walker.get_df.remote() for walker in walkers[0:n]])
        # print(res)
        res = ray.get([walker.init_yield.remote() for walker in walkers[0:n]])
        min_yield = [np.min(res)]
        start = time.time()
        pool = ActorPool(ac)
        with dm.suppress_stdout():
            # res=ray.get(pool.map(lambda a,v: a.walk.remote(min_yield[0],nb_mutations)))
            res = ray.get([
                walker.walk.remote(min_yield[0], nb_mutations)
                for walker in walkers[0:n]
            ])
        t.append((time.time() - start) / nb_steps)

        # [walker.reset() for walker in walkers]

    # save states of those processor numbers
    # include standard of deviation
    times.loc[:, 'nproc: %i' % n] = t
    times.to_pickle(
        path=
        bytes = ray.cloudpickle.dumps(keyvalue[1])
        with open(self.basedir + '/' + keyvalue[0], "wb") as f:
            f.write(bytes)

    def restore(self, key: str) -> dict:
        if self.exists(key):
            with open(self.basedir + '/' + key, "rb") as f:
                bytes = f.read()
            return ray.cloudpickle.loads(bytes)
        else:
            return None


pool = ActorPool([
    FilePersistence.remote(),
    FilePersistence.remote(),
    FilePersistence.remote()
])


@ray.remote
class Account:
    def __init__(self, balance: float, minimal_balance: float,
                 account_key: str, persistence: ActorPool):
        self.persistence = persistence
        self.key = account_key
        if not self.restorestate():
            if balance < minimal_balance:
                raise Exception(
                    "Starting balance is less then minimal balance")
            self.balance = balance
def _main(cnf_dir,
          n_datapoints_per_file,
          dest,
          n_workers,
          dumpfreq=100e3,
          num_subproblems=8,
          random_units=5,
          timeout=300,
          rootdir=None,
          n_datapoints=25000):
    logger = SimpleLogger(
        os.path.join(dest, "logs", "main_loop", f"{str(uuid.uuid4())}.txt"))
    try:
        ray.init(address='auto', redis_password='******')
    except:
        ray.init()
    tms_dict = {
        cnf_path: TaskManager(cnf_path, rootdir)
        for cnf_path in recursively_get_files(
            cnf_dir, forbidden=["bz2", "xz"], exts=["cnf", "gz"])
    }
    logger.write("STARTING DATA GENERATION LOOP WITH", len(tms_dict.keys()),
                 "CNFs")
    time.sleep(5)
    writer = lbdpWriter.remote(n_datapoints_per_file, dest)
    workers = [
        Worker.remote(writer,
                      dumpfreq=dumpfreq,
                      num_subproblems=num_subproblems,
                      random_units=random_units,
                      timeout=timeout) for _ in range(n_workers)
    ]
    pool = ActorPool(workers)
    for tm in tms_dict.values():
        for task in tm:
            if task is not None:
                pool.submit((lambda a, v: a.work.remote(v)), task)
                logger.write(
                    f"SUBMITTED TASK: TYPE {task.task_type}; CNF_PATH {task.cnf_path}"
                )
    try:
        LOOP_COUNT = 0
        while any([(x.waiting > 0) for x in tms_dict.values()]):
            LOOP_COUNT += 1
            if LOOP_COUNT % 100 == 0:
                if ray.get(writer.get_write_count.remote()) > n_datapoints:
                    print(
                        f"NUMBER OF WRITES EXCEEDS N_DATAPOINTS={n_datapoints}, STOPPING"
                    )
                    break
            cnf_path, tasks, original_task = pool.get_next_unordered()
            logger.write(f"GOT TASK RESULT (TYPE {original_task.task_type})")
            if original_task.task_type == 0 and len(tasks) == 0:
                logger.write("WARNING: Task", original_task.cnf_path,
                             "returned no subtasks.")
            tm = tms_dict[cnf_path]
            tm.set_tasks(tasks)
            if (tm.waiting == 0) and len(tm.tasks) == 0:
                print(f"ROOT TASK {tm.cnf_path} FINISHED")
                tms_dict.pop(cnf_path)
                print("POPPED FROM TMS_DICT")
                try:
                    shutil.rmtree(tms.tmpdir.name)
                    os.makedirs(tms.tmpdir.name)
                    time.sleep(1)
                except:
                    pass
            else:
                for task in tm:
                    if task.task_type == 0:
                        logger.write(f"SUBMITTING SUBPROBLEM (TIMEOUT) TASK")
                    elif task.task_type == 1:
                        logger.write(f"SUBMITTING DERIVED FORMULA")
                    elif task.task_type == 2:
                        logger.write(f"SUBMITTING DERIVED SUBFORMULA")
                    pool.submit((lambda a, v: a.work.remote(v)), task)
                logger.write(f"SUBMITTED {len(tasks)} NEW TASKS")
    finally:
        del writer
Exemple #26
0

@ray.remote
class PoolActor:
    def __init__(self):
        self.id = str(uuid4())

    def computation(self, num):
        print(f'Actor with id {self.id} waiting for {num} sec')
        for x in range(num):
            sleep(1)
            print(f'Actor with id {self.id} slept for {x} sec')
        return num


# Create actors and add them to the pool
a1, a2, a3 = PoolActor.remote(), PoolActor.remote(), PoolActor.remote()
pool = ActorPool([a1, a2, a3])

print(list(pool.map(lambda a, v: a.computation.remote(v), [3, 4, 5, 4])))

pool.submit(lambda a, v: a.computation.remote(v), 3)
pool.submit(lambda a, v: a.computation.remote(v), 4)
pool.submit(lambda a, v: a.computation.remote(v), 5)
pool.submit(lambda a, v: a.computation.remote(v), 4)

print(pool.get_next())
print(pool.get_next())
print(pool.get_next())
print(pool.get_next())
Exemple #27
0
#print("Creating placement group")
#start = time.time()
#pg = placement_group([{"CPU": 10}, {"CPU": 10}, {"CPU": 10}, {"CPU": 10}, {"CPU": 10}], strategy="STRICT_SPREAD")
#ray.get(pg.ready())
#print(f"Placement group created {time.time() - start}")

# invoking remote function
#for n in number_invokes:
#    start, execution = ray.get(remote_caller_placement.remote(sleep_time, n, pg))
#    print(f'Executed {n} invocations with sleep {sleep_time} in {execution}s, start time {start}s')

# Delete placement group. This API is asynchronous.
#remove_placement_group(pg)
# Wait until placement group is killed.
#sleep(1)

#using executors

print("Creating Actor pool")
start = time.time()
actors = [JobExecutor.remote() for n in range(500)]
pool = ActorPool(actors)
print(f"Actor pool is created {time.time() - start}")

for n in number_invokes:
    start, execution = ray.get(remote_worker_caller.remote(
        pool, sleep_time, n))
    print(
        f'Executed {n} invocations with sleep {sleep_time} in {execution}s, start time {start}s'
    )