Ejemplo n.º 1
0
    def search(self, load_from: str = None, save_every: int = None):
        if load_from:
            self.load_state(load_from)

        ray.init(local_mode=debug_mode())

        trainer = ray.put(self.trainer)
        ss = ray.put(self.config.search_space)
        scheduler = Scheduler([
            GPUTrainer.remote(ss, trainer)
            for _ in range(self.max_parallel_evaluations)
        ])
        self.log.info(
            f"Searching with {self.max_parallel_evaluations} workers.")

        def should_submit_more(cap):
            return (len(self.history) + scheduler.pending_tasks() < cap) \
               and scheduler.has_a_free_worker()

        def point_number():
            return len(self.history) + scheduler.pending_tasks() + 1

        while len(self.history) < self.initial_population_size:
            if should_submit_more(cap=self.initial_population_size):
                self.log.info(f"Populating #{point_number()}...")
                scheduler.submit(self.random_sample())
            else:
                info = scheduler.await_any()
                self.population.append(info)
                self.history.append(info)
                self.maybe_save_state(save_every)

        while len(self.history) < self.rounds:
            if should_submit_more(cap=self.rounds):
                self.log.info(f"Searching #{point_number()}...")
                sample = np.random.choice(self.population,
                                          size=self.sample_size)
                parent = max(sample, key=self.get_mo_fitness_fn())

                scheduler.submit(self.evolve(parent.point))
            else:
                info = scheduler.await_any()
                self.population.append(info)
                while len(self.population) > self.population_size:
                    self.population.pop(0)
                self.history.append(info)
                self.maybe_save_state(save_every)
                self.bounds_log()
Ejemplo n.º 2
0
class RayWorkerManager(AbstractWorkerManager):
    """
    Adapts Dragonfly's "workers" that execute a function at a point to use Ray's actors
    """
    def __init__(self,
                 max_pending=None,
                 default_func_caller=None,
                 cpu_only=False):
        num_gpus_available = len(
            tf.config.experimental.list_physical_devices('GPU'))

        if max_pending is None:
            max_pending = 4 if cpu_only else max(1, num_gpus_available)
        super().__init__(max_pending)

        worker_resources = {
            "num_gpus":
            0 if debug_mode() or cpu_only or num_gpus_available == 0 else 1,
        }

        self.worker_cls = ray.remote(**worker_resources)(Worker)

        if not ray.is_initialized():
            ray.init(local_mode=debug_mode(), ignore_reinit_error=True)

        self.max_pending = max_pending
        self.default_func_caller = default_func_caller
        if self.default_func_caller:
            self.caller_handle = ray.put(default_func_caller)
        self.scheduler = Scheduler(self.worker_cls.remote()
                                   for _ in range(max_pending))
        self.last_receive_time = 0

    def _child_reset(self):
        pass

    def close_all_queries(self):
        pass

    def a_worker_is_free(self, force_await=False):
        if not self.scheduler.has_a_free_worker() or force_await:
            qinfo = self.scheduler.await_any()
            if not hasattr(qinfo, 'true_val'):
                qinfo.true_val = qinfo.val

            if hasattr(
                    qinfo,
                    'caller_eval_cost') and qinfo.caller_eval_cost is not None:
                qinfo.eval_time = qinfo.caller_eval_cost
            else:
                qinfo.eval_time = 1.0
            qinfo.receive_time = qinfo.send_time + qinfo.eval_time
            qinfo.worker_id = 0
            self.last_receive_time = qinfo.receive_time

            self.latest_results.append(qinfo)

        return self.last_receive_time

    def all_workers_are_free(self):
        num_pending_tasks = self.scheduler.pending_tasks()
        for _ in range(num_pending_tasks):
            self.a_worker_is_free(force_await=True)
        return self.last_receive_time

    def _dispatch_experiment(self, func_caller, qinfo, **kwargs):
        if func_caller is self.default_func_caller:
            func_caller = self.caller_handle
        self.scheduler.submit(func_caller, qinfo, **kwargs)

    def dispatch_single_experiment(self, func_caller, qinfo, **kwargs):
        self._dispatch_experiment(func_caller, qinfo, **kwargs)

    def dispatch_batch_of_experiments(self, func_caller, qinfos, **kwargs):
        for qinfo in qinfos:
            self.dispatch_single_experiment(func_caller, qinfo, **kwargs)

    def get_time_distro_info(self):
        return 'caller_eval_cost'

    def get_poll_time_real(self):
        return 5.0