def random_search( operations: OperatorSet, output: List[Individual], start_candidates: List[Individual], max_evaluations: Optional[int] = None, ) -> List[Individual]: """ Perform random search over all possible pipelines. Parameters ---------- operations: OperatorSet An operator set with `evaluate` and `individual` functions. output: List[Individual] A list which contains the found individuals during search. start_candidates: List[Individual] A list with candidate individuals to evaluate first. max_evaluations: int, optional (default=None) If specified, only a maximum of `max_evaluations` individuals are evaluated. If None, the algorithm will be run indefinitely. Returns ------- List[Individual] All evaluated individuals. """ _check_base_search_hyperparameters(operations, output, start_candidates) with AsyncEvaluator() as async_: for individual in start_candidates: async_.submit(operations.evaluate, individual) while (max_evaluations is None) or (len(output) < max_evaluations): future = operations.wait_next(async_) if future.result is not None: output.append(future.result.individual) async_.submit(operations.evaluate, operations.individual()) return output
def async_ea( ops: OperatorSet, output: List[Individual], start_candidates: List[Individual], restart_callback: Optional[Callable[[], bool]] = None, max_n_evaluations: Optional[int] = None, population_size: int = 50, ) -> List[Individual]: """ Perform asynchronous evolutionary optimization with given operators. Parameters ---------- ops: OperatorSet Operator set with `evaluate`, `create`, `individual` and `eliminate` functions. output: List[Individual] A list which contains the set of best found individuals during search. start_candidates: List[Individual] A list with candidate individuals which should be used to start search from. restart_callback: Callable[[], bool], optional (default=None) Function which takes no arguments and returns True if search restart. max_n_evaluations: int, optional (default=None) If specified, only a maximum of `max_n_evaluations` individuals are evaluated. If None, the algorithm will be run indefinitely. population_size: int (default=50) Maximum number of individuals in the population at any time. Returns ------- List[Individual] The individuals currently in the population. """ if max_n_evaluations is not None and max_n_evaluations <= 0: raise ValueError( f"n_evaluations must be non-negative or None, is {max_n_evaluations}." ) max_pop_size = population_size current_population = output n_evaluated_individuals = 0 with AsyncEvaluator() as async_: should_restart = True while should_restart: should_restart = False current_population[:] = [] log.info("Starting EA with new population.") for individual in start_candidates: async_.submit(ops.evaluate, individual) while (max_n_evaluations is None) or (n_evaluated_individuals < max_n_evaluations): future = ops.wait_next(async_) if future.exception is None and future.result.error is None: current_population.append(future.result.individual) if len(current_population) > max_pop_size: to_remove = ops.eliminate(current_population, 1) current_population.remove(to_remove[0]) if async_.job_queue_size <= 1: # Technically 0 should work to keep near-100% worker load, # especially if the dataset is sufficiently large to require # significant time to evaluate a pipeline. # Increasing the number decreases the risk of lost compute time, # but also increases information lag. An offspring created too # early might miss out on a better parent. new_individual = ops.create(current_population, 1)[0] async_.submit(ops.evaluate, new_individual) should_restart = restart_callback is not None and restart_callback( ) n_evaluated_individuals += 1 if should_restart: log.info( "Restart criterion met. Creating new random population." ) start_candidates = [ ops.individual() for _ in range(max_pop_size) ] break return current_population
def async_ea( ops: OperatorSet, output: List[Individual], start_candidates: List[Individual], restart_callback: Optional[Callable[[], bool]] = None, max_n_evaluations: Optional[int] = None, population_size: int = 50, ) -> List[Individual]: """ Perform asynchronous evolutionary optimization with given operators. Parameters ---------- ops: OperatorSet Operator set with `evaluate`, `create`, `individual` and `eliminate` functions. output: List[Individual] A list which contains the set of best found individuals during search. start_candidates: List[Individual] A list with candidate individuals which should be used to start search from. restart_callback: Callable[[], bool], optional (default=None) Function which takes no arguments and returns True if search restart. max_n_evaluations: int, optional (default=None) If specified, only a maximum of `max_n_evaluations` individuals are evaluated. If None, the algorithm will be run indefinitely. population_size: int (default=50) Maximum number of individuals in the population at any time. Returns ------- List[Individual] The individuals currently in the population. """ if max_n_evaluations is not None and max_n_evaluations <= 0: raise ValueError( f"n_evaluations must be non-negative or None, is {max_n_evaluations}." ) max_pop_size = population_size logger = MultiprocessingLogger() evaluate_log = partial(ops.evaluate, logger=logger) current_population = output n_evaluated_individuals = 0 with AsyncEvaluator() as async_: should_restart = True while should_restart: should_restart = False current_population[:] = [] log.info("Starting EA with new population.") for individual in start_candidates: async_.submit(evaluate_log, individual) while (max_n_evaluations is None) or (n_evaluated_individuals < max_n_evaluations): future = ops.wait_next(async_) logger.flush_to_log(log) if future.exception is None: individual = future.result.individual current_population.append(individual) if len(current_population) > max_pop_size: to_remove = ops.eliminate(current_population, 1) log_event(log, TOKENS.EA_REMOVE_IND, to_remove[0]) current_population.remove(to_remove[0]) if len(current_population) > 2: new_individual = ops.create(current_population, 1)[0] async_.submit(evaluate_log, new_individual) should_restart = restart_callback is not None and restart_callback( ) n_evaluated_individuals += 1 if should_restart: log.info( "Restart criterion met. Creating new random population." ) log_event(log, TOKENS.EA_RESTART, n_evaluated_individuals) start_candidates = [ ops.individual() for _ in range(max_pop_size) ] break return current_population