def tune_mnist_mxnet(num_samples=10, num_epochs=10): logger.info("Downloading MNIST data...") mnist_data = mx.test_utils.get_mnist() logger.info("Got MNIST data, starting Ray Tune.") config = { "layer_1_size": tune.choice([32, 64, 128]), "layer_2_size": tune.choice([64, 128, 256]), "lr": tune.loguniform(1e-3, 1e-1), "batch_size": tune.choice([32, 64, 128]), } scheduler = ASHAScheduler(max_t=num_epochs, grace_period=1, reduction_factor=2) analysis = tune.run( tune.with_parameters( train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs ), resources_per_trial={ "cpu": 1, }, metric="mean_accuracy", mode="max", config=config, num_samples=num_samples, scheduler=scheduler, name="tune_mnist_mxnet", ) return analysis
def _set_api_key(api_key_file: Optional[str] = None, api_key: Optional[str] = None): """Set WandB API key from `wandb_config`. Will pop the `api_key_file` and `api_key` keys from `wandb_config` parameter""" if api_key_file: if api_key: raise ValueError("Both WandB `api_key_file` and `api_key` set.") with open(api_key_file, "rt") as fp: api_key = fp.readline().strip() # Try to get API key from external hook if not api_key and WANDB_SETUP_API_KEY_HOOK in os.environ: try: api_key = _load_class(os.environ[WANDB_SETUP_API_KEY_HOOK])() except Exception as e: logger.exception( f"Error executing {WANDB_SETUP_API_KEY_HOOK} to setup API key: {e}", exc_info=e, ) if api_key: os.environ[WANDB_ENV_VAR] = api_key elif not os.environ.get(WANDB_ENV_VAR): try: # Check if user is already logged into wandb. wandb.ensure_configured() if wandb.api.api_key: logger.info("Already logged into W&B.") return except AttributeError: pass raise ValueError( "No WandB API key found. Either set the {} environment " "variable, pass `api_key` or `api_key_file` to the" "`WandbLoggerCallback` class as arguments, " "or run `wandb login` from the command line".format(WANDB_ENV_VAR))
def tune_mnist_mxnet(num_samples=10, num_epochs=10): logger.info("Downloading MNIST data...") mnist_data = mx.test_utils.get_mnist() logger.info("Got MNIST data, starting Ray Tune.") config = { "layer_1_size": tune.choice([32, 64, 128]), "layer_2_size": tune.choice([64, 128, 256]), "lr": tune.loguniform(1e-3, 1e-1), "batch_size": tune.choice([32, 64, 128]) } scheduler = ASHAScheduler(metric="mean_accuracy", mode="max", max_t=num_epochs, grace_period=1, reduction_factor=2) reporter = CLIReporter( parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"], metric_columns=["loss", "mean_accuracy", "training_iteration"]) tune.run(partial(train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs), resources_per_trial={ "cpu": 1, }, config=config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter, name="tune_mnist_mxnet")
def _set_api_key(wandb_config): """Set WandB API key from `wandb_config`. Will pop the `api_key_file` and `api_key` keys from `wandb_config` parameter""" api_key_file = os.path.expanduser(wandb_config.pop("api_key_file", "")) api_key = wandb_config.pop("api_key", None) if api_key_file: if api_key: raise ValueError("Both WandB `api_key_file` and `api_key` set.") with open(api_key_file, "rt") as fp: api_key = fp.readline().strip() if api_key: os.environ[WANDB_ENV_VAR] = api_key elif not os.environ.get(WANDB_ENV_VAR): try: # Check if user is already logged into wandb. wandb.ensure_configured() if wandb.api.api_key: logger.info("Already logged into W&B.") return except AttributeError: pass raise ValueError( "No WandB API key found. Either set the {} environment " "variable, pass `api_key` or `api_key_file` in the config, " "or run `wandb login` from the command line".format(WANDB_ENV_VAR))
def on_episode_end( self, worker: RolloutWorker, base_env: BaseEnv, policies: Dict[str, Policy], episode: MultiAgentEpisode, **kwargs, ): ego_speed = episode.user_data["ego_speed"] mean_ego_speed = { agent_id: np.mean(speed_hist) for agent_id, speed_hist in ego_speed.items() } distance_travelled = dict() for _id, info in episode._agent_to_last_info.items(): if info.get("_group_info"): for i, _info in enumerate(info["_group_info"]): distance_travelled[f"{_id}:AGENT-{i}"] = np.mean(_info["score"]) else: distance_travelled[_id] = np.mean(info["score"]) speed_list = list(map(lambda x: round(x, 3), mean_ego_speed.values())) dist_list = list(map(lambda x: round(x, 3), distance_travelled.values())) reward_list = list(map(lambda x: round(x, 3), episode.agent_rewards.values())) episode.custom_metrics[f"mean_ego_speed"] = sum(speed_list) / max( 1, len(speed_list) ) episode.custom_metrics[f"distance_travelled"] = sum(dist_list) / max( 1, len(dist_list) ) logger.info(f"episode {episode.episode_id} ended with {episode.length} steps")
def _set_api_key(api_key_file: Optional[str] = None, api_key: Optional[str] = None): """Set WandB API key from `wandb_config`. Will pop the `api_key_file` and `api_key` keys from `wandb_config` parameter""" if api_key_file: if api_key: raise ValueError("Both WandB `api_key_file` and `api_key` set.") with open(api_key_file, "rt") as fp: api_key = fp.readline().strip() if api_key: os.environ[WANDB_ENV_VAR] = api_key elif not os.environ.get(WANDB_ENV_VAR): try: # Check if user is already logged into wandb. wandb.ensure_configured() if wandb.api.api_key: logger.info("Already logged into W&B.") return except AttributeError: pass raise ValueError( "No WandB API key found. Either set the {} environment " "variable, pass `api_key` or `api_key_file` to the" "`WandbLoggerCallback` class as arguments, " "or run `wandb login` from the command line".format(WANDB_ENV_VAR) )
def train(config, reporter): import sys from ray import logger for i in range(10): reporter(timesteps_total=i) print("PRINT_STDOUT") print("PRINT_STDERR", file=sys.stderr) logger.info("LOG_STDERR")
def step(self): self.iter += 1 print("PRINT_STDOUT: {}".format(self.msg)) print("PRINT_STDERR: {}".format(self.msg), file=sys.stderr) logger.info("LOG_STDERR: {}".format(self.msg)) return {"num_resets": self.num_resets, "done": self.iter > 1}
def stop_all(self): if not self._start: self._start = time.time() return False now = time.time() if now - self._start >= self._timeout_seconds: logger.info(f"Reached timeout of {self._timeout_seconds} seconds. " f"Stopping all trials.") return True return False
def upload( self, cloud_path: Optional[str] = None, local_path: Optional[str] = None, clean_before: bool = False, ): """Upload checkpoint to cloud. This will push the checkpoint directory from local storage to ``cloud_path``. If a ``cloud_path`` argument is provided and ``self.cloud_path`` is unset, it will be set to ``cloud_path``. Args: cloud_path: Cloud path to load checkpoint from. Defaults to ``self.cloud_path``. local_path: Local path to save checkpoint at. Defaults to ``self.local_path``. clean_before: If True, deletes potentially existing cloud bucket before storing new data. """ local_path = local_path or self.local_path if not local_path: raise RuntimeError( "Could not upload trial checkpoint: No local " "path is set. Fix this by either passing a " "`local_path` to your call to `upload()` or by " "passing a `local_path` into the constructor." ) cloud_path = cloud_path or self.cloud_path if not cloud_path: raise RuntimeError( "Could not download trial checkpoint: No cloud " "path is set. Fix this by either passing a " "`cloud_path` to your call to `download()` or by " "passing a `cloud_path` into the constructor. The latter " "should automatically be done if you pass the correct " "`tune.SyncConfig`." ) if not self.cloud_path: self.cloud_path = cloud_path if clean_before: logger.info(f"Clearing bucket contents before upload: {cloud_path}") delete_at_uri(cloud_path) # Actually upload upload_to_uri(local_path, cloud_path) return cloud_path
def on_episode_start( self, worker: RolloutWorker, base_env: BaseEnv, policies: Dict[str, Policy], episode: MultiAgentEpisode, **kwargs, ): logger.info("episode {} started".format(episode.episode_id)) episode.user_data["ego_speed"] = defaultdict(lambda: []) episode.user_data["step_heading_error"] = dict()
def _predict(model: xgb.Booster, data: RayDMatrix, num_actors: int = 4, cpus_per_actor: int = 0, gpus_per_actor: int = 0, resources_per_actor: Optional[Dict] = None, **kwargs): _assert_ray_support() if not ray.is_initialized(): ray.init() # Create remote actors actors = [ _create_actor(i, num_actors, cpus_per_actor, gpus_per_actor, resources_per_actor) for i in range(num_actors) ] logger.info(f"[RayXGBoost] Created {len(actors)} remote actors.") # Split data across workers wait_load = [] for _, actor in enumerate(actors): wait_load.extend(_trigger_data_load(actor, data, [])) try: ray.get(wait_load) except Exception as exc: logger.warning(f"Caught an error during prediction: {str(exc)}") _shutdown(actors, force=True) raise # Put model into object store model_ref = ray.put(model) logger.info("[RayXGBoost] Starting XGBoost prediction.") # Train fut = [actor.predict.remote(model_ref, data, **kwargs) for actor in actors] try: actor_results = ray.get(fut) except Exception as exc: logger.warning(f"Caught an error during prediction: {str(exc)}") _shutdown(remote_workers=actors, force=True) raise _shutdown(remote_workers=actors, force=False) return combine_data(data.sharding, actor_results)
def stop_all(self): now = time.time() if self._last_check: taken = now - self._last_check self._budget -= taken self._last_check = now if self._budget <= 0: logger.info(f"Reached timeout of {self._timeout_seconds} seconds. " f"Stopping all trials.") return True return False
def step(self): self.iter += 1 print("PRINT_STDOUT: {}".format(self.msg)) print("PRINT_STDERR: {}".format(self.msg), file=sys.stderr) logger.info("LOG_STDERR: {}".format(self.msg)) if self.sleep: time.sleep(self.sleep) return { "id": self.config["id"], "num_resets": self.num_resets, "done": self.iter > 1, "iter": self.iter }
def rollout(trainer, env_name, metrics_handler, num_steps, num_episodes, log_dir): """Reference: https://github.com/ray-project/ray/blob/master/rllib/rollout.py""" policy_agent_mapping = default_policy_agent_mapping assert hasattr(trainer, "workers") and isinstance(trainer.workers, WorkerSet) env = trainer.workers.local_worker().env multiagent = isinstance(env, MultiAgentEnv) if trainer.workers.local_worker().multiagent: policy_agent_mapping = trainer.config["multiagent"][ "policy_mapping_fn"] policy_map = trainer.workers.local_worker().policy_map state_init = {p: m.get_initial_state() for p, m in policy_map.items()} use_lstm = {p: len(s) > 0 for p, s in state_init.items()} action_init = { p: flatten_to_single_ndarray(m.action_space.sample()) for p, m in policy_map.items() } for episode in range(num_episodes): mapping_cache = {} # in case policy_agent_mapping is stochastic obs = env.reset() agent_states = DefaultMapping( lambda agent_id: state_init[mapping_cache[agent_id]]) prev_actions = DefaultMapping( lambda agent_id: action_init[mapping_cache[agent_id]]) prev_rewards = collections.defaultdict(lambda: 0.0) done = False reward_total = 0.0 step = 0 while not done and step < num_steps: multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs} action_dict = {} for agent_id, a_obs in multi_obs.items(): if a_obs is not None: policy_id = mapping_cache.setdefault( agent_id, policy_agent_mapping(agent_id)) p_use_lstm = use_lstm[policy_id] if p_use_lstm: a_action, p_state, _ = trainer.compute_action( a_obs, state=agent_states[agent_id], prev_action=prev_actions[agent_id], prev_reward=prev_rewards[agent_id], policy_id=policy_id, ) agent_states[agent_id] = p_state else: a_action = trainer.compute_action( a_obs, prev_action=prev_actions[agent_id], prev_reward=prev_rewards[agent_id], policy_id=policy_id, ) a_action = flatten_to_single_ndarray(a_action) action_dict[agent_id] = a_action prev_actions[agent_id] = a_action action = action_dict action = action if multiagent else action[_DUMMY_AGENT_ID] next_obs, reward, done, info = env.step(action) metrics_handler.log_step( episode=episode, observations=multi_obs, actions=action, rewards=reward, dones=done, infos=info, ) if multiagent: for agent_id, r in reward.items(): prev_rewards[agent_id] = r else: prev_rewards[_DUMMY_AGENT_ID] = reward # filter dead agents if multiagent: next_obs = { agent_id: obs for agent_id, obs in next_obs.items() if not done[agent_id] } if multiagent: done = done["__all__"] reward_total += sum(reward.values()) else: reward_total += reward step += 1 obs = next_obs logger.info("\nEpisode #{}: steps: {} reward: {}".format( episode, step, reward_total)) if done: episode += 1 metrics_handler.write_to_csv(csv_dir=log_dir)
def _train(params: Dict, dtrain: RayDMatrix, *args, evals=(), num_actors: int = 4, cpus_per_actor: int = 0, gpus_per_actor: int = -1, resources_per_actor: Optional[Dict] = None, checkpoint_prefix: Optional[str] = None, checkpoint_path: str = "/tmp", checkpoint_frequency: int = 5, **kwargs): _assert_ray_support() if not ray.is_initialized(): ray.init() if gpus_per_actor == -1: gpus_per_actor = 0 if "tree_method" in params and params["tree_method"].startswith("gpu"): gpus_per_actor = 1 if cpus_per_actor <= 0: cluster_cpus = _ray_get_cluster_cpus() or 1 cpus_per_actor = min(int(_get_max_node_cpus() or 1), int(cluster_cpus // num_actors)) if "nthread" in params: if params["nthread"] > cpus_per_actor: raise ValueError( "Specified number of threads greater than number of CPUs. " "\nFIX THIS by passing a lower value for the `nthread` " "parameter or a higher number for `cpus_per_actor`.") else: params["nthread"] = cpus_per_actor # Create remote actors actors = [ _create_actor(i, num_actors, cpus_per_actor, gpus_per_actor, resources_per_actor, checkpoint_prefix, checkpoint_path, checkpoint_frequency) for i in range(num_actors) ] logger.info(f"[RayXGBoost] Created {len(actors)} remote actors.") # Split data across workers wait_load = [] for _, actor in enumerate(actors): wait_load.extend(_trigger_data_load(actor, dtrain, evals)) ray.get(wait_load) logger.info("[RayXGBoost] Starting XGBoost training.") # Start tracker env = _start_rabit_tracker(num_actors) rabit_args = [("%s=%s" % item).encode() for item in env.items()] # Train fut = [ actor.train.remote(rabit_args, params, dtrain, evals, *args, **kwargs) for actor in actors ] try: ray.get(fut) except RayActorError: for actor in actors: ray.kill(actor) raise # All results should be the same because of Rabit tracking. So we just # return the first one. res: Dict[str, Any] = ray.get(fut[0]) bst = res["bst"] evals_result = res["evals_result"] all_res = ray.get(fut) total_n = sum([res["train_n"] or 0 for res in all_res]) logger.info(f"[RayXGBoost] Finished XGBoost training on training data " f"with total N={total_n:,}.") if checkpoint_prefix: _cleanup(checkpoint_prefix, checkpoint_path, num_actors) return bst, evals_result
def _train(params: Dict, dtrain: RayDMatrix, *args, evals=(), num_actors: int = 4, cpus_per_actor: int = 0, gpus_per_actor: int = -1, resources_per_actor: Optional[Dict] = None, checkpoint_prefix: Optional[str] = None, checkpoint_path: str = "/tmp", checkpoint_frequency: int = 5, **kwargs) -> Tuple[xgb.Booster, Dict, Dict]: _assert_ray_support() if not ray.is_initialized(): ray.init() if gpus_per_actor == -1: gpus_per_actor = 0 if "tree_method" in params and params["tree_method"].startswith("gpu"): gpus_per_actor = 1 if cpus_per_actor <= 0: cluster_cpus = _ray_get_cluster_cpus() or 1 cpus_per_actor = min(int(_get_max_node_cpus() or 1), int(cluster_cpus // num_actors)) if "nthread" in params: if params["nthread"] > cpus_per_actor: raise ValueError( "Specified number of threads greater than number of CPUs. " "\nFIX THIS by passing a lower value for the `nthread` " "parameter or a higher number for `cpus_per_actor`.") else: params["nthread"] = cpus_per_actor # Create queue for communication from worker to caller. # Always create queue. queue = Queue() # Create remote actors actors = [ _create_actor(i, num_actors, cpus_per_actor, gpus_per_actor, resources_per_actor, queue, checkpoint_prefix, checkpoint_path, checkpoint_frequency) for i in range(num_actors) ] logger.info(f"[RayXGBoost] Created {len(actors)} remote actors.") # Split data across workers wait_load = [] for _, actor in enumerate(actors): wait_load.extend(_trigger_data_load(actor, dtrain, evals)) try: ray.get(wait_load) except Exception: _shutdown(actors, queue, force=True) raise logger.info("[RayXGBoost] Starting XGBoost training.") # Start tracker env = _start_rabit_tracker(num_actors) rabit_args = [("%s=%s" % item).encode() for item in env.items()] # Train fut = [ actor.train.remote(rabit_args, params, dtrain, evals, *args, **kwargs) for actor in actors ] callback_returns = [list() for _ in range(len(actors))] try: not_ready = fut while not_ready: if queue: while not queue.empty(): (actor_rank, item) = queue.get() if isinstance(item, Callable): item() else: callback_returns[actor_rank].append(item) ready, not_ready = ray.wait(not_ready, timeout=0) logger.debug("[RayXGBoost] Waiting for results...") ray.get(ready) # Once everything is ready ray.get(fut) # The inner loop should catch all exceptions except Exception: _shutdown(remote_workers=actors, queue=queue, force=True) raise # All results should be the same because of Rabit tracking. So we just # return the first one. res: Dict[str, Any] = ray.get(fut[0]) bst = res["bst"] evals_result = res["evals_result"] additional_results = {} if callback_returns: additional_results["callback_returns"] = callback_returns all_res = ray.get(fut) total_n = sum(res["train_n"] or 0 for res in all_res) logger.info(f"[RayXGBoost] Finished XGBoost training on training data " f"with total N={total_n:,}.") if checkpoint_prefix: _cleanup(checkpoint_prefix, checkpoint_path, num_actors) _shutdown(remote_workers=actors, queue=queue, force=False) return bst, evals_result, additional_results