def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) model_config = merge_dicts(BASELINE_CONFIG, model_config['custom_model_config']) # new way to get model config directly from keyword arguments model_config = merge_dicts(model_config, kwargs) state_embed_size = model_config['state_embed_size'] self.use_rnn = model_config['use_rnn'] rnn_type = model_config['rnn_type'] self.use_prev_action_reward = model_config['use_prev_action_reward'] action_net_kwargs = model_config['action_net_kwargs'] if isinstance(action_space, gym.spaces.Discrete): action_net_kwargs.update({'discrete': True, 'n': action_space.n}) self.discrete = True else: self.discrete = False def get_factory(network_name): return getattr(getattr(models.torch, network_name), model_config[f'{network_name}_net']) self._pov_network, pov_embed_size = get_factory('pov')( **model_config['pov_net_kwargs']) self._vector_network, vector_embed_size = get_factory('vector')( **model_config['vector_net_kwargs']) state_input_size = pov_embed_size + vector_embed_size if self.use_prev_action_reward: self._action_network, action_embed_size = get_factory('action')( **action_net_kwargs) self._reward_network, reward_embed_size = get_factory('reward')( **model_config['reward_net_kwargs']) state_input_size += action_embed_size + reward_embed_size rnn_config = model_config.get('rnn_config') if self.use_rnn: state_embed_size = rnn_config['hidden_size'] if rnn_type == 'lstm': self._rnn = models.torch.rnn.LSTMBaseline( state_input_size, **rnn_config) elif rnn_type == 'gru': self._rnn = models.torch.rnn.GRUBaseline( state_input_size, **rnn_config) else: raise NotImplementedError else: self._state_network = nn.Sequential( nn.Linear(state_input_size, state_embed_size), nn.ELU(), ) self._value_head = nn.Sequential(nn.Linear(state_embed_size, 1), ) self._policy_head = nn.Sequential( nn.Linear(state_embed_size, num_outputs), )
def create_trial_if_possible(self, experiment_spec: Dict, output_path: str) -> Optional[Trial]: logger.debug("creating trial") trial_id = Trial.generate_id() suggested_config = self.searcher.suggest(trial_id) if suggested_config == Searcher.FINISHED: self._finished = True logger.debug("Searcher has finished.") return if suggested_config is None: return spec = copy.deepcopy(experiment_spec) spec["config"] = merge_dicts(spec["config"], copy.deepcopy(suggested_config)) # Create a new trial_id if duplicate trial is created flattened_config = resolve_nested_dict(spec["config"]) self._counter += 1 tag = "{0}_{1}".format(str(self._counter), format_vars(flattened_config)) trial = create_trial_from_spec( spec, output_path, self._parser, evaluated_params=flatten_dict(suggested_config), experiment_tag=tag, trial_id=trial_id) return trial
def _generate_trials(self, experiment_spec, output_path=""): """Generates trials with configurations from `_suggest`. Creates a trial_id that is passed into `_suggest`. Yields: Trial objects constructed according to `spec` """ if "run" not in experiment_spec: raise TuneError("Must specify `run` in {}".format(experiment_spec)) for _ in range(experiment_spec.get("num_samples", 1)): trial_id = Trial.generate_id() while True: suggested_config = self._suggest(trial_id) if suggested_config is None: yield None else: break spec = copy.deepcopy(experiment_spec) spec["config"] = merge_dicts(spec["config"], copy.deepcopy(suggested_config)) flattened_config = resolve_nested_dict(spec["config"]) self._counter += 1 tag = "{0}_{1}".format(str(self._counter), format_vars(flattened_config)) yield create_trial_from_spec( spec, output_path, self._parser, evaluated_params=flatten_dict(suggested_config), experiment_tag=tag, trial_id=trial_id)
def step(trainer, info: dict): """Define a custom training loop for tune. This is needed because we want to manually update our scheduler. """ train_stats = trainer.train(profile=True) validation_stats = trainer.validate(profile=True) # Manually update our scheduler with the given metric. trainer.update_scheduler(metric=validation_stats["val_loss"]) all_stats = merge_dicts(train_stats, validation_stats) return all_stats
def run(args, parser): config = {} # Load configuration from file config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") if not os.path.exists(config_path): if not args.config: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory.") else: with open(config_path, "rb") as f: config = pickle.load(f) if "num_workers" in config: config["num_workers"] = min(2, config["num_workers"]) config = merge_dicts(config, args.config) if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") # ray by default takes up ~100GB ray.init(memory=4.2e+10, object_store_memory=1e+10) env_name = config['env'] if env_name == 'harvest_env': def env_creator(env_config): return HarvestEnv(env_config) else: def env_creator(env_config): return CleanupEnv(env_config) register_env(env_name, env_creator) ModelCatalog.register_custom_model("conv_to_fc_net", ObedienceLSTM) cls = get_agent_class(args.run) agent = cls(env=args.env, config=config) agent.restore(args.checkpoint) num_steps = int(args.steps) num_episodes = int(args.episodes) with RolloutSaver(args.out, args.use_shelve, write_update_file=args.track_progress, target_steps=num_steps, target_episodes=num_episodes, save_info=args.save_info) as saver: rollout(agent, args.env, num_steps, num_episodes, saver, args.no_render, args.monitor)
def parse_inputs(inputs: Iterable[StrOrPath]) -> dict: # note that importing ray.tune does not override tf.executing_eagerly # pylint: disable=import-outside-toplevel from ray.tune.utils import merge_dicts out: dict = {} for inp in inputs: with open(inp, 'r') as f: params = yaml.safe_load(f) out = merge_dicts(out, params) return out
def custom_train_func(trainer, info): train_stats = trainer.train(profile=True) val_stats = trainer.validate(profile=True) stats = merge_dicts(train_stats, val_stats) actors = ray.state.actors().values() assert len(actors) == num_workers + 1 node_id_set = set() for actor_info in actors: node_id = actor_info["Address"]["NodeID"] node_id_set.add(node_id) assert len(node_id_set) == 1 + num_workers // 8 return stats
def restore_agent(checkpoint_path, baseline=False, num_levels=5, deterministic=False, video_dir=None): config = {} # Load configuration from checkpoint file. config_dir = os.path.dirname(checkpoint_path) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") with open(config_path, "rb") as f: config = pickle.load(f) # Set num_workers to be at least 2. if "num_workers" in config: config["num_workers"] = min(2, config["num_workers"]) # Merge with `evaluation_config`. evaluation_config = copy.deepcopy(config.get("evaluation_config", {})) config = merge_dicts(config, evaluation_config) if video_dir: config["env_config"]["render_mode"] = "rgb_array" # config["env_config"]["num_levels"] = num_levels config["explore"] = not deterministic config["evaluation_interval"] = 0 config["monitor"] = False if args.level_file is not None: cwd = os.path.dirname(os.path.realpath(__file__)) config["env_config"]["level_file"] = os.path.join(cwd, args.level_file) print(config) if baseline: trainer = PPOTrainer else: state_danger = config["model"]["custom_options"]["state_danger"] if not state_danger: trainer = ActionDangerPPOTrainer else: trainer = StateDangerPPOTrainer restored_trainer = trainer(env=config["env"], config=config) restored_trainer.restore(checkpoint_path) return restored_trainer, config
def train(name, ray_config, debug=False): """ Trains sam Parameters ---------- name: name of yaml file ray_config: ray configuration debug: whether to test in editor Returns ------- """ ray.init() trainer_class = get_trainable_cls(ray_config['run']) default_config = trainer_class._default_config.copy() config = merge_dicts(default_config, ray_config['config']) now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') run = ray_config['run'] model_name = f'{name}_{now}' print(f'\33]0;{model_name} - {name}\a', end='', flush=True) if debug: config['num_workers'] = 0 config['num_envs_per_worker'] = 1 # config['train_batch_size'] = 10 config['env_config']['log_every'] = 2000 trainer = trainer_class(config=config) policy = trainer.get_policy() model = policy.model print(model) for i in range(10): res = trainer.train() print(pretty_print(res)) else: tune.run( run, name=model_name, # stop=ray_config['stop'], local_dir='results', config=config, checkpoint_at_end=True, verbose=2, # restore=RESTORE_PATH, checkpoint_freq=10) ray.shutdown()
def main(): args = parser.parse_args() print( "This script is outdated. " "Use the 'input: minerl' option to load the correct data automatically." ) env_list = [] env_config = {} if args.config_file is not None: config = yaml.safe_load(open(args.config_file)) settings = list(config.values())[0] if "config" in settings: if "env_config" in settings["config"]: env_config = settings["config"]["env_config"] if "env" in settings["config"]: env_list.append(settings["config"]["env"]) if "env" in settings: env_list.append(settings["env"]) else: if args.env is None: for env_spec in obfuscated_envs: env_list.append(env_spec.name) else: env_list.append(args.env) env_config = merge_dicts(env_config, args.env_config) if args.save_path is None: save_path = get_save_path(args.data_dir, env_config) else: save_path = args.save_path print(f"saving jsons to {save_path}") for env_name in env_list: print(f"Writing data to json files for environment {env_name}") env_save_path = os.path.join(save_path, env_name) write_jsons(env_name, args.data_dir, env_config, env_save_path, overwrite=args.overwrite)
def run(args, parser): config = {} # Load configuration from file config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") if not os.path.exists(config_path): if not args.config: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory.") else: with open(config_path, "rb") as f: config = pickle.load(f) if "num_workers" in config: config["num_workers"] = min(2, config["num_workers"]) config = merge_dicts(config, args.config) if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") ray.init() cls = get_agent_class(args.run) agent = cls(env=args.env, config=config) agent.restore(args.checkpoint) num_steps = int(args.steps) num_episodes = int(args.episodes) with RolloutSaver( args.out, args.use_shelve, write_update_file=args.track_progress, target_steps=num_steps, target_episodes=num_episodes, save_info=args.save_info) as saver: outcome = rollout(agent, args.env, num_steps, num_episodes, saver, args.no_render, args.monitor) outcome_file = os.path.join(os.path.dirname(config_path), 'test_outcome.json') with open(outcome_file, 'w') as f: json.dump(outcome, f, indent=4)
def evaluate(exp): eval_configs = get_eval_config(exp['config'].get('env_config', {}).get('eval_generator', "default")) eval_seed = eval_configs.get('evaluation_config', {}).get('env_config', {}).get('seed') # add evaluation config to the current config exp['config'] = merge_dicts(exp['config'], eval_configs) if exp['config'].get('evaluation_config'): exp['config']['evaluation_config']['env_config'] = exp['config'].get('env_config') eval_env_config = exp['config']['evaluation_config'].get('env_config') if eval_seed and eval_env_config: # We override the envs seed from the evaluation config eval_env_config['seed'] = eval_seed # Remove any wandb related configs if eval_env_config: if eval_env_config.get('wandb'): del eval_env_config['wandb'] # Remove any wandb related configs if exp['config']['evaluation_config'].get('wandb'): del exp['config']['evaluation_config']['wandb']
def run(args, parser): # Load configuration from checkpoint file. config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") # If no pkl file found, require command line `--config`. if not os.path.exists(config_path): if not args.config: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory AND no config given on command line!") else: config = args.config # Load the config from pickled. else: with open(config_path, "rb") as f: config = pickle.load(f) # Set num_workers to be at least 2. if "num_workers" in config: config["num_workers"] = min(2, config["num_workers"]) # Merge with `evaluation_config` (first try from command line, then from # pkl file). evaluation_config = copy.deepcopy( args.config.get("evaluation_config", config.get("evaluation_config", {}))) config = merge_dicts(config, evaluation_config) # Merge with command line `--config` settings (if not already the same # anyways). config = merge_dicts(config, args.config) if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") ray.init() # Create the Trainer from config. cls = get_trainable_cls(args.run) agent = cls(env=args.env, config=config) # Load state from checkpoint. agent.restore(args.checkpoint) num_steps = int(args.steps) num_episodes = int(args.episodes) # Determine the video output directory. # Deprecated way: Use (--out|~/ray_results) + "/monitor" as dir. video_dir = None if args.monitor: video_dir = os.path.join( os.path.dirname(args.out or "") or os.path.expanduser("~/ray_results/"), "monitor") # New way: Allow user to specify a video output path. elif args.video_dir: video_dir = os.path.expanduser(args.video_dir) # Do the actual rollout. with RolloutSaver(args.out, args.use_shelve, write_update_file=args.track_progress, target_steps=num_steps, target_episodes=num_episodes, save_info=args.save_info) as saver: rollout(agent, args.env, num_steps, num_episodes, saver, args.no_render, video_dir) agent.stop()
from ray.rllib.agents.ppo.ppo import PPOTrainer, DEFAULT_CONFIG, \ validate_config as original_validate from ray.tune.utils import merge_dicts from toolbox import initialize_ray, train from toolbox.dies.es_utils import run_evolution_strategies from toolbox.marl import get_marl_env_config, on_train_result, \ MultiAgentEnvWrapper ppo_es_default_config = merge_dicts( DEFAULT_CONFIG, dict(update_steps=100000, callbacks={"on_train_result": on_train_result})) def validate_config(config): tmp_env = MultiAgentEnvWrapper(config["env_config"]) config["multiagent"]["policies"] = { "agent{}".format(i): (None, tmp_env.observation_space, tmp_env.action_space, {}) for i in range(num_agents) } config["multiagent"]["policy_mapping_fn"] = lambda x: x original_validate(config) PPOESTrainer = PPOTrainer.with_updates( name="PPOES", default_config=ppo_es_default_config, after_train_result=run_evolution_strategies, validate_config=validate_config)
def run(args, parser): # Load configuration from checkpoint file. config_path = "" if args.checkpoint: config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") # Load the config from pickled. if os.path.exists(config_path): with open(config_path, "rb") as f: config = cloudpickle.load(f) # If no pkl file found, require command line `--config`. else: # If no config in given checkpoint -> Error. if args.checkpoint: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory AND no `--config` given on command " "line!") # Use default config for given agent. _, config = get_trainer_class(args.run, return_config=True) # Make sure worker 0 has an Env. config["create_env_on_driver"] = True # Merge with `evaluation_config` (first try from command line, then from # pkl file). evaluation_config = copy.deepcopy( args.config.get("evaluation_config", config.get("evaluation_config", {}))) config = merge_dicts(config, evaluation_config) # Merge with command line `--config` settings (if not already the same # anyways). config = merge_dicts(config, args.config) if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") # Make sure we have evaluation workers. if not config.get("evaluation_num_workers"): config["evaluation_num_workers"] = config.get("num_workers", 0) if not config.get("evaluation_num_episodes"): config["evaluation_num_episodes"] = 1 config["render_env"] = not args.no_render config["record_env"] = args.video_dir ray.init(local_mode=args.local_mode) # Create the Trainer from config. cls = get_trainable_cls(args.run) agent = cls(env=args.env, config=config) # Load state from checkpoint, if provided. if args.checkpoint: agent.restore(args.checkpoint) num_steps = int(args.steps) num_episodes = int(args.episodes) # Determine the video output directory. video_dir = None # Allow user to specify a video output path. if args.video_dir: video_dir = os.path.expanduser(args.video_dir) # Do the actual rollout. with RolloutSaver(args.out, args.use_shelve, write_update_file=args.track_progress, target_steps=num_steps, target_episodes=num_episodes, save_info=args.save_info) as saver: rollout(agent, args.env, num_steps, num_episodes, saver, args.no_render, video_dir) agent.stop()
from ray.tune.utils import merge_dicts from toolbox import initialize_ray, train from toolbox.dice import DiCETrainer, utils as dice_utils from toolbox.dice.utils import dice_default_config from toolbox.dies.es_utils import run_evolution_strategies from toolbox.marl import get_marl_env_config dies_default_config = merge_dicts( dice_default_config, { "update_steps": 100000, # callbacks={"on_train_result": on_train_result} # already there dice_utils.DELAY_UPDATE: False, dice_utils.TWO_SIDE_CLIP_LOSS: False, dice_utils.ONLY_TNB: True, dice_utils.NORMALIZE_ADVANTAGE: True, # May be need to set false } ) DiESTrainer = DiCETrainer.with_updates( name="DiES", default_config=dies_default_config, after_train_result=run_evolution_strategies ) if __name__ == '__main__': env_name = "CartPole-v0" num_agents = 3 config = {
QMIX_APEX_DEFAULT_CONFIG = merge_dicts( DEFAULT_CONFIG, # see also the options in dqn.py, which are also supported { "optimizer": { "max_weight_sync_delay": 400, "num_replay_buffer_shards": 4, "debug": False }, "n_step": 3, "num_gpus": 1, "num_workers": 32, "buffer_size": 2000000, "learning_starts": 50000, "train_batch_size": 512, "rollout_fragment_length": 50, "target_network_update_freq": 500000, "timesteps_per_iteration": 1000, "exploration_config": { "type": "PerWorkerEpsilonGreedy" }, "worker_side_prioritization": True, "min_iter_time_s": 30, "training_intensity": None, "prioritized_replay": True, "prioritized_replay_alpha": 0.6, "prioritized_replay_beta": 0.4, "final_prioritized_replay_beta": 0.4, "prioritized_replay_beta_annealing_timesteps": 20000, "prioritized_replay_eps": 1e-6, }, )
def custom_train_func(trainer, info): train_stats = trainer.train(profile=True) val_stats = trainer.validate(profile=True) stats = merge_dicts(train_stats, val_stats) return stats
def load_agent_config(args): # Load configuration from checkpoint file. config_path = "" if args.checkpoint: config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") # Load the config from pickled. if os.path.exists(config_path): with open(config_path, "rb") as f: config = cloudpickle.load(f) # If no pkl file found, require command line `--config`. else: # If no config in given checkpoint -> Error. if args.checkpoint: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory AND no `--config` given on command " "line!") # Use default config for given agent. _, config = get_trainer_class(args.run, return_config=True) # Make sure worker 0 has an Env. config["num_workers"] = 0 config["num_envs_per_worker"] = 1 config["create_env_on_driver"] = True # Merge with `evaluation_config` (first try from command line, then from # pkl file). evaluation_config = copy.deepcopy( args.config.get("evaluation_config", config.get("evaluation_config", {}))) config = merge_dicts(config, evaluation_config) # Merge with command line `--config` settings (if not already the same # anyways). config = merge_dicts(config, args.config) if not args.env: args.env = config.get("env") # Make sure we have evaluation workers. # if not config.get("evaluation_num_workers"): # config["evaluation_num_workers"] = config.get("num_workers", 0) if not config.get("evaluation_num_episodes"): config["evaluation_num_episodes"] = 1 config["render_env"] = args.render config["record_env"] = args.video_dir if config.get("env_config") is None: config["env_config"] = {} print(args.agent_speeds) config["env_config"]["agent_speeds"] = args.agent_speeds register_env(args.env, env_creator) # Create the Trainer from config. cls = get_trainable_cls(args.run) agent = cls(env=args.env, config=config) # Load state from checkpoint, if provided. if args.checkpoint: agent.restore(args.checkpoint) return agent, config
def __init__( self, obs_space, action_space, num_outputs, model_config, name, **kwargs ): TorchModelV2.__init__( self, obs_space, action_space, num_outputs, model_config, name ) nn.Module.__init__(self) model_config = merge_dicts(BASELINE_CONFIG, model_config["custom_model_config"]) # new way to get model config directly from keyword arguments model_config = merge_dicts(model_config, kwargs) state_embed_size = model_config["state_embed_size"] self.use_rnn = model_config["use_rnn"] rnn_type = model_config["rnn_type"] self.use_prev_action_reward = model_config["use_prev_action_reward"] action_net_kwargs = model_config["action_net_kwargs"] if isinstance(action_space, gym.spaces.Discrete): action_net_kwargs.update({"discrete": True, "n": action_space.n}) self.discrete = True else: self.discrete = False def get_factory(network_name): base_module = importlib.import_module( f"minerl_rllib.models.torch.{network_name}" ) return getattr(base_module, model_config[f"{network_name}_net"]) self._pov_network, pov_embed_size = get_factory("pov")( **model_config["pov_net_kwargs"] ) self._vector_network, vector_embed_size = get_factory("vector")( **model_config["vector_net_kwargs"] ) state_input_size = pov_embed_size + vector_embed_size if self.use_prev_action_reward: self._action_network, action_embed_size = get_factory("action")( **action_net_kwargs ) self._reward_network, reward_embed_size = get_factory("reward")( **model_config["reward_net_kwargs"] ) state_input_size += action_embed_size + reward_embed_size rnn_config = model_config.get("rnn_config") if self.use_rnn: state_embed_size = rnn_config["hidden_size"] if rnn_type == "lstm": self._rnn = LSTMBaseline(state_input_size, **rnn_config) elif rnn_type == "gru": self._rnn = GRUBaseline(state_input_size, **rnn_config) else: raise NotImplementedError else: self._state_network = nn.Sequential( nn.Linear(state_input_size, state_embed_size), nn.ELU(), ) self._value_head = nn.Sequential( nn.Linear(state_embed_size, 1), ) self._policy_head = nn.Sequential( nn.Linear(state_embed_size, num_outputs), )
# Load configuration from checkpoint file. config_dir = os.path.dirname(args.checkpoint) print(config_dir) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") with open(config_path, "rb") as f: config = pickle.load(f) print(config) config["num_workers"] = args.num_workers # Merge with `evaluation_config`. evaluation_config = copy.deepcopy(config.get("evaluation_config", {})) config = merge_dicts(config, evaluation_config) config["batch_mode"] = "complete_episodes" config["evaluation_config"] = {"explore": False} print(config) print(pretty_print(config)) ray.init() evaluator = ppo.PPOTrainer(config=config) evaluator.restore(args.checkpoint) num_workers = args.num_workers start_day = args.start_day test_days_remain = args.test_days
def train(cfg: DictConfig) -> None: ray.init(num_gpus=cfg.num_gpus, num_cpus=cfg.num_cpus + 1) cfg = get_full_config(cfg) register_env("RLlibNLE-v0", RLLibNLEEnv) try: algo, trainer = NAME_TO_TRAINER[cfg.algo] except KeyError: raise ValueError( "The algorithm you specified isn't currently supported: %s", cfg.algo ) config = algo.DEFAULT_CONFIG.copy() args_config = OmegaConf.to_container(cfg) # Algo-specific config. Requires hydra config keys to match rllib exactly algo_config = args_config.pop(cfg.algo) # Remove unnecessary config keys for algo in NAME_TO_TRAINER.keys(): if algo != cfg.algo: args_config.pop(algo, None) # Merge config from hydra (will have some rogue keys but that's ok) config = merge_dicts(config, args_config) # Update configuration with parsed arguments in specific ways config = merge_dicts( config, { "framework": "torch", "num_gpus": cfg.num_gpus, "seed": cfg.seed, "env": "RLlibNLE-v0", "env_config": { "flags": cfg, "observation_keys": cfg.obs_keys.split(","), "name": cfg.env, }, "train_batch_size": cfg.train_batch_size, "model": merge_dicts( MODEL_DEFAULTS, { "custom_model": "rllib_nle_model", "custom_model_config": {"flags": cfg, "algo": cfg.algo}, "use_lstm": cfg.use_lstm, "lstm_use_prev_reward": True, "lstm_use_prev_action": True, "lstm_cell_size": cfg.hidden_dim, }, ), "num_workers": cfg.num_cpus, "num_envs_per_worker": int(cfg.num_actors / cfg.num_cpus), "evaluation_interval": 100, "evaluation_num_episodes": 50, "evaluation_config": {"explore": False}, "rollout_fragment_length": cfg.unroll_length, }, ) # Merge algo-specific config at top level config = merge_dicts(config, algo_config) # Ensure we can use the config we've specified above trainer_class = trainer.with_updates(default_config=config) callbacks = [] if cfg.wandb: callbacks.append( WandbLoggerCallback( project=cfg.project, api_key_file="~/.wandb_api_key", entity=cfg.entity, group=cfg.group, tags=cfg.tags.split(","), ) ) os.environ["TUNE_DISABLE_AUTO_CALLBACK_LOGGERS"] = "1" # Only log to wandb # Hacky monkey-patching to allow for OmegaConf config def _is_allowed_type(obj): """Return True if type is allowed for logging to wandb""" if isinstance(obj, DictConfig): return True if isinstance(obj, np.ndarray) and obj.size == 1: return isinstance(obj.item(), Number) if isinstance(obj, Iterable) and len(obj) > 0: return isinstance(obj[0], _VALID_ITERABLE_TYPES) return isinstance(obj, _VALID_TYPES) ray.tune.integration.wandb._is_allowed_type = _is_allowed_type tune.run( trainer_class, stop={"timesteps_total": cfg.total_steps}, config=config, name=cfg.name, callbacks=callbacks, )
def run(args, parser): # Load configuration from checkpoint file. config_path = "" if args.checkpoint: config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") # Load the config from pickled. if os.path.exists(config_path): with open(config_path, "rb") as f: config = cloudpickle.load(f) # If no pkl file found, require command line `--config`. else: # If no config in given checkpoint -> Error. if args.checkpoint: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory AND no `--config` given on command " "line!") # Use default config for given agent. _, config = get_algorithm_class(args.run, return_config=True) # Make sure worker 0 has an Env. config["create_env_on_driver"] = True # Merge with `evaluation_config` (first try from command line, then from # pkl file). evaluation_config = copy.deepcopy( args.config.get("evaluation_config", config.get("evaluation_config", {}))) config = merge_dicts(config, evaluation_config) # Merge with command line `--config` settings (if not already the same # anyways). config = merge_dicts(config, args.config) if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") # Make sure we have evaluation workers. if not config.get("evaluation_num_workers"): config["evaluation_num_workers"] = config.get("num_workers", 0) if not config.get("evaluation_duration"): config["evaluation_duration"] = 1 # Hard-override this as it raises a warning by Trainer otherwise. # Makes no sense anyways, to have it set to None as we don't call # `Trainer.train()` here. config["evaluation_interval"] = 1 # Rendering and video recording settings. if args.no_render: deprecation_warning(old="--no-render", new="--render", error=False) args.render = False config["render_env"] = args.render ray.init(local_mode=args.local_mode) # Create the Trainer from config. cls = get_trainable_cls(args.run) agent = cls(env=args.env, config=config) # Load state from checkpoint, if provided. if args.checkpoint: agent.restore(args.checkpoint) num_steps = int(args.steps) num_episodes = int(args.episodes) # Do the actual rollout. with RolloutSaver( args.out, args.use_shelve, write_update_file=args.track_progress, target_steps=num_steps, target_episodes=num_episodes, save_info=args.save_info, ) as saver: rollout(agent, args.env, num_steps, num_episodes, saver, not args.render) agent.stop()
def run(args, parser): if args.config_file: with open(args.config_file) as f: config_experiments = yaml.safe_load(f) experiments = config_experiments else: if args.algo is not None: args.experiment = args.algo if args.experiment: config_file = os.path.join('config', f'{args.experiment}.yaml') with open(config_file) as f: config_dict = yaml.safe_load(f) else: config_dict = {args.name: {}} if args.debug: args.env = 'MineRLRandomDebug-v0' experiments = {} for experiment_name, experiment_settings in config_dict.items(): config = dict(args.config, env=args.env) # TODO: implement if args.mode == 'offline': config.update( dict( explore=False, input=args.data_path, input_evaluation=['simulation'], )) elif args.mode == 'mixed': config.update( dict( input={ args.data_path: args.mixing_ratio, 'sample': (1 - args.mixing_ratio) }, input_evaluation=['simulation'], )) if 'time_total_s' not in args.stop: # The MineRL competition training time limit is 4 days. Subtract an hour for evaluation. args.stop['time_total_s'] = int(2 * 24 * 60 * 60 - 3600) # limit two day training if 'info/num_steps_sampled' not in args.stop: # The MineRL competition environment sample limit is 8 million steps. args.stop['info/num_steps_sampled'] = 8000000 if args.checkpoint_freq is None: args.checkpoint_freq = 1000 if args.checkpoint_at_end is None: args.checkpoint_at_end = True if args.checkpoint_score_attr is None: args.checkpoint_score_attr = 'episode_reward_mean' # Note: keep this in sync with tune/config_parser.py settings_from_args = { # i.e. log to ~/ray_results/default "run": args.run, "checkpoint_freq": args.checkpoint_freq, "checkpoint_at_end": args.checkpoint_at_end, "keep_checkpoints_num": args.keep_checkpoints_num, "checkpoint_score_attr": args.checkpoint_score_attr, "local_dir": args.local_dir, "resources_per_trial": ( args.resources_per_trial and resources_to_json(args.resources_per_trial)), "stop": args.stop, "config": config, "restore": args.restore, "num_samples": args.num_samples, "upload_dir": args.upload_dir, } # overwrite the settings from arguments with those in the experiment config file settings = merge_dicts(settings_from_args, experiment_settings) experiments.update({experiment_name: settings}) if any('MineRL' in setting['config']['env'] for setting in experiments.values()): import envs envs.register(discrete=args.discrete, num_actions=args.num_actions, data_dir=args.data_dir) print('\nArguments:') pprint.pprint(args) print('\nExperiment config:') pprint.pprint(experiments) print() verbose = 1 for exp in experiments.values(): # Bazel makes it hard to find files specified in `args` (and `data`). # Look for them here. # NOTE: Some of our yaml files don't have a `config` section. if exp.get("config", {}).get("input") and \ not os.path.exists(exp["config"]["input"]): # This script runs in the ray/rllib dir. rllib_dir = Path(__file__).parent input_file = rllib_dir.absolute().joinpath(exp["config"]["input"]) exp["config"]["input"] = str(input_file) if not exp.get("run"): parser.error("the following arguments are required: --run") if not exp.get("env") and not exp.get("config", {}).get("env"): parser.error("the following arguments are required: --env") if 'framework' not in exp['config']: if args.eager: exp["config"]["framework"] = "tfe" elif args.torch: exp["config"]["framework"] = "torch" else: exp["config"]["framework"] = "tf" if args.v: exp["config"]["log_level"] = "INFO" verbose = 2 if args.vv: exp["config"]["log_level"] = "DEBUG" verbose = 3 if args.trace: if exp["config"]["framework"] != "tfe": raise ValueError("Must enable --eager to enable tracing.") exp["config"]["eager_tracing"] = True if args.ray_num_nodes: cluster = Cluster() for _ in range(args.ray_num_nodes): cluster.add_node(num_cpus=args.ray_num_cpus or 1, num_gpus=args.ray_num_gpus or 0, object_store_memory=args.ray_object_store_memory, memory=args.ray_memory, redis_max_memory=args.ray_redis_max_memory) ray.init(address=cluster.address) else: ray.init(include_dashboard=not args.no_ray_ui, address=args.ray_address, object_store_memory=args.ray_object_store_memory, memory=args.ray_memory, redis_max_memory=args.ray_redis_max_memory, num_cpus=args.ray_num_cpus, num_gpus=args.ray_num_gpus, local_mode=args.local_mode) run_experiments(experiments, scheduler=_make_scheduler(args), queue_trials=args.queue_trials, resume=args.resume, verbose=verbose, concurrent=True) ray.shutdown()
def run(args, parser): config = {} # Load configuration from checkpoint file. config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") # If no pkl file found, require command line `--config`. if not os.path.exists(config_path): if not args.config: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory AND no config given on command line!") # Load the config from pickled. else: with open(config_path, "rb") as f: config = pickle.load(f) if args.use_cpu: # When you don't want to run with any gpus. config["num_gpus_per_worker"] = 0 config["num_gpus"] = 0 os.environ["CUDA_VISIBLE_DEVICES"] = "" config["num_workers"] = 1 # # Set num_workers to be at least 2. # if "num_workers" in config: # config["num_workers"] = min(2, config["num_workers"]) # Merge with `evaluation_config`. evaluation_config = copy.deepcopy(config.get("evaluation_config", {})) # ADDED if args.deterministic_policy: evaluation_config["explore"] = False config["explore"] = False if "env_config" in evaluation_config: evaluation_config["env_config"]["num_levels"] = 1 evaluation_config["env_config"]["use_sequential_levels"] = True evaluation_config["env_config"][ "start_level"] = 0 if args.level_seed is None else args.level_seed config["env_config"]["num_levels"] = 1 config["env_config"]["use_sequential_levels"] = True config["env_config"][ "start_level"] = 0 if args.level_seed is None else args.level_seed # END ADDED config = merge_dicts(config, evaluation_config) # Merge with command line `--config` settings. config = merge_dicts(config, args.config) if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") ray.init() # Create the Trainer from config. cls = get_trainable_cls(args.run) agent = cls(env=args.env, config=config) # Load state from checkpoint. agent.restore(args.checkpoint) num_steps = int(args.steps) num_episodes = int(args.episodes) video_dir = None if args.video_dir: video_dir = os.path.expanduser(args.video_dir) vis_info = rollout(agent, args.env, num_steps, num_episodes, video_dir, config, level_seed=args.level_seed) visualize_info(vis_info, video_dir)
def check_for_saved_config(args): """Check for saved configuration :args: Argparse arguments :returns: Saved config file with merged updates """ # Load configuration from checkpoint file. config_path = "" args.save_info = True config = None # If there is a checkpoint, find parameters if args.checkpoint_path: config_dir = os.path.dirname(args.checkpoint_path) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") # Load the config from pickled. if os.path.exists(config_path): with open(config_path, "rb") as f: config = cloudpickle.load(f) # If no pkl file found, require command line `--config`. else: # If no config in given checkpoint -> Error. if args.checkpoint_path: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory AND no `--config` given on command " "line!") # Use default config for given agent. _, config = get_trainer_class(args.model_name, return_config=True) # Make sure worker 0 has an Env. config["create_env_on_driver"] = True # Merge with `evaluation_config` (first try from command line, then from # pkl file). evaluation_config = copy.deepcopy( args.config.get("evaluation_config", config.get("evaluation_config", {}))) config = merge_dicts(config, evaluation_config) # Merge with command line `--config` settings (if not already the same # anyways). # Adds any custom arguments here config = merge_dicts(config, args.config) if not args.env_name: if not config.get("env"): parser.error("the following arguments are required: --env") args.env_name = config.get("env") # Make sure we have evaluation workers. if not config.get("evaluation_num_workers"): config["evaluation_num_workers"] = config.get("num_workers", 0) if not config.get("evaluation_num_episodes"): config["evaluation_num_episodes"] = 1 return config