Beispiel #1
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, **kwargs):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        model_config = merge_dicts(BASELINE_CONFIG,
                                   model_config['custom_model_config'])
        # new way to get model config directly from keyword arguments
        model_config = merge_dicts(model_config, kwargs)

        state_embed_size = model_config['state_embed_size']
        self.use_rnn = model_config['use_rnn']
        rnn_type = model_config['rnn_type']
        self.use_prev_action_reward = model_config['use_prev_action_reward']

        action_net_kwargs = model_config['action_net_kwargs']
        if isinstance(action_space, gym.spaces.Discrete):
            action_net_kwargs.update({'discrete': True, 'n': action_space.n})
            self.discrete = True
        else:
            self.discrete = False

        def get_factory(network_name):
            return getattr(getattr(models.torch, network_name),
                           model_config[f'{network_name}_net'])

        self._pov_network, pov_embed_size = get_factory('pov')(
            **model_config['pov_net_kwargs'])
        self._vector_network, vector_embed_size = get_factory('vector')(
            **model_config['vector_net_kwargs'])
        state_input_size = pov_embed_size + vector_embed_size
        if self.use_prev_action_reward:
            self._action_network, action_embed_size = get_factory('action')(
                **action_net_kwargs)
            self._reward_network, reward_embed_size = get_factory('reward')(
                **model_config['reward_net_kwargs'])
            state_input_size += action_embed_size + reward_embed_size

        rnn_config = model_config.get('rnn_config')
        if self.use_rnn:
            state_embed_size = rnn_config['hidden_size']
            if rnn_type == 'lstm':
                self._rnn = models.torch.rnn.LSTMBaseline(
                    state_input_size, **rnn_config)
            elif rnn_type == 'gru':
                self._rnn = models.torch.rnn.GRUBaseline(
                    state_input_size, **rnn_config)
            else:
                raise NotImplementedError
        else:
            self._state_network = nn.Sequential(
                nn.Linear(state_input_size, state_embed_size),
                nn.ELU(),
            )
        self._value_head = nn.Sequential(nn.Linear(state_embed_size, 1), )
        self._policy_head = nn.Sequential(
            nn.Linear(state_embed_size, num_outputs), )
Beispiel #2
0
    def create_trial_if_possible(self, experiment_spec: Dict,
                                 output_path: str) -> Optional[Trial]:
        logger.debug("creating trial")
        trial_id = Trial.generate_id()
        suggested_config = self.searcher.suggest(trial_id)
        if suggested_config == Searcher.FINISHED:
            self._finished = True
            logger.debug("Searcher has finished.")
            return

        if suggested_config is None:
            return
        spec = copy.deepcopy(experiment_spec)
        spec["config"] = merge_dicts(spec["config"],
                                     copy.deepcopy(suggested_config))

        # Create a new trial_id if duplicate trial is created
        flattened_config = resolve_nested_dict(spec["config"])
        self._counter += 1
        tag = "{0}_{1}".format(str(self._counter),
                               format_vars(flattened_config))
        trial = create_trial_from_spec(
            spec,
            output_path,
            self._parser,
            evaluated_params=flatten_dict(suggested_config),
            experiment_tag=tag,
            trial_id=trial_id)
        return trial
Beispiel #3
0
    def _generate_trials(self, experiment_spec, output_path=""):
        """Generates trials with configurations from `_suggest`.

        Creates a trial_id that is passed into `_suggest`.

        Yields:
            Trial objects constructed according to `spec`
        """
        if "run" not in experiment_spec:
            raise TuneError("Must specify `run` in {}".format(experiment_spec))
        for _ in range(experiment_spec.get("num_samples", 1)):
            trial_id = Trial.generate_id()
            while True:
                suggested_config = self._suggest(trial_id)
                if suggested_config is None:
                    yield None
                else:
                    break
            spec = copy.deepcopy(experiment_spec)
            spec["config"] = merge_dicts(spec["config"],
                                         copy.deepcopy(suggested_config))
            flattened_config = resolve_nested_dict(spec["config"])
            self._counter += 1
            tag = "{0}_{1}".format(str(self._counter),
                                   format_vars(flattened_config))
            yield create_trial_from_spec(
                spec,
                output_path,
                self._parser,
                evaluated_params=flatten_dict(suggested_config),
                experiment_tag=tag,
                trial_id=trial_id)
Beispiel #4
0
 def step(trainer, info: dict):
     """Define a custom training loop for tune.
      This is needed because we want to manually update our scheduler.
      """
     train_stats = trainer.train(profile=True)
     validation_stats = trainer.validate(profile=True)
     # Manually update our scheduler with the given metric.
     trainer.update_scheduler(metric=validation_stats["val_loss"])
     all_stats = merge_dicts(train_stats, validation_stats)
     return all_stats
def run(args, parser):
    config = {}
    # Load configuration from file
    config_dir = os.path.dirname(args.checkpoint)
    config_path = os.path.join(config_dir, "params.pkl")
    if not os.path.exists(config_path):
        config_path = os.path.join(config_dir, "../params.pkl")
    if not os.path.exists(config_path):
        if not args.config:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory.")
    else:
        with open(config_path, "rb") as f:
            config = pickle.load(f)
    if "num_workers" in config:
        config["num_workers"] = min(2, config["num_workers"])
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    # ray by default takes up ~100GB
    ray.init(memory=4.2e+10, object_store_memory=1e+10)
    env_name = config['env']

    if env_name == 'harvest_env':

        def env_creator(env_config):
            return HarvestEnv(env_config)
    else:

        def env_creator(env_config):
            return CleanupEnv(env_config)

    register_env(env_name, env_creator)
    ModelCatalog.register_custom_model("conv_to_fc_net", ObedienceLSTM)

    cls = get_agent_class(args.run)
    agent = cls(env=args.env, config=config)
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)
    num_episodes = int(args.episodes)
    with RolloutSaver(args.out,
                      args.use_shelve,
                      write_update_file=args.track_progress,
                      target_steps=num_steps,
                      target_episodes=num_episodes,
                      save_info=args.save_info) as saver:
        rollout(agent, args.env, num_steps, num_episodes, saver,
                args.no_render, args.monitor)
Beispiel #6
0
def parse_inputs(inputs: Iterable[StrOrPath]) -> dict:
    # note that importing ray.tune does not override tf.executing_eagerly
    # pylint: disable=import-outside-toplevel
    from ray.tune.utils import merge_dicts

    out: dict = {}
    for inp in inputs:
        with open(inp, 'r') as f:
            params = yaml.safe_load(f)

        out = merge_dicts(out, params)

    return out
    def custom_train_func(trainer, info):
        train_stats = trainer.train(profile=True)
        val_stats = trainer.validate(profile=True)
        stats = merge_dicts(train_stats, val_stats)

        actors = ray.state.actors().values()
        assert len(actors) == num_workers + 1

        node_id_set = set()
        for actor_info in actors:
            node_id = actor_info["Address"]["NodeID"]
            node_id_set.add(node_id)

        assert len(node_id_set) == 1 + num_workers // 8
        return stats
Beispiel #8
0
def restore_agent(checkpoint_path, baseline=False, num_levels=5, deterministic=False, video_dir=None):
    config = {}
    # Load configuration from checkpoint file.
    config_dir = os.path.dirname(checkpoint_path)
    config_path = os.path.join(config_dir, "params.pkl")
    # Try parent directory.
    if not os.path.exists(config_path):
        config_path = os.path.join(config_dir, "../params.pkl")
    with open(config_path, "rb") as f:
        config = pickle.load(f)

    # Set num_workers to be at least 2.
    if "num_workers" in config:
        config["num_workers"] = min(2, config["num_workers"])

    # Merge with `evaluation_config`.
    evaluation_config = copy.deepcopy(config.get("evaluation_config", {}))
    config = merge_dicts(config, evaluation_config)

    if video_dir:
        config["env_config"]["render_mode"] = "rgb_array"

    # config["env_config"]["num_levels"] = num_levels
    config["explore"] = not deterministic

    config["evaluation_interval"] = 0
    config["monitor"] = False

    if args.level_file is not None:
        cwd = os.path.dirname(os.path.realpath(__file__))
        config["env_config"]["level_file"] = os.path.join(cwd, args.level_file)

    print(config)


    if baseline:
        trainer = PPOTrainer
    else:
        state_danger = config["model"]["custom_options"]["state_danger"]
        if not state_danger:
            trainer = ActionDangerPPOTrainer
        else:
            trainer = StateDangerPPOTrainer


    restored_trainer = trainer(env=config["env"], config=config)
    restored_trainer.restore(checkpoint_path)
    return restored_trainer, config
Beispiel #9
0
def train(name, ray_config, debug=False):
    """
    Trains sam
    Parameters
    ----------
    name: name of yaml file
    ray_config: ray configuration
    debug: whether to test in editor

    Returns
    -------

    """
    ray.init()
    trainer_class = get_trainable_cls(ray_config['run'])
    default_config = trainer_class._default_config.copy()
    config = merge_dicts(default_config, ray_config['config'])
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
    run = ray_config['run']

    model_name = f'{name}_{now}'
    print(f'\33]0;{model_name} - {name}\a', end='', flush=True)
    if debug:
        config['num_workers'] = 0
        config['num_envs_per_worker'] = 1
        # config['train_batch_size'] = 10
        config['env_config']['log_every'] = 2000
        trainer = trainer_class(config=config)
        policy = trainer.get_policy()
        model = policy.model
        print(model)
        for i in range(10):
            res = trainer.train()
            print(pretty_print(res))
    else:
        tune.run(
            run,
            name=model_name,
            # stop=ray_config['stop'],
            local_dir='results',
            config=config,
            checkpoint_at_end=True,
            verbose=2,
            # restore=RESTORE_PATH,
            checkpoint_freq=10)

    ray.shutdown()
Beispiel #10
0
def main():
    args = parser.parse_args()

    print(
        "This script is outdated. "
        "Use the 'input: minerl' option to load the correct data automatically."
    )

    env_list = []

    env_config = {}
    if args.config_file is not None:
        config = yaml.safe_load(open(args.config_file))
        settings = list(config.values())[0]
        if "config" in settings:
            if "env_config" in settings["config"]:
                env_config = settings["config"]["env_config"]
            if "env" in settings["config"]:
                env_list.append(settings["config"]["env"])
        if "env" in settings:
            env_list.append(settings["env"])
    else:
        if args.env is None:
            for env_spec in obfuscated_envs:
                env_list.append(env_spec.name)
        else:
            env_list.append(args.env)
    env_config = merge_dicts(env_config, args.env_config)

    if args.save_path is None:
        save_path = get_save_path(args.data_dir, env_config)
    else:
        save_path = args.save_path
    print(f"saving jsons to {save_path}")

    for env_name in env_list:
        print(f"Writing data to json files for environment {env_name}")
        env_save_path = os.path.join(save_path, env_name)
        write_jsons(env_name,
                    args.data_dir,
                    env_config,
                    env_save_path,
                    overwrite=args.overwrite)
Beispiel #11
0
def run(args, parser):
    config = {}
    # Load configuration from file
    config_dir = os.path.dirname(args.checkpoint)
    config_path = os.path.join(config_dir, "params.pkl")
    if not os.path.exists(config_path):
        config_path = os.path.join(config_dir, "../params.pkl")
    if not os.path.exists(config_path):
        if not args.config:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory.")
    else:
        with open(config_path, "rb") as f:
            config = pickle.load(f)
    if "num_workers" in config:
        config["num_workers"] = min(2, config["num_workers"])
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    ray.init()

    cls = get_agent_class(args.run)
    agent = cls(env=args.env, config=config)
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)
    num_episodes = int(args.episodes)
    with RolloutSaver(
            args.out,
            args.use_shelve,
            write_update_file=args.track_progress,
            target_steps=num_steps,
            target_episodes=num_episodes,
            save_info=args.save_info) as saver:
        outcome = rollout(agent, args.env, num_steps, num_episodes, saver,
                          args.no_render, args.monitor)
        outcome_file = os.path.join(os.path.dirname(config_path), 'test_outcome.json')
        with open(outcome_file, 'w') as f:
            json.dump(outcome, f, indent=4)
Beispiel #12
0
    def evaluate(exp):
        eval_configs = get_eval_config(exp['config'].get('env_config',
                                                         {}).get('eval_generator', "default"))
        eval_seed = eval_configs.get('evaluation_config', {}).get('env_config', {}).get('seed')

        # add evaluation config to the current config
        exp['config'] = merge_dicts(exp['config'], eval_configs)
        if exp['config'].get('evaluation_config'):
            exp['config']['evaluation_config']['env_config'] = exp['config'].get('env_config')
            eval_env_config = exp['config']['evaluation_config'].get('env_config')
            if eval_seed and eval_env_config:
                # We override the envs seed from the evaluation config
                eval_env_config['seed'] = eval_seed

            # Remove any wandb related configs
            if eval_env_config:
                if eval_env_config.get('wandb'):
                    del eval_env_config['wandb']

        # Remove any wandb related configs
        if exp['config']['evaluation_config'].get('wandb'):
            del exp['config']['evaluation_config']['wandb']
Beispiel #13
0
def run(args, parser):
    # Load configuration from checkpoint file.
    config_dir = os.path.dirname(args.checkpoint)
    config_path = os.path.join(config_dir, "params.pkl")
    # Try parent directory.
    if not os.path.exists(config_path):
        config_path = os.path.join(config_dir, "../params.pkl")

    # If no pkl file found, require command line `--config`.
    if not os.path.exists(config_path):
        if not args.config:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no config given on command line!")
        else:
            config = args.config

    # Load the config from pickled.
    else:
        with open(config_path, "rb") as f:
            config = pickle.load(f)

    # Set num_workers to be at least 2.
    if "num_workers" in config:
        config["num_workers"] = min(2, config["num_workers"])

    # Merge with `evaluation_config` (first try from command line, then from
    # pkl file).
    evaluation_config = copy.deepcopy(
        args.config.get("evaluation_config",
                        config.get("evaluation_config", {})))
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings (if not already the same
    # anyways).
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    ray.init()

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)
    # Load state from checkpoint.
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)
    num_episodes = int(args.episodes)

    # Determine the video output directory.
    # Deprecated way: Use (--out|~/ray_results) + "/monitor" as dir.
    video_dir = None
    if args.monitor:
        video_dir = os.path.join(
            os.path.dirname(args.out or "")
            or os.path.expanduser("~/ray_results/"), "monitor")
    # New way: Allow user to specify a video output path.
    elif args.video_dir:
        video_dir = os.path.expanduser(args.video_dir)

    # Do the actual rollout.
    with RolloutSaver(args.out,
                      args.use_shelve,
                      write_update_file=args.track_progress,
                      target_steps=num_steps,
                      target_episodes=num_episodes,
                      save_info=args.save_info) as saver:
        rollout(agent, args.env, num_steps, num_episodes, saver,
                args.no_render, video_dir)
    agent.stop()
Beispiel #14
0
from ray.rllib.agents.ppo.ppo import PPOTrainer, DEFAULT_CONFIG, \
    validate_config as original_validate
from ray.tune.utils import merge_dicts

from toolbox import initialize_ray, train
from toolbox.dies.es_utils import run_evolution_strategies
from toolbox.marl import get_marl_env_config, on_train_result, \
    MultiAgentEnvWrapper

ppo_es_default_config = merge_dicts(
    DEFAULT_CONFIG,
    dict(update_steps=100000, callbacks={"on_train_result": on_train_result}))


def validate_config(config):
    tmp_env = MultiAgentEnvWrapper(config["env_config"])
    config["multiagent"]["policies"] = {
        "agent{}".format(i):
        (None, tmp_env.observation_space, tmp_env.action_space, {})
        for i in range(num_agents)
    }
    config["multiagent"]["policy_mapping_fn"] = lambda x: x

    original_validate(config)


PPOESTrainer = PPOTrainer.with_updates(
    name="PPOES",
    default_config=ppo_es_default_config,
    after_train_result=run_evolution_strategies,
    validate_config=validate_config)
Beispiel #15
0
def run(args, parser):
    # Load configuration from checkpoint file.
    config_path = ""
    if args.checkpoint:
        config_dir = os.path.dirname(args.checkpoint)
        config_path = os.path.join(config_dir, "params.pkl")
        # Try parent directory.
        if not os.path.exists(config_path):
            config_path = os.path.join(config_dir, "../params.pkl")

    # Load the config from pickled.
    if os.path.exists(config_path):
        with open(config_path, "rb") as f:
            config = cloudpickle.load(f)
    # If no pkl file found, require command line `--config`.
    else:
        # If no config in given checkpoint -> Error.
        if args.checkpoint:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no `--config` given on command "
                "line!")

        # Use default config for given agent.
        _, config = get_trainer_class(args.run, return_config=True)

    # Make sure worker 0 has an Env.
    config["create_env_on_driver"] = True

    # Merge with `evaluation_config` (first try from command line, then from
    # pkl file).
    evaluation_config = copy.deepcopy(
        args.config.get("evaluation_config",
                        config.get("evaluation_config", {})))
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings (if not already the same
    # anyways).
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    # Make sure we have evaluation workers.
    if not config.get("evaluation_num_workers"):
        config["evaluation_num_workers"] = config.get("num_workers", 0)
    if not config.get("evaluation_num_episodes"):
        config["evaluation_num_episodes"] = 1
    config["render_env"] = not args.no_render
    config["record_env"] = args.video_dir

    ray.init(local_mode=args.local_mode)

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)

    # Load state from checkpoint, if provided.
    if args.checkpoint:
        agent.restore(args.checkpoint)

    num_steps = int(args.steps)
    num_episodes = int(args.episodes)

    # Determine the video output directory.
    video_dir = None
    # Allow user to specify a video output path.
    if args.video_dir:
        video_dir = os.path.expanduser(args.video_dir)

    # Do the actual rollout.
    with RolloutSaver(args.out,
                      args.use_shelve,
                      write_update_file=args.track_progress,
                      target_steps=num_steps,
                      target_episodes=num_episodes,
                      save_info=args.save_info) as saver:
        rollout(agent, args.env, num_steps, num_episodes, saver,
                args.no_render, video_dir)
    agent.stop()
Beispiel #16
0
from ray.tune.utils import merge_dicts

from toolbox import initialize_ray, train
from toolbox.dice import DiCETrainer, utils as dice_utils
from toolbox.dice.utils import dice_default_config
from toolbox.dies.es_utils import run_evolution_strategies
from toolbox.marl import get_marl_env_config

dies_default_config = merge_dicts(
    dice_default_config,
    {
        "update_steps": 100000,
        # callbacks={"on_train_result": on_train_result}  # already there
        dice_utils.DELAY_UPDATE: False,
        dice_utils.TWO_SIDE_CLIP_LOSS: False,
        dice_utils.ONLY_TNB: True,
        dice_utils.NORMALIZE_ADVANTAGE: True,  # May be need to set false
    }
)


DiESTrainer = DiCETrainer.with_updates(
    name="DiES",
    default_config=dies_default_config,
    after_train_result=run_evolution_strategies
)

if __name__ == '__main__':
    env_name = "CartPole-v0"
    num_agents = 3
    config = {
Beispiel #17
0
QMIX_APEX_DEFAULT_CONFIG = merge_dicts(
    DEFAULT_CONFIG,  # see also the options in dqn.py, which are also supported
    {
        "optimizer": {
            "max_weight_sync_delay": 400,
            "num_replay_buffer_shards": 4,
            "debug": False
        },
        "n_step": 3,
        "num_gpus": 1,
        "num_workers": 32,
        "buffer_size": 2000000,
        "learning_starts": 50000,
        "train_batch_size": 512,
        "rollout_fragment_length": 50,
        "target_network_update_freq": 500000,
        "timesteps_per_iteration": 1000,
        "exploration_config": {
            "type": "PerWorkerEpsilonGreedy"
        },
        "worker_side_prioritization": True,
        "min_iter_time_s": 30,
        "training_intensity": None,
        "prioritized_replay": True,
        "prioritized_replay_alpha": 0.6,
        "prioritized_replay_beta": 0.4,
        "final_prioritized_replay_beta": 0.4,
        "prioritized_replay_beta_annealing_timesteps": 20000,
        "prioritized_replay_eps": 1e-6,
    },
)
Beispiel #18
0
 def custom_train_func(trainer, info):
     train_stats = trainer.train(profile=True)
     val_stats = trainer.validate(profile=True)
     stats = merge_dicts(train_stats, val_stats)
     return stats
Beispiel #19
0
def load_agent_config(args):
    # Load configuration from checkpoint file.
    config_path = ""
    if args.checkpoint:
        config_dir = os.path.dirname(args.checkpoint)
        config_path = os.path.join(config_dir, "params.pkl")
        # Try parent directory.
        if not os.path.exists(config_path):
            config_path = os.path.join(config_dir, "../params.pkl")

    # Load the config from pickled.
    if os.path.exists(config_path):
        with open(config_path, "rb") as f:
            config = cloudpickle.load(f)
    # If no pkl file found, require command line `--config`.
    else:
        # If no config in given checkpoint -> Error.
        if args.checkpoint:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no `--config` given on command "
                "line!")

        # Use default config for given agent.
        _, config = get_trainer_class(args.run, return_config=True)

    # Make sure worker 0 has an Env.
    config["num_workers"] = 0
    config["num_envs_per_worker"] = 1
    config["create_env_on_driver"] = True

    # Merge with `evaluation_config` (first try from command line, then from
    # pkl file).
    evaluation_config = copy.deepcopy(
        args.config.get("evaluation_config",
                        config.get("evaluation_config", {})))
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings (if not already the same
    # anyways).
    config = merge_dicts(config, args.config)
    if not args.env:
        args.env = config.get("env")

    # Make sure we have evaluation workers.
    # if not config.get("evaluation_num_workers"):
    #     config["evaluation_num_workers"] = config.get("num_workers", 0)
    if not config.get("evaluation_num_episodes"):
        config["evaluation_num_episodes"] = 1
    config["render_env"] = args.render
    config["record_env"] = args.video_dir

    if config.get("env_config") is None:
        config["env_config"] = {}

    print(args.agent_speeds)
    config["env_config"]["agent_speeds"] = args.agent_speeds

    register_env(args.env, env_creator)

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)

    # Load state from checkpoint, if provided.
    if args.checkpoint:
        agent.restore(args.checkpoint)

    return agent, config
Beispiel #20
0
    def __init__(
        self, obs_space, action_space, num_outputs, model_config, name, **kwargs
    ):
        TorchModelV2.__init__(
            self, obs_space, action_space, num_outputs, model_config, name
        )
        nn.Module.__init__(self)
        model_config = merge_dicts(BASELINE_CONFIG, model_config["custom_model_config"])
        # new way to get model config directly from keyword arguments
        model_config = merge_dicts(model_config, kwargs)

        state_embed_size = model_config["state_embed_size"]
        self.use_rnn = model_config["use_rnn"]
        rnn_type = model_config["rnn_type"]
        self.use_prev_action_reward = model_config["use_prev_action_reward"]

        action_net_kwargs = model_config["action_net_kwargs"]
        if isinstance(action_space, gym.spaces.Discrete):
            action_net_kwargs.update({"discrete": True, "n": action_space.n})
            self.discrete = True
        else:
            self.discrete = False

        def get_factory(network_name):
            base_module = importlib.import_module(
                f"minerl_rllib.models.torch.{network_name}"
            )
            return getattr(base_module, model_config[f"{network_name}_net"])

        self._pov_network, pov_embed_size = get_factory("pov")(
            **model_config["pov_net_kwargs"]
        )
        self._vector_network, vector_embed_size = get_factory("vector")(
            **model_config["vector_net_kwargs"]
        )
        state_input_size = pov_embed_size + vector_embed_size
        if self.use_prev_action_reward:
            self._action_network, action_embed_size = get_factory("action")(
                **action_net_kwargs
            )
            self._reward_network, reward_embed_size = get_factory("reward")(
                **model_config["reward_net_kwargs"]
            )
            state_input_size += action_embed_size + reward_embed_size

        rnn_config = model_config.get("rnn_config")
        if self.use_rnn:
            state_embed_size = rnn_config["hidden_size"]
            if rnn_type == "lstm":
                self._rnn = LSTMBaseline(state_input_size, **rnn_config)
            elif rnn_type == "gru":
                self._rnn = GRUBaseline(state_input_size, **rnn_config)
            else:
                raise NotImplementedError
        else:
            self._state_network = nn.Sequential(
                nn.Linear(state_input_size, state_embed_size),
                nn.ELU(),
            )
        self._value_head = nn.Sequential(
            nn.Linear(state_embed_size, 1),
        )
        self._policy_head = nn.Sequential(
            nn.Linear(state_embed_size, num_outputs),
        )
Beispiel #21
0
    # Load configuration from checkpoint file.
    config_dir = os.path.dirname(args.checkpoint)
    print(config_dir)
    config_path = os.path.join(config_dir, "params.pkl")
    # Try parent directory.
    if not os.path.exists(config_path):
        config_path = os.path.join(config_dir, "../params.pkl")

    with open(config_path, "rb") as f:
        config = pickle.load(f)
        print(config)

    config["num_workers"] = args.num_workers
    # Merge with `evaluation_config`.
    evaluation_config = copy.deepcopy(config.get("evaluation_config", {}))
    config = merge_dicts(config, evaluation_config)
    config["batch_mode"] = "complete_episodes"
    config["evaluation_config"] = {"explore": False}

    print(config)
    print(pretty_print(config))

    ray.init()

    evaluator = ppo.PPOTrainer(config=config)
    evaluator.restore(args.checkpoint)

    num_workers = args.num_workers
    start_day = args.start_day
    test_days_remain = args.test_days
Beispiel #22
0
def train(cfg: DictConfig) -> None:
    ray.init(num_gpus=cfg.num_gpus, num_cpus=cfg.num_cpus + 1)
    cfg = get_full_config(cfg)
    register_env("RLlibNLE-v0", RLLibNLEEnv)

    try:
        algo, trainer = NAME_TO_TRAINER[cfg.algo]
    except KeyError:
        raise ValueError(
            "The algorithm you specified isn't currently supported: %s", cfg.algo
        )

    config = algo.DEFAULT_CONFIG.copy()

    args_config = OmegaConf.to_container(cfg)

    # Algo-specific config. Requires hydra config keys to match rllib exactly
    algo_config = args_config.pop(cfg.algo)

    # Remove unnecessary config keys
    for algo in NAME_TO_TRAINER.keys():
        if algo != cfg.algo:
            args_config.pop(algo, None)

    # Merge config from hydra (will have some rogue keys but that's ok)
    config = merge_dicts(config, args_config)

    # Update configuration with parsed arguments in specific ways
    config = merge_dicts(
        config,
        {
            "framework": "torch",
            "num_gpus": cfg.num_gpus,
            "seed": cfg.seed,
            "env": "RLlibNLE-v0",
            "env_config": {
                "flags": cfg,
                "observation_keys": cfg.obs_keys.split(","),
                "name": cfg.env,
            },
            "train_batch_size": cfg.train_batch_size,
            "model": merge_dicts(
                MODEL_DEFAULTS,
                {
                    "custom_model": "rllib_nle_model",
                    "custom_model_config": {"flags": cfg, "algo": cfg.algo},
                    "use_lstm": cfg.use_lstm,
                    "lstm_use_prev_reward": True,
                    "lstm_use_prev_action": True,
                    "lstm_cell_size": cfg.hidden_dim,
                },
            ),
            "num_workers": cfg.num_cpus,
            "num_envs_per_worker": int(cfg.num_actors / cfg.num_cpus),
            "evaluation_interval": 100,
            "evaluation_num_episodes": 50,
            "evaluation_config": {"explore": False},
            "rollout_fragment_length": cfg.unroll_length,
        },
    )

    # Merge algo-specific config at top level
    config = merge_dicts(config, algo_config)

    # Ensure we can use the config we've specified above
    trainer_class = trainer.with_updates(default_config=config)

    callbacks = []
    if cfg.wandb:
        callbacks.append(
            WandbLoggerCallback(
                project=cfg.project,
                api_key_file="~/.wandb_api_key",
                entity=cfg.entity,
                group=cfg.group,
                tags=cfg.tags.split(","),
            )
        )
        os.environ["TUNE_DISABLE_AUTO_CALLBACK_LOGGERS"] = "1"  # Only log to wandb

    # Hacky monkey-patching to allow for OmegaConf config
    def _is_allowed_type(obj):
        """Return True if type is allowed for logging to wandb"""
        if isinstance(obj, DictConfig):
            return True
        if isinstance(obj, np.ndarray) and obj.size == 1:
            return isinstance(obj.item(), Number)
        if isinstance(obj, Iterable) and len(obj) > 0:
            return isinstance(obj[0], _VALID_ITERABLE_TYPES)
        return isinstance(obj, _VALID_TYPES)

    ray.tune.integration.wandb._is_allowed_type = _is_allowed_type

    tune.run(
        trainer_class,
        stop={"timesteps_total": cfg.total_steps},
        config=config,
        name=cfg.name,
        callbacks=callbacks,
    )
Beispiel #23
0
def run(args, parser):
    # Load configuration from checkpoint file.
    config_path = ""
    if args.checkpoint:
        config_dir = os.path.dirname(args.checkpoint)
        config_path = os.path.join(config_dir, "params.pkl")
        # Try parent directory.
        if not os.path.exists(config_path):
            config_path = os.path.join(config_dir, "../params.pkl")

    # Load the config from pickled.
    if os.path.exists(config_path):
        with open(config_path, "rb") as f:
            config = cloudpickle.load(f)
    # If no pkl file found, require command line `--config`.
    else:
        # If no config in given checkpoint -> Error.
        if args.checkpoint:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no `--config` given on command "
                "line!")

        # Use default config for given agent.
        _, config = get_algorithm_class(args.run, return_config=True)

    # Make sure worker 0 has an Env.
    config["create_env_on_driver"] = True

    # Merge with `evaluation_config` (first try from command line, then from
    # pkl file).
    evaluation_config = copy.deepcopy(
        args.config.get("evaluation_config",
                        config.get("evaluation_config", {})))
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings (if not already the same
    # anyways).
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    # Make sure we have evaluation workers.
    if not config.get("evaluation_num_workers"):
        config["evaluation_num_workers"] = config.get("num_workers", 0)
    if not config.get("evaluation_duration"):
        config["evaluation_duration"] = 1
    # Hard-override this as it raises a warning by Trainer otherwise.
    # Makes no sense anyways, to have it set to None as we don't call
    # `Trainer.train()` here.
    config["evaluation_interval"] = 1

    # Rendering and video recording settings.
    if args.no_render:
        deprecation_warning(old="--no-render", new="--render", error=False)
        args.render = False
    config["render_env"] = args.render

    ray.init(local_mode=args.local_mode)

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)

    # Load state from checkpoint, if provided.
    if args.checkpoint:
        agent.restore(args.checkpoint)

    num_steps = int(args.steps)
    num_episodes = int(args.episodes)

    # Do the actual rollout.
    with RolloutSaver(
            args.out,
            args.use_shelve,
            write_update_file=args.track_progress,
            target_steps=num_steps,
            target_episodes=num_episodes,
            save_info=args.save_info,
    ) as saver:
        rollout(agent, args.env, num_steps, num_episodes, saver,
                not args.render)
    agent.stop()
Beispiel #24
0
def run(args, parser):
    if args.config_file:
        with open(args.config_file) as f:
            config_experiments = yaml.safe_load(f)
        experiments = config_experiments
    else:
        if args.algo is not None:
            args.experiment = args.algo
        if args.experiment:
            config_file = os.path.join('config', f'{args.experiment}.yaml')
            with open(config_file) as f:
                config_dict = yaml.safe_load(f)
        else:
            config_dict = {args.name: {}}

        if args.debug:
            args.env = 'MineRLRandomDebug-v0'

        experiments = {}
        for experiment_name, experiment_settings in config_dict.items():
            config = dict(args.config, env=args.env)

            # TODO: implement
            if args.mode == 'offline':
                config.update(
                    dict(
                        explore=False,
                        input=args.data_path,
                        input_evaluation=['simulation'],
                    ))
            elif args.mode == 'mixed':
                config.update(
                    dict(
                        input={
                            args.data_path: args.mixing_ratio,
                            'sample': (1 - args.mixing_ratio)
                        },
                        input_evaluation=['simulation'],
                    ))

            if 'time_total_s' not in args.stop:
                # The MineRL competition training time limit is 4 days. Subtract an hour for evaluation.
                args.stop['time_total_s'] = int(2 * 24 * 60 * 60 -
                                                3600)  # limit two day training
            if 'info/num_steps_sampled' not in args.stop:
                # The MineRL competition environment sample limit is 8 million steps.
                args.stop['info/num_steps_sampled'] = 8000000
            if args.checkpoint_freq is None:
                args.checkpoint_freq = 1000
            if args.checkpoint_at_end is None:
                args.checkpoint_at_end = True
            if args.checkpoint_score_attr is None:
                args.checkpoint_score_attr = 'episode_reward_mean'

            # Note: keep this in sync with tune/config_parser.py
            settings_from_args = {  # i.e. log to ~/ray_results/default
                "run": args.run,
                "checkpoint_freq": args.checkpoint_freq,
                "checkpoint_at_end": args.checkpoint_at_end,
                "keep_checkpoints_num": args.keep_checkpoints_num,
                "checkpoint_score_attr": args.checkpoint_score_attr,
                "local_dir": args.local_dir,
                "resources_per_trial": (
                        args.resources_per_trial and
                        resources_to_json(args.resources_per_trial)),
                "stop": args.stop,
                "config": config,
                "restore": args.restore,
                "num_samples": args.num_samples,
                "upload_dir": args.upload_dir,
            }
            # overwrite the settings from arguments with those in the experiment config file
            settings = merge_dicts(settings_from_args, experiment_settings)
            experiments.update({experiment_name: settings})

        if any('MineRL' in setting['config']['env']
               for setting in experiments.values()):
            import envs
            envs.register(discrete=args.discrete,
                          num_actions=args.num_actions,
                          data_dir=args.data_dir)

    print('\nArguments:')
    pprint.pprint(args)
    print('\nExperiment config:')
    pprint.pprint(experiments)
    print()

    verbose = 1
    for exp in experiments.values():
        # Bazel makes it hard to find files specified in `args` (and `data`).
        # Look for them here.
        # NOTE: Some of our yaml files don't have a `config` section.
        if exp.get("config", {}).get("input") and \
                not os.path.exists(exp["config"]["input"]):
            # This script runs in the ray/rllib dir.
            rllib_dir = Path(__file__).parent
            input_file = rllib_dir.absolute().joinpath(exp["config"]["input"])
            exp["config"]["input"] = str(input_file)

        if not exp.get("run"):
            parser.error("the following arguments are required: --run")
        if not exp.get("env") and not exp.get("config", {}).get("env"):
            parser.error("the following arguments are required: --env")
        if 'framework' not in exp['config']:
            if args.eager:
                exp["config"]["framework"] = "tfe"
            elif args.torch:
                exp["config"]["framework"] = "torch"
            else:
                exp["config"]["framework"] = "tf"
        if args.v:
            exp["config"]["log_level"] = "INFO"
            verbose = 2
        if args.vv:
            exp["config"]["log_level"] = "DEBUG"
            verbose = 3
        if args.trace:
            if exp["config"]["framework"] != "tfe":
                raise ValueError("Must enable --eager to enable tracing.")
            exp["config"]["eager_tracing"] = True

    if args.ray_num_nodes:
        cluster = Cluster()
        for _ in range(args.ray_num_nodes):
            cluster.add_node(num_cpus=args.ray_num_cpus or 1,
                             num_gpus=args.ray_num_gpus or 0,
                             object_store_memory=args.ray_object_store_memory,
                             memory=args.ray_memory,
                             redis_max_memory=args.ray_redis_max_memory)
        ray.init(address=cluster.address)
    else:
        ray.init(include_dashboard=not args.no_ray_ui,
                 address=args.ray_address,
                 object_store_memory=args.ray_object_store_memory,
                 memory=args.ray_memory,
                 redis_max_memory=args.ray_redis_max_memory,
                 num_cpus=args.ray_num_cpus,
                 num_gpus=args.ray_num_gpus,
                 local_mode=args.local_mode)

    run_experiments(experiments,
                    scheduler=_make_scheduler(args),
                    queue_trials=args.queue_trials,
                    resume=args.resume,
                    verbose=verbose,
                    concurrent=True)

    ray.shutdown()
def run(args, parser):
    config = {}
    # Load configuration from checkpoint file.
    config_dir = os.path.dirname(args.checkpoint)
    config_path = os.path.join(config_dir, "params.pkl")
    # Try parent directory.
    if not os.path.exists(config_path):
        config_path = os.path.join(config_dir, "../params.pkl")

    # If no pkl file found, require command line `--config`.
    if not os.path.exists(config_path):
        if not args.config:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no config given on command line!")

    # Load the config from pickled.
    else:
        with open(config_path, "rb") as f:
            config = pickle.load(f)

    if args.use_cpu:
        # When you don't want to run with any gpus.
        config["num_gpus_per_worker"] = 0
        config["num_gpus"] = 0
        os.environ["CUDA_VISIBLE_DEVICES"] = ""

    config["num_workers"] = 1
    # # Set num_workers to be at least 2.
    # if "num_workers" in config:
    #     config["num_workers"] = min(2, config["num_workers"])

    # Merge with `evaluation_config`.
    evaluation_config = copy.deepcopy(config.get("evaluation_config", {}))
    # ADDED
    if args.deterministic_policy:
        evaluation_config["explore"] = False
        config["explore"] = False
    if "env_config" in evaluation_config:
        evaluation_config["env_config"]["num_levels"] = 1
        evaluation_config["env_config"]["use_sequential_levels"] = True
        evaluation_config["env_config"][
            "start_level"] = 0 if args.level_seed is None else args.level_seed
    config["env_config"]["num_levels"] = 1
    config["env_config"]["use_sequential_levels"] = True
    config["env_config"][
        "start_level"] = 0 if args.level_seed is None else args.level_seed
    # END ADDED
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings.
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    ray.init()

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)
    # Load state from checkpoint.
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)
    num_episodes = int(args.episodes)

    video_dir = None
    if args.video_dir:
        video_dir = os.path.expanduser(args.video_dir)

    vis_info = rollout(agent,
                       args.env,
                       num_steps,
                       num_episodes,
                       video_dir,
                       config,
                       level_seed=args.level_seed)
    visualize_info(vis_info, video_dir)
def check_for_saved_config(args):
    """Check for saved configuration

    :args: Argparse arguments
    :returns: Saved config file with merged updates

    """

    # Load configuration from checkpoint file.
    config_path = ""
    args.save_info = True
    config = None

    # If there is a checkpoint, find parameters
    if args.checkpoint_path:
        config_dir = os.path.dirname(args.checkpoint_path)
        config_path = os.path.join(config_dir, "params.pkl")
        # Try parent directory.
        if not os.path.exists(config_path):
            config_path = os.path.join(config_dir, "../params.pkl")

    # Load the config from pickled.
    if os.path.exists(config_path):
        with open(config_path, "rb") as f:
            config = cloudpickle.load(f)
    # If no pkl file found, require command line `--config`.
    else:
        # If no config in given checkpoint -> Error.
        if args.checkpoint_path:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no `--config` given on command "
                "line!")

        # Use default config for given agent.
        _, config = get_trainer_class(args.model_name, return_config=True)

    # Make sure worker 0 has an Env.
    config["create_env_on_driver"] = True

    # Merge with `evaluation_config` (first try from command line, then from
    # pkl file).
    evaluation_config = copy.deepcopy(
        args.config.get("evaluation_config",
                        config.get("evaluation_config", {})))
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings (if not already the same
    # anyways).

    # Adds any custom arguments here
    config = merge_dicts(config, args.config)

    if not args.env_name:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env_name = config.get("env")

    # Make sure we have evaluation workers.
    if not config.get("evaluation_num_workers"):
        config["evaluation_num_workers"] = config.get("num_workers", 0)
    if not config.get("evaluation_num_episodes"):
        config["evaluation_num_episodes"] = 1

    return config