Esempio n. 1
0
def test_result_grid_future_checkpoint(ray_start_2_cpus, to_object):
    trainable_cls = get_trainable_cls("__fake")
    trial = Trial("__fake", stub=True)
    trial.config = {"some_config": 1}
    trial.last_result = {"some_result": 2, "config": trial.config}

    trainable = ray.remote(trainable_cls).remote()
    ray.get(trainable.set_info.remote({"info": 4}))

    if to_object:
        checkpoint_data = trainable.save_to_object.remote()
    else:
        checkpoint_data = trainable.save.remote()

    trial.on_checkpoint(
        _TrackedCheckpoint(checkpoint_data,
                           storage_mode=CheckpointStorage.MEMORY))
    trial.pickled_error_file = None
    trial.error_file = None
    result_grid = ResultGrid(None)

    # Internal result grid conversion
    result = result_grid._trial_to_result(trial)
    assert isinstance(result.checkpoint, Checkpoint)
    assert isinstance(result.metrics, dict)
    assert isinstance(result.config, dict)
    assert result.metrics_dataframe is None
    assert result.config == {"some_config": 1}
    assert result.metrics["config"] == result.config

    # Load checkpoint data (see ray.rllib.algorithms.mock.MockTrainer definition)
    with result.checkpoint.as_directory() as checkpoint_dir:
        with open(os.path.join(checkpoint_dir, "mock_agent.pkl"), "rb") as f:
            info = pickle.load(f)
            assert info["info"] == 4
Esempio n. 2
0
def get_agent_cls(agent_name):
    """Retrieve agent class from global registry.

    The user must have called `raylab.register_all_agents()` beforehand to
    have access to Raylab's agents.
    """
    return get_trainable_cls(agent_name)
Esempio n. 3
0
def _check_default_resources_override(run_identifier):
    if not isinstance(run_identifier, str):
        # If obscure dtype, assume it is overridden.
        return True
    trainable_cls = get_trainable_cls(run_identifier)
    return hasattr(trainable_cls, "default_resource_request") and (
        trainable_cls.default_resource_request.__code__ !=
        Trainable.default_resource_request.__code__)
def run_rollout(args, parser):

    config = args.config
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)

    # Load state from checkpoint.
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)
    num_episodes = int(args.episodes)

    # Determine the video output directory.
    use_arg_monitor = False
    try:
        args.video_dir
    except AttributeError:
        print("There is no such attribute: args.video_dir")
        use_arg_monitor = True

    video_dir = None
    if not use_arg_monitor:
        if args.monitor:
            video_dir = os.path.join("./logs", "video")
        elif args.video_dir:
            video_dir = os.path.expanduser(args.video_dir)

    # Do the actual rollout.
    with rollout.RolloutSaver(
            args.out,
            args.use_shelve,
            write_update_file=args.track_progress,
            target_steps=num_steps,
            target_episodes=num_episodes,
            save_info=args.save_info) as saver:
        if use_arg_monitor:
            rollout.rollout(
                agent,
                args.env,
                num_steps,
                num_episodes,
                saver,
                args.no_render,
                args.monitor)
        else:
            rollout.rollout(
                agent, args.env,
                num_steps,
                num_episodes,
                saver,
                args.no_render, video_dir)
Esempio n. 5
0
def _raise_on_durable(trainable_name, sync_to_driver, upload_dir):
    trainable_cls = get_trainable_cls(trainable_name)
    from ray.tune.durable_trainable import DurableTrainable
    if issubclass(trainable_cls, DurableTrainable):
        if sync_to_driver is not False:
            raise ValueError(
                "EXPERIMENTAL: DurableTrainable will automatically sync "
                "results to the provided upload_dir. "
                "Set `sync_to_driver=False` to avoid data inconsistencies.")
        if not upload_dir:
            raise ValueError(
                "EXPERIMENTAL: DurableTrainable will automatically sync "
                "results to the provided upload_dir. "
                "`upload_dir` must be provided.")
Esempio n. 6
0
def run_rollout(trainable_type, mission_file, seed):
    # Writes the mission file for minerl
    mission_file_path = write_mission_file_for_seed(mission_file, seed)

    # Instantiate the agent.  Note: the IMPALA trainer implementation in
    # Ray uses an AsyncSamplesOptimizer.  Under the hood, this starts a
    # LearnerThread which will wait for training samples.  This will fail
    # after a timeout, but has no influence on the rollout. See
    # https://github.com/ray-project/ray/blob/708dff6d8f7dd6f7919e06c1845f1fea0cca5b89/rllib/optimizers/aso_learner.py#L66
    config = {
        "env_config": {
            "mission": mission_file_path,
            "is_rollout": True,
            "seed": seed
        },
        "num_workers": 0
    }
    cls = get_trainable_cls(args.run)
    agent = cls(env="Minecraft", config=config)

    # The optimizer is not needed during a rollout
    agent.optimizer.stop()

    # Load state from checkpoint
    agent.restore(f'{checkpoint_path}/{checkpoint_file}')

    # Get a reference to the environment
    env = agent.workers.local_worker().env

    # Let the agent choose actions until the game is over
    obs = env.reset()
    done = False
    total_reward = 0

    while not done:
        action = agent.compute_action(obs)
        obs, reward, done, info = env.step(action)

        total_reward += reward

    print(f'Total reward using seed {seed}: {total_reward}')

    # This avoids a sigterm trace in the logs, see minerl.env.malmo.Instance
    env.instance.watcher_process.kill()

    env.close()
    agent.stop()

    return env.get_trajectory()
Esempio n. 7
0
    def as_trainable(self) -> Type[Trainable]:
        param_dict = self._param_dict
        base_config = self._config
        trainer_cls = self.__class__

        if isinstance(self._algorithm, str):
            rllib_trainer = get_trainable_cls(self._algorithm)
        else:
            rllib_trainer = self._algorithm

        class AIRRLTrainer(rllib_trainer):
            def __init__(
                self,
                config: Optional[PartialTrainerConfigDict] = None,
                env: Optional[Union[str, EnvType]] = None,
                logger_creator: Optional[Callable[[], Logger]] = None,
                remote_checkpoint_dir: Optional[str] = None,
                sync_function_tpl: Optional[str] = None,
            ):
                resolved_config = merge_dicts(base_config, config)
                param_dict["config"] = resolved_config

                trainer = trainer_cls(**param_dict)
                rllib_config = trainer._get_rllib_config(process_datasets=True)

                super(AIRRLTrainer, self).__init__(
                    rllib_config,
                    env,
                    logger_creator,
                    remote_checkpoint_dir,
                    sync_function_tpl,
                )

            @classmethod
            def default_resource_request(
                cls, config: PartialTrainerConfigDict
            ) -> Union[Resources, PlacementGroupFactory]:
                resolved_config = merge_dicts(base_config, config)
                param_dict["config"] = resolved_config

                trainer = trainer_cls(**param_dict)
                rllib_config = trainer._get_rllib_config(
                    process_datasets=False)

                return rllib_trainer.default_resource_request(rllib_config)

        AIRRLTrainer.__name__ = f"AIR{rllib_trainer.__name__}"
        return AIRRLTrainer
Esempio n. 8
0
def train(name, ray_config, debug=False):
    """
    Trains sam
    Parameters
    ----------
    name: name of yaml file
    ray_config: ray configuration
    debug: whether to test in editor

    Returns
    -------

    """
    ray.init()
    trainer_class = get_trainable_cls(ray_config['run'])
    default_config = trainer_class._default_config.copy()
    config = merge_dicts(default_config, ray_config['config'])
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
    run = ray_config['run']

    model_name = f'{name}_{now}'
    print(f'\33]0;{model_name} - {name}\a', end='', flush=True)
    if debug:
        config['num_workers'] = 0
        config['num_envs_per_worker'] = 1
        # config['train_batch_size'] = 10
        config['env_config']['log_every'] = 2000
        trainer = trainer_class(config=config)
        policy = trainer.get_policy()
        model = policy.model
        print(model)
        for i in range(10):
            res = trainer.train()
            print(pretty_print(res))
    else:
        tune.run(
            run,
            name=model_name,
            # stop=ray_config['stop'],
            local_dir='results',
            config=config,
            checkpoint_at_end=True,
            verbose=2,
            # restore=RESTORE_PATH,
            checkpoint_freq=10)

    ray.shutdown()
Esempio n. 9
0
def _check_default_resources_override(
        run_identifier: Union[Experiment, str, Type, Callable]) -> bool:
    if isinstance(run_identifier, Experiment):
        run_identifier = run_identifier.run_identifier

    if isinstance(run_identifier, type):
        if not issubclass(run_identifier, Trainable):
            # If obscure dtype, assume it is overridden.
            return True
        trainable_cls = run_identifier
    elif callable(run_identifier):
        trainable_cls = run_identifier
    elif isinstance(run_identifier, str):
        trainable_cls = get_trainable_cls(run_identifier)
    else:
        # Default to True
        return True

    return hasattr(trainable_cls, "default_resource_request") and (
        trainable_cls.default_resource_request.__code__ !=
        Trainable.default_resource_request.__code__)
Esempio n. 10
0
 def get_trainable_cls(self):
     return get_trainable_cls(self.trainable_name)
    all_steps = []
    all_dist = []
    all_power_total = []
    all_cot = []
    all_vel = []

    for experiment in range(0, len(exp_params)):
        with open(exp_params[experiment], "rb") as f:
            config = pickle.load(f)
        if "num_workers" in config:
            config["num_workers"] = min(2, config["num_workers"])
        config["create_env_on_driver"] = True
        config['env_config']['hf_smoothness'] = hf_smoothness_eval
        if "no_eager_on_workers" in config:
            del config["no_eager_on_workers"]
        cls = get_trainable_cls('PPO')
        agent = cls(env=config['env'], config=config)
        # Load state from checkpoint.
        agent.restore(exp_checkpoint[experiment])
        if hasattr(agent, "workers") and isinstance(agent.workers, WorkerSet):
            env = agent.workers.local_worker().env
        res_rollout = rollout_episodes(env,
                                       agent,
                                       num_episodes=100,
                                       num_steps=1000,
                                       render=False)

        # Write detailed data to panda file
        for sim_it in range(0, len(res_rollout[0])):
            new_pd_entry = pd.Series({
                "approach":
Esempio n. 12
0
def run(args, parser):
    # Load configuration from checkpoint file.
    config_path = ""
    if args.checkpoint:
        config_dir = os.path.dirname(args.checkpoint)
        config_path = os.path.join(config_dir, "params.pkl")
        # Try parent directory.
        if not os.path.exists(config_path):
            config_path = os.path.join(config_dir, "../params.pkl")

    # Load the config from pickled.
    if os.path.exists(config_path):
        with open(config_path, "rb") as f:
            config = cloudpickle.load(f)
    # If no pkl file found, require command line `--config`.
    else:
        # If no config in given checkpoint -> Error.
        if args.checkpoint:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no `--config` given on command "
                "line!")

        # Use default config for given agent.
        _, config = get_trainer_class(args.run, return_config=True)

    # Make sure worker 0 has an Env.
    config["create_env_on_driver"] = True

    # Merge with `evaluation_config` (first try from command line, then from
    # pkl file).
    evaluation_config = copy.deepcopy(
        args.config.get("evaluation_config",
                        config.get("evaluation_config", {})))
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings (if not already the same
    # anyways).
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    # Make sure we have evaluation workers.
    if not config.get("evaluation_num_workers"):
        config["evaluation_num_workers"] = config.get("num_workers", 0)
    if not config.get("evaluation_num_episodes"):
        config["evaluation_num_episodes"] = 1
    config["render_env"] = not args.no_render
    config["record_env"] = args.video_dir

    ray.init(local_mode=args.local_mode)

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)

    # Load state from checkpoint, if provided.
    if args.checkpoint:
        agent.restore(args.checkpoint)

    num_steps = int(args.steps)
    num_episodes = int(args.episodes)

    # Determine the video output directory.
    video_dir = None
    # Allow user to specify a video output path.
    if args.video_dir:
        video_dir = os.path.expanduser(args.video_dir)

    # Do the actual rollout.
    with RolloutSaver(args.out,
                      args.use_shelve,
                      write_update_file=args.track_progress,
                      target_steps=num_steps,
                      target_episodes=num_episodes,
                      save_info=args.save_info) as saver:
        rollout(agent, args.env, num_steps, num_episodes, saver,
                args.no_render, video_dir)
    agent.stop()
Esempio n. 13
0
 def is_durable_trainable(self):
     # Local import to avoid cyclical dependencies
     from ray.tune.durable_trainable import DurableTrainable
     trainable_cls = get_trainable_cls(self._run_identifier)
     return issubclass(trainable_cls, DurableTrainable)
Esempio n. 14
0
def durable(trainable: Union[str, Type[Trainable], Callable]):
    """Convert trainable into a durable trainable.

    Durable trainables are used to upload trial results and checkpoints
    to cloud storage, like e.g. AWS S3.

    This function can be used to convert your trainable, i.e. your trainable
    classes, functions, or string identifiers, to a durable trainable.

    To make durable trainables work, you should pass a valid
    :class:`SyncConfig <ray.tune.SyncConfig>` object to `tune.run()`.

    Example:

    .. code-block:: python

        from ray import tune

        analysis = tune.run(
            tune.durable("PPO"),
            config={"env": "CartPole-v0"},
            checkpoint_freq=1,
            sync_config=tune.SyncConfig(
                sync_to_driver=False,
                upload_dir="s3://your-s3-bucket/durable-ppo/",
            ))

    You can also convert your trainable functions:

    .. code-block:: python

        tune.run(
            tune.durable(your_training_fn),
            # ...
        )

    And your class functions:

    .. code-block:: python

        tune.run(
            tune.durable(YourTrainableClass),
            # ...
        )


    Args:
        trainable (str|Type[Trainable]|Callable): Trainable. Can be a
            string identifier, a trainable class, or a trainable function.

    Returns:
        A durable trainable class wrapped around your trainable.

    """
    if isinstance(trainable, str):
        trainable_cls = get_trainable_cls(trainable)
    else:
        trainable_cls = trainable

    if not inspect.isclass(trainable_cls):
        # Function API
        return wrap_function(trainable_cls, durable=True)

    if not issubclass(trainable_cls, Trainable):
        raise ValueError(
            "You can only use `durable()` with valid trainables. The class "
            "you passed does not inherit from `Trainable`. Please make sure "
            f"it does. Got: {type(trainable_cls)}")

    # else: Class API
    class _WrappedDurableTrainable(DurableTrainable, trainable_cls):
        _name = trainable_cls.__name__ if hasattr(trainable_cls, "__name__") \
            else "durable_trainable"

    return _WrappedDurableTrainable
Esempio n. 15
0
 def get_trainable_cls(self):
     if self.stub:
         return None
     return get_trainable_cls(self.trainable_name)
Esempio n. 16
0
def run(args, parser):
    config = {}
    # Load configuration from checkpoint file.
    config_dir = os.path.dirname(args.checkpoint)
    config_path = os.path.join(config_dir, "params.pkl")
    # Try parent directory.
    if not os.path.exists(config_path):
        config_path = os.path.join(config_dir, "../params.pkl")

    # If no pkl file found, require command line `--config`.
    if not os.path.exists(config_path):
        if not args.config:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no config given on command line!")

    # Load the config from pickled.
    else:
        with open(config_path, "rb") as f:
            config = pickle.load(f)

    if args.use_cpu:
        # When you don't want to run with any gpus.
        config["num_gpus_per_worker"] = 0
        config["num_gpus"] = 0
        os.environ["CUDA_VISIBLE_DEVICES"] = ""

    config["num_workers"] = 1
    # # Set num_workers to be at least 2.
    # if "num_workers" in config:
    #     config["num_workers"] = min(2, config["num_workers"])

    # Merge with `evaluation_config`.
    evaluation_config = copy.deepcopy(config.get("evaluation_config", {}))
    # ADDED
    if args.deterministic_policy:
        evaluation_config["explore"] = False
        config["explore"] = False
    if "env_config" in evaluation_config:
        evaluation_config["env_config"]["num_levels"] = 1
        evaluation_config["env_config"]["use_sequential_levels"] = True
        evaluation_config["env_config"][
            "start_level"] = 0 if args.level_seed is None else args.level_seed
    config["env_config"]["num_levels"] = 1
    config["env_config"]["use_sequential_levels"] = True
    config["env_config"][
        "start_level"] = 0 if args.level_seed is None else args.level_seed
    # END ADDED
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings.
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    ray.init()

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)
    # Load state from checkpoint.
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)
    num_episodes = int(args.episodes)

    video_dir = None
    if args.video_dir:
        video_dir = os.path.expanduser(args.video_dir)

    vis_info = rollout(agent,
                       args.env,
                       num_steps,
                       num_episodes,
                       video_dir,
                       config,
                       level_seed=args.level_seed)
    visualize_info(vis_info, video_dir)
Esempio n. 17
0
def get_agent_cls(agent_name):
    """Retrieve agent class from global registry."""
    return get_trainable_cls(agent_name)
Esempio n. 18
0
def self_play_workflow(config):
    """
    Expects in config:
        checkpoint
            checkpoint to load from (None if new)
        trainer
            trainer to use
        model
            model to use in learning
        percentage_equal: float
            The maximal allowed percentage that equal opponents get game results. (see binomial test)
        lr_schedule: List of lr
            Learning rates to use. Will use first to last and update each time the model gets worse.
       training_rounds
            Rounds of training
        evaluation_rounds
            Rounds of evaluation

    1. Generate a large batch of self-play games.
    2. Train.
    3. Test the updated bot against the previous version.
    4. If the bot is measurably stronger, switch to this new version.
    5. If the bot is about the same strength, generate more games and train again.
    6. If the bot gets significantly weaker, adjust the optimizer settings and retrain.
    """
    ##########################################
    # Set config of trainer and evaluators
    ##########################################
    check_dir = 'logs'
    log_file = 'logs/logs.txt'
    if os.path.exists(log_file):
        os.remove(log_file)

    if config.get("evaluation_num_episodes", None) is None:
        config["evaluation_num_episodes"] = 1
    trainer_fn = get_trainable_cls(config["trainer"])
    lr_idx = 0

    def select_policy_train(agent_id):
        if agent_id == "player1":
            return np.random.choice(
                ["learning_white", "previous_white", "random"],
                1,
                p=[.6, .3, .1])[0]
        else:
            return np.random.choice(
                ["learning_black", "previous_black", "random"],
                1,
                p=[.6, .3, .1])[0]

    def select_policy_eval(learning_player, agent_id):
        if learning_player == "player1":
            if agent_id == "player1":
                return "learning_white"
            else:
                return "previous_black"
        else:
            if agent_id == "player2":
                return "learning_black"
            else:
                return "previous_white"

    trainer_config = copy.deepcopy(config)
    # remove self-play parameters
    trainer_config.pop("trainer")
    trainer_config.pop("percentage_equal")
    trainer_config.pop("model")
    trainer_config.pop("training_rounds")
    trainer_config.pop("evaluation_rounds")
    trainer_config.pop("checkpoint", None)
    trainer_config.pop("lr_schedule", None)
    trainer_config.pop("evaluation_interval", None)

    trainer_config["lr"] = config["lr_schedule"][lr_idx]
    trainer_config["multiagent"] = {
        "policies_to_train": ["learning_white", "learning_black"],
        "policies": {
            "random": (PolicyRandom, config["env"].observation_space,
                       config["env"].action_space, {}),
            "learning_white": (None, config["env"].observation_space,
                               config["env"].action_space, {
                                   "model": config["model"]
                               }),
            "learning_black": (None, config["env"].observation_space,
                               config["env"].action_space, {
                                   "model": config["model"]
                               }),
            "previous_white": (None, config["env"].observation_space,
                               config["env"].action_space, {
                                   "model": config["model"]
                               }),
            "previous_black": (None, config["env"].observation_space,
                               config["env"].action_space, {
                                   "model": config["model"]
                               }),
        },
        "policy_mapping_fn": select_policy_train,
    }
    trainer_config["train_batch_size"] = 2 * config["train_batch_size"]

    eval_config_player1 = copy.deepcopy(trainer_config)
    eval_config_player1["multiagent"]["policy_mapping_fn"] = partial(
        select_policy_eval, "player1")
    eval_config_player1["multiagent"]["policies_to_train"] = []

    eval_config_player2 = copy.deepcopy(trainer_config)
    eval_config_player2["multiagent"]["policy_mapping_fn"] = partial(
        select_policy_eval, "player2")
    eval_config_player2["multiagent"]["policies_to_train"] = []

    ##########################################
    # Run train / evaluation rounds
    ##########################################

    def update_for_next_loop(total_rounds, rounds, reset=False):
        done = False
        if reset:
            next_num_rounds = rounds.min
        else:
            if (total_rounds >= rounds.max):
                done = True
            next_num_rounds = rounds.step

        return done, next_num_rounds

    ray.init()

    trainer = trainer_fn(env=trainer_config["env"], config=trainer_config)
    evaluator_player1 = trainer_fn(env=eval_config_player1["env"],
                                   config=eval_config_player1)
    evaluator_player2 = trainer_fn(env=eval_config_player1["env"],
                                   config=eval_config_player2)

    total_rounds_training = 0
    done, training_rounds = update_for_next_loop(total_rounds_training,
                                                 config["training_rounds"],
                                                 True)
    prev_it_state = config.get("checkpoint", None)
    prev_state = prev_it_state
    while not done:
        ##########################################
        # Train
        ##########################################
        try:
            if prev_it_state is not None:
                trainer.restore(prev_it_state)
            for _ in range(training_rounds):
                trainer.train()
            state = trainer.save(check_dir)
            # trainer.stop()

            total_rounds_training += training_rounds
        except Exception:
            trainer.stop()
            with open(log_file, 'a') as f:
                f.write("Model failed, updating optimizer\n")
            lr_idx += 1
            if lr_idx < len(config["lr_schedule"]):
                trainer_config["lr"] = config["lr_schedule"][lr_idx]
                trainer = trainer_fn(env=trainer_config["env"],
                                     config=trainer_config)
                total_rounds_training = 0
                done, training_rounds = update_for_next_loop(
                    total_rounds_training, config["training_rounds"], True)
                prev_it_state = prev_state
            else:
                done = True
            continue  # try again.

        ##########################################
        # Evaluate
        ##########################################
        try:
            total_eval_rounds = 0
            comparison_wrt_equal = 1
            eval_results1 = []
            eval_results2 = []
            # maximal evaluation rounds determined by training, does not make sense to evaluate more than training rounds.
            eval_info = InfoNumberRounds(
                config["evaluation_rounds"].min,
                min(config["evaluation_rounds"].max, total_rounds_training),
                config["evaluation_rounds"].step)
            done_eval, eval_rounds = update_for_next_loop(
                total_eval_rounds, eval_info, True)
            while not done_eval:
                num_episodes = eval_rounds * config["evaluation_num_episodes"]

                evaluator_player1.restore(state)
                eval_results1.extend(
                    own_evaluation(evaluator_player1, eval_rounds))
                num_pos = sum(x == 1 for x in eval_results1)
                num_neg = sum(x == -1 for x in eval_results1)
                comparison_wrt_equal1 = binom_test(num_pos, num_pos + num_neg,
                                                   0.5)
                with open(log_file, 'a') as f:
                    f.write(
                        f'results1: trained agent wins: {num_pos} previous agent wins: {num_neg} remises: {sum(x == 0 for x in eval_results1)} \n'
                    )
                    f.write(
                        f'chance result for equal opponents: {comparison_wrt_equal1} \n'
                    )

                evaluator_player2.restore(state)
                eval_results2.extend(
                    own_evaluation(evaluator_player2, eval_rounds))
                num_pos = sum(x == 1 for x in eval_results2)
                num_neg = sum(x == -1 for x in eval_results2)
                comparison_wrt_equal2 = binom_test(num_neg, num_pos + num_neg,
                                                   0.5)
                with open(log_file, 'a') as f:
                    f.write(
                        f'results2: trained agent wins: {num_neg} previous agent wins: {num_pos} remises: {sum(x == 0 for x in eval_results2)} \n'
                    )
                    f.write(
                        f'chance result for equal opponents: {comparison_wrt_equal2} \n'
                    )

                total_eval_rounds += eval_rounds

                done_eval, eval_rounds = update_for_next_loop(
                    total_eval_rounds, eval_info)
                if config["percentage_equal"] > comparison_wrt_equal1 or config[
                        "percentage_equal"] > comparison_wrt_equal2:
                    # one of players improved
                    done_eval = True
        except Exception:
            with open(log_file, 'a') as f:
                f.write("Model failed, need to update optimizer\n")
            # trigger update optimizer
            comparison_wrt_equal1 = 0
            comparison_wrt_equal2 = 0
            eval_results1 = [-1]
            eval_results2 = [1]

        ##########################################
        # Update policy
        ##########################################

        if config["percentage_equal"] > comparison_wrt_equal1 or config[
                "percentage_equal"] > comparison_wrt_equal2:
            # results differ enough
            if sum(x == 1 for x in eval_results1) > sum(
                    x == -1 for x in eval_results1) and sum(
                        x == -1
                        for x in eval_results2) > sum(x == 1
                                                      for x in eval_results2):
                with open(log_file, 'a') as f:
                    f.write("Model improved\n")
                total_rounds_training = 0
                done, training_rounds = update_for_next_loop(
                    total_rounds_training, config["training_rounds"], True)
                # reupdate previous
                key_previous_val_learning_white = {}
                for (k, v), (k2, v2) in zip(
                        trainer.get_policy(
                            "previous_white").get_weights().items(),
                        trainer.get_policy(
                            "learning_white").get_weights().items()):
                    key_previous_val_learning_white[k] = v2
                key_previous_val_learning_black = {}
                for (k, v), (k2, v2) in zip(
                        trainer.get_policy(
                            "previous_black").get_weights().items(),
                        trainer.get_policy(
                            "learning_black").get_weights().items()):
                    key_previous_val_learning_black[k] = v2
                # set weights
                trainer.set_weights({
                    "previous_white":
                    key_previous_val_learning_white,
                    "previous_black":
                    key_previous_val_learning_black,
                    # no change
                    "learning_white":
                    trainer.get_policy("learning_white").get_weights(),
                    "learning_black":
                    trainer.get_policy("learning_black").get_weights(),
                })
                if prev_state is not None:
                    trainer.delete_checkpoint(prev_state)
                trainer.delete_checkpoint(state)

                prev_it_state = trainer.save(check_dir)
                prev_state = prev_it_state
            elif sum(x == 1 for x in eval_results1) < sum(
                    x == -1 for x in eval_results1) and sum(
                        x == -1
                        for x in eval_results2) < sum(x == 1
                                                      for x in eval_results2):
                with open(log_file, 'a') as f:
                    f.write("Model got worse, updating optimizer\n")
                trainer.stop()
                lr_idx += 1
                if lr_idx < len(config["lr_schedule"]):
                    trainer_config["lr"] = config["lr_schedule"][lr_idx]
                    trainer = trainer_fn(env=trainer_config["env"],
                                         config=trainer_config)
                    total_rounds_training = 0
                    done, training_rounds = update_for_next_loop(
                        total_rounds_training, config["training_rounds"], True)
                    prev_it_state = prev_state
                else:
                    done = True
            else:
                with open(log_file, 'a') as f:
                    f.write(
                        "One player improved one got worse, trying more learning iterations.\n"
                    )
                done, training_rounds = update_for_next_loop(
                    total_rounds_training, config["training_rounds"])
                prev_it_state = state
        else:
            with open(log_file, 'a') as f:
                f.write(
                    "Unable to evaluate, trying more learning iterations.\n")
            done, training_rounds = update_for_next_loop(
                total_rounds_training, config["training_rounds"])
            prev_it_state = state

    trainer.restore(prev_it_state)
    trainer.save()
    print("Checkpoint and trainer saved at: ", trainer.logdir)
    with open(log_file, 'a') as f:
        f.write(f'Checkpoint and trainer saved at: {trainer.logdir} \n')
Esempio n. 19
0
File: tune.py Progetto: emailhy/ray
def _check_default_resources_override(run_identifier):
    trainable_cls = get_trainable_cls(run_identifier)
    return hasattr(trainable_cls, "default_resource_request") and (
        trainable_cls.default_resource_request.__code__ !=
        Trainable.default_resource_request.__code__)
Esempio n. 20
0
def run_rollouts(args, config):
    """Run rollouts (if not bundling
    existing rollouts)

    :args: Argparse.Args: User defined arguments
    :config: Dict: Execution Configuration

    """

    # Make sure configuration has the correct outpath
    config['callbacks'] = lambda: RLlibIxdrlCallbacks(args=args, config=config)

    #Spin up Ray only if it is not already running
    if args.init_ray:
        ray.init(local_mode=args.local_mode)

    # Set up environment
    env = gym.make(args.env_name)

    # Wrap environment
    env = wrap_env(env, args.wrappers)

    # Register environment with Ray
    register_env(args.env_name, lambda config: env)

    # Create the model Trainer from config.
    cls = get_trainable_cls(args.model_name)

    # Instantiate agent
    agent = cls(env=args.env_name,
                config=config,
                logger_creator=cameleon_logger_creator(args.writer_dir))

    # Restore agent if needed
    if args.checkpoint_path:

        # This is not ideal, but only way to guarantee
        # correct information about model. Add slight overhead
        # Need to restore the model for rollouts but, then
        # must restart to feed information to logger
        logging.info(
            "Restoring agent twice to feed information correctly to logger")
        agent.restore(args.checkpoint_path)

        # Make sure configuration has the correct outpath
        args.epochs_trained = agent._iteration if agent._iteration is not None else 0

        # Make sure configuration has the correct outpath
        config['callbacks'] = lambda: RLlibIxdrlCallbacks(args=args,
                                                          config=config)
        # Need to run setup again with new callbacks
        agent.setup(config)
        agent.restore(args.checkpoint_path)

    # Do the actual rollout.
    run_rollout(agent,
                env,
                args.env_name,
                args.num_timesteps,
                args.num_episodes,
                args.no_render,
                args.video_dir,
                args=args)

    # Stop the agent
    agent.stop()

    # Get the gross files out of there
    cleanup(config['monitor'],
            args.writer_dir,
            ext=args.ext,
            sync_bundles=args.sync_bundles)
Esempio n. 21
0
    def as_trainable(self) -> Type[Trainable]:
        param_dict = self._param_dict
        base_config = self._config or {}
        trainer_cls = self.__class__
        preprocessor = self.preprocessor

        if isinstance(self._algorithm, str):
            rllib_trainer = get_trainable_cls(self._algorithm)
        else:
            rllib_trainer = self._algorithm

        class AIRRLTrainer(rllib_trainer):
            def __init__(
                self,
                config: Optional[PartialAlgorithmConfigDict] = None,
                env: Optional[Union[str, EnvType]] = None,
                logger_creator: Optional[Callable[[], Logger]] = None,
                remote_checkpoint_dir: Optional[str] = None,
                custom_syncer: Optional[Syncer] = None,
            ):
                resolved_config = merge_dicts(base_config, config or {})
                param_dict["config"] = resolved_config

                trainer = trainer_cls(**param_dict)
                rllib_config = trainer._get_rllib_config(process_datasets=True)

                super(AIRRLTrainer, self).__init__(
                    config=rllib_config,
                    env=env,
                    logger_creator=logger_creator,
                    remote_checkpoint_dir=remote_checkpoint_dir,
                    custom_syncer=custom_syncer,
                )

            def save_checkpoint(self, checkpoint_dir: str):
                checkpoint_path = super(AIRRLTrainer,
                                        self).save_checkpoint(checkpoint_dir)

                trainer_class_path = os.path.join(checkpoint_dir,
                                                  RL_TRAINER_CLASS_FILE)
                with open(trainer_class_path, "wb") as fp:
                    cpickle.dump(self.__class__, fp)

                config_path = os.path.join(checkpoint_dir, RL_CONFIG_FILE)
                with open(config_path, "wb") as fp:
                    cpickle.dump(self.config, fp)

                if preprocessor:
                    save_preprocessor_to_dir(preprocessor, checkpoint_dir)

                return checkpoint_path

            @classmethod
            def default_resource_request(
                cls, config: PartialAlgorithmConfigDict
            ) -> Union[Resources, PlacementGroupFactory]:
                resolved_config = merge_dicts(base_config, config)
                param_dict["config"] = resolved_config

                trainer = trainer_cls(**param_dict)
                rllib_config = trainer._get_rllib_config(
                    process_datasets=False)

                return rllib_trainer.default_resource_request(rllib_config)

        AIRRLTrainer.__name__ = f"AIR{rllib_trainer.__name__}"
        return AIRRLTrainer
Esempio n. 22
0
def visualizer_rllib(args, seed=None):
    """Visualizer for RLlib experiments.

    This function takes args (see function create_parser below for
    more detailed information on what information can be fed to this
    visualizer), and renders the experiment associated with it.
    """
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    config = get_rllib_config(result_dir)

    # check if we have a multiagent environment but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policies', None):
        multiagent = True
        pkl = get_rllib_pkl(result_dir)
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    config['callbacks'] = MyCallbacks
    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)
    #flow_params['env'].additional_params["use_seeds"]=args.use_seeds
    #    print(args.use_seeds)
    seed_tmp = None
    if seed:
        with open(seed, 'rb') as f:
            seed_tmp = pickle.load(f)
        config['seed'] = int(seed_tmp['rllib_seed'])
    elif args.use_seeds:
        with open(args.use_seeds, 'rb') as f:
            seed_tmp = pickle.load(f)
        config['seed'] = int(seed_tmp['rllib_seed'])
    # hack for old pkl files
    # TODO(ev) remove eventually
    sim_params = flow_params['sim']
    setattr(sim_params, 'num_clients', 1)
    if seed_tmp:
        #setattr(sim_params, 'seed', seed_tmp['sumo_seed'])
        sim_params.seed = int(int(seed_tmp['sumo_seed']) / 10**6)
        print(sim_params.seed)
    #import IPython
    #IPython.embed()
    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if args.run and config_run:
        if args.run != config_run:
            print('visualizer_rllib.py: error: run argument ' +
                  '\'{}\' passed in '.format(args.run) +
                  'differs from the one stored in params.json ' +
                  '\'{}\''.format(config_run))
            sys.exit(1)

    # Merge with `evaluation_config`.
    evaluation_config = copy.deepcopy(config.get("evaluation_config", {}))
    config = merge_dicts(config, evaluation_config)

    if args.run:
        agent_cls = get_trainable_cls(args.run)
    elif config_run:
        agent_cls = get_trainable_cls(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sim_params.restart_instance = True
    dir_path = os.path.dirname(os.path.realpath(__file__))
    emission_path = '{0}/test_time_rollout/'.format(dir_path)
    sim_params.emission_path = emission_path if args.gen_emission else None

    # pick your rendering mode
    if args.render_mode == 'sumo_web3d':
        sim_params.num_clients = 2
        sim_params.render = False
    elif args.render_mode == 'drgb':
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
    elif args.render_mode == 'sumo_gui':
        sim_params.render = True
        print('NOTE: With render mode {}, an extra instance of the SUMO GUI '
              'will display before the GUI for visualizing the result. Click '
              'the green Play arrow to continue.'.format(args.render_mode))
    elif args.render_mode == 'no_render':
        sim_params.render = False
    if args.save_render:
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
        sim_params.save_render = True
    #if seed is not None:
    #    print(seed)
    #    flow_params["env"].additional_params["use_seeds"] = seed
    #    input()
    #else:
    #    flow_params["env"].additional_params["use_seeds"] = args.use_seeds
    if args.horizon:
        config['horizon'] = args.horizon
        flow_params['env'].horizon = args.horizon

    # Create and register a gym+rllib env
    register_time = time.time()
    create_env, env_name = make_create_env(params=flow_params,
                                           version=0,
                                           seeds_file=seed)
    register_env(env_name, create_env)
    register_time = time.time() - register_time
    print("Register Time:", register_time)
    # check if the environment is a single or multiagent environment, and
    # get the right address accordingly
    # single_agent_envs = [env for env in dir(flow.envs)
    #                      if not env.startswith('__')]

    # if flow_params['env_name'] in single_agent_envs:
    #     env_loc = 'flow.envs'
    # else:
    #     env_loc = 'flow.envs.multiagent'

    # Start the environment with the gui turned on and a path for the
    # emission file
    env_params = flow_params['env']
    env_params.restart_instance = True  #False
    if args.evaluate:
        env_params.evaluate = True

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = args.horizon
        env_params.horizon = args.horizon
    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    create_time = time.time()
    if hasattr(agent, "local_evaluator") and \
            os.environ.get("TEST_FLAG") != 'True':
        env = agent.local_evaluator.env
    else:
        env = gym.make(env_name)
    create_time = time.time() - create_time
    print("Create time:", create_time)
    if multiagent:
        rets = {}
        # map the agent id to its policy
        print(config['multiagent']['policy_mapping_fn'])
        policy_map_fn = config['multiagent']['policy_mapping_fn']  #.func

        for key in config['multiagent']['policies'].keys():
            rets[key] = []
    else:
        rets = []

    if config['model']['use_lstm']:
        use_lstm = True
        if multiagent:
            state_init = {}
            # map the agent id to its policy
            policy_map_fn = config['multiagent']['policy_mapping_fn'].func
            size = config['model']['lstm_cell_size']
            for key in config['multiagent']['policies'].keys():
                state_init[key] = [
                    np.zeros(size, np.float32),
                    np.zeros(size, np.float32)
                ]
        else:
            state_init = [
                np.zeros(config['model']['lstm_cell_size'], np.float32),
                np.zeros(config['model']['lstm_cell_size'], np.float32)
            ]
    else:
        use_lstm = False

    restart_time = time.time()
    env.restart_simulation(sim_params=sim_params, render=sim_params.render)
    restart_time = time.time() - restart_time
    print("Restart Time:", restart_time)

    # Simulate and collect metrics
    final_outflows = []
    final_inflows = []
    mean_speed = []
    std_speed = []

    if PRINT_TO_SCREEN:
        pp = pprint.PrettyPrinter(indent=2)
        print("config ")
        pp.pprint(config)
        print("flow_params ")
        pp.pprint(flow_params)

    if REALTIME_PLOTS:
        # prepare plots
        # You probably won't need this if you're embedding things in a tkinter plot...
        plt.ion()
        fig = plt.figure()
        axA = fig.add_subplot(331)
        axA.set_title("Actions")
        axR = fig.add_subplot(332)
        axR.set_title("Rewards")
        axS = fig.add_subplot(333)
        axS.set_title("States")
        axS0 = fig.add_subplot(334)
        axS0.set_title("S0")
        axS1 = fig.add_subplot(335)
        axS1.set_title("S1")
        axS2 = fig.add_subplot(336)
        axS2.set_title("S2")
        axA_hist = fig.add_subplot(337)
        axA_hist.set_title("Actions")
        axR_hist = fig.add_subplot(338)
        axR_hist.set_title("Rewards")
        axS_hist = fig.add_subplot(339)
        axS_hist.set_title("States")
        axS.set_ylim((-2, 3))
        axA.set_ylim((-5, 5))
        axR.set_ylim((-1, 1))
        initialized_plot = False

    # record for visualization purposes
    actions = []
    rewards = []
    states = []
    times = []
    WARMUP = args.warmup
    run_time = time.time()
    for i in range(args.num_rollouts):
        vel = []
        time_to_exit = 0
        state = env.reset()
        if multiagent:
            ret = {key: [0] for key in rets.keys()}
        else:
            ret = 0
        for _ in range(env_params.horizon):
            time_to_exit += 1
            vehicles = env.unwrapped.k.vehicle
            if np.mean(vehicles.get_speed(vehicles.get_ids())) > 0:
                vel.append(np.mean(vehicles.get_speed(vehicles.get_ids())))
            #vel.append(np.mean(vehicles.get_speed(vehicles.get_ids())))
            if multiagent:
                action = {}
                for agent_id in state.keys():
                    if use_lstm:
                        action[agent_id], state_init[agent_id], logits = \
                            agent.compute_action(
                            state[agent_id], state=state_init[agent_id],
                            policy_id=policy_map_fn(agent_id))
                    else:
                        action[agent_id] = agent.compute_action(
                            state[agent_id], policy_id=policy_map_fn(agent_id))
            else:
                action = agent.compute_action(state)
            state, reward, done, _ = env.step(action)

            if SUMMARY_PLOTS:
                # record for visualization purposes
                actions.append(action)
                rewards.append(reward)
                states.append(state)

            if PRINT_TO_SCREEN:
                print("action")
                pp.pprint(action)
                print("reward")
                pp.pprint(reward)
                print("state")
                pp.pprint(state)
                print("after step ")

            if REALTIME_PLOTS:
                # Update plots.
                if not initialized_plot:  # initialize
                    lineA, = axA.plot(
                        [0] * len(action), 'g^'
                    )  # Returns a tuple of line objects, thus the comma
                    lineR, = axR.plot(
                        0, 'bs'
                    )  # Returns a tuple of line objects, thus the comma
                    lineS, = axS.plot(
                        [0] * len(state), 'r+'
                    )  # Returns a tuple of line objects, thus the comma
                    initialized_plot = True
                lineA.set_ydata(action)
                lineR.set_ydata(reward)
                lineS.set_ydata(state)
                fig.canvas.draw()
                fig.canvas.flush_events()

            if multiagent:
                for actor, rew in reward.items():
                    ret[policy_map_fn(actor)][0] += rew
            else:
                ret += reward

            if multiagent and done['__all__']:
                break
            if not multiagent and done:
                break
            if args.use_delay > 0:
                if vehicles.get_num_arrived() >= args.use_delay:
                    break

        if multiagent:
            for key in rets.keys():
                rets[key].append(ret[key])
        else:
            rets.append(ret)
        outflow = vehicles.get_outflow_rate(5000)
        final_outflows.append(outflow)
        inflow = vehicles.get_inflow_rate(5000)
        final_inflows.append(inflow)
        times.append(time_to_exit)
        if np.all(np.array(final_inflows) > 1e-5):
            throughput_efficiency = [
                x / y for x, y in zip(final_outflows, final_inflows)
            ]
        else:
            throughput_efficiency = [0] * len(final_inflows)
        mean_speed.append(np.mean(vel))
        std_speed.append(np.std(vel))
        if multiagent:
            for agent_id, rew in rets.items():
                print('Round {}, Return: {} for agent {}'.format(
                    i, ret, agent_id))
        else:
            print('Round {}, Return: {}'.format(i, ret))
    run_time = time.time() - run_time
    print('==== Summary of results ====')
    print("Run Time: ", run_time)
    print("Return:")
    env.close()
    return_reward = 0
    if multiagent:
        for agent_id, rew in rets.items():
            print('For agent', agent_id)
            print(rew)
            print('Average, std return: {}, {} for agent {}'.format(
                np.mean(rew), np.std(rew), agent_id))
            return_reward = np.mean(rew)
    else:
        print(rets)
        print('Average, std: {:.2f}, {:.5f}'.format(np.mean(rets),
                                                    np.std(rets)))
        return_reward = np.mean(rets)

    print("\nSpeed, mean (m/s):")
    print(mean_speed)
    print('Average, std: {:.2f}, {:.5f}'.format(np.mean(mean_speed),
                                                np.std(mean_speed)))
    print("\nSpeed, std (m/s):")
    print(std_speed)
    print('Average, std: {:.2f}, {:.5f}'.format(np.mean(std_speed),
                                                np.std(std_speed)))

    # Compute arrival rate of vehicles in the last 500 sec of the run
    print("\nOutflows (veh/hr):")
    print(final_outflows)
    print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_outflows),
                                                np.std(final_outflows)))
    # Compute departure rate of vehicles in the last 500 sec of the run
    print("Inflows (veh/hr):")
    print(final_inflows)
    print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_inflows),
                                                np.std(final_inflows)))
    # Compute throughput efficiency in the last 500 sec of the
    print("Throughput efficiency (veh/hr):")
    print(throughput_efficiency)
    print('Average, std: {:.2f}, {:.5f}'.format(np.mean(throughput_efficiency),
                                                np.std(throughput_efficiency)))
    print("Time Delay")
    print(times)
    print("Time for certain number of vehicles to exit {:.2f},{:.5f}".format(
        (np.mean(times)), np.std(times)))

    if args.output:
        np.savetxt(args.output, [
            return_reward, mean_speed, std_speed, final_inflows,
            final_outflows, times
        ])
    if SUMMARY_PLOTS:
        generateHtmlplots(actions, rewards, states)

    # terminate the environment
    env.unwrapped.terminate()
    env.terminate()
    # Deleting the env in order to remove sumo process
    del env
    del evaluation_config

    # if prompted, convert the emission file into a csv file
    if args.gen_emission:
        time.sleep(0.1)

        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(env.network.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        # convert the emission file into a csv file
        emission_to_csv(emission_path)

        # print the location of the emission csv file
        emission_path_csv = emission_path[:-4] + ".csv"
        print("\nGenerated emission file at " + emission_path_csv)

        # delete the .xml version of the emission file
        os.remove(emission_path)

    # if we wanted to save the render, here we create the movie
    if args.save_render:
        dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering')
        # Ignore hidden files
        dirs = [d for d in dirs if d[0] != '.']
        dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S"))
        recent_dir = dirs[-1]
        # create the movie
        movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir
        save_dir = os.path.expanduser('~') + '/flow_movies'
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png"
        os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4"
        os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/"
        os.system(os_cmd)
    return return_reward, mean_speed, final_inflows, final_outflows
Esempio n. 23
0
def load_agent_config(args):
    # Load configuration from checkpoint file.
    config_path = ""
    if args.checkpoint:
        config_dir = os.path.dirname(args.checkpoint)
        config_path = os.path.join(config_dir, "params.pkl")
        # Try parent directory.
        if not os.path.exists(config_path):
            config_path = os.path.join(config_dir, "../params.pkl")

    # Load the config from pickled.
    if os.path.exists(config_path):
        with open(config_path, "rb") as f:
            config = cloudpickle.load(f)
    # If no pkl file found, require command line `--config`.
    else:
        # If no config in given checkpoint -> Error.
        if args.checkpoint:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no `--config` given on command "
                "line!")

        # Use default config for given agent.
        _, config = get_trainer_class(args.run, return_config=True)

    # Make sure worker 0 has an Env.
    config["num_workers"] = 0
    config["num_envs_per_worker"] = 1
    config["create_env_on_driver"] = True

    # Merge with `evaluation_config` (first try from command line, then from
    # pkl file).
    evaluation_config = copy.deepcopy(
        args.config.get("evaluation_config",
                        config.get("evaluation_config", {})))
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings (if not already the same
    # anyways).
    config = merge_dicts(config, args.config)
    if not args.env:
        args.env = config.get("env")

    # Make sure we have evaluation workers.
    # if not config.get("evaluation_num_workers"):
    #     config["evaluation_num_workers"] = config.get("num_workers", 0)
    if not config.get("evaluation_num_episodes"):
        config["evaluation_num_episodes"] = 1
    config["render_env"] = args.render
    config["record_env"] = args.video_dir

    if config.get("env_config") is None:
        config["env_config"] = {}

    print(args.agent_speeds)
    config["env_config"]["agent_speeds"] = args.agent_speeds

    register_env(args.env, env_creator)

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)

    # Load state from checkpoint, if provided.
    if args.checkpoint:
        agent.restore(args.checkpoint)

    return agent, config
Esempio n. 24
0
def run(args, parser):
    # Load configuration from checkpoint file.
    config_dir = os.path.dirname(args.checkpoint)
    config_path = os.path.join(config_dir, "params.pkl")
    # Try parent directory.
    if not os.path.exists(config_path):
        config_path = os.path.join(config_dir, "../params.pkl")

    # If no pkl file found, require command line `--config`.
    if not os.path.exists(config_path):
        if not args.config:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no config given on command line!")
        else:
            config = args.config

    # Load the config from pickled.
    else:
        with open(config_path, "rb") as f:
            config = pickle.load(f)

    # Set num_workers to be at least 2.
    if "num_workers" in config:
        config["num_workers"] = min(2, config["num_workers"])

    # Merge with `evaluation_config` (first try from command line, then from
    # pkl file).
    evaluation_config = copy.deepcopy(
        args.config.get("evaluation_config",
                        config.get("evaluation_config", {})))
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings (if not already the same
    # anyways).
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    ray.init()

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)
    # Load state from checkpoint.
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)
    num_episodes = int(args.episodes)

    # Determine the video output directory.
    # Deprecated way: Use (--out|~/ray_results) + "/monitor" as dir.
    video_dir = None
    if args.monitor:
        video_dir = os.path.join(
            os.path.dirname(args.out or "")
            or os.path.expanduser("~/ray_results/"), "monitor")
    # New way: Allow user to specify a video output path.
    elif args.video_dir:
        video_dir = os.path.expanduser(args.video_dir)

    # Do the actual rollout.
    with RolloutSaver(args.out,
                      args.use_shelve,
                      write_update_file=args.track_progress,
                      target_steps=num_steps,
                      target_episodes=num_episodes,
                      save_info=args.save_info) as saver:
        rollout(agent, args.env, num_steps, num_episodes, saver,
                args.no_render, video_dir)
    agent.stop()
Esempio n. 25
0
def run(args, parser):
    # Load configuration from checkpoint file.
    config_path = ""
    if args.checkpoint:
        config_dir = os.path.dirname(args.checkpoint)
        config_path = os.path.join(config_dir, "params.pkl")
        # Try parent directory.
        if not os.path.exists(config_path):
            config_path = os.path.join(config_dir, "../params.pkl")

    # Load the config from pickled.
    if os.path.exists(config_path):
        with open(config_path, "rb") as f:
            config = cloudpickle.load(f)
    # If no pkl file found, require command line `--config`.
    else:
        # If no config in given checkpoint -> Error.
        if args.checkpoint:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no `--config` given on command "
                "line!")

        # Use default config for given agent.
        _, config = get_algorithm_class(args.run, return_config=True)

    # Make sure worker 0 has an Env.
    config["create_env_on_driver"] = True

    # Merge with `evaluation_config` (first try from command line, then from
    # pkl file).
    evaluation_config = copy.deepcopy(
        args.config.get("evaluation_config",
                        config.get("evaluation_config", {})))
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings (if not already the same
    # anyways).
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    # Make sure we have evaluation workers.
    if not config.get("evaluation_num_workers"):
        config["evaluation_num_workers"] = config.get("num_workers", 0)
    if not config.get("evaluation_duration"):
        config["evaluation_duration"] = 1
    # Hard-override this as it raises a warning by Trainer otherwise.
    # Makes no sense anyways, to have it set to None as we don't call
    # `Trainer.train()` here.
    config["evaluation_interval"] = 1

    # Rendering and video recording settings.
    if args.no_render:
        deprecation_warning(old="--no-render", new="--render", error=False)
        args.render = False
    config["render_env"] = args.render

    ray.init(local_mode=args.local_mode)

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)

    # Load state from checkpoint, if provided.
    if args.checkpoint:
        agent.restore(args.checkpoint)

    num_steps = int(args.steps)
    num_episodes = int(args.episodes)

    # Do the actual rollout.
    with RolloutSaver(
            args.out,
            args.use_shelve,
            write_update_file=args.track_progress,
            target_steps=num_steps,
            target_episodes=num_episodes,
            save_info=args.save_info,
    ) as saver:
        rollout(agent, args.env, num_steps, num_episodes, saver,
                not args.render)
    agent.stop()