Example #1
0
    def _validate_json_result(self, config):
        # Check result logs
        results = []
        result_file = os.path.join(self.test_dir, EXPR_RESULT_FILE)
        with open(result_file, "rt") as fp:
            for row in fp.readlines():
                results.append(json.loads(row))

        self.assertEqual(len(results), 3)
        self.assertSequenceEqual(
            [int(row["episode_reward_mean"]) for row in results], [4, 5, 6])

        # Check json saved config file
        config_file = os.path.join(self.test_dir, EXPR_PARAM_FILE)
        with open(config_file, "rt") as fp:
            loaded_config = json.load(fp)

        self.assertEqual(loaded_config, config)

        # Check pickled config file
        config_file = os.path.join(self.test_dir, EXPR_PARAM_PICKLE_FILE)
        with open(config_file, "rb") as fp:
            loaded_config = cloudpickle.load(fp)

        self.assertEqual(loaded_config, config)
Example #2
0
 def _populate_exception(trial: Trial) -> Optional[Union[TuneError, RayTaskError]]:
     if trial.pickled_error_file and os.path.exists(trial.pickled_error_file):
         with open(trial.pickled_error_file, "rb") as f:
             e = cloudpickle.load(f)
             return e
     elif trial.error_file and os.path.exists(trial.error_file):
         with open(trial.error_file, "r") as f:
             return TuneError(f.read())
     return None
Example #3
0
def _get_rllib_config(path):
    """Return the data from the specified rllib configuration file."""
    jsonfile = path / 'params.json'  # params.json is the config file
    jsondata = json.loads(open(jsonfile).read())

    pklfile = path / 'params.pkl'  # params.pkl is the config file
    with open(pklfile, 'rb') as file:
        pkldata = cloudpickle.load(file)

    return jsondata, pkldata
Example #4
0
def get_rllib_pkl(path):
    """Return the data from the specified rllib configuration file."""
    config_path = os.path.join(path, "params.pkl")
    if not os.path.exists(config_path):
        config_path = os.path.join(path, "../params.pkl")
    if not os.path.exists(config_path):
        raise ValueError(
            "Could not find params.pkl in either the checkpoint dir or "
            "its parent directory.")
    with open(config_path, 'rb') as f:
        config = cloudpickle.load(f)
    return config
Example #5
0
def get_rllib_pkl(path):
    """Return the data from the specified rllib configuration file."""
    pklfile = path + '/params.pkl'  # params.json is the config file
    with open(pklfile, 'rb') as file:
        pkldata = cloudpickle.load(file)
    return pkldata
Example #6
0
def load_agent_config(args):
    # Load configuration from checkpoint file.
    config_path = ""
    if args.checkpoint:
        config_dir = os.path.dirname(args.checkpoint)
        config_path = os.path.join(config_dir, "params.pkl")
        # Try parent directory.
        if not os.path.exists(config_path):
            config_path = os.path.join(config_dir, "../params.pkl")

    # Load the config from pickled.
    if os.path.exists(config_path):
        with open(config_path, "rb") as f:
            config = cloudpickle.load(f)
    # If no pkl file found, require command line `--config`.
    else:
        # If no config in given checkpoint -> Error.
        if args.checkpoint:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory AND no `--config` given on command "
                "line!")

        # Use default config for given agent.
        _, config = get_trainer_class(args.run, return_config=True)

    # Make sure worker 0 has an Env.
    config["num_workers"] = 0
    config["num_envs_per_worker"] = 1
    config["create_env_on_driver"] = True

    # Merge with `evaluation_config` (first try from command line, then from
    # pkl file).
    evaluation_config = copy.deepcopy(
        args.config.get("evaluation_config",
                        config.get("evaluation_config", {})))
    config = merge_dicts(config, evaluation_config)
    # Merge with command line `--config` settings (if not already the same
    # anyways).
    config = merge_dicts(config, args.config)
    if not args.env:
        args.env = config.get("env")

    # Make sure we have evaluation workers.
    # if not config.get("evaluation_num_workers"):
    #     config["evaluation_num_workers"] = config.get("num_workers", 0)
    if not config.get("evaluation_num_episodes"):
        config["evaluation_num_episodes"] = 1
    config["render_env"] = args.render
    config["record_env"] = args.video_dir

    if config.get("env_config") is None:
        config["env_config"] = {}

    print(args.agent_speeds)
    config["env_config"]["agent_speeds"] = args.agent_speeds

    register_env(args.env, env_creator)

    # Create the Trainer from config.
    cls = get_trainable_cls(args.run)
    agent = cls(env=args.env, config=config)

    # Load state from checkpoint, if provided.
    if args.checkpoint:
        agent.restore(args.checkpoint)

    return agent, config
 params_path = os.path.join(os.path.dirname(os.path.dirname(path)), 'params.pkl')
 a = np.vectorize(lambda x, e: x[e])
 def process_info(infos):
     return np.array([[info[key] for key in info_keys] for info in infos])
 register_env('warehouse_env', warehouse_env_creator)
 # env_config = {'map': MAP}
 # env = VectorEnv.wrap(existing_envs=[warehouse_env_creator(env_config) for _ in range(NUM_ENVS)],
 #                      num_envs=NUM_ENVS)
 # config = {"env": "warehouse_env",
 #           "framework": "torch",
 #           "num_gpus": 0.1,
 #           "num_gpus_per_worker": 0.1,
 #           'num_envs_per_worker': 6,
 #           "evaluation_interval": 5, }
 with open(params_path, "rb") as f:
     config = cloudpickle.load(f)
 config["explore"] = False
 config['num_envs_per_worker'] = 1
 print("Trained on map: \n", config["env_config"]["maps"])
 config["env_config"]["maps"] = MAP_WITH_EXCEPTION
 trainer = DQNTrainer(config=config)
 trainer.restore(path.format(checkpoint, checkpoint))
 policy = trainer.get_policy()
 trainer._evaluate()
 samples = (trainer.evaluation_workers.local_worker().sample()
            for _ in range(NUM_EPISODES))
 rows = map(lambda x: np.concatenate([
     x["unroll_id"][:, None],
     np.arange(0, x.count)[:,None],
     x["obs"],
     x["actions"][:, None],