def test_string_game_gpu(self):
     with open(MDNRNN_STRING_GAME_JSON, "r") as f:
         mdnrnn_params = json_to_object(f.read(), OpenAiGymParameters)
         mdnrnn_params = mdnrnn_params._replace(use_gpu=True)
     with open(DQN_STRING_GAME_JSON, "r") as f:
         rl_params = json_to_object(f.read(), OpenAiGymParameters)
         rl_params = rl_params._replace(use_gpu=True)
     avg_reward_history = self._test_state_embed(mdnrnn_params, rl_params)
     self.verify_result(avg_reward_history, 10)
Beispiel #2
0
def main(args):
    parser = argparse.ArgumentParser(
        description="Train a RL net to play in an OpenAI Gym environment. "
        "States are embedded by a mdn-rnn model."
    )
    parser.add_argument(
        "-p",
        "--mdnrnn_parameters",
        help="Path to JSON parameters file for MDN-RNN training.",
    )
    parser.add_argument(
        "-q", "--rl_parameters", help="Path to JSON parameters file for RL training."
    )
    parser.add_argument(
        "-s",
        "--score-bar",
        help="Bar for averaged tests scores.",
        type=float,
        default=None,
    )
    parser.add_argument(
        "-g",
        "--gpu_id",
        help="If set, will use GPU with specified ID. Otherwise will use CPU.",
        default=-1,
    )
    parser.add_argument(
        "-l",
        "--log_level",
        help="If set, use logging level specified (debug, info, warning, error, "
        "critical). Else defaults to info.",
        default="info",
    )
    args = parser.parse_args(args)
    if args.log_level not in ("debug", "info", "warning", "error", "critical"):
        raise Exception("Logging level {} not valid level.".format(args.log_level))
    else:
        logging.getLogger().setLevel(getattr(logging, args.log_level.upper()))

    with open(args.mdnrnn_parameters, "r") as f:
        mdnrnn_params = json_to_object(f.read(), OpenAiGymParameters)
    with open(args.rl_parameters, "r") as f:
        rl_params = json_to_object(f.read(), OpenAiGymParameters)

    env, mdnrnn_trainer, embed_rl_dataset = create_mdnrnn_trainer_and_embed_dataset(
        mdnrnn_params, rl_params.use_gpu
    )

    max_embed_seq_len = mdnrnn_params["run_details"]["seq_len"]
    _, _, rl_trainer, rl_predictor, state_embed_env = run_gym(
        rl_params,
        args.score_bar,
        embed_rl_dataset,
        env.env,
        mdnrnn_trainer.mdnrnn,
        max_embed_seq_len,
    )
Beispiel #3
0
 def test_mdnrnn_cartpole(self):
     with open(MDNRNN_CARTPOLE_JSON, "r") as f:
         params = json_to_object(f.read(), OpenAiGymParameters)
     _, _, feature_importance_map, feature_sensitivity_map, _ = self._test_mdnrnn(
         params, feature_importance=True, feature_sensitivity=True)
     self.verify_result(feature_importance_map,
                        ["state1", "state3", "action1"])
     self.verify_result(feature_sensitivity_map, ["state1", "state3"])
Beispiel #4
0
 def test_dqn_cartpole_online(self):
     """ Test if the json config works for online DQN in Cartpole """
     with open(DQN_CARTPOLE_JSON, "r") as f:
         params = json_to_object(f.read(), OpenAiGymParameters)
     reward_history, _, _, _, _ = run_gym(
         params, offline_train=False, score_bar=CARTPOLE_SCORE_BAR, seed=SEED
     )
     assert reward_history[-1] > CARTPOLE_SCORE_BAR
Beispiel #5
0
def main(args):
    parser = argparse.ArgumentParser(
        description="Train a Mixture-Density-Network RNN net to learn an OpenAI"
        " Gym environment, i.e., predict next state, reward, and"
        " terminal signal using current state and action")
    parser.add_argument("-p",
                        "--parameters",
                        help="Path to JSON parameters file.")
    parser.add_argument(
        "-g",
        "--gpu_id",
        help="If set, will use GPU with specified ID. Otherwise will use CPU.",
        default=-1,
    )
    parser.add_argument(
        "-l",
        "--log_level",
        choices=["debug", "info", "warning", "error", "critical"],
        help="If set, use logging level specified (debug, info, warning, error, "
        "critical). Else defaults to info.",
        default="info",
    )
    parser.add_argument(
        "-f",
        "--feature_importance",
        action="store_true",
        help="If set, feature importance will be calculated after the training",
    )
    parser.add_argument(
        "-s",
        "--feature_sensitivity",
        action="store_true",
        help="If set, state feature sensitivity by varying actions will be"
        " calculated after the training",
    )
    parser.add_argument(
        "-e",
        "--save_embedding_to_path",
        help="If a file path is provided, save a RLDataset with states embedded"
        " by the trained world model",
    )
    args = parser.parse_args(args)

    logger.setLevel(getattr(logging, args.log_level.upper()))

    with open(args.parameters, "r") as f:
        params = json_to_object(f.read(), OpenAiGymParameters)
    if args.gpu_id != -1:
        params = params._replace(use_gpu=True)

    mdnrnn_gym(
        params,
        args.feature_importance,
        args.feature_sensitivity,
        args.save_embedding_to_path,
    )
Beispiel #6
0
    def test_json_serialize_nested(self):
        @dataclasses.dataclass
        class Test1:
            x: int

        @dataclasses.dataclass
        class Test2:
            x: typing.List[Test1]
            y: typing.Dict[str, Test1]

        t = Test2(x=[Test1(x=3), Test1(x=4)],
                  y={
                      "1": Test1(x=5),
                      "2": Test1(x=6)
                  })
        self.assertEqual(t, json_to_object(object_to_json(t), Test2))
Beispiel #7
0
 def test_json_serialize_basic(self):
     damp = rlp.DiscreteActionModelParameters(
         actions=["foo", "bar"],
         rl=rlp.RLParameters(),
         training=None,
         rainbow=rlp.RainbowDQNParameters(double_q_learning=False,
                                          categorical=True),
         state_feature_params=None,
         target_action_distribution=[1.0, 2.0],
         evaluation=rlp.EvaluationParameters(),
     )
     self.assertEqual(
         damp,
         json_to_object(object_to_json(damp),
                        rlp.DiscreteActionModelParameters),
     )
Beispiel #8
0
def main(args):
    parser = argparse.ArgumentParser(
        description="Train a RL net to play in an OpenAI Gym environment."
    )
    parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.")
    parser.add_argument(
        "-s",
        "--score-bar",
        help="Bar for averaged tests scores.",
        type=float,
        default=None,
    )
    parser.add_argument(
        "-l",
        "--log_level",
        help="If set, use logging level specified (debug, info, warning, error, "
        "critical). Else defaults to info.",
        default="info",
    )
    parser.add_argument(
        "-f",
        "--file_path",
        help="If set, save all collected samples as an RLDataset to this file.",
        default=None,
    )
    parser.add_argument(
        "-e",
        "--start_saving_from_score",
        type=int,
        help="If file_path is set, start saving episodes after this score is hit.",
        default=None,
    )
    parser.add_argument(
        "-r",
        "--results_file_path",
        help="If set, save evaluation results to file.",
        type=str,
        default=None,
    )
    parser.add_argument(
        "--offline_train",
        action="store_true",
        help="If set, collect data using a random policy then train RL offline.",
    )
    parser.add_argument(
        "--path_to_pickled_transitions",
        help="Path to saved transitions to load into replay buffer.",
        type=str,
        default=None,
    )
    parser.add_argument(
        "--seed",
        help="Seed for the test (numpy, torch, and gym).",
        type=int,
        default=None,
    )
    parser.add_argument(
        "--use_gpu",
        help="Use GPU, if available; set the device with CUDA_VISIBLE_DEVICES",
        action="store_true",
    )

    args = parser.parse_args(args)

    if args.log_level not in ("debug", "info", "warning", "error", "critical"):
        raise Exception("Logging level {} not valid level.".format(args.log_level))
    else:
        logging.getLogger().setLevel(getattr(logging, args.log_level.upper()))

    if args.seed is not None:
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        random.seed(args.seed)

    assert (
        not args.path_to_pickled_transitions or args.offline_train
    ), "path_to_pickled_transitions is provided so you must run offline training"

    with open(args.parameters, "r") as f:
        params = json_to_object(f.read(), OpenAiGymParameters)

    if args.use_gpu:
        assert torch.cuda.is_available(), "CUDA requested but not available"
        params = params._replace(use_gpu=True)

    dataset = RLDataset(args.file_path) if args.file_path else None

    reward_history, iteration_history, trainer, predictor, env = run_gym(
        params,
        args.offline_train,
        args.score_bar,
        args.seed,
        dataset,
        args.start_saving_from_score,
        args.path_to_pickled_transitions,
    )

    if dataset:
        dataset.save()
        logger.info("Saving dataset to {}".format(args.file_path))
        final_score_exploit, _ = env.run_ep_n_times(
            params.run_details.avg_over_num_episodes, predictor, test=True
        )
        final_score_explore, _ = env.run_ep_n_times(
            params.run_details.avg_over_num_episodes, predictor, test=False
        )
        logger.info(
            "Final policy scores {} with epsilon={} and {} with epsilon=0 over {} eps.".format(
                final_score_explore,
                env.epsilon,
                final_score_exploit,
                params.run_details.avg_over_num_episodes,
            )
        )

    if args.results_file_path:
        write_lists_to_csv(args.results_file_path, reward_history, iteration_history)
    return reward_history