def test_string_game_gpu(self): with open(MDNRNN_STRING_GAME_JSON, "r") as f: mdnrnn_params = json_to_object(f.read(), OpenAiGymParameters) mdnrnn_params = mdnrnn_params._replace(use_gpu=True) with open(DQN_STRING_GAME_JSON, "r") as f: rl_params = json_to_object(f.read(), OpenAiGymParameters) rl_params = rl_params._replace(use_gpu=True) avg_reward_history = self._test_state_embed(mdnrnn_params, rl_params) self.verify_result(avg_reward_history, 10)
def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment. " "States are embedded by a mdn-rnn model." ) parser.add_argument( "-p", "--mdnrnn_parameters", help="Path to JSON parameters file for MDN-RNN training.", ) parser.add_argument( "-q", "--rl_parameters", help="Path to JSON parameters file for RL training." ) parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-g", "--gpu_id", help="If set, will use GPU with specified ID. Otherwise will use CPU.", default=-1, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format(args.log_level)) else: logging.getLogger().setLevel(getattr(logging, args.log_level.upper())) with open(args.mdnrnn_parameters, "r") as f: mdnrnn_params = json_to_object(f.read(), OpenAiGymParameters) with open(args.rl_parameters, "r") as f: rl_params = json_to_object(f.read(), OpenAiGymParameters) env, mdnrnn_trainer, embed_rl_dataset = create_mdnrnn_trainer_and_embed_dataset( mdnrnn_params, rl_params.use_gpu ) max_embed_seq_len = mdnrnn_params["run_details"]["seq_len"] _, _, rl_trainer, rl_predictor, state_embed_env = run_gym( rl_params, args.score_bar, embed_rl_dataset, env.env, mdnrnn_trainer.mdnrnn, max_embed_seq_len, )
def test_mdnrnn_cartpole(self): with open(MDNRNN_CARTPOLE_JSON, "r") as f: params = json_to_object(f.read(), OpenAiGymParameters) _, _, feature_importance_map, feature_sensitivity_map, _ = self._test_mdnrnn( params, feature_importance=True, feature_sensitivity=True) self.verify_result(feature_importance_map, ["state1", "state3", "action1"]) self.verify_result(feature_sensitivity_map, ["state1", "state3"])
def test_dqn_cartpole_online(self): """ Test if the json config works for online DQN in Cartpole """ with open(DQN_CARTPOLE_JSON, "r") as f: params = json_to_object(f.read(), OpenAiGymParameters) reward_history, _, _, _, _ = run_gym( params, offline_train=False, score_bar=CARTPOLE_SCORE_BAR, seed=SEED ) assert reward_history[-1] > CARTPOLE_SCORE_BAR
def main(args): parser = argparse.ArgumentParser( description="Train a Mixture-Density-Network RNN net to learn an OpenAI" " Gym environment, i.e., predict next state, reward, and" " terminal signal using current state and action") parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-g", "--gpu_id", help="If set, will use GPU with specified ID. Otherwise will use CPU.", default=-1, ) parser.add_argument( "-l", "--log_level", choices=["debug", "info", "warning", "error", "critical"], help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--feature_importance", action="store_true", help="If set, feature importance will be calculated after the training", ) parser.add_argument( "-s", "--feature_sensitivity", action="store_true", help="If set, state feature sensitivity by varying actions will be" " calculated after the training", ) parser.add_argument( "-e", "--save_embedding_to_path", help="If a file path is provided, save a RLDataset with states embedded" " by the trained world model", ) args = parser.parse_args(args) logger.setLevel(getattr(logging, args.log_level.upper())) with open(args.parameters, "r") as f: params = json_to_object(f.read(), OpenAiGymParameters) if args.gpu_id != -1: params = params._replace(use_gpu=True) mdnrnn_gym( params, args.feature_importance, args.feature_sensitivity, args.save_embedding_to_path, )
def test_json_serialize_nested(self): @dataclasses.dataclass class Test1: x: int @dataclasses.dataclass class Test2: x: typing.List[Test1] y: typing.Dict[str, Test1] t = Test2(x=[Test1(x=3), Test1(x=4)], y={ "1": Test1(x=5), "2": Test1(x=6) }) self.assertEqual(t, json_to_object(object_to_json(t), Test2))
def test_json_serialize_basic(self): damp = rlp.DiscreteActionModelParameters( actions=["foo", "bar"], rl=rlp.RLParameters(), training=None, rainbow=rlp.RainbowDQNParameters(double_q_learning=False, categorical=True), state_feature_params=None, target_action_distribution=[1.0, 2.0], evaluation=rlp.EvaluationParameters(), ) self.assertEqual( damp, json_to_object(object_to_json(damp), rlp.DiscreteActionModelParameters), )
def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment." ) parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--file_path", help="If set, save all collected samples as an RLDataset to this file.", default=None, ) parser.add_argument( "-e", "--start_saving_from_score", type=int, help="If file_path is set, start saving episodes after this score is hit.", default=None, ) parser.add_argument( "-r", "--results_file_path", help="If set, save evaluation results to file.", type=str, default=None, ) parser.add_argument( "--offline_train", action="store_true", help="If set, collect data using a random policy then train RL offline.", ) parser.add_argument( "--path_to_pickled_transitions", help="Path to saved transitions to load into replay buffer.", type=str, default=None, ) parser.add_argument( "--seed", help="Seed for the test (numpy, torch, and gym).", type=int, default=None, ) parser.add_argument( "--use_gpu", help="Use GPU, if available; set the device with CUDA_VISIBLE_DEVICES", action="store_true", ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format(args.log_level)) else: logging.getLogger().setLevel(getattr(logging, args.log_level.upper())) if args.seed is not None: np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) assert ( not args.path_to_pickled_transitions or args.offline_train ), "path_to_pickled_transitions is provided so you must run offline training" with open(args.parameters, "r") as f: params = json_to_object(f.read(), OpenAiGymParameters) if args.use_gpu: assert torch.cuda.is_available(), "CUDA requested but not available" params = params._replace(use_gpu=True) dataset = RLDataset(args.file_path) if args.file_path else None reward_history, iteration_history, trainer, predictor, env = run_gym( params, args.offline_train, args.score_bar, args.seed, dataset, args.start_saving_from_score, args.path_to_pickled_transitions, ) if dataset: dataset.save() logger.info("Saving dataset to {}".format(args.file_path)) final_score_exploit, _ = env.run_ep_n_times( params.run_details.avg_over_num_episodes, predictor, test=True ) final_score_explore, _ = env.run_ep_n_times( params.run_details.avg_over_num_episodes, predictor, test=False ) logger.info( "Final policy scores {} with epsilon={} and {} with epsilon=0 over {} eps.".format( final_score_explore, env.epsilon, final_score_exploit, params.run_details.avg_over_num_episodes, ) ) if args.results_file_path: write_lists_to_csv(args.results_file_path, reward_history, iteration_history) return reward_history