def main(args): parser = create_parser() args = parser.parse_args(args) # load experiment configuration with open(args.parameters.strip(), "r") as f: params = json.load(f) checkpoint_freq = params["run_details"]["checkpoint_after_ts"] # train agent dataset = RLDataset( args.file_path) if checkpoint_freq != 0 and args.file_path else None # log experiment info to Tensorboard evaluation_file = args.evaluation_file_path config_file = args.parameters.strip() experiment_name = config_file[config_file.rfind('/') + 1:config_file.rfind('.json')] os.environ["TENSORBOARD_DIR"] = os.path.join(evaluation_file, experiment_name) start_time = time.time() average_reward_train, num_episodes_train, average_reward_eval, num_episodes_eval, timesteps_history, trainer, predictor, env = horizon_runner.run_gym( params, args.offline_train, args.score_bar, args.gpu_id, dataset, args.start_saving_from_score, args.path_to_pickled_transitions, ) if dataset: dataset.save() end_time = time.time() # save runtime runtime_file = os.path.join(evaluation_file, 'runtime', 'runtime.csv') f = open(runtime_file, 'a+') f.write(experiment_name + ', ' + str(end_time - start_time) + '\n') f.close() # inference testing try: num_inference_steps = params["run_details"]["num_inference_steps"] if num_inference_steps: print("--- STARTING HORIZON CARTPOLE INFERENCE EXPERIMENT ---") start_time = time.time() _ = env.run_n_steps(num_inference_steps, predictor, test=True) end_time = time.time() print("--- HORIZON CARTPOLE INFERENCE EXPERIMENT COMPLETED ---") inference_file = os.path.join(evaluation_file, 'runtime', 'inference.csv') f = open(inference_file, 'a+') f.write(experiment_name + ', ' + str(end_time - start_time) + '\n') f.close() except KeyError: pass return average_reward_eval
def test_complete_experiment(env_name, config): """ Smoke test that runs a small Park QOpt experiment and fails if any exception during its execution was raised. """ try: SummaryWriterContext._reset_globals() with open(config) as f: params = json.load(f) checkpoint_freq = params["run_details"]["checkpoint_after_ts"] # train agent dataset = RLDataset(FILE_PATH) # log experiment info to Tensorboard evaluation_file = EVALUATION_PATH config_file = config experiment_name = config_file[config_file.rfind('/') + 1:config_file.rfind('.json')] os.environ["TENSORBOARD_DIR"] = os.path.join(evaluation_file, experiment_name) average_reward_train, num_episodes_train, average_reward_eval, num_episodes_eval, timesteps_history, trainer, predictor, env = horizon_runner.run_gym( params, False, None, -1, dataset) if dataset: dataset.save() SummaryWriterContext._reset_globals() except Exception: pytest.fail('Running a small ' + str(env_name) + ' experiment in Horizon failed!')
def mdnrnn_gym( params: OpenAiGymParameters, feature_importance: bool = False, feature_sensitivity: bool = False, save_embedding_to_path: Optional[str] = None, seed: Optional[int] = None, ): assert params.mdnrnn is not None use_gpu = params.use_gpu logger.info("Running gym with params") logger.info(params) env_type = params.env env = OpenAIGymEnvironment( env_type, epsilon=1.0, softmax_policy=False, gamma=0.99, random_seed=seed ) # create test data once assert params.run_details.max_steps is not None test_replay_buffer = get_replay_buffer( params.run_details.num_test_episodes, params.run_details.seq_len, params.run_details.max_steps, env, ) test_batch = test_replay_buffer.sample_memories( test_replay_buffer.memory_size, use_gpu=use_gpu, batch_first=True ) trainer = create_trainer(params, env, use_gpu) _, _, trainer = train_sgd( env, trainer, use_gpu, "{} test run".format(env_type), params.mdnrnn.minibatch_size, params.run_details, test_batch=test_batch, ) feature_importance_map, feature_sensitivity_map, dataset = None, None, None if feature_importance: feature_importance_map = calculate_feature_importance( env, trainer, use_gpu, params.run_details, test_batch=test_batch ) if feature_sensitivity: feature_sensitivity_map = calculate_feature_sensitivity_by_actions( env, trainer, use_gpu, params.run_details, test_batch=test_batch ) if save_embedding_to_path: dataset = RLDataset(save_embedding_to_path) create_embed_rl_dataset(env, trainer, dataset, use_gpu, params.run_details) dataset.save() return env, trainer, feature_importance_map, feature_sensitivity_map, dataset
def mdnrnn_gym( params: OpenAiGymParameters, feature_importance: bool = False, feature_sensitivity: bool = False, save_embedding_to_path: Optional[str] = None, ): assert params.mdnrnn is not None use_gpu = params.use_gpu logger.info("Running gym with params") logger.info(params) env_type = params.env env = OpenAIGymEnvironment(env_type, epsilon=1.0, softmax_policy=True, gamma=0.99) trainer = create_trainer(params, env, use_gpu) _, _, trainer = train_sgd( env, trainer, use_gpu, "{} test run".format(env_type), params.mdnrnn.minibatch_size, params.run_details, ) feature_importance_map, feature_sensitivity_map, dataset = None, None, None if feature_importance: feature_importance_map = calculate_feature_importance( env, trainer, use_gpu, params.run_details) if feature_sensitivity: feature_sensitivity_map = calculate_feature_sensitivity_by_actions( env, trainer, use_gpu, params.run_details) if save_embedding_to_path: dataset = RLDataset(save_embedding_to_path) create_embed_rl_dataset(env, trainer, dataset, use_gpu, params.run_details) dataset.save() return env, trainer, feature_importance_map, feature_sensitivity_map, dataset
def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment." ) parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--file_path", help="If set, save all collected samples as an RLDataset to this file.", default=None, ) parser.add_argument( "-e", "--start_saving_from_score", type=int, help="If file_path is set, start saving episodes after this score is hit.", default=None, ) parser.add_argument( "-r", "--results_file_path", help="If set, save evaluation results to file.", type=str, default=None, ) parser.add_argument( "--offline_train", action="store_true", help="If set, collect data using a random policy then train RL offline.", ) parser.add_argument( "--path_to_pickled_transitions", help="Path to saved transitions to load into replay buffer.", type=str, default=None, ) parser.add_argument( "--seed", help="Seed for the test (numpy, torch, and gym).", type=int, default=None, ) parser.add_argument( "--use_gpu", help="Use GPU, if available; set the device with CUDA_VISIBLE_DEVICES", action="store_true", ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format(args.log_level)) else: logging.getLogger().setLevel(getattr(logging, args.log_level.upper())) if args.seed is not None: np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) assert ( not args.path_to_pickled_transitions or args.offline_train ), "path_to_pickled_transitions is provided so you must run offline training" with open(args.parameters, "r") as f: params = json_to_object(f.read(), OpenAiGymParameters) if args.use_gpu: assert torch.cuda.is_available(), "CUDA requested but not available" params = params._replace(use_gpu=True) dataset = RLDataset(args.file_path) if args.file_path else None reward_history, iteration_history, trainer, predictor, env = run_gym( params, args.offline_train, args.score_bar, args.seed, dataset, args.start_saving_from_score, args.path_to_pickled_transitions, ) if dataset: dataset.save() logger.info("Saving dataset to {}".format(args.file_path)) final_score_exploit, _ = env.run_ep_n_times( params.run_details.avg_over_num_episodes, predictor, test=True ) final_score_explore, _ = env.run_ep_n_times( params.run_details.avg_over_num_episodes, predictor, test=False ) logger.info( "Final policy scores {} with epsilon={} and {} with epsilon=0 over {} eps.".format( final_score_explore, env.epsilon, final_score_exploit, params.run_details.avg_over_num_episodes, ) ) if args.results_file_path: write_lists_to_csv(args.results_file_path, reward_history, iteration_history) return reward_history
def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment.") parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-g", "--gpu_id", help="If set, will use GPU with specified ID. Otherwise will use CPU.", default=USE_CPU, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--file_path", help="If set, save all collected samples as an RLDataset to this file.", default=None, ) parser.add_argument( "-e", "--start_saving_from_episode", type=int, help= "If file_path is set, start saving episodes from this episode num.", default=0, ) parser.add_argument( "-r", "--results_file_path", help="If set, save evaluation results to file.", type=str, default=None, ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format( args.log_level)) else: logger.setLevel(getattr(logging, args.log_level.upper())) with open(args.parameters, "r") as f: params = json.load(f) dataset = RLDataset(args.file_path) if args.file_path else None reward_history, timestep_history, trainer, predictor = run_gym( params, args.score_bar, args.gpu_id, dataset, args.start_saving_from_episode) if dataset: dataset.save() if args.results_file_path: write_lists_to_csv(args.results_file_path, reward_history, timestep_history) return reward_history
def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment." ) parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-g", "--gpu_id", help="If set, will use GPU with specified ID. Otherwise will use CPU.", default=USE_CPU, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--file_path", help="If set, save all collected samples as an RLDataset to this file.", default=None, ) parser.add_argument( "-e", "--start_saving_from_score", type=int, help="If file_path is set, start saving episodes after this score is hit.", default=None, ) parser.add_argument( "-r", "--results_file_path", help="If set, save evaluation results to file.", type=str, default=None, ) parser.add_argument( "--offline_train", action="store_true", help="If set, collect data using a random policy then train RL offline.", ) parser.add_argument( "--path_to_pickled_transitions", help="Path to saved transitions to load into replay buffer.", type=str, default=None, ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format(args.log_level)) else: logger.setLevel(getattr(logging, args.log_level.upper())) assert ( not args.path_to_pickled_transitions or args.offline_train ), "path_to_pickled_transitions is provided so you must run offline training" with open(args.parameters, "r") as f: params = json.load(f) dataset = RLDataset(args.file_path) if args.file_path else None reward_history, iteration_history, trainer, predictor, env = run_gym( params, args.offline_train, args.score_bar, args.gpu_id, dataset, args.start_saving_from_score, args.path_to_pickled_transitions, ) if dataset: dataset.save() logger.info("Saving dataset to {}".format(args.file_path)) final_score_exploit, _ = env.run_ep_n_times( params["run_details"]["avg_over_num_episodes"], predictor, test=True ) final_score_explore, _ = env.run_ep_n_times( params["run_details"]["avg_over_num_episodes"], predictor, test=False ) logger.info( "Final policy scores {} with epsilon={} and {} with epsilon=0 over {} eps.".format( final_score_explore, env.epsilon, final_score_exploit, params["run_details"]["avg_over_num_episodes"], ) ) if args.results_file_path: write_lists_to_csv(args.results_file_path, reward_history, iteration_history) return reward_history
def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment." ) parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-g", "--gpu_id", help="If set, will use GPU with specified ID. Otherwise will use CPU.", default=USE_CPU, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--file_path", help="If set, save all collected samples as an RLDataset to this file.", default=None, ) parser.add_argument( "-e", "--start_saving_from_episode", type=int, help="If file_path is set, start saving episodes from this episode num.", default=0, ) parser.add_argument( "-r", "--results_file_path", help="If set, save evaluation results to file.", type=str, default=None, ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format(args.log_level)) else: logger.setLevel(getattr(logging, args.log_level.upper())) with open(args.parameters, "r") as f: params = json.load(f) dataset = RLDataset(args.file_path) if args.file_path else None reward_history, timestep_history, trainer, predictor = run_gym( params, args.score_bar, args.gpu_id, dataset, args.start_saving_from_episode ) if dataset: dataset.save() if args.results_file_path: write_lists_to_csv(args.results_file_path, reward_history, timestep_history) return reward_history