def test_complete_experiment(env_name, config): """ Smoke test that runs a small Park QOpt experiment and fails if any exception during its execution was raised. """ try: SummaryWriterContext._reset_globals() with open(config) as f: params = json.load(f) checkpoint_freq = params["run_details"]["checkpoint_after_ts"] # train agent dataset = RLDataset(FILE_PATH) # log experiment info to Tensorboard evaluation_file = EVALUATION_PATH config_file = config experiment_name = config_file[config_file.rfind('/') + 1:config_file.rfind('.json')] os.environ["TENSORBOARD_DIR"] = os.path.join(evaluation_file, experiment_name) average_reward_train, num_episodes_train, average_reward_eval, num_episodes_eval, timesteps_history, trainer, predictor, env = horizon_runner.run_gym( params, False, None, -1, dataset) if dataset: dataset.save() SummaryWriterContext._reset_globals() except Exception: pytest.fail('Running a small ' + str(env_name) + ' experiment in Horizon failed!')
def main(args): parser = create_parser() args = parser.parse_args(args) # load experiment configuration with open(args.parameters.strip(), "r") as f: params = json.load(f) checkpoint_freq = params["run_details"]["checkpoint_after_ts"] # train agent dataset = RLDataset( args.file_path) if checkpoint_freq != 0 and args.file_path else None # log experiment info to Tensorboard evaluation_file = args.evaluation_file_path config_file = args.parameters.strip() experiment_name = config_file[config_file.rfind('/') + 1:config_file.rfind('.json')] os.environ["TENSORBOARD_DIR"] = os.path.join(evaluation_file, experiment_name) start_time = time.time() average_reward_train, num_episodes_train, average_reward_eval, num_episodes_eval, timesteps_history, trainer, predictor, env = horizon_runner.run_gym( params, args.offline_train, args.score_bar, args.gpu_id, dataset, args.start_saving_from_score, args.path_to_pickled_transitions, ) if dataset: dataset.save() end_time = time.time() # save runtime runtime_file = os.path.join(evaluation_file, 'runtime', 'runtime.csv') f = open(runtime_file, 'a+') f.write(experiment_name + ', ' + str(end_time - start_time) + '\n') f.close() # inference testing try: num_inference_steps = params["run_details"]["num_inference_steps"] if num_inference_steps: print("--- STARTING HORIZON CARTPOLE INFERENCE EXPERIMENT ---") start_time = time.time() _ = env.run_n_steps(num_inference_steps, predictor, test=True) end_time = time.time() print("--- HORIZON CARTPOLE INFERENCE EXPERIMENT COMPLETED ---") inference_file = os.path.join(evaluation_file, 'runtime', 'inference.csv') f = open(inference_file, 'a+') f.write(experiment_name + ', ' + str(end_time - start_time) + '\n') f.close() except KeyError: pass return average_reward_eval
def train_gym_batch_rl( model_type, trainer, predictor, batch_rl_file_path, gym_env, num_train_batches, test_every_ts, test_after_ts, avg_over_num_episodes, score_bar, test_run_name, ): """Train off of fixed set of stored transitions generated off-policy.""" total_timesteps = 0 avg_reward_history, timestep_history = [], [] batch_dataset = RLDataset(batch_rl_file_path) batch_dataset.load() gym_env.replay_memory = batch_dataset.replay_memory test_every_ts_n = 1 for _ in range(num_train_batches): samples = gym_env.sample_memories(trainer.minibatch_size, model_type) trainer.train(samples) total_timesteps += trainer.minibatch_size # Evaluation loop if (total_timesteps > (test_every_ts * test_every_ts_n) and total_timesteps > test_after_ts): avg_rewards, avg_discounted_rewards = gym_env.run_ep_n_times( avg_over_num_episodes, predictor, test=True) avg_reward_history.append(avg_rewards) timestep_history.append(total_timesteps) logger.info( "Achieved an average reward score of {} over {} evaluations." " Total timesteps: {}.".format(avg_rewards, avg_over_num_episodes, total_timesteps)) test_every_ts_n += 1 if score_bar is not None and avg_rewards > score_bar: logger.info("Avg. reward history for {}: {}".format( test_run_name, avg_reward_history)) return avg_reward_history, trainer, predictor # Always eval after last training batch avg_rewards, avg_discounted_rewards = gym_env.run_ep_n_times( avg_over_num_episodes, predictor, test=True) avg_reward_history.append(avg_rewards) timestep_history.append(total_timesteps) logger.info("Achieved an average reward score of {} over {} evaluations." " Total timesteps: {}.".format(avg_rewards, avg_over_num_episodes, total_timesteps)) logger.info("Avg. reward history for {}: {}".format( test_run_name, avg_reward_history)) return avg_reward_history, timestep_history, trainer, predictor
def mdnrnn_gym( params: OpenAiGymParameters, feature_importance: bool = False, feature_sensitivity: bool = False, save_embedding_to_path: Optional[str] = None, seed: Optional[int] = None, ): assert params.mdnrnn is not None use_gpu = params.use_gpu logger.info("Running gym with params") logger.info(params) env_type = params.env env = OpenAIGymEnvironment( env_type, epsilon=1.0, softmax_policy=False, gamma=0.99, random_seed=seed ) # create test data once assert params.run_details.max_steps is not None test_replay_buffer = get_replay_buffer( params.run_details.num_test_episodes, params.run_details.seq_len, params.run_details.max_steps, env, ) test_batch = test_replay_buffer.sample_memories( test_replay_buffer.memory_size, use_gpu=use_gpu, batch_first=True ) trainer = create_trainer(params, env, use_gpu) _, _, trainer = train_sgd( env, trainer, use_gpu, "{} test run".format(env_type), params.mdnrnn.minibatch_size, params.run_details, test_batch=test_batch, ) feature_importance_map, feature_sensitivity_map, dataset = None, None, None if feature_importance: feature_importance_map = calculate_feature_importance( env, trainer, use_gpu, params.run_details, test_batch=test_batch ) if feature_sensitivity: feature_sensitivity_map = calculate_feature_sensitivity_by_actions( env, trainer, use_gpu, params.run_details, test_batch=test_batch ) if save_embedding_to_path: dataset = RLDataset(save_embedding_to_path) create_embed_rl_dataset(env, trainer, dataset, use_gpu, params.run_details) dataset.save() return env, trainer, feature_importance_map, feature_sensitivity_map, dataset
def create_mdnrnn_trainer_and_embed_dataset(mdnrnn_params: OpenAiGymParameters, use_gpu): env, mdnrnn_trainer, _, _, _ = mdnrnn_gym(mdnrnn_params) embed_rl_dataset = RLDataset("/tmp/rl.pkl") create_embed_rl_dataset(env, mdnrnn_trainer, embed_rl_dataset, use_gpu, mdnrnn_params.run_details) return env, mdnrnn_trainer, embed_rl_dataset
def create_mdnrnn_trainer_and_embed_dataset(mdnrnn_params, use_gpu): env, mdnrnn_trainer, _, _, _ = mdnrnn_gym(mdnrnn_params, use_gpu) embed_rl_dataset = RLDataset("/tmp/rl.pkl") create_embed_rl_dataset( env, mdnrnn_trainer, embed_rl_dataset, use_gpu, **mdnrnn_params["run_details"] ) return env, mdnrnn_trainer, embed_rl_dataset
def mdnrnn_gym( params: OpenAiGymParameters, feature_importance: bool = False, feature_sensitivity: bool = False, save_embedding_to_path: Optional[str] = None, ): assert params.mdnrnn is not None use_gpu = params.use_gpu logger.info("Running gym with params") logger.info(params) env_type = params.env env = OpenAIGymEnvironment(env_type, epsilon=1.0, softmax_policy=True, gamma=0.99) trainer = create_trainer(params, env, use_gpu) _, _, trainer = train_sgd( env, trainer, use_gpu, "{} test run".format(env_type), params.mdnrnn.minibatch_size, params.run_details, ) feature_importance_map, feature_sensitivity_map, dataset = None, None, None if feature_importance: feature_importance_map = calculate_feature_importance( env, trainer, use_gpu, params.run_details) if feature_sensitivity: feature_sensitivity_map = calculate_feature_sensitivity_by_actions( env, trainer, use_gpu, params.run_details) if save_embedding_to_path: dataset = RLDataset(save_embedding_to_path) create_embed_rl_dataset(env, trainer, dataset, use_gpu, params.run_details) dataset.save() return env, trainer, feature_importance_map, feature_sensitivity_map, dataset
def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment." ) parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--file_path", help="If set, save all collected samples as an RLDataset to this file.", default=None, ) parser.add_argument( "-e", "--start_saving_from_score", type=int, help="If file_path is set, start saving episodes after this score is hit.", default=None, ) parser.add_argument( "-r", "--results_file_path", help="If set, save evaluation results to file.", type=str, default=None, ) parser.add_argument( "--offline_train", action="store_true", help="If set, collect data using a random policy then train RL offline.", ) parser.add_argument( "--path_to_pickled_transitions", help="Path to saved transitions to load into replay buffer.", type=str, default=None, ) parser.add_argument( "--seed", help="Seed for the test (numpy, torch, and gym).", type=int, default=None, ) parser.add_argument( "--use_gpu", help="Use GPU, if available; set the device with CUDA_VISIBLE_DEVICES", action="store_true", ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format(args.log_level)) else: logging.getLogger().setLevel(getattr(logging, args.log_level.upper())) if args.seed is not None: np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) assert ( not args.path_to_pickled_transitions or args.offline_train ), "path_to_pickled_transitions is provided so you must run offline training" with open(args.parameters, "r") as f: params = json_to_object(f.read(), OpenAiGymParameters) if args.use_gpu: assert torch.cuda.is_available(), "CUDA requested but not available" params = params._replace(use_gpu=True) dataset = RLDataset(args.file_path) if args.file_path else None reward_history, iteration_history, trainer, predictor, env = run_gym( params, args.offline_train, args.score_bar, args.seed, dataset, args.start_saving_from_score, args.path_to_pickled_transitions, ) if dataset: dataset.save() logger.info("Saving dataset to {}".format(args.file_path)) final_score_exploit, _ = env.run_ep_n_times( params.run_details.avg_over_num_episodes, predictor, test=True ) final_score_explore, _ = env.run_ep_n_times( params.run_details.avg_over_num_episodes, predictor, test=False ) logger.info( "Final policy scores {} with epsilon={} and {} with epsilon=0 over {} eps.".format( final_score_explore, env.epsilon, final_score_exploit, params.run_details.avg_over_num_episodes, ) ) if args.results_file_path: write_lists_to_csv(args.results_file_path, reward_history, iteration_history) return reward_history
def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment.") parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-g", "--gpu_id", help="If set, will use GPU with specified ID. Otherwise will use CPU.", default=USE_CPU, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--file_path", help="If set, save all collected samples as an RLDataset to this file.", default=None, ) parser.add_argument( "-e", "--start_saving_from_episode", type=int, help= "If file_path is set, start saving episodes from this episode num.", default=0, ) parser.add_argument( "-r", "--results_file_path", help="If set, save evaluation results to file.", type=str, default=None, ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format( args.log_level)) else: logger.setLevel(getattr(logging, args.log_level.upper())) with open(args.parameters, "r") as f: params = json.load(f) dataset = RLDataset(args.file_path) if args.file_path else None reward_history, timestep_history, trainer, predictor = run_gym( params, args.score_bar, args.gpu_id, dataset, args.start_saving_from_episode) if dataset: dataset.save() if args.results_file_path: write_lists_to_csv(args.results_file_path, reward_history, timestep_history) return reward_history
def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment." ) parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-g", "--gpu_id", help="If set, will use GPU with specified ID. Otherwise will use CPU.", default=USE_CPU, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--file_path", help="If set, save all collected samples as an RLDataset to this file.", default=None, ) parser.add_argument( "-e", "--start_saving_from_score", type=int, help="If file_path is set, start saving episodes after this score is hit.", default=None, ) parser.add_argument( "-r", "--results_file_path", help="If set, save evaluation results to file.", type=str, default=None, ) parser.add_argument( "--offline_train", action="store_true", help="If set, collect data using a random policy then train RL offline.", ) parser.add_argument( "--path_to_pickled_transitions", help="Path to saved transitions to load into replay buffer.", type=str, default=None, ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format(args.log_level)) else: logger.setLevel(getattr(logging, args.log_level.upper())) assert ( not args.path_to_pickled_transitions or args.offline_train ), "path_to_pickled_transitions is provided so you must run offline training" with open(args.parameters, "r") as f: params = json.load(f) dataset = RLDataset(args.file_path) if args.file_path else None reward_history, iteration_history, trainer, predictor, env = run_gym( params, args.offline_train, args.score_bar, args.gpu_id, dataset, args.start_saving_from_score, args.path_to_pickled_transitions, ) if dataset: dataset.save() logger.info("Saving dataset to {}".format(args.file_path)) final_score_exploit, _ = env.run_ep_n_times( params["run_details"]["avg_over_num_episodes"], predictor, test=True ) final_score_explore, _ = env.run_ep_n_times( params["run_details"]["avg_over_num_episodes"], predictor, test=False ) logger.info( "Final policy scores {} with epsilon={} and {} with epsilon=0 over {} eps.".format( final_score_explore, env.epsilon, final_score_exploit, params["run_details"]["avg_over_num_episodes"], ) ) if args.results_file_path: write_lists_to_csv(args.results_file_path, reward_history, iteration_history) return reward_history
def create_embed_rl_dataset( gym_env: OpenAIGymEnvironment, trainer: MDNRNNTrainer, dataset: RLDataset, use_gpu: bool, run_details: OpenAiRunDetails, ): assert run_details.max_steps is not None old_mdnrnn_mode = trainer.mdnrnn.mdnrnn.training trainer.mdnrnn.mdnrnn.eval() num_transitions = run_details.num_state_embed_episodes * run_details.max_steps device = torch.device("cuda") if use_gpu else torch.device( "cpu") # type: ignore ( state_batch, action_batch, reward_batch, next_state_batch, next_action_batch, not_terminal_batch, step_batch, next_step_batch, ) = map( list, zip(*multi_step_sample_generator( gym_env=gym_env, num_transitions=num_transitions, max_steps=run_details.max_steps, # +1 because MDNRNN embeds the first seq_len steps and then # the embedded state will be concatenated with the last step multi_steps=run_details.seq_len + 1, include_shorter_samples_at_start=True, include_shorter_samples_at_end=False, )), ) def concat_batch(batch): return torch.cat( [ torch.tensor(np.expand_dims(x, axis=1), dtype=torch.float, device=device) for x in batch ], dim=1, ) # shape: seq_len x batch_size x feature_dim mdnrnn_state = concat_batch(state_batch) next_mdnrnn_state = concat_batch(next_state_batch) mdnrnn_action = concat_batch(action_batch) next_mdnrnn_action = concat_batch(next_action_batch) mdnrnn_input = rlt.PreprocessedStateAction.from_tensors( state=mdnrnn_state, action=mdnrnn_action) next_mdnrnn_input = rlt.PreprocessedStateAction.from_tensors( state=next_mdnrnn_state, action=next_mdnrnn_action) # batch-compute state embedding mdnrnn_output = trainer.mdnrnn(mdnrnn_input) next_mdnrnn_output = trainer.mdnrnn(next_mdnrnn_input) for i in range(len(state_batch)): # Embed the state as the hidden layer's output # until the previous step + current state hidden_idx = 0 if step_batch[ i] == 1 else step_batch[i] - 2 # type: ignore next_hidden_idx = next_step_batch[i] - 2 # type: ignore hidden_embed = ( mdnrnn_output.all_steps_lstm_hidden[hidden_idx, i, :].squeeze().detach().cpu()) state_embed = torch.cat( (hidden_embed, torch.tensor(state_batch[i][hidden_idx + 1]) ) # type: ignore ) next_hidden_embed = (next_mdnrnn_output.all_steps_lstm_hidden[ next_hidden_idx, i, :].squeeze().detach().cpu()) next_state_embed = torch.cat(( next_hidden_embed, torch.tensor(next_state_batch[i][next_hidden_idx + 1]), # type: ignore )) logger.debug( "create_embed_rl_dataset:\nstate batch\n{}\naction batch\n{}\nlast " "action: {},reward: {}\nstate embed {}\nnext state embed {}\n". format( state_batch[i][:hidden_idx + 1], # type: ignore action_batch[i][:hidden_idx + 1], # type: ignore action_batch[i][hidden_idx + 1], # type: ignore reward_batch[i][hidden_idx + 1], # type: ignore state_embed, next_state_embed, )) terminal = 1 - not_terminal_batch[i][hidden_idx + 1] # type: ignore possible_actions, possible_actions_mask = get_possible_actions( gym_env, ModelType.PYTORCH_PARAMETRIC_DQN.value, False) possible_next_actions, possible_next_actions_mask = get_possible_actions( gym_env, ModelType.PYTORCH_PARAMETRIC_DQN.value, terminal) dataset.insert( state=state_embed, action=torch.tensor(action_batch[i][hidden_idx + 1]), # type: ignore reward=reward_batch[i][hidden_idx + 1], # type: ignore next_state=next_state_embed, next_action=torch.tensor(next_action_batch[i][next_hidden_idx + 1] # type: ignore ), terminal=torch.tensor(terminal), possible_next_actions=possible_next_actions, possible_next_actions_mask=possible_next_actions_mask, time_diff=torch.tensor(1), possible_actions=possible_actions, possible_actions_mask=possible_actions_mask, policy_id=0, ) logger.info("Insert {} transitions into a state embed dataset".format( len(state_batch))) trainer.mdnrnn.mdnrnn.train(old_mdnrnn_mode) return dataset
def main(args): parser = argparse.ArgumentParser( description="Train a RL net to play in an OpenAI Gym environment." ) parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.") parser.add_argument( "-s", "--score-bar", help="Bar for averaged tests scores.", type=float, default=None, ) parser.add_argument( "-g", "--gpu_id", help="If set, will use GPU with specified ID. Otherwise will use CPU.", default=USE_CPU, ) parser.add_argument( "-l", "--log_level", help="If set, use logging level specified (debug, info, warning, error, " "critical). Else defaults to info.", default="info", ) parser.add_argument( "-f", "--file_path", help="If set, save all collected samples as an RLDataset to this file.", default=None, ) parser.add_argument( "-e", "--start_saving_from_episode", type=int, help="If file_path is set, start saving episodes from this episode num.", default=0, ) parser.add_argument( "-r", "--results_file_path", help="If set, save evaluation results to file.", type=str, default=None, ) args = parser.parse_args(args) if args.log_level not in ("debug", "info", "warning", "error", "critical"): raise Exception("Logging level {} not valid level.".format(args.log_level)) else: logger.setLevel(getattr(logging, args.log_level.upper())) with open(args.parameters, "r") as f: params = json.load(f) dataset = RLDataset(args.file_path) if args.file_path else None reward_history, timestep_history, trainer, predictor = run_gym( params, args.score_bar, args.gpu_id, dataset, args.start_saving_from_episode ) if dataset: dataset.save() if args.results_file_path: write_lists_to_csv(args.results_file_path, reward_history, timestep_history) return reward_history