def train_pnn(self, env_name="Merging-v0"): """ Directly trains on env_name """ bs2model = {'RL': BR_s, 'LR': BL_s} #for bs in bs2model.keys(): bs = self.bs for seed in [101, 102]: model_info = bs2model[bs] model_dir = os.path.join(model_info[0], model_info[1], model_info[2]) output_dir = os.path.join("output/fetch_PNN", 'resave', model_info[2]) utils.resave_params_for_PNN(model_dir, output_dir) self.model = HER2PNN.load(output_dir) self.seed = seed self.experiment_name = f"PNN_{bs}_{self.seed}" print("EXPT NAME: ", self.experiment_name) self.experiment_dir = os.path.join(self.experiment_dir1, self.experiment_name) self.create_eval_dir() env = gym.make(env_name) eval_env = gym.make(env_name) if bs == 'RL': env.homotopy_class = 'left' eval_env.homotopy_class = 'left' elif bs == 'LR': env.homotopy_class = 'right' eval_env.homotopy_class = 'right' env = HERGoalEnvWrapper(env) self.model.set_env(env) eval_env = HERGoalEnvWrapper(eval_env) self.model = train(self.model, eval_env, self.timesteps, self.experiment_dir, self.is_save, self.eval_save_period, self.rets_path, 0)
def train_l2sp(self, env_name="Merging-v0"): """ Directly trains on env_name """ bs2model = {'RL':BR_s, 'LR': BL_s} #for bs in bs2model.keys(): bs = self.bs for seed in [101, 102]: model_info = bs2model[bs] model_dir = os.path.join(model_info[0], model_info[1], model_info[2]) data, params = utils.load_from_file(model_dir) self.model = HER2L2SP.load(model_dir, original_params=params) self.seed = seed self.experiment_name = f"L2SP_{bs}_{self.seed}" print("EXPT NAME: ", self.experiment_name) self.experiment_dir = os.path.join(self.experiment_dir1, self.experiment_name) self.create_eval_dir() env = gym.make(env_name) eval_env = gym.make(env_name) if bs == 'RL': env.homotopy_class = 'left' eval_env.homotopy_class = 'left' elif bs == 'LR': env.homotopy_class = 'right' eval_env.homotopy_class = 'right' env = HERGoalEnvWrapper(env) self.model.set_env(env) eval_env = HERGoalEnvWrapper(eval_env) self.model = train(self.model, eval_env, self.timesteps, self.experiment_dir, self.is_save, self.eval_save_period, self.rets_path, 0)
def train_curriculum_fetch(self, env_name="Merging-v0"): """ Trains reward curriculum """ self.curriculum = [env_name] bs2model_ours = {'RL': BR_BL0_BL1_BL5, 'LR': BL_BR0} bs2model = {'RL': BR_s, 'LR': BL_s} for l, lesson in enumerate(self.curriculum): for bs in bs2model.keys(): self.bs = bs for seed in [101, 102]: if self.expt_type == "ours": model_info = bs2model_ours[self.bs] else: model_info = bs2model[self.bs] model_dir = os.path.join(model_info[0], model_info[1], model_info[2]) if self.model_type == "PPO": self.model = PPO2.load( model_dir) # loads pre-trained model elif self.model_type == "HER": self.model = HER.load( model_dir) # loads pre-trained model print(f"\ntraining on {lesson}, bs {self.bs}, seed{seed}") self.seed = seed self.experiment_name = f"{self.bs}_{self.expt_type}_{seed}" print("EXPT NAME: ", self.experiment_dir1, self.experiment_name) self.experiment_dir = os.path.join(self.experiment_dir1, self.experiment_name) self.create_eval_dir() env = gym.make(lesson) eval_env = gym.make(lesson) if self.bs == 'RL': env._set_homotopy_class('left') eval_env._set_homotopy_class('left') elif self.bs == 'LR': env._set_homotopy_class('right') eval_env._set_homotopy_class('right') if self.model_type == "HER": env = HERGoalEnvWrapper(env) eval_env = HERGoalEnvWrapper(eval_env) print("hc: ", env.env.homotopy_class) else: env = DummyVecEnv([lambda: env]) self.model.set_env(env) self.model.seed = self.seed self.model = train(self.model, eval_env, self.timesteps, self.experiment_dir, self.is_save, self.eval_save_period, self.rets_path, l)
def train_curriculum(self, env_name="Merging-v0"): """ Trains reward curriculum """ self.curriculum = [env_name] bs2model_ours = {1: B1R_B0L, 3: B3R_B0L, 5: B5R_B0L2, 7: B7R_B0L_B4L1} bs2model = {1: B1R, 3: B3R, 5: B5R, 7: B7R} for l, lesson in enumerate(self.curriculum): for seed in [201, 202, 203, 204, 205]: if self.expt_type == "ours": model_info = bs2model_ours[int(self.bs)] elif self.expt_type == "finetune": model_info = bs2model[int(self.bs)] model_dir = os.path.join(model_info[0], model_info[1], model_info[2]) if self.model_type == "PPO": self.model = PPO2.load( model_dir) # loads pre-trained model elif self.model_type == "HER": self.model = HER.load(model_dir) # loads pre-trained model print(f"\ntraining on {lesson}, bs {self.bs}, seed{seed}") self.seed = seed self.experiment_name = f"{self.bs}_{self.expt_type}_{seed}" print("EXPT NAME: ", self.experiment_dir1, self.experiment_name) self.experiment_dir = os.path.join(self.experiment_dir1, self.experiment_name) self.create_eval_dir() env = gym.make(lesson) eval_env = gym.make(lesson) env._set_barrier_size(self.bs) env._set_homotopy_class('left') eval_env._set_barrier_size(self.bs) eval_env._set_homotopy_class('left') if self.model_type == "HER": env = HERGoalEnvWrapper(env) eval_env = HERGoalEnvWrapper(eval_env) print("bs: ", env.env.barrier_size) print("hc: ", env.env.homotopy_class) else: env = DummyVecEnv([lambda: env]) self.model.set_env(env) self.model.set_random_seed(self.seed) ### ENTROPY### #self.model.ent_coef = 0.05 self.model = train(self.model, eval_env, self.timesteps, self.experiment_dir, self.is_save, self.eval_save_period, self.rets_path, l)
def __init__(self, env_id, exp_id, model_path, trajectory_type, episode_timesteps, noise_parameters): self.env_id = env_id self.exp_id = exp_id self.trajectory_type = trajectory_type # Load model and environment self.env = HERGoalEnvWrapper( gym.make(env_id, **{'noise_parameters': noise_parameters})) self.model = HER.load(model_path, env=self.env) self.episode_timesteps = episode_timesteps # Setup subscriber for trajectory generator # self.line_trajectory_timer = rospy.Timer(rospy.Duration(0.1), self.line_trajectory_callback) # self.circle_trajectory_timer = rospy.Timer(rospy.Duration(0.01), self.circle_trajectory_callback) # Line trajectory settings if self.trajectory_type == "line": self.start_p = np.array([20, 0, 100]) / 1000 self.finish_p = np.array([20, 40, 100]) / 1000 self.del_p = self.finish_p - self.start_p self.current_goal = self.start_p # Circle trajectory settings if self.trajectory_type == "circle": self.offset = np.array([20, 20, 100]) / 1000 self.radius = 20.0 / 1000 self.thetas = np.arange(0, 2 * np.pi, np.deg2rad(5)) self.thetas_counter = 0 self.start_p = self.offset self.current_goal = self.start_p # Start timer self.prev_time = rospy.get_time() # Complete trajectory check self.shape_df = pd.DataFrame(columns=[ 'episode', 'timestep', 'r1x', 'r1y', 'r1z', 'r2x', 'r2y', 'r2z', 'r3x', 'r3y', 'r3z' ]) # self.goals_df = pd.DataFrame(columns=['ag_x', 'ag_y', 'ag_z', 'dg_x', 'dg_y', 'dg_z']) self.traj_complete = False self.achieved_goals = np.array([]) self.desired_goals = np.array([]) self.episode_count = 0
def _create_replay_wrapper(self, env): """ Wrap the environment in a HERGoalEnvWrapper if needed and create the replay buffer wrapper. """ if not isinstance(env, HERGoalEnvWrapper): env = HERGoalEnvWrapper(env) self.env = env self.n_sampled_goal = 4 self.goal_selection_strategy = 'future' # NOTE: we cannot do that check directly with VecEnv # maybe we can try calling `compute_reward()` ? # assert isinstance(self.env, gym.GoalEnv), "HER only supports gym.GoalEnv" self.replay_wrapper = functools.partial( HindsightExperienceReplayWrapper, n_sampled_goal=self.n_sampled_goal, goal_selection_strategy=self.goal_selection_strategy, wrapped_env=self.env)
def evaluation(env_id, exp_id, model_path, num_episodes, output_path): env = HERGoalEnvWrapper(gym.make(env_id)) model = HER.load(model_path, env=env) seed = np.random.randint(0, 10) set_global_seeds(seed) goal_errors = np.empty((num_episodes), dtype=float) B_errors = np.empty((num_episodes), dtype=float) alpha_errors = np.empty((num_episodes), dtype=float) q_B_achieved = np.empty((num_episodes, 3), dtype=float) q_alpha_achieved = np.empty((num_episodes, 3), dtype=float) q_B_desired = np.empty((num_episodes, 3), dtype=float) q_alpha_desired = np.empty((num_episodes, 3), dtype=float) desired_goals = np.empty((num_episodes, 3), dtype=float) achieved_goals = np.empty((num_episodes, 3), dtype=float) starting_positions = np.empty((num_episodes, 3), dtype=float) q_B_starting = np.empty((num_episodes, 3), dtype=float) q_alpha_starting = np.empty((num_episodes, 3), dtype=float) # TODO: pre-allocate memory for episode in range(num_episodes): print('episode: ', episode) # Run random episodes and save sequence of actions and states to plot in matlab episode_reward = 0 ep_len = 0 obs = env.reset() while True: action, _ = model.predict(obs, deterministic=True) action = np.clip(action, env.action_space.low, env.action_space.high) obs, reward, done, infos = env.step(action) episode_reward += reward ep_len += 1 if done or infos.get('is_success', False): goal_errors[episode] = infos.get('errors_pos') q_B_desired[episode, :] = infos.get('q_desired')[:3] q_alpha_desired[episode, :] = infos.get('q_desired')[3:] q_B_achieved[episode, :] = infos.get('q_achieved')[:3] q_alpha_achieved[episode, :] = infos.get('q_achieved')[3:] desired_goals[episode, :] = infos.get('desired_goal') achieved_goals[episode, :] = infos.get('achieved_goal') starting_positions[episode, :] = infos.get('starting_position') q_B_starting[episode, :] = infos.get('q_starting')[:3] q_alpha_starting[episode, :] = infos.get('q_starting')[3:] break print('mean_errors: ', np.mean(goal_errors)) eval_df = pd.DataFrame(data=np.column_stack( (desired_goals, achieved_goals, starting_positions, q_B_desired, q_B_achieved, q_B_starting, q_alpha_desired, q_alpha_achieved, q_alpha_starting)), columns=[ 'desired_goal_x', 'desired_goal_y', 'desired_goal_z', 'achieved_goal_x', 'achieved_goal_y', 'achieved_goal_z', 'starting_position_x', 'starting_position_y', 'starting_position_z', 'B_desired_1', 'B_desired_2', 'B_desired_3', 'B_achieved_1', 'B_achieved_2', 'B_achieved_3', 'B_starting_1', 'B_starting_2', 'B_starting_3', 'alpha_desired_1', 'alpha_desired_2', 'alpha_desired_3', 'alpha_achieved_1', 'alpha_achieved_2', 'alpha_achieved_3', 'alpha_startin_1', 'alpha_starting_2', 'alpha_starting_3', ]) eval_df.to_csv(output_path)
def train_single(self, env_name="Merging-v0"): """ Directly trains on env_name """ for seed in [201, 202, 203, 204, 205]: print(f"\ntraining with bsize {self.bs}, seed{seed}") self.seed = seed self.experiment_name = f"B{self.bs}R{seed}" print("EXPT NAME: ", self.experiment_dir1, self.experiment_name) self.experiment_dir = os.path.join(self.experiment_dir1, self.experiment_name) self.create_eval_dir() self.model = None env = gym.make(env_name) eval_env = gym.make(env_name) env._set_barrier_size(self.bs) env._set_homotopy_class('right') eval_env._set_barrier_size(self.bs) eval_env._set_homotopy_class('right') if self.model_type == "PPO": if self.is_save: ### DEEPER NETWORK #policy_kwargs = dict(net_arch=[dict(pi=[64, 64, 64, 64], # vf=[64, 64, 64, 64])] # ) #self.PPO = PPO2('MlpPolicy', env, verbose=1, seed=self.seed, learning_rate=1e-3, # policy_kwargs=policy_kwargs) ### DROPOUT #self.PPO = PPO2(MlpGeneralPolicy1, env, verbose=1, seed=self.seed, learning_rate=1e-3) ### REGULAR self.PPO = PPO2('MlpPolicy', env, verbose=1, seed=self.seed, learning_rate=1e-3) else: self.PPO = PPO2('MlpPolicy', env, verbose=1, seed=self.seed, learning_rate=1e-3) self.model = train(self.PPO, eval_env, self.timesteps, self.experiment_dir, self.is_save, self.eval_save_period, self.rets_path, 0) elif self.model_type == "DQN": if self.is_save: self.DQN = DQN( 'MlpPolicy', env, verbose=1, seed=self.seed, prioritized_replay=True, learning_rate=1e-3, tensorboard_log="./Gridworldv1_tensorboard/" + self.experiment_name, full_tensorboard_log=True) else: self.DQN = DQN('MlpPolicy', env, verbose=1, seed=self.seed, prioritized_replay=True, learning_rate=1e-3) self.model = train(self.DQN, eval_env, self.timesteps, self.experiment_dir, self.is_save, self.eval_save_period, self.rets_path, 0) elif self.model_type == "HER": env = HERGoalEnvWrapper(env) eval_env = HERGoalEnvWrapper(eval_env) print("bs: ", env.env.barrier_size) print("hc: ", env.env.homotopy_class) self.HER = HER('MlpPolicy', env, DDPG, n_sampled_goal=4, goal_selection_strategy="future", seed=self.seed, verbose=1) self.model = train(self.HER, eval_env, self.timesteps, self.experiment_dir, self.is_save, self.eval_save_period, self.rets_path, 0)
class CTMPathFollower(object): def __init__(self, env_id, exp_id, model_path, trajectory_type, episode_timesteps, noise_parameters): self.env_id = env_id self.exp_id = exp_id self.trajectory_type = trajectory_type # Load model and environment self.env = HERGoalEnvWrapper( gym.make(env_id, **{'noise_parameters': noise_parameters})) self.model = HER.load(model_path, env=self.env) self.episode_timesteps = episode_timesteps # Setup subscriber for trajectory generator # self.line_trajectory_timer = rospy.Timer(rospy.Duration(0.1), self.line_trajectory_callback) # self.circle_trajectory_timer = rospy.Timer(rospy.Duration(0.01), self.circle_trajectory_callback) # Line trajectory settings if self.trajectory_type == "line": self.start_p = np.array([20, 0, 100]) / 1000 self.finish_p = np.array([20, 40, 100]) / 1000 self.del_p = self.finish_p - self.start_p self.current_goal = self.start_p # Circle trajectory settings if self.trajectory_type == "circle": self.offset = np.array([20, 20, 100]) / 1000 self.radius = 20.0 / 1000 self.thetas = np.arange(0, 2 * np.pi, np.deg2rad(5)) self.thetas_counter = 0 self.start_p = self.offset self.current_goal = self.start_p # Start timer self.prev_time = rospy.get_time() # Complete trajectory check self.shape_df = pd.DataFrame(columns=[ 'episode', 'timestep', 'r1x', 'r1y', 'r1z', 'r2x', 'r2y', 'r2z', 'r3x', 'r3y', 'r3z' ]) # self.goals_df = pd.DataFrame(columns=['ag_x', 'ag_y', 'ag_z', 'dg_x', 'dg_y', 'dg_z']) self.traj_complete = False self.achieved_goals = np.array([]) self.desired_goals = np.array([]) self.episode_count = 0 def line_trajectory_update(self): curr_time = rospy.get_time() delta_t = curr_time - self.prev_time self.prev_time = curr_time self.current_goal = self.current_goal + self.del_p * delta_t * 0.20 if np.linalg.norm(self.current_goal - self.finish_p) < 0.001: self.traj_complete = True print("Distance to end: ", np.linalg.norm(self.current_goal - self.finish_p)) def circle_trajectory_update(self): print('thetas_counter: ', self.thetas_counter, 'of ', self.thetas.size - 1) curr_time = rospy.get_time() delta_t = curr_time - self.prev_time self.prev_time = curr_time self.thetas_counter += 1 if self.thetas_counter == self.thetas.size - 1: self.traj_complete = True else: self.current_goal = self.offset + self.radius * np.array([ np.cos(self.thetas[self.thetas_counter]), np.sin(self.thetas[self.thetas_counter]), 0 ]) def play_episode(self, render_mode='inference'): self.episode_count += 1 episode_reward = 0.0 ep_len = 0 # Set the observation obs = self.env.reset(goal=self.current_goal) for t in range(self.episode_timesteps): action, _ = self.model.predict(obs, deterministic=True) # Ensure action space is of type Box if isinstance(self.env.action_space, gym.spaces.Box): action = np.clip(action, self.env.action_space.low, self.env.action_space.high) obs, reward, done, infos = self.env.step(action) episode_reward += reward ep_len += 1 if done or infos.get('is_success', False): break self.env.render(mode=render_mode) r_df = self.env.env.r_df r_df['episode'] = np.full(r_df.shape[0], self.episode_count) self.shape_df = pd.concat([self.shape_df, r_df], join='inner') # Save data for the episode if self.achieved_goals.size == 0: self.achieved_goals = self.env.convert_obs_to_dict( obs)['achieved_goal'] self.desired_goals = self.env.convert_obs_to_dict( obs)['desired_goal'] else: self.achieved_goals = np.vstack([ self.achieved_goals, self.env.convert_obs_to_dict(obs)['achieved_goal'] ]) self.desired_goals = np.vstack([ self.desired_goals, self.env.convert_obs_to_dict(obs)['desired_goal'] ]) # Save shape information of the episode def save_data(self, name=None): ag_goals_df = pd.DataFrame(data=self.achieved_goals, columns=['ag_x', 'ag_y', 'ag_z']) dg_goals_df = pd.DataFrame(data=self.desired_goals, columns=['dg_x', 'dg_y', 'dg_z']) goals_df = pd.concat([ag_goals_df, dg_goals_df], axis=1, join='inner') if name is None: goals_df.to_csv( '/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/data/revisions/' + self.trajectory_type + '_path/' + self.trajectory_type + '_path_following_' + self.exp_id + '_goals.csv') self.shape_df.to_csv( '/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/data/revisions/' + self.trajectory_type + '_path/' + self.trajectory_type + '_path_following_' + self.exp_id + '_shape.csv') else: goals_df.to_csv( '/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/data/revisions/' + self.trajectory_type + '_path/' + self.trajectory_type + '_path_following_' + self.exp_id + '_' + name + '_goals.csv') self.shape_df.to_csv( '/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/data/revisions/' + self.trajectory_type + '_path/' + self.trajectory_type + '_path_following_' + self.exp_id + '_' + name + '_shape.csv')
if ag[0] > max_x: max_x = ag[0] if ag[1] > max_y: max_y = ag[1] if ag[2] > max_z: max_z = ag[2] # Check for min workspace limits if ag[0] < min_x: min_x = ag[0] if ag[1] < min_y: min_y = ag[1] if ag[2] < min_z: min_z = ag[2] if i % 100 == 0: print(i) print("ag: ", ag) print("max x: ", max_x, " max y: ", max_y, " max z: ", max_z) print("min x: ", min_x, " min y: ", min_y, " min z: ", min_z) print("=======") return np.array([max_x, max_y, max_z]), np.array([min_x, min_y, min_z]) if __name__ == '__main__': env_id = "CTR-Reach-v0" env = HERGoalEnvWrapper(gym.make(env_id)) max_limits, min_limits = workspace_limits(env) print(max_limits) print(min_limits)
def train_single_fetch(self, env_name="Merging-v0"): """ Directly trains on env_name """ for bs in ['LR', 'RL']: self.bs = bs for seed in [101, 102]: print(f"\ntraining with bsize {self.bs}, seed{seed}") self.seed = seed self.experiment_name = f"{self.bs}_{self.expt_type}_{seed}" print("EXPT NAME: ", self.experiment_dir1, self.experiment_name) self.experiment_dir = os.path.join(self.experiment_dir1, self.experiment_name) self.create_eval_dir() self.model = None env = gym.make(env_name) eval_env = gym.make(env_name) if self.bs == 'RL': env._set_homotopy_class('left') eval_env._set_homotopy_class('left') elif self.bs == 'LR': env._set_homotopy_class('right') eval_env._set_homotopy_class('right') if self.model_type == "PPO": if self.is_save: self.PPO = PPO2( 'MlpPolicy', env, verbose=1, seed=self.seed, learning_rate=1e-3, ) else: self.PPO = PPO2('MlpPolicy', env, verbose=1, seed=self.seed, learning_rate=1e-3) self.model = train(self.PPO, eval_env, self.timesteps, self.experiment_dir, self.is_save, self.eval_save_period, self.rets_path, 0) elif self.model_type == "DQN": if self.is_save: self.DQN = DQN( 'MlpPolicy', env, verbose=1, seed=self.seed, prioritized_replay=True, learning_rate=1e-3, tensorboard_log="./Gridworldv1_tensorboard/" + self.experiment_name, full_tensorboard_log=True) else: self.DQN = DQN('MlpPolicy', env, verbose=1, seed=self.seed, prioritized_replay=True, learning_rate=1e-3) self.model = train(self.DQN, eval_env, self.timesteps, self.experiment_dir, self.is_save, self.eval_save_period, self.rets_path, 0) elif self.model_type == "HER": env = HERGoalEnvWrapper(env) eval_env = HERGoalEnvWrapper(eval_env) print("hc: ", env.env.homotopy_class) self.HER = HER('MlpPolicy', env, DDPG, n_sampled_goal=4, goal_selection_strategy="future", seed=self.seed, verbose=1) self.model = train(self.HER, eval_env, self.timesteps, self.experiment_dir, self.is_save, self.eval_save_period, self.rets_path, 0)
import numpy as np import pandas as pd from stable_baselines import DDPG, HER from stable_baselines.common import set_global_seeds from stable_baselines.her.utils import HERGoalEnvWrapper if __name__ == '__main__': # Env and model names and paths env_id = "CTR-Reach-v0" # env_id = "CTR-Reach-Noisy-v0" exp_id = "cras_exp_6" model_path = "/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/" + exp_id + "/learned_policy/500000_saved_model.pkl" # Create envs and model env = HERGoalEnvWrapper(gym.make(env_id)) model = HER.load(model_path, env=env) seed = np.random.randint(0, 10) set_global_seeds(seed) q_joints = np.array([]) achieved_goals = np.array([]) desired_goals = np.array([]) time_taken = np.array([]) # Run an initial number of random actions to randomize starting position obs = env.reset() for t in range(100): env.step(env.action_space.sample()) # take a random action
def save_traj(model, state_history): state_history = list(state_history) with open("output/fetch/single_trajs/{}.csv".format(model[1]), "w") as f: writer = csv.writer(f) writer.writerow(state_history) if __name__ == "__main__": if FLAGS.env == "nav1": from output.updated_gridworld_continuous.policies import * model_info = spB7L #eval_env = load_env("Continuous-v0", "PPO") eval_env = load_env("ContinuousSparse-v0", "HER") #TODO: REMOVE eval_env = HERGoalEnvWrapper(load_env("ContinuousSparse-v0")) eval_env.env._set_barrier_size(7) eval_env.env._set_homotopy_class('left') model = load_model(model_info, "HER", baseline=None) elif FLAGS.env == 'fetch': from output.fetch2.policies import * model_info = BR_BL eval_env = HERGoalEnvWrapper(load_env("Fetch-v0")) model = load_model(model_info, "HER", baseline=None) save = False sum_reward = 0.0 num_episode = 10 for ne in range(num_episode): mean_ret, std_ret, total_ret, state_history = evaluate(model, eval_env, render=True)
import numpy as np import pandas as pd from stable_baselines import DDPG, HER from stable_baselines.common import set_global_seeds from stable_baselines.her.utils import HERGoalEnvWrapper # Aim of this script is to run through a number of episodes, record the achieved goal and error and plot in workspace to # visualize any biased areas or are errors evenly spread. if __name__ == '__main__': # Env and model names and paths env_id = "CTR-Reach-v0" model_path = "/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/cras_exp_1/learned_policy/500000_saved_model.pkl" # Create envs and model env = HERGoalEnvWrapper(gym.make(env_id)) model = HER.load(model_path, env=env) seed = np.random.randint(0, 10) set_global_seeds(seed) errors = np.array([]) achieved_goals = np.empty((1, 3)) episode_rewards = np.array([]) num_episodes = 100 for episode in range(num_episodes): print('episode: ', episode) # Run random episodes and save sequence of actions and states to plot in matlab episode_reward = 0