Ejemplo n.º 1
0
 def train_pnn(self, env_name="Merging-v0"):
     """
     Directly trains on env_name
     """
     bs2model = {'RL': BR_s, 'LR': BL_s}
     #for bs in bs2model.keys():
     bs = self.bs
     for seed in [101, 102]:
         model_info = bs2model[bs]
         model_dir = os.path.join(model_info[0], model_info[1],
                                  model_info[2])
         output_dir = os.path.join("output/fetch_PNN", 'resave',
                                   model_info[2])
         utils.resave_params_for_PNN(model_dir, output_dir)
         self.model = HER2PNN.load(output_dir)
         self.seed = seed
         self.experiment_name = f"PNN_{bs}_{self.seed}"
         print("EXPT NAME: ", self.experiment_name)
         self.experiment_dir = os.path.join(self.experiment_dir1,
                                            self.experiment_name)
         self.create_eval_dir()
         env = gym.make(env_name)
         eval_env = gym.make(env_name)
         if bs == 'RL':
             env.homotopy_class = 'left'
             eval_env.homotopy_class = 'left'
         elif bs == 'LR':
             env.homotopy_class = 'right'
             eval_env.homotopy_class = 'right'
         env = HERGoalEnvWrapper(env)
         self.model.set_env(env)
         eval_env = HERGoalEnvWrapper(eval_env)
         self.model = train(self.model, eval_env, self.timesteps,
                            self.experiment_dir, self.is_save,
                            self.eval_save_period, self.rets_path, 0)
Ejemplo n.º 2
0
 def train_l2sp(self, env_name="Merging-v0"):
     """
     Directly trains on env_name
     """
     bs2model = {'RL':BR_s, 'LR': BL_s}
     #for bs in bs2model.keys():
     bs = self.bs
     for seed in [101, 102]:
         model_info = bs2model[bs]
         model_dir = os.path.join(model_info[0], model_info[1], model_info[2])
         data, params = utils.load_from_file(model_dir)
         self.model = HER2L2SP.load(model_dir, original_params=params)
         self.seed = seed
         self.experiment_name = f"L2SP_{bs}_{self.seed}"
         print("EXPT NAME: ", self.experiment_name)
         self.experiment_dir = os.path.join(self.experiment_dir1, self.experiment_name)
         self.create_eval_dir()
         env = gym.make(env_name)
         eval_env = gym.make(env_name)
         if bs == 'RL':
             env.homotopy_class = 'left'
             eval_env.homotopy_class = 'left'
         elif bs == 'LR':
             env.homotopy_class = 'right'
             eval_env.homotopy_class = 'right'
         env = HERGoalEnvWrapper(env)
         self.model.set_env(env)
         eval_env = HERGoalEnvWrapper(eval_env)
         self.model = train(self.model, eval_env, self.timesteps, self.experiment_dir,
                            self.is_save, self.eval_save_period, self.rets_path, 0)
Ejemplo n.º 3
0
    def train_curriculum_fetch(self, env_name="Merging-v0"):
        """
        Trains reward curriculum
        """
        self.curriculum = [env_name]
        bs2model_ours = {'RL': BR_BL0_BL1_BL5, 'LR': BL_BR0}
        bs2model = {'RL': BR_s, 'LR': BL_s}
        for l, lesson in enumerate(self.curriculum):
            for bs in bs2model.keys():
                self.bs = bs
                for seed in [101, 102]:
                    if self.expt_type == "ours":
                        model_info = bs2model_ours[self.bs]
                    else:
                        model_info = bs2model[self.bs]
                    model_dir = os.path.join(model_info[0], model_info[1],
                                             model_info[2])
                    if self.model_type == "PPO":
                        self.model = PPO2.load(
                            model_dir)  # loads pre-trained model
                    elif self.model_type == "HER":
                        self.model = HER.load(
                            model_dir)  # loads pre-trained model
                    print(f"\ntraining on {lesson}, bs {self.bs}, seed{seed}")
                    self.seed = seed
                    self.experiment_name = f"{self.bs}_{self.expt_type}_{seed}"
                    print("EXPT NAME: ", self.experiment_dir1,
                          self.experiment_name)
                    self.experiment_dir = os.path.join(self.experiment_dir1,
                                                       self.experiment_name)
                    self.create_eval_dir()
                    env = gym.make(lesson)
                    eval_env = gym.make(lesson)

                    if self.bs == 'RL':
                        env._set_homotopy_class('left')
                        eval_env._set_homotopy_class('left')
                    elif self.bs == 'LR':
                        env._set_homotopy_class('right')
                        eval_env._set_homotopy_class('right')

                    if self.model_type == "HER":
                        env = HERGoalEnvWrapper(env)
                        eval_env = HERGoalEnvWrapper(eval_env)
                        print("hc: ", env.env.homotopy_class)
                    else:
                        env = DummyVecEnv([lambda: env])
                    self.model.set_env(env)
                    self.model.seed = self.seed
                    self.model = train(self.model, eval_env, self.timesteps,
                                       self.experiment_dir, self.is_save,
                                       self.eval_save_period, self.rets_path,
                                       l)
Ejemplo n.º 4
0
    def train_curriculum(self, env_name="Merging-v0"):
        """
        Trains reward curriculum
        """
        self.curriculum = [env_name]
        bs2model_ours = {1: B1R_B0L, 3: B3R_B0L, 5: B5R_B0L2, 7: B7R_B0L_B4L1}
        bs2model = {1: B1R, 3: B3R, 5: B5R, 7: B7R}
        for l, lesson in enumerate(self.curriculum):
            for seed in [201, 202, 203, 204, 205]:
                if self.expt_type == "ours":
                    model_info = bs2model_ours[int(self.bs)]
                elif self.expt_type == "finetune":
                    model_info = bs2model[int(self.bs)]
                model_dir = os.path.join(model_info[0], model_info[1],
                                         model_info[2])
                if self.model_type == "PPO":
                    self.model = PPO2.load(
                        model_dir)  # loads pre-trained model
                elif self.model_type == "HER":
                    self.model = HER.load(model_dir)  # loads pre-trained model
                print(f"\ntraining on {lesson}, bs {self.bs}, seed{seed}")
                self.seed = seed
                self.experiment_name = f"{self.bs}_{self.expt_type}_{seed}"
                print("EXPT NAME: ", self.experiment_dir1,
                      self.experiment_name)
                self.experiment_dir = os.path.join(self.experiment_dir1,
                                                   self.experiment_name)
                self.create_eval_dir()
                env = gym.make(lesson)
                eval_env = gym.make(lesson)

                env._set_barrier_size(self.bs)
                env._set_homotopy_class('left')
                eval_env._set_barrier_size(self.bs)
                eval_env._set_homotopy_class('left')

                if self.model_type == "HER":
                    env = HERGoalEnvWrapper(env)
                    eval_env = HERGoalEnvWrapper(eval_env)
                    print("bs: ", env.env.barrier_size)
                    print("hc: ", env.env.homotopy_class)
                else:
                    env = DummyVecEnv([lambda: env])
                self.model.set_env(env)
                self.model.set_random_seed(self.seed)
                ### ENTROPY###
                #self.model.ent_coef = 0.05
                self.model = train(self.model, eval_env, self.timesteps,
                                   self.experiment_dir, self.is_save,
                                   self.eval_save_period, self.rets_path, l)
Ejemplo n.º 5
0
    def __init__(self, env_id, exp_id, model_path, trajectory_type,
                 episode_timesteps, noise_parameters):
        self.env_id = env_id
        self.exp_id = exp_id
        self.trajectory_type = trajectory_type
        # Load model and environment
        self.env = HERGoalEnvWrapper(
            gym.make(env_id, **{'noise_parameters': noise_parameters}))
        self.model = HER.load(model_path, env=self.env)
        self.episode_timesteps = episode_timesteps

        # Setup subscriber for trajectory generator
        # self.line_trajectory_timer = rospy.Timer(rospy.Duration(0.1), self.line_trajectory_callback)
        # self.circle_trajectory_timer = rospy.Timer(rospy.Duration(0.01), self.circle_trajectory_callback)

        # Line trajectory settings
        if self.trajectory_type == "line":
            self.start_p = np.array([20, 0, 100]) / 1000
            self.finish_p = np.array([20, 40, 100]) / 1000
            self.del_p = self.finish_p - self.start_p
            self.current_goal = self.start_p

        # Circle trajectory settings
        if self.trajectory_type == "circle":
            self.offset = np.array([20, 20, 100]) / 1000
            self.radius = 20.0 / 1000
            self.thetas = np.arange(0, 2 * np.pi, np.deg2rad(5))
            self.thetas_counter = 0
            self.start_p = self.offset
            self.current_goal = self.start_p

        # Start timer
        self.prev_time = rospy.get_time()

        # Complete trajectory check
        self.shape_df = pd.DataFrame(columns=[
            'episode', 'timestep', 'r1x', 'r1y', 'r1z', 'r2x', 'r2y', 'r2z',
            'r3x', 'r3y', 'r3z'
        ])
        # self.goals_df = pd.DataFrame(columns=['ag_x', 'ag_y', 'ag_z', 'dg_x', 'dg_y', 'dg_z'])
        self.traj_complete = False

        self.achieved_goals = np.array([])
        self.desired_goals = np.array([])
        self.episode_count = 0
Ejemplo n.º 6
0
    def _create_replay_wrapper(self, env):
        """
        Wrap the environment in a HERGoalEnvWrapper
        if needed and create the replay buffer wrapper.
        """
        if not isinstance(env, HERGoalEnvWrapper):
            env = HERGoalEnvWrapper(env)

        self.env = env
        self.n_sampled_goal = 4
        self.goal_selection_strategy = 'future'
        # NOTE: we cannot do that check directly with VecEnv
        # maybe we can try calling `compute_reward()` ?
        # assert isinstance(self.env, gym.GoalEnv), "HER only supports gym.GoalEnv"

        self.replay_wrapper = functools.partial(
            HindsightExperienceReplayWrapper,
            n_sampled_goal=self.n_sampled_goal,
            goal_selection_strategy=self.goal_selection_strategy,
            wrapped_env=self.env)
Ejemplo n.º 7
0
def evaluation(env_id, exp_id, model_path, num_episodes, output_path):
    env = HERGoalEnvWrapper(gym.make(env_id))
    model = HER.load(model_path, env=env)

    seed = np.random.randint(0, 10)
    set_global_seeds(seed)

    goal_errors = np.empty((num_episodes), dtype=float)
    B_errors = np.empty((num_episodes), dtype=float)
    alpha_errors = np.empty((num_episodes), dtype=float)
    q_B_achieved = np.empty((num_episodes, 3), dtype=float)
    q_alpha_achieved = np.empty((num_episodes, 3), dtype=float)
    q_B_desired = np.empty((num_episodes, 3), dtype=float)
    q_alpha_desired = np.empty((num_episodes, 3), dtype=float)
    desired_goals = np.empty((num_episodes, 3), dtype=float)
    achieved_goals = np.empty((num_episodes, 3), dtype=float)
    starting_positions = np.empty((num_episodes, 3), dtype=float)
    q_B_starting = np.empty((num_episodes, 3), dtype=float)
    q_alpha_starting = np.empty((num_episodes, 3), dtype=float)

    # TODO: pre-allocate memory
    for episode in range(num_episodes):
        print('episode: ', episode)
        # Run random episodes and save sequence of actions and states to plot in matlab
        episode_reward = 0
        ep_len = 0
        obs = env.reset()
        while True:
            action, _ = model.predict(obs, deterministic=True)
            action = np.clip(action, env.action_space.low,
                             env.action_space.high)
            obs, reward, done, infos = env.step(action)

            episode_reward += reward
            ep_len += 1

            if done or infos.get('is_success', False):
                goal_errors[episode] = infos.get('errors_pos')
                q_B_desired[episode, :] = infos.get('q_desired')[:3]
                q_alpha_desired[episode, :] = infos.get('q_desired')[3:]
                q_B_achieved[episode, :] = infos.get('q_achieved')[:3]
                q_alpha_achieved[episode, :] = infos.get('q_achieved')[3:]
                desired_goals[episode, :] = infos.get('desired_goal')
                achieved_goals[episode, :] = infos.get('achieved_goal')
                starting_positions[episode, :] = infos.get('starting_position')
                q_B_starting[episode, :] = infos.get('q_starting')[:3]
                q_alpha_starting[episode, :] = infos.get('q_starting')[3:]
                break

    print('mean_errors: ', np.mean(goal_errors))
    eval_df = pd.DataFrame(data=np.column_stack(
        (desired_goals, achieved_goals, starting_positions, q_B_desired,
         q_B_achieved, q_B_starting, q_alpha_desired, q_alpha_achieved,
         q_alpha_starting)),
                           columns=[
                               'desired_goal_x',
                               'desired_goal_y',
                               'desired_goal_z',
                               'achieved_goal_x',
                               'achieved_goal_y',
                               'achieved_goal_z',
                               'starting_position_x',
                               'starting_position_y',
                               'starting_position_z',
                               'B_desired_1',
                               'B_desired_2',
                               'B_desired_3',
                               'B_achieved_1',
                               'B_achieved_2',
                               'B_achieved_3',
                               'B_starting_1',
                               'B_starting_2',
                               'B_starting_3',
                               'alpha_desired_1',
                               'alpha_desired_2',
                               'alpha_desired_3',
                               'alpha_achieved_1',
                               'alpha_achieved_2',
                               'alpha_achieved_3',
                               'alpha_startin_1',
                               'alpha_starting_2',
                               'alpha_starting_3',
                           ])
    eval_df.to_csv(output_path)
Ejemplo n.º 8
0
    def train_single(self, env_name="Merging-v0"):
        """
        Directly trains on env_name
        """
        for seed in [201, 202, 203, 204, 205]:
            print(f"\ntraining with bsize {self.bs}, seed{seed}")
            self.seed = seed
            self.experiment_name = f"B{self.bs}R{seed}"
            print("EXPT NAME: ", self.experiment_dir1, self.experiment_name)
            self.experiment_dir = os.path.join(self.experiment_dir1,
                                               self.experiment_name)
            self.create_eval_dir()
            self.model = None
            env = gym.make(env_name)
            eval_env = gym.make(env_name)
            env._set_barrier_size(self.bs)
            env._set_homotopy_class('right')
            eval_env._set_barrier_size(self.bs)
            eval_env._set_homotopy_class('right')
            if self.model_type == "PPO":
                if self.is_save:
                    ### DEEPER NETWORK
                    #policy_kwargs = dict(net_arch=[dict(pi=[64, 64, 64, 64],
                    #                                    vf=[64, 64, 64, 64])]
                    #                                    )
                    #self.PPO = PPO2('MlpPolicy', env, verbose=1, seed=self.seed, learning_rate=1e-3,
                    #                policy_kwargs=policy_kwargs)
                    ### DROPOUT
                    #self.PPO = PPO2(MlpGeneralPolicy1, env, verbose=1, seed=self.seed, learning_rate=1e-3)
                    ### REGULAR
                    self.PPO = PPO2('MlpPolicy',
                                    env,
                                    verbose=1,
                                    seed=self.seed,
                                    learning_rate=1e-3)
                else:
                    self.PPO = PPO2('MlpPolicy',
                                    env,
                                    verbose=1,
                                    seed=self.seed,
                                    learning_rate=1e-3)

                self.model = train(self.PPO, eval_env, self.timesteps,
                                   self.experiment_dir, self.is_save,
                                   self.eval_save_period, self.rets_path, 0)
            elif self.model_type == "DQN":
                if self.is_save:
                    self.DQN = DQN(
                        'MlpPolicy',
                        env,
                        verbose=1,
                        seed=self.seed,
                        prioritized_replay=True,
                        learning_rate=1e-3,
                        tensorboard_log="./Gridworldv1_tensorboard/" +
                        self.experiment_name,
                        full_tensorboard_log=True)
                else:
                    self.DQN = DQN('MlpPolicy',
                                   env,
                                   verbose=1,
                                   seed=self.seed,
                                   prioritized_replay=True,
                                   learning_rate=1e-3)
                self.model = train(self.DQN, eval_env, self.timesteps,
                                   self.experiment_dir, self.is_save,
                                   self.eval_save_period, self.rets_path, 0)
            elif self.model_type == "HER":
                env = HERGoalEnvWrapper(env)
                eval_env = HERGoalEnvWrapper(eval_env)
                print("bs: ", env.env.barrier_size)
                print("hc: ", env.env.homotopy_class)
                self.HER = HER('MlpPolicy',
                               env,
                               DDPG,
                               n_sampled_goal=4,
                               goal_selection_strategy="future",
                               seed=self.seed,
                               verbose=1)
                self.model = train(self.HER, eval_env, self.timesteps,
                                   self.experiment_dir, self.is_save,
                                   self.eval_save_period, self.rets_path, 0)
Ejemplo n.º 9
0
class CTMPathFollower(object):
    def __init__(self, env_id, exp_id, model_path, trajectory_type,
                 episode_timesteps, noise_parameters):
        self.env_id = env_id
        self.exp_id = exp_id
        self.trajectory_type = trajectory_type
        # Load model and environment
        self.env = HERGoalEnvWrapper(
            gym.make(env_id, **{'noise_parameters': noise_parameters}))
        self.model = HER.load(model_path, env=self.env)
        self.episode_timesteps = episode_timesteps

        # Setup subscriber for trajectory generator
        # self.line_trajectory_timer = rospy.Timer(rospy.Duration(0.1), self.line_trajectory_callback)
        # self.circle_trajectory_timer = rospy.Timer(rospy.Duration(0.01), self.circle_trajectory_callback)

        # Line trajectory settings
        if self.trajectory_type == "line":
            self.start_p = np.array([20, 0, 100]) / 1000
            self.finish_p = np.array([20, 40, 100]) / 1000
            self.del_p = self.finish_p - self.start_p
            self.current_goal = self.start_p

        # Circle trajectory settings
        if self.trajectory_type == "circle":
            self.offset = np.array([20, 20, 100]) / 1000
            self.radius = 20.0 / 1000
            self.thetas = np.arange(0, 2 * np.pi, np.deg2rad(5))
            self.thetas_counter = 0
            self.start_p = self.offset
            self.current_goal = self.start_p

        # Start timer
        self.prev_time = rospy.get_time()

        # Complete trajectory check
        self.shape_df = pd.DataFrame(columns=[
            'episode', 'timestep', 'r1x', 'r1y', 'r1z', 'r2x', 'r2y', 'r2z',
            'r3x', 'r3y', 'r3z'
        ])
        # self.goals_df = pd.DataFrame(columns=['ag_x', 'ag_y', 'ag_z', 'dg_x', 'dg_y', 'dg_z'])
        self.traj_complete = False

        self.achieved_goals = np.array([])
        self.desired_goals = np.array([])
        self.episode_count = 0

    def line_trajectory_update(self):
        curr_time = rospy.get_time()
        delta_t = curr_time - self.prev_time
        self.prev_time = curr_time
        self.current_goal = self.current_goal + self.del_p * delta_t * 0.20

        if np.linalg.norm(self.current_goal - self.finish_p) < 0.001:
            self.traj_complete = True

        print("Distance to end: ",
              np.linalg.norm(self.current_goal - self.finish_p))

    def circle_trajectory_update(self):
        print('thetas_counter: ', self.thetas_counter, 'of ',
              self.thetas.size - 1)
        curr_time = rospy.get_time()
        delta_t = curr_time - self.prev_time
        self.prev_time = curr_time

        self.thetas_counter += 1
        if self.thetas_counter == self.thetas.size - 1:
            self.traj_complete = True
        else:
            self.current_goal = self.offset + self.radius * np.array([
                np.cos(self.thetas[self.thetas_counter]),
                np.sin(self.thetas[self.thetas_counter]), 0
            ])

    def play_episode(self, render_mode='inference'):
        self.episode_count += 1
        episode_reward = 0.0
        ep_len = 0
        # Set the observation
        obs = self.env.reset(goal=self.current_goal)

        for t in range(self.episode_timesteps):
            action, _ = self.model.predict(obs, deterministic=True)

            # Ensure action space is of type Box
            if isinstance(self.env.action_space, gym.spaces.Box):
                action = np.clip(action, self.env.action_space.low,
                                 self.env.action_space.high)
            obs, reward, done, infos = self.env.step(action)

            episode_reward += reward
            ep_len += 1

            if done or infos.get('is_success', False):
                break

        self.env.render(mode=render_mode)
        r_df = self.env.env.r_df
        r_df['episode'] = np.full(r_df.shape[0], self.episode_count)
        self.shape_df = pd.concat([self.shape_df, r_df], join='inner')

        # Save data for the episode
        if self.achieved_goals.size == 0:
            self.achieved_goals = self.env.convert_obs_to_dict(
                obs)['achieved_goal']
            self.desired_goals = self.env.convert_obs_to_dict(
                obs)['desired_goal']
        else:
            self.achieved_goals = np.vstack([
                self.achieved_goals,
                self.env.convert_obs_to_dict(obs)['achieved_goal']
            ])
            self.desired_goals = np.vstack([
                self.desired_goals,
                self.env.convert_obs_to_dict(obs)['desired_goal']
            ])

        # Save shape information of the episode
    def save_data(self, name=None):
        ag_goals_df = pd.DataFrame(data=self.achieved_goals,
                                   columns=['ag_x', 'ag_y', 'ag_z'])
        dg_goals_df = pd.DataFrame(data=self.desired_goals,
                                   columns=['dg_x', 'dg_y', 'dg_z'])
        goals_df = pd.concat([ag_goals_df, dg_goals_df], axis=1, join='inner')
        if name is None:
            goals_df.to_csv(
                '/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/data/revisions/'
                + self.trajectory_type + '_path/' + self.trajectory_type +
                '_path_following_' + self.exp_id + '_goals.csv')
            self.shape_df.to_csv(
                '/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/data/revisions/'
                + self.trajectory_type + '_path/' + self.trajectory_type +
                '_path_following_' + self.exp_id + '_shape.csv')
        else:
            goals_df.to_csv(
                '/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/data/revisions/'
                + self.trajectory_type + '_path/' + self.trajectory_type +
                '_path_following_' + self.exp_id + '_' + name + '_goals.csv')
            self.shape_df.to_csv(
                '/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/data/revisions/'
                + self.trajectory_type + '_path/' + self.trajectory_type +
                '_path_following_' + self.exp_id + '_' + name + '_shape.csv')
Ejemplo n.º 10
0
        if ag[0] > max_x:
            max_x = ag[0]
        if ag[1] > max_y:
            max_y = ag[1]
        if ag[2] > max_z:
            max_z = ag[2]
        # Check for min workspace limits
        if ag[0] < min_x:
            min_x = ag[0]
        if ag[1] < min_y:
            min_y = ag[1]
        if ag[2] < min_z:
            min_z = ag[2]

        if i % 100 == 0:
            print(i)
            print("ag: ", ag)
            print("max x: ", max_x, " max y: ", max_y, " max z: ", max_z)
            print("min x: ", min_x, " min y: ", min_y, " min z: ", min_z)
            print("=======")

    return np.array([max_x, max_y, max_z]), np.array([min_x, min_y, min_z])


if __name__ == '__main__':
    env_id = "CTR-Reach-v0"
    env = HERGoalEnvWrapper(gym.make(env_id))
    max_limits, min_limits = workspace_limits(env)
    print(max_limits)
    print(min_limits)
Ejemplo n.º 11
0
 def train_single_fetch(self, env_name="Merging-v0"):
     """
     Directly trains on env_name
     """
     for bs in ['LR', 'RL']:
         self.bs = bs
         for seed in [101, 102]:
             print(f"\ntraining with bsize {self.bs}, seed{seed}")
             self.seed = seed
             self.experiment_name = f"{self.bs}_{self.expt_type}_{seed}"
             print("EXPT NAME: ", self.experiment_dir1,
                   self.experiment_name)
             self.experiment_dir = os.path.join(self.experiment_dir1,
                                                self.experiment_name)
             self.create_eval_dir()
             self.model = None
             env = gym.make(env_name)
             eval_env = gym.make(env_name)
             if self.bs == 'RL':
                 env._set_homotopy_class('left')
                 eval_env._set_homotopy_class('left')
             elif self.bs == 'LR':
                 env._set_homotopy_class('right')
                 eval_env._set_homotopy_class('right')
             if self.model_type == "PPO":
                 if self.is_save:
                     self.PPO = PPO2(
                         'MlpPolicy',
                         env,
                         verbose=1,
                         seed=self.seed,
                         learning_rate=1e-3,
                     )
                 else:
                     self.PPO = PPO2('MlpPolicy',
                                     env,
                                     verbose=1,
                                     seed=self.seed,
                                     learning_rate=1e-3)
                 self.model = train(self.PPO, eval_env, self.timesteps,
                                    self.experiment_dir, self.is_save,
                                    self.eval_save_period, self.rets_path,
                                    0)
             elif self.model_type == "DQN":
                 if self.is_save:
                     self.DQN = DQN(
                         'MlpPolicy',
                         env,
                         verbose=1,
                         seed=self.seed,
                         prioritized_replay=True,
                         learning_rate=1e-3,
                         tensorboard_log="./Gridworldv1_tensorboard/" +
                         self.experiment_name,
                         full_tensorboard_log=True)
                 else:
                     self.DQN = DQN('MlpPolicy',
                                    env,
                                    verbose=1,
                                    seed=self.seed,
                                    prioritized_replay=True,
                                    learning_rate=1e-3)
                 self.model = train(self.DQN, eval_env, self.timesteps,
                                    self.experiment_dir, self.is_save,
                                    self.eval_save_period, self.rets_path,
                                    0)
             elif self.model_type == "HER":
                 env = HERGoalEnvWrapper(env)
                 eval_env = HERGoalEnvWrapper(eval_env)
                 print("hc: ", env.env.homotopy_class)
                 self.HER = HER('MlpPolicy',
                                env,
                                DDPG,
                                n_sampled_goal=4,
                                goal_selection_strategy="future",
                                seed=self.seed,
                                verbose=1)
                 self.model = train(self.HER, eval_env, self.timesteps,
                                    self.experiment_dir, self.is_save,
                                    self.eval_save_period, self.rets_path,
                                    0)
Ejemplo n.º 12
0
import numpy as np
import pandas as pd
from stable_baselines import DDPG, HER
from stable_baselines.common import set_global_seeds
from stable_baselines.her.utils import HERGoalEnvWrapper

if __name__ == '__main__':
    # Env and model names and paths
    env_id = "CTR-Reach-v0"
    # env_id = "CTR-Reach-Noisy-v0"
    exp_id = "cras_exp_6"

    model_path = "/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/" + exp_id + "/learned_policy/500000_saved_model.pkl"

    # Create envs and model
    env = HERGoalEnvWrapper(gym.make(env_id))
    model = HER.load(model_path, env=env)

    seed = np.random.randint(0, 10)
    set_global_seeds(seed)

    q_joints = np.array([])
    achieved_goals = np.array([])
    desired_goals = np.array([])
    time_taken = np.array([])

    # Run an initial number of random actions to randomize starting position
    obs = env.reset()
    for t in range(100):
        env.step(env.action_space.sample())  # take a random action
Ejemplo n.º 13
0
def save_traj(model, state_history):
    state_history = list(state_history)
    with open("output/fetch/single_trajs/{}.csv".format(model[1]), "w") as f:
        writer = csv.writer(f)
        writer.writerow(state_history)


if __name__ == "__main__":
    if FLAGS.env == "nav1":
        from output.updated_gridworld_continuous.policies import *
        model_info = spB7L
        #eval_env = load_env("Continuous-v0", "PPO")
        eval_env = load_env("ContinuousSparse-v0", "HER")
        #TODO: REMOVE
        eval_env = HERGoalEnvWrapper(load_env("ContinuousSparse-v0"))
        eval_env.env._set_barrier_size(7)
        eval_env.env._set_homotopy_class('left')
        model = load_model(model_info, "HER", baseline=None)
    elif FLAGS.env == 'fetch':
        from output.fetch2.policies import *
        model_info = BR_BL
        eval_env = HERGoalEnvWrapper(load_env("Fetch-v0"))
        model = load_model(model_info, "HER", baseline=None)
    save = False
    sum_reward = 0.0
    num_episode = 10
    for ne in range(num_episode):
        mean_ret, std_ret, total_ret, state_history = evaluate(model,
                                                               eval_env,
                                                               render=True)
Ejemplo n.º 14
0
import numpy as np
import pandas as pd
from stable_baselines import DDPG, HER
from stable_baselines.common import set_global_seeds
from stable_baselines.her.utils import HERGoalEnvWrapper

# Aim of this script is to run through a number of episodes, record the achieved goal and error and plot in workspace to
# visualize any biased areas or are errors evenly spread.
if __name__ == '__main__':
    # Env and model names and paths
    env_id = "CTR-Reach-v0"
    model_path = "/home/keshav/ctm2-stable-baselines/saved_results/icra_experiments/cras_exp_1/learned_policy/500000_saved_model.pkl"

    # Create envs and model
    env = HERGoalEnvWrapper(gym.make(env_id))
    model = HER.load(model_path, env=env)

    seed = np.random.randint(0, 10)
    set_global_seeds(seed)

    errors = np.array([])
    achieved_goals = np.empty((1, 3))
    episode_rewards = np.array([])

    num_episodes = 100

    for episode in range(num_episodes):
        print('episode: ', episode)
        # Run random episodes and save sequence of actions and states to plot in matlab
        episode_reward = 0