コード例 #1
0
    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        # if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
        if algo in ['ddpg', 'td3'] or trial.model_class in [
                DDPG, TD3
        ]:  # bug to report: changed by Pierre
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        model = model_fn(**kwargs)

        eval_env = env_fn(n_envs=1, eval_env=True)
        # Account for parallel envs
        eval_freq_ = eval_freq
        if isinstance(model.get_env(), VecEnv):
            eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # TODO: use non-deterministic eval for Atari?
        eval_callback = TrialEvalCallback(eval_env,
                                          trial,
                                          n_eval_episodes=n_eval_episodes,
                                          eval_freq=eval_freq_,
                                          deterministic=True)

        if algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(eval_env, VecEnv):
                print("UNVECTORIZE ENV")
                eval_env = _UnvecWrapper(eval_env)
            # eval_env = HERGoalEnvWrapper(eval_env)  # commented by Pierre

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        cost = -1 * eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return cost
コード例 #2
0
    def create_env(n_envs, eval_env=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :return: (Union[gym.Env, VecEnv])
        :return: (gym.Env)
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env else save_path

        if is_atari:
            if args.verbose > 0:
                print("Using Atari wrapper")
            env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
            # Frame-stacking with 4 frames
            env = VecFrameStack(env, n_stack=4)
        elif algo_ in ['dqn', 'ddpg']:
            if hyperparams.get('normalize', False):
                print("WARNING: normalization not supported yet for DDPG/DQN")
            env = gym.make(env_id, **env_kwargs)
            env.seed(args.seed)
            if env_wrapper is not None:
                env = env_wrapper(env)
        else:
            if n_envs == 1:
                env = DummyVecEnv([make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs)])
            else:
                # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                # On most env, SubprocVecEnv does not help and is quite memory hungry
                env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir,
                                            wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs)])
            if normalize:
                if args.verbose > 0:
                    if len(normalize_kwargs) > 0:
                        print("Normalization activated: {}".format(normalize_kwargs))
                    else:
                        print("Normalizing input and reward")
                env = VecNormalize(env, **normalize_kwargs)
        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print("Stacking {} frames".format(n_stack))
            del hyperparams['frame_stack']
        if args.algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(env, VecEnv):
                env = _UnvecWrapper(env)
            env = HERGoalEnvWrapper(env)
        return env
コード例 #3
0
ファイル: MultiTaskA2C.py プロジェクト: andris955/diplomaterv
    def __init__(self, policy_name: str, env_dict):
        self.policy_name = policy_name
        self.policy = get_policy_from_string(self.policy_name)
        self.env_dict = env_dict
        self.tasks = [key for key in self.env_dict.keys()
                      ] if self.env_dict is not None else None
        self.verbose = config.verbose
        self.observation_space_dict = {}
        self.action_space_dict = {}
        self.n_envs_per_task = None
        self.num_timesteps = 0

        if env_dict is not None:
            if not isinstance(env_dict, dict):
                print(
                    "env_dict must be a dictionary with keys as the name of the game and values are SubprocVecEnv objects"
                )
            for key in env_dict.keys():
                self.observation_space_dict[key] = env_dict[
                    key].observation_space
                self.action_space_dict[key] = env_dict[key].action_space
            for key in self.env_dict.keys():
                if isinstance(self.env_dict[key], VecEnv):
                    if env_dict[key].num_envs == 1:
                        self.env_dict[key] = _UnvecWrapper(env_dict[key])
                        self._vectorize_action = True
                    if self.n_envs_per_task is None:
                        self.n_envs_per_task = self.env_dict[key].num_envs
                    else:
                        if self.n_envs_per_task != self.env_dict[key].num_envs:
                            raise ValueError(
                                "All tasks must have the same number of environments "
                            )
                    break
                else:
                    raise ValueError(
                        "Error: the model requires a vectorized environment, please use a VecEnv wrapper."
                    )
コード例 #4
0
    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        def callback(_locals, _globals):
            """
            Callback for monitoring learning progress.
            :param _locals: (dict)
            :param _globals: (dict)
            :return: (bool) If False: stop training
            """
            self_ = _locals['self']
            trial = self_.trial

            # Initialize variables
            if not hasattr(self_, 'is_pruned'):
                self_.is_pruned = False
                self_.last_mean_test_reward = -np.inf
                self_.last_time_evaluated = 0
                self_.eval_idx = 0

            if (self_.num_timesteps -
                    self_.last_time_evaluated) < evaluate_interval:
                return True

            self_.last_time_evaluated = self_.num_timesteps

            # Evaluate the trained agent on the test env
            rewards = []
            n_episodes, reward_sum = 0, 0.0

            # Sync the obs rms if using vecnormalize
            # NOTE: this does not cover all the possible cases
            if isinstance(self_.test_env, VecNormalize):
                self_.test_env.obs_rms = deepcopy(self_.env.obs_rms)
                # Do not normalize reward
                self_.test_env.norm_reward = False

            obs = self_.test_env.reset()
            while n_episodes < n_test_episodes:
                # Use default value for deterministic
                action, _ = self_.predict(obs)
                obs, reward, done, _ = self_.test_env.step(action)
                reward_sum += reward

                if done:
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    n_episodes += 1
                    obs = self_.test_env.reset()

            mean_reward = np.mean(rewards)
            self_.last_mean_test_reward = mean_reward
            self_.eval_idx += 1

            # report best or report current ?
            # report num_timesteps or elasped time ?
            trial.report(-1 * mean_reward, self_.eval_idx)
            # Prune trial if need
            if trial.should_prune(self_.eval_idx):
                self_.is_pruned = True
                return False

            return True

        model = model_fn(**kwargs)
        model.test_env = env_fn(n_envs=1)
        model.trial = trial
        if algo == 'her':
            model.model.trial = trial
            # Wrap the env if need to flatten the dict obs
            if isinstance(model.test_env, VecEnv):
                model.test_env = _UnvecWrapper(model.test_env)
            model.model.test_env = HERGoalEnvWrapper(model.test_env)

        try:
            model.learn(n_timesteps, callback=callback)
            # Free memory
            model.env.close()
            model.test_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            model.test_env.close()
            raise
        is_pruned = False
        cost = np.inf
        if hasattr(model, 'is_pruned'):
            is_pruned = model.is_pruned
            cost = -1 * model.last_mean_test_reward
        del model.env, model.test_env
        del model

        if is_pruned:
            raise optuna.structs.TrialPruned()

        return cost
コード例 #5
0
ファイル: 2_train.py プロジェクト: PierreExeter/RL_reacher
    def create_env(n_envs, eval_env=False, no_log=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :param no_log: (bool) Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: (Union[gym.Env, VecEnv])
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else save_path

        if is_atari:
            if args.verbose > 0:
                print("Using Atari wrapper")
            env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
            # Frame-stacking with 4 frames
            env = VecFrameStack(env, n_stack=4)
        elif algo_ in ['dqn', 'ddpg']:
            if hyperparams.get('normalize', False):
                print("WARNING: normalization not supported yet for DDPG/DQN")
            env = gym.make(env_id, **env_kwargs)
            env.seed(args.seed)

            # added by Pierre (for some reason, monitoring the training wasn't enabled for DDPG)
            log_file = os.path.join(log_dir,
                                    str(rank)) if log_dir is not None else None
            env = Monitor(env, log_file)

            if env_wrapper is not None:
                env = env_wrapper(env)
        else:
            if n_envs == 1:
                env = DummyVecEnv([
                    make_env(env_id,
                             0,
                             args.seed,
                             wrapper_class=env_wrapper,
                             log_dir=log_dir,
                             env_kwargs=env_kwargs)
                ])
            else:
                # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                # On most env, SubprocVecEnv does not help and is quite memory hungry
                env = DummyVecEnv([
                    make_env(env_id,
                             i,
                             args.seed,
                             log_dir=log_dir,
                             wrapper_class=env_wrapper,
                             env_kwargs=env_kwargs) for i in range(n_envs)
                ])
            if normalize:
                # Copy to avoid changing default values by reference
                local_normalize_kwargs = normalize_kwargs.copy()
                # Do not normalize reward for env used for evaluation
                if eval_env:
                    if len(local_normalize_kwargs) > 0:
                        local_normalize_kwargs['norm_reward'] = False
                    else:
                        local_normalize_kwargs = {'norm_reward': False}

                if args.verbose > 0:
                    if len(local_normalize_kwargs) > 0:
                        print("Normalization activated: {}".format(
                            local_normalize_kwargs))
                    else:
                        print("Normalizing input and reward")
                env = VecNormalize(env, **local_normalize_kwargs)

        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print("Stacking {} frames".format(n_stack))
        if args.algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(env, VecEnv):
                env = _UnvecWrapper(env)
            env = HERGoalEnvWrapper(env)
        return env
コード例 #6
0
    def create_env(n_envs, eval_env=False, no_log=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :param no_log: (bool) Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: (Union[gym.Env, VecEnv])
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else save_path
        if eval_env:
            eval_env_kwargs = env_kwargs

        if is_atari:
            if args.verbose > 0:
                print("Using Atari wrapper")
            env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
            # Frame-stacking with 4 frames
            env = VecFrameStack(env, n_stack=4)
        else:
            if n_envs == 1:
                if eval_env:
                    eval_env_kwargs["goal_tolerance_parameters"]["set_tol"] = 0.001
                    env = DummyVecEnv(
                        [make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir,
                                  info_keywords=("is_success", "error"), env_kwargs=eval_env_kwargs)])
                else:
                    env = DummyVecEnv(
                        [make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir,
                                  info_keywords=(), env_kwargs=env_kwargs)])
            else:
                # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                # On most env, SubprocVecEnv does not help and is quite memory hungry
                env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir,
                                            wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs)])
            if normalize:
                # Copy to avoid changing default values by reference
                local_normalize_kwargs = normalize_kwargs.copy()
                # Do not normalize reward for env used for evaluation
                if eval_env:
                    if len(local_normalize_kwargs) > 0:
                        local_normalize_kwargs['norm_reward'] = False
                    else:
                        local_normalize_kwargs = {'norm_reward': False}

                if args.verbose > 0:
                    if len(local_normalize_kwargs) > 0:
                        print("Normalization activated: {}".format(local_normalize_kwargs))
                    else:
                        print("Normalizing input and reward")
                env = VecNormalize(env, **local_normalize_kwargs)

        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print("Stacking {} frames".format(n_stack))
        if args.algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(env, VecEnv):
                env = _UnvecWrapper(env)
            env = HERGoalEnvWrapper(env)
        return env
コード例 #7
0
    def create_env(n_envs, eval_env=False, no_log=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :param no_log: (bool) Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: (Union[gym.Env, VecEnv])
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else save_path

        # Set initialzier and action type for environment, standard implementation currently does not support
        # custom types, so pass them here (kwargs is global, so do set again during repeated calls)
        if "initializer" in env_kwargs.keys() and isinstance(
                env_kwargs["initializer"], int):
            if env_kwargs["initializer"] == 0:
                env_kwargs["initializer"] = RandomInitializer(
                    env_kwargs.pop("difficulty"))
            elif env_kwargs["initializer"] == 1:
                env_kwargs["initializer"] = CompletelyRandomInitializer()
            else:
                raise RuntimeError('Unsupported initializer "{}"'.format(
                    env_kwargs["initializer"]))

        if "action_type" in env_kwargs.keys() and isinstance(
                env_kwargs["action_type"], int):
            if env_kwargs["action_type"] == "POSITION":
                env_kwargs["action_type"] = ActionType.POSITION
            elif env_kwargs["action_type"] == "TORQUE":
                env_kwargs["action_type"] = ActionType.TORQUE
            elif env_kwargs["action_type"] == "TORQUE_AND_POSITION":
                env_kwargs["action_type"] = ActionType.TORQUE_AND_POSITION
            else:
                raise RuntimeError('Unsupported Action Type"{}"'.format(
                    kwargs["action_type"]))
        else:
            env_kwargs["action_type"] = ActionType.POSITION

        if is_atari:
            if args.verbose > 0:
                print("Using Atari wrapper")
            env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
            # Frame-stacking with 4 frames
            env = VecFrameStack(env, n_stack=4)
        elif algo_ in ['dqn', 'ddpg']:
            if hyperparams.get('normalize', False):
                print("WARNING: normalization not supported yet for DDPG/DQN")
            env = gym.make(env_id, **env_kwargs)
            env.seed(args.seed)
            if env_wrapper is not None:
                env = env_wrapper(env)
        else:
            if n_envs == 1:
                env = DummyVecEnv([
                    make_env(env_id,
                             0,
                             args.seed,
                             wrapper_class=env_wrapper,
                             log_dir=log_dir,
                             env_kwargs=env_kwargs)
                ])
            else:
                # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                # On most env, SubprocVecEnv does not help and is quite memory hungry
                env = DummyVecEnv([
                    make_env(env_id,
                             i,
                             args.seed,
                             log_dir=log_dir,
                             wrapper_class=env_wrapper,
                             env_kwargs=env_kwargs) for i in range(n_envs)
                ])
            if normalize:
                # Copy to avoid changing default values by reference
                local_normalize_kwargs = normalize_kwargs.copy()
                # Do not normalize reward for env used for evaluation
                if eval_env:
                    if len(local_normalize_kwargs) > 0:
                        local_normalize_kwargs['norm_reward'] = False
                    else:
                        local_normalize_kwargs = {'norm_reward': False}

                if args.verbose > 0:
                    if len(local_normalize_kwargs) > 0:
                        print("Normalization activated: {}".format(
                            local_normalize_kwargs))
                    else:
                        print("Normalizing input and reward")
                env = VecNormalize(env, **local_normalize_kwargs)

        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print("Stacking {} frames".format(n_stack))
        if args.algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(env, VecEnv):
                env = _UnvecWrapper(env)
            env = HERGoalEnvWrapper(env)
        return env