Exemple #1
0
 def run(self, env: Env, n_steps=1000000):
     env = self.wrap_env(env)
     env.start()
     try:
         self._run(env, n_steps)
     except KeyboardInterrupt:
         env.stop()
Exemple #2
0
 def run(self, env: Env, expt, event_buffer=None, n_steps=1000000):
     env = self.wrap_env(env)
     env.start()
     try:
         self._run(env, expt, event_buffer, n_steps)
     except KeyboardInterrupt:
         env.stop()
         self.on_finish()
Exemple #3
0
 def wrap_env(self, env: Env) -> Env:
     render, env.render = env.render, False
     envs = [env] + [copy.deepcopy(env) for _ in range(self.n_envs - 1)]
     env.render = render
     return MultiProcEnv(envs)
Exemple #4
0
    def run(self, env: Env, n_steps=1000000):

        if self.args.test or (env.id
                              not in SUB_ENV_DICT) or (not self.args.HRL):
            # either testing or training without HRL at all
            # or the env selected does not have the subenvs
            if not self.args.HRL or self.args.HRL != 'separate':
                env = self.wrap_env(env)
                env.start()
                try:
                    self.on_start()
                    self._run(env, n_steps)
                except KeyboardInterrupt:
                    env.stop()

                self.on_finish()

            # testing with HRL and separate subenvs
            else:
                env = self.wrap_env(env)
                env.start()
                subenvs = SUB_ENV_DICT[env.id]
                print(
                    LOGGING_MSG_HEADER +
                    ": Ensure that you are testing models trained using the <HRL_separate> approach."
                )
                print(
                    LOGGING_MSG_HEADER +
                    ": Testing the {} with combined subpolicies trained seperately from subenvs-{}"
                    .format(env.id, subenvs))
                try:
                    self.on_start()
                    self._run_subenvs(env, n_steps, subenvs=subenvs)
                except KeyboardInterrupt:
                    env.stop()
                self.on_finish()

        else:

            assert self.args.HRL in [
                'human', 'systematic', 'random', 'sequential', 'separate'
            ]
            subenvs = SUB_ENV_DICT[env.id]
            print(LOGGING_MSG_HEADER + ": Subenvs are: ", subenvs)
            subenv_steps = [n_steps // len(subenvs) for subenv in subenvs]
            thresholds = [None for subenv in subenvs]

            if self.args.HRL in ['human', 'sequential', 'separate']:
                thresholds = HRL_thredhold(env.id)
                print(LOGGING_MSG_HEADER + ": Reward thresholds are: ",
                      thresholds)

            elif self.args.HRL == 'random':
                import numpy as np
                np.random.seed(1234)
                indices = sorted(
                    np.random.choice(n_steps, len(subenvs) - 1, replace=False))
                indices = [0] + sorted(
                    np.random.choice(n_steps, I - 1,
                                     replace=False)) + [n_steps]
                subenv_steps = np.ediff1d(indices)
            elif self.args.HRL == 'systematic':
                pass
                #  subenv_steps already defined and initializied

            for i, (subenv, subenv_step, threshold) in enumerate(
                    zip(subenvs, subenv_steps, thresholds)):
                env = SC2Env(subenv, env.render, max_ep_len=env.max_ep_len)
                print(
                    LOGGING_MSG_HEADER +
                    ": Creating and Running subenv : {} with maximum {} steps, and reward threshold is {}."
                    .format(env.id, subenv_step, threshold))
                env = self.wrap_env(env)
                env.start()
                try:
                    self.on_start()
                    if i != 0:
                        self.reset()
                    self._run(env, subenv_step, threshold, subenv_id=i)
                except KeyboardInterrupt:
                    env.stop()
                    break
            self.on_finish()
        """