def run(self, env: Env, n_steps=1000000): env = self.wrap_env(env) env.start() try: self._run(env, n_steps) except KeyboardInterrupt: env.stop()
def run(self, env: Env, expt, event_buffer=None, n_steps=1000000): env = self.wrap_env(env) env.start() try: self._run(env, expt, event_buffer, n_steps) except KeyboardInterrupt: env.stop() self.on_finish()
def run(self, env: Env, n_steps=1000000): if self.args.test or (env.id not in SUB_ENV_DICT) or (not self.args.HRL): # either testing or training without HRL at all # or the env selected does not have the subenvs if not self.args.HRL or self.args.HRL != 'separate': env = self.wrap_env(env) env.start() try: self.on_start() self._run(env, n_steps) except KeyboardInterrupt: env.stop() self.on_finish() # testing with HRL and separate subenvs else: env = self.wrap_env(env) env.start() subenvs = SUB_ENV_DICT[env.id] print( LOGGING_MSG_HEADER + ": Ensure that you are testing models trained using the <HRL_separate> approach." ) print( LOGGING_MSG_HEADER + ": Testing the {} with combined subpolicies trained seperately from subenvs-{}" .format(env.id, subenvs)) try: self.on_start() self._run_subenvs(env, n_steps, subenvs=subenvs) except KeyboardInterrupt: env.stop() self.on_finish() else: assert self.args.HRL in [ 'human', 'systematic', 'random', 'sequential', 'separate' ] subenvs = SUB_ENV_DICT[env.id] print(LOGGING_MSG_HEADER + ": Subenvs are: ", subenvs) subenv_steps = [n_steps // len(subenvs) for subenv in subenvs] thresholds = [None for subenv in subenvs] if self.args.HRL in ['human', 'sequential', 'separate']: thresholds = HRL_thredhold(env.id) print(LOGGING_MSG_HEADER + ": Reward thresholds are: ", thresholds) elif self.args.HRL == 'random': import numpy as np np.random.seed(1234) indices = sorted( np.random.choice(n_steps, len(subenvs) - 1, replace=False)) indices = [0] + sorted( np.random.choice(n_steps, I - 1, replace=False)) + [n_steps] subenv_steps = np.ediff1d(indices) elif self.args.HRL == 'systematic': pass # subenv_steps already defined and initializied for i, (subenv, subenv_step, threshold) in enumerate( zip(subenvs, subenv_steps, thresholds)): env = SC2Env(subenv, env.render, max_ep_len=env.max_ep_len) print( LOGGING_MSG_HEADER + ": Creating and Running subenv : {} with maximum {} steps, and reward threshold is {}." .format(env.id, subenv_step, threshold)) env = self.wrap_env(env) env.start() try: self.on_start() if i != 0: self.reset() self._run(env, subenv_step, threshold, subenv_id=i) except KeyboardInterrupt: env.stop() break self.on_finish() """