def render(self, mode='human'): """ Gym environment rendering :param mode: (str) the rendering type """ logger.warn('Render not defined for %s' % self)
def render(self, *args, **kwargs): """ Gym environment rendering :param mode: (str) the rendering type """ logger.warn('Render not defined for %s' % self)
def load_state(fname, sess=None): from baselines import logger logger.warn( 'load_state method is deprecated, please use load_variables instead') sess = sess or get_session() saver = tf.train.Saver() saver.restore(tf.get_default_session(), fname)
def __init__(self, use_approximated_vel=True): logger.warn('Modified Gym Envs!') self.rescale_action = False self.use_approximated_vel = use_approximated_vel ant.AntEnv.__init__(self) BaseModelBasedEnv.__init__(self)
def reset(self): if self.waiting_step: logger.warn( 'Called reset() while waiting for the step to complete') self.step_wait() for pipe in self.parent_pipes: pipe.send(('reset', None)) return self._decode_obses([pipe.recv() for pipe in self.parent_pipes])
def __init__(self, use_approximated_vel=True): self.use_approximated_vel = use_approximated_vel self.rescale_action = False if not self.use_approximated_vel: logger.warn('Modified Gym Env!') hopper.HopperEnv.__init__(self) BaseModelBasedEnv.__init__(self)
def save_state(fname, sess=None): from baselines import logger logger.warn( 'save_state method is deprecated, please use save_variables instead') sess = sess or get_session() dirname = os.path.dirname(fname) if any(dirname): os.makedirs(dirname, exist_ok=True) saver = tf.train.Saver() saver.save(tf.get_default_session(), fname)
def render(self, mode: str, *args, **kwargs): """ Gym environment rendering :param mode: the rendering type """ try: imgs = self.get_images(*args, **kwargs) except NotImplementedError: logger.warn('Render not defined for {}'.format(self)) return # Create a big image by tiling images from subprocesses bigimg = tile_images(imgs) if mode == 'human': import cv2 # pytype:disable=import-error cv2.imshow('vecenv', bigimg[:, :, ::-1]) cv2.waitKey(1) elif mode == 'rgb_array': return bigimg else: raise NotImplementedError
def render(self): logger.warn('Render not defined for %s' % self)
def launch(env, logdir, n_epochs, num_cpu, seed, replay_strategy, policy_save_interval, clip_return, override_params=None, save_policies=True): """ launch training with mpi :param env: (str) environment ID :param logdir: (str) the log directory :param n_epochs: (int) the number of training epochs :param num_cpu: (int) the number of CPUs to run on :param seed: (int) the initial random seed :param replay_strategy: (str) the type of replay strategy ('future' or 'none') :param policy_save_interval: (int) the interval with which policy pickles are saved. If set to 0, only the best and latest policy will be pickled. :param clip_return: (float): clip returns to be in [-clip_return, clip_return] :param override_params: (dict) override any parameter for training :param save_policies: (bool) whether or not to save the policies """ if override_params is None: override_params = {} # Fork for multi-CPU MPI implementation. if num_cpu > 1: try: whoami = mpi_fork(num_cpu, ['--bind-to', 'core']) except CalledProcessError: # fancy version of mpi call failed, try simple version whoami = mpi_fork(num_cpu) if whoami == 'parent': sys.exit(0) tf_util.single_threaded_session().__enter__() rank = MPI.COMM_WORLD.Get_rank() # Configure logging if rank == 0: if logdir or logger.get_dir() is None: logger.configure(folder=logdir) else: logger.configure() logdir = logger.get_dir() assert logdir is not None os.makedirs(logdir, exist_ok=True) # Seed everything. rank_seed = seed + 1000000 * rank set_global_seeds(rank_seed) # Prepare params. params = config.DEFAULT_PARAMS params['env_name'] = env params['replay_strategy'] = replay_strategy if env in config.DEFAULT_ENV_PARAMS: params.update( config.DEFAULT_ENV_PARAMS[env]) # merge env-specific parameters in params.update( **override_params) # makes it possible to override any parameter with open(os.path.join(logger.get_dir(), 'params.json'), 'w') as file_handler: json.dump(params, file_handler) params = config.prepare_params(params) config.log_params(params, logger_input=logger) if num_cpu == 1: logger.warn() logger.warn('*** Warning ***') logger.warn( 'You are running HER with just a single MPI worker. This will work, but the ' + 'experiments that we report in Plappert et al. (2018, https://arxiv.org/abs/1802.09464) ' + 'were obtained with --num_cpu 19. This makes a significant difference and if you ' + 'are looking to reproduce those results, be aware of this. Please also refer to ' + 'https://github.com/openai/stable_baselines/issues/314 for further details.' ) logger.warn('****************') logger.warn() dims = config.configure_dims(params) policy = config.configure_ddpg(dims=dims, params=params, clip_return=clip_return) rollout_params = { 'exploit': False, 'use_target_net': False, # 'use_demo_states': True, 'compute_q': False, 'time_horizon': params['time_horizon'], } eval_params = { 'exploit': True, 'use_target_net': params['test_with_polyak'], # 'use_demo_states': False, 'compute_q': True, 'time_horizon': params['time_horizon'], } for name in [ 'time_horizon', 'rollout_batch_size', 'gamma', 'noise_eps', 'random_eps' ]: rollout_params[name] = params[name] eval_params[name] = params[name] rollout_worker = RolloutWorker(params['make_env'], policy, dims, logger, **rollout_params) rollout_worker.seed(rank_seed) evaluator = RolloutWorker(params['make_env'], policy, dims, logger, **eval_params) evaluator.seed(rank_seed) train(policy=policy, rollout_worker=rollout_worker, evaluator=evaluator, n_epochs=n_epochs, n_test_rollouts=params['n_test_rollouts'], n_cycles=params['n_cycles'], n_batches=params['n_batches'], policy_save_interval=policy_save_interval, save_policies=save_policies)
def set_state(self, state): logger.warn('`set_state` is not implemented')