Example #1
0
    def render(self, mode='human'):
        """
        Gym environment rendering

        :param mode: (str) the rendering type
        """
        logger.warn('Render not defined for %s' % self)
    def render(self, *args, **kwargs):
        """
        Gym environment rendering

        :param mode: (str) the rendering type
        """
        logger.warn('Render not defined for %s' % self)
Example #3
0
def load_state(fname, sess=None):
    from baselines import logger
    logger.warn(
        'load_state method is deprecated, please use load_variables instead')
    sess = sess or get_session()
    saver = tf.train.Saver()
    saver.restore(tf.get_default_session(), fname)
Example #4
0
    def __init__(self, use_approximated_vel=True):
        logger.warn('Modified Gym Envs!')
        self.rescale_action = False
        self.use_approximated_vel = use_approximated_vel

        ant.AntEnv.__init__(self)
        BaseModelBasedEnv.__init__(self)
Example #5
0
 def reset(self):
     if self.waiting_step:
         logger.warn(
             'Called reset() while waiting for the step to complete')
         self.step_wait()
     for pipe in self.parent_pipes:
         pipe.send(('reset', None))
     return self._decode_obses([pipe.recv() for pipe in self.parent_pipes])
Example #6
0
    def __init__(self, use_approximated_vel=True):
        self.use_approximated_vel = use_approximated_vel
        self.rescale_action = False

        if not self.use_approximated_vel:
            logger.warn('Modified Gym Env!')
        hopper.HopperEnv.__init__(self)
        BaseModelBasedEnv.__init__(self)
Example #7
0
def save_state(fname, sess=None):
    from baselines import logger
    logger.warn(
        'save_state method is deprecated, please use save_variables instead')
    sess = sess or get_session()
    dirname = os.path.dirname(fname)
    if any(dirname):
        os.makedirs(dirname, exist_ok=True)
    saver = tf.train.Saver()
    saver.save(tf.get_default_session(), fname)
    def render(self, mode: str, *args, **kwargs):
        """
        Gym environment rendering

        :param mode: the rendering type
        """
        try:
            imgs = self.get_images(*args, **kwargs)
        except NotImplementedError:
            logger.warn('Render not defined for {}'.format(self))
            return

        # Create a big image by tiling images from subprocesses
        bigimg = tile_images(imgs)
        if mode == 'human':
            import cv2  # pytype:disable=import-error
            cv2.imshow('vecenv', bigimg[:, :, ::-1])
            cv2.waitKey(1)
        elif mode == 'rgb_array':
            return bigimg
        else:
            raise NotImplementedError
Example #9
0
 def render(self):
     logger.warn('Render not defined for %s' % self)
Example #10
0
def launch(env,
           logdir,
           n_epochs,
           num_cpu,
           seed,
           replay_strategy,
           policy_save_interval,
           clip_return,
           override_params=None,
           save_policies=True):
    """
    launch training with mpi

    :param env: (str) environment ID
    :param logdir: (str) the log directory
    :param n_epochs: (int) the number of training epochs
    :param num_cpu: (int) the number of CPUs to run on
    :param seed: (int) the initial random seed
    :param replay_strategy: (str) the type of replay strategy ('future' or 'none')
    :param policy_save_interval: (int) the interval with which policy pickles are saved.
        If set to 0, only the best and latest policy will be pickled.
    :param clip_return: (float): clip returns to be in [-clip_return, clip_return]
    :param override_params: (dict) override any parameter for training
    :param save_policies: (bool) whether or not to save the policies
    """

    if override_params is None:
        override_params = {}
    # Fork for multi-CPU MPI implementation.
    if num_cpu > 1:
        try:
            whoami = mpi_fork(num_cpu, ['--bind-to', 'core'])
        except CalledProcessError:
            # fancy version of mpi call failed, try simple version
            whoami = mpi_fork(num_cpu)

        if whoami == 'parent':
            sys.exit(0)
        tf_util.single_threaded_session().__enter__()
    rank = MPI.COMM_WORLD.Get_rank()

    # Configure logging
    if rank == 0:
        if logdir or logger.get_dir() is None:
            logger.configure(folder=logdir)
    else:
        logger.configure()
    logdir = logger.get_dir()
    assert logdir is not None
    os.makedirs(logdir, exist_ok=True)

    # Seed everything.
    rank_seed = seed + 1000000 * rank
    set_global_seeds(rank_seed)

    # Prepare params.
    params = config.DEFAULT_PARAMS
    params['env_name'] = env
    params['replay_strategy'] = replay_strategy
    if env in config.DEFAULT_ENV_PARAMS:
        params.update(
            config.DEFAULT_ENV_PARAMS[env])  # merge env-specific parameters in
    params.update(
        **override_params)  # makes it possible to override any parameter
    with open(os.path.join(logger.get_dir(), 'params.json'),
              'w') as file_handler:
        json.dump(params, file_handler)
    params = config.prepare_params(params)
    config.log_params(params, logger_input=logger)

    if num_cpu == 1:
        logger.warn()
        logger.warn('*** Warning ***')
        logger.warn(
            'You are running HER with just a single MPI worker. This will work, but the '
            +
            'experiments that we report in Plappert et al. (2018, https://arxiv.org/abs/1802.09464) '
            +
            'were obtained with --num_cpu 19. This makes a significant difference and if you '
            +
            'are looking to reproduce those results, be aware of this. Please also refer to '
            +
            'https://github.com/openai/stable_baselines/issues/314 for further details.'
        )
        logger.warn('****************')
        logger.warn()

    dims = config.configure_dims(params)
    policy = config.configure_ddpg(dims=dims,
                                   params=params,
                                   clip_return=clip_return)

    rollout_params = {
        'exploit': False,
        'use_target_net': False,
        # 'use_demo_states': True,
        'compute_q': False,
        'time_horizon': params['time_horizon'],
    }

    eval_params = {
        'exploit': True,
        'use_target_net': params['test_with_polyak'],
        # 'use_demo_states': False,
        'compute_q': True,
        'time_horizon': params['time_horizon'],
    }

    for name in [
            'time_horizon', 'rollout_batch_size', 'gamma', 'noise_eps',
            'random_eps'
    ]:
        rollout_params[name] = params[name]
        eval_params[name] = params[name]

    rollout_worker = RolloutWorker(params['make_env'], policy, dims, logger,
                                   **rollout_params)
    rollout_worker.seed(rank_seed)

    evaluator = RolloutWorker(params['make_env'], policy, dims, logger,
                              **eval_params)
    evaluator.seed(rank_seed)

    train(policy=policy,
          rollout_worker=rollout_worker,
          evaluator=evaluator,
          n_epochs=n_epochs,
          n_test_rollouts=params['n_test_rollouts'],
          n_cycles=params['n_cycles'],
          n_batches=params['n_batches'],
          policy_save_interval=policy_save_interval,
          save_policies=save_policies)
Example #11
0
 def set_state(self, state):
     logger.warn('`set_state` is not implemented')