예제 #1
0
    def __init__(self, make_env, policy, dims, logger, T, rollout_batch_size=1,
                 history_len=100, render=False, curriculum_sampling='none', **kwargs):
        """Rollout worker generates experience by interacting with one or many environments.

        Args:
            make_env (function): a factory function that creates a new instance of the environment
                when called
            policy (object): the policy that is used to act
            dims (dict of ints): the dimensions for observations (o), goals (g), and actions (u)
            logger (object): the logger that is used by the rollout worker
            rollout_batch_size (int): the number of parallel rollouts that should be used
            history_len (int): length of history for statistics smoothing
            render (boolean): whether or not to render the rollouts
            curriculum_sampling (str): the curriculum sampling method. Either none or stochastic3_*properties*
        """
        self.T = T
        max_goal_len = 0
        gripper_goal = None
        if '-gripper_none-' not in kwargs['env_name']:
            gripper_goal = 'gripper_none'
            max_goal_len += 3
        n_objects = re.search('-o[0-9]+-', kwargs['env_name']).group(0)
        n_objects = int(n_objects[2:-1])

        max_goal_len += (n_objects * 3)

        self.cgms = [CGM(goal_size=max_goal_len, curriculum_sampling=curriculum_sampling, gripper_goal=gripper_goal, exploit=kwargs['policy_action_params']['exploit']) for _ in range(rollout_batch_size)]

        Rollout.__init__(self, make_env, policy, dims, logger, T, rollout_batch_size=rollout_batch_size, history_len=history_len, render=render, **kwargs)
    def __init__(self,
                 make_env,
                 policy,
                 dims,
                 logger,
                 T,
                 rollout_batch_size=1,
                 exploit=False,
                 history_len=100,
                 render=False,
                 **kwargs):
        Rollout.__init__(self,
                         make_env,
                         policy,
                         dims,
                         logger,
                         T,
                         rollout_batch_size=rollout_batch_size,
                         history_len=history_len,
                         render=render,
                         **kwargs)

        self.env_name = self.envs[0].env.spec._env_name
        self.n_objects = self.envs[0].env.n_objects
        self.gripper_has_target = self.envs[
            0].env.gripper_goal != 'gripper_none'
        self.tower_height = self.envs[0].env.goal_tower_height
        # self.subg = self.g.copy()
        self.rep_correct_history = deque(maxlen=history_len)
        self.subgoal_succ_history = deque(maxlen=history_len)
        self.plan_cache = {}
예제 #3
0
    def __init__(self, make_env, policy, dims, logger, T, rollout_batch_size=1,
            exploit=False, history_len=100, render=False, **kwargs):
        Rollout.__init__(self, make_env, policy, dims, logger, T, rollout_batch_size=rollout_batch_size,
                history_len=history_len, render=render, **kwargs)

        self.env = self.policy.env
        self.env.visualize = render
        self.env.graph = kwargs['graph']
        self.time_scales = np.array([int(t) for t in kwargs['time_scales'].split(',')])
        self.eval_data = {}
예제 #4
0
    def __init__(self, make_env, policy, dims, logger, T, rollout_batch_size=1,
                 exploit=False, history_len=100, render=False, **kwargs):
        """Rollout worker generates experience by interacting with one or many environments.

        Args:
            make_env (function): a factory function that creates a new instance of the environment
                when called
            policy (object): the policy that is used to act
            dims (dict of ints): the dimensions for observations (o), goals (g), and actions (u)
            logger (object): the logger that is used by the rollout worker
            rollout_batch_size (int): the number of parallel rollouts that should be used
            exploit (boolean): whether or not to exploit, i.e. to act optimally according to the
                current policy without any exploration
            use_target_net (boolean): whether or not to use the target net for rollouts
            compute_Q (boolean): whether or not to compute the Q values alongside the actions
            noise_eps (float): scale of the additive Gaussian noise
            random_eps (float): probability of selecting a completely random action
            history_len (int): length of history for statistics smoothing
            render (boolean): whether or not to render the rollouts
        """
        Rollout.__init__(self, make_env, policy, dims, logger, T, rollout_batch_size=rollout_batch_size, history_len=history_len, render=render, **kwargs)
    def __init__(self,
                 make_env,
                 policy,
                 dims,
                 logger,
                 rollout_batch_size=1,
                 exploit=False,
                 history_len=200,
                 render=False,
                 **kwargs):
        """Rollout worker generates experience by interacting with one or many environments.

        Args:
            make_env (function): a factory function that creates a new instance of the environment
                when called
            policy (object): the policy that is used to act
            dims (dict of ints): the dimensions for observations (o), goals (g), and actions (u)
            logger (object): the logger that is used by the rollout worker
            rollout_batch_size (int): the number of parallel rollouts that should be used
            exploit (boolean): whether or not to exploit, i.e. to act optimally according to the
                current policy without any exploration
            use_target_net (boolean): whether or not to use the target net for rollouts
            compute_Q (boolean): whether or not to compute the Q values alongside the actions
            noise_eps (float): scale of the additive Gaussian noise
            random_eps (float): probability of selecting a completely random action
            history_len (int): length of history for statistics smoothing
            render (boolean): whether or not to render the rollouts
            graph (boolean): whether or not to create the graph
        """
        self.current_logs = []
        self.exploit = exploit
        self.is_leaf = policy.child_policy is None
        self.h_level = policy.h_level
        dims = policy.input_dims
        self.rep_correct_history = deque(maxlen=history_len)
        self.q_loss_history = deque(maxlen=history_len)
        self.pi_loss_history = deque(maxlen=history_len)
        self.preproc_loss_history = deque(maxlen=history_len)
        self.q_history = deque(maxlen=history_len)
        self.subgoals_achieved_history = deque(maxlen=history_len)
        self.subgoals_given_history = deque(maxlen=history_len)
        self.success = 0
        self.this_T = policy.T
        self.current_t = 0
        self.current_episode = {}
        self.subgoals_achieved = 0
        self.final_goal_achieved = False
        self.subgoals_given = []
        self.render_mode = 'human'
        self.graph = kwargs['graph']

        self.total_steps = 0
        if self.is_leaf is False:
            self.child_rollout = RolloutWorker(
                make_env,
                policy.child_policy,
                dims,
                logger,
                rollout_batch_size=rollout_batch_size,
                render=render,
                exploit=exploit,
                **kwargs)
            make_env = self.make_env_from_child
            self.test_subgoal_perc = kwargs['test_subgoal_perc']
        self.tmp_env_ctr = 0
        Rollout.__init__(self,
                         make_env,
                         policy,
                         dims,
                         logger,
                         rollout_batch_size=rollout_batch_size,
                         history_len=history_len,
                         render=render,
                         T=self.this_T,
                         **kwargs)
        self.env_name = self.first_env.env.spec._env_name

        # Set Noise coefficient for environments
        self.obs_noise_coefficient = kwargs['obs_noise_coeff']
        self.first_env.env.obs_noise_coefficient = self.obs_noise_coefficient

        self.n_train_batches = 0
 def __init__(self, make_env, policy, dims, logger, T, **kwargs):
     """Rollout worker generates experience by interacting with one or many environments.
     """
     Rollout.__init__(self, make_env, policy, dims, logger, T, **kwargs)