def __init__(self, make_env, policy, dims, logger, T, rollout_batch_size=1, history_len=100, render=False, curriculum_sampling='none', **kwargs): """Rollout worker generates experience by interacting with one or many environments. Args: make_env (function): a factory function that creates a new instance of the environment when called policy (object): the policy that is used to act dims (dict of ints): the dimensions for observations (o), goals (g), and actions (u) logger (object): the logger that is used by the rollout worker rollout_batch_size (int): the number of parallel rollouts that should be used history_len (int): length of history for statistics smoothing render (boolean): whether or not to render the rollouts curriculum_sampling (str): the curriculum sampling method. Either none or stochastic3_*properties* """ self.T = T max_goal_len = 0 gripper_goal = None if '-gripper_none-' not in kwargs['env_name']: gripper_goal = 'gripper_none' max_goal_len += 3 n_objects = re.search('-o[0-9]+-', kwargs['env_name']).group(0) n_objects = int(n_objects[2:-1]) max_goal_len += (n_objects * 3) self.cgms = [CGM(goal_size=max_goal_len, curriculum_sampling=curriculum_sampling, gripper_goal=gripper_goal, exploit=kwargs['policy_action_params']['exploit']) for _ in range(rollout_batch_size)] Rollout.__init__(self, make_env, policy, dims, logger, T, rollout_batch_size=rollout_batch_size, history_len=history_len, render=render, **kwargs)
def __init__(self, make_env, policy, dims, logger, T, rollout_batch_size=1, exploit=False, history_len=100, render=False, **kwargs): Rollout.__init__(self, make_env, policy, dims, logger, T, rollout_batch_size=rollout_batch_size, history_len=history_len, render=render, **kwargs) self.env_name = self.envs[0].env.spec._env_name self.n_objects = self.envs[0].env.n_objects self.gripper_has_target = self.envs[ 0].env.gripper_goal != 'gripper_none' self.tower_height = self.envs[0].env.goal_tower_height # self.subg = self.g.copy() self.rep_correct_history = deque(maxlen=history_len) self.subgoal_succ_history = deque(maxlen=history_len) self.plan_cache = {}
def __init__(self, make_env, policy, dims, logger, T, rollout_batch_size=1, exploit=False, history_len=100, render=False, **kwargs): Rollout.__init__(self, make_env, policy, dims, logger, T, rollout_batch_size=rollout_batch_size, history_len=history_len, render=render, **kwargs) self.env = self.policy.env self.env.visualize = render self.env.graph = kwargs['graph'] self.time_scales = np.array([int(t) for t in kwargs['time_scales'].split(',')]) self.eval_data = {}
def __init__(self, make_env, policy, dims, logger, T, rollout_batch_size=1, exploit=False, history_len=100, render=False, **kwargs): """Rollout worker generates experience by interacting with one or many environments. Args: make_env (function): a factory function that creates a new instance of the environment when called policy (object): the policy that is used to act dims (dict of ints): the dimensions for observations (o), goals (g), and actions (u) logger (object): the logger that is used by the rollout worker rollout_batch_size (int): the number of parallel rollouts that should be used exploit (boolean): whether or not to exploit, i.e. to act optimally according to the current policy without any exploration use_target_net (boolean): whether or not to use the target net for rollouts compute_Q (boolean): whether or not to compute the Q values alongside the actions noise_eps (float): scale of the additive Gaussian noise random_eps (float): probability of selecting a completely random action history_len (int): length of history for statistics smoothing render (boolean): whether or not to render the rollouts """ Rollout.__init__(self, make_env, policy, dims, logger, T, rollout_batch_size=rollout_batch_size, history_len=history_len, render=render, **kwargs)
def __init__(self, make_env, policy, dims, logger, rollout_batch_size=1, exploit=False, history_len=200, render=False, **kwargs): """Rollout worker generates experience by interacting with one or many environments. Args: make_env (function): a factory function that creates a new instance of the environment when called policy (object): the policy that is used to act dims (dict of ints): the dimensions for observations (o), goals (g), and actions (u) logger (object): the logger that is used by the rollout worker rollout_batch_size (int): the number of parallel rollouts that should be used exploit (boolean): whether or not to exploit, i.e. to act optimally according to the current policy without any exploration use_target_net (boolean): whether or not to use the target net for rollouts compute_Q (boolean): whether or not to compute the Q values alongside the actions noise_eps (float): scale of the additive Gaussian noise random_eps (float): probability of selecting a completely random action history_len (int): length of history for statistics smoothing render (boolean): whether or not to render the rollouts graph (boolean): whether or not to create the graph """ self.current_logs = [] self.exploit = exploit self.is_leaf = policy.child_policy is None self.h_level = policy.h_level dims = policy.input_dims self.rep_correct_history = deque(maxlen=history_len) self.q_loss_history = deque(maxlen=history_len) self.pi_loss_history = deque(maxlen=history_len) self.preproc_loss_history = deque(maxlen=history_len) self.q_history = deque(maxlen=history_len) self.subgoals_achieved_history = deque(maxlen=history_len) self.subgoals_given_history = deque(maxlen=history_len) self.success = 0 self.this_T = policy.T self.current_t = 0 self.current_episode = {} self.subgoals_achieved = 0 self.final_goal_achieved = False self.subgoals_given = [] self.render_mode = 'human' self.graph = kwargs['graph'] self.total_steps = 0 if self.is_leaf is False: self.child_rollout = RolloutWorker( make_env, policy.child_policy, dims, logger, rollout_batch_size=rollout_batch_size, render=render, exploit=exploit, **kwargs) make_env = self.make_env_from_child self.test_subgoal_perc = kwargs['test_subgoal_perc'] self.tmp_env_ctr = 0 Rollout.__init__(self, make_env, policy, dims, logger, rollout_batch_size=rollout_batch_size, history_len=history_len, render=render, T=self.this_T, **kwargs) self.env_name = self.first_env.env.spec._env_name # Set Noise coefficient for environments self.obs_noise_coefficient = kwargs['obs_noise_coeff'] self.first_env.env.obs_noise_coefficient = self.obs_noise_coefficient self.n_train_batches = 0
def __init__(self, make_env, policy, dims, logger, T, **kwargs): """Rollout worker generates experience by interacting with one or many environments. """ Rollout.__init__(self, make_env, policy, dims, logger, T, **kwargs)