コード例 #1
0
    def __init__(self,
                 wrapped_task: Task,
                 mode: FinalRewMode,
                 factor: float = 1e3):
        """
        Constructor

        :param wrapped_task: task to wrap
        :param mode: mode for calculating the final reward
        :param factor: value to scale the final reward, does not matter if `mode.time_dependent is True`
        """
        # Call TaskWrapper's constructor
        super().__init__(wrapped_task)

        if not isinstance(mode, FinalRewMode):
            raise pyrado.TypeErr(given=mode, expected_type=FinalRewMode)
        if mode.user_input and (mode.always_positive or mode.always_negative or
                                mode.state_dependent or mode.time_dependent):
            print_cbt(
                'If the user_input == True, then all other specifications in FinalRewMode are ignored.',
                'w')

        self.mode = mode
        self.factor = factor
        self._yielded_final_rew = False
コード例 #2
0
def eval_damping():
    """ Plot joint trajectories for different joint damping parameters """
    # Load experiment and remove possible randomization wrappers
    ex_dir = ask_for_experiment()
    env, policy, _ = load_experiment(ex_dir)
    env = inner_env(env)
    env.domain_param = WAMBallInCupSim.get_nominal_domain_param()

    data = []
    t = []
    dampings = [0., 1e-2, 1e-1, 1e0]
    print_cbt(f'Run policy for damping coefficients: {dampings}')
    for d in dampings:
        env.reset(domain_param=dict(joint_damping=d))
        ro = rollout(env,
                     policy,
                     render_mode=RenderMode(video=False),
                     eval=True)
        t.append(ro.env_infos['t'])
        data.append(ro.env_infos['qpos'])

    fig, ax = plt.subplots(3, sharex='all')
    ls = ['k-', 'b--', 'g-.', 'r:']  # line style setting for better visibility
    for i, idx in enumerate([1, 3, 5]):
        for j in range(len(dampings)):
            ax[i].plot(t[j],
                       data[j][:, idx],
                       ls[j],
                       label=f'damping: {dampings[j]}')
            if i == 0:
                ax[i].legend()
        ax[i].set_ylabel(f'joint {idx} pos [rad]')
    ax[2].set_xlabel('time [s]')
    plt.suptitle('Evaluation of joint damping coefficient')
    plt.show()
コード例 #3
0
    def check_ball_collisions(self, verbose: bool = False) -> bool:
        """
        Check if an undesired collision with the ball occurs.

        :param verbose: print messages on collision
        :return: `True` if the ball collides with something else than the central parts of the cup
        """
        for i in range(self.sim.data.ncon):
            # Get current contact object
            contact = self.sim.data.contact[i]

            # Extract body-id and body-name of both contact geoms
            body1 = self.model.geom_bodyid[contact.geom1]
            body1_name = self.model.body_names[body1]
            body2 = self.model.geom_bodyid[contact.geom2]
            body2_name = self.model.body_names[body2]

            # Evaluate if the ball collides with part of the WAM (collision bodies)
            # or the connection of WAM and cup (geom_ids)
            c1 = body1_name == 'ball' and (body2_name in self._collision_bodies
                                           or contact.geom2
                                           in self._collision_geom_ids)
            c2 = body2_name == 'ball' and (body1_name in self._collision_bodies
                                           or contact.geom1
                                           in self._collision_geom_ids)
            if c1 or c2:
                if verbose:
                    print_cbt(
                        f'Undesired collision of {body1_name} and {body2_name} detected!',
                        'y')
                return True

        return False
コード例 #4
0
 def contains(self, cand: np.ndarray, verbose: bool = False) -> bool:
     valid = any([s.contains(cand) for s in self._spaces])
     if not valid and verbose:
         print_cbt(f'Violated all of the {len(self._spaces)} subspaces!', 'r')
         for s in self._spaces:
             s.contains(cand, verbose)
     return valid
コード例 #5
0
def _keys_in_columns(df: DataFrame, *cands, verbose: bool) -> bool:
    if any([c not in df.columns for c in cands]):
        if verbose:
            print_cbt(f"Did not find {list(cands)} in the data frame. Skipped the associated plot.")
        return False
    else:
        return True
コード例 #6
0
    def check_ball_in_cup(self, *args, verbose: bool = False):
        """
        Check if the ball is in the cup.

        :param verbose: print messages when ball is in the cup
        :return: `True` if the ball is in the cup
        """
        for i in range(self.sim.data.ncon):
            # Get current contact object
            contact = self.sim.data.contact[i]

            # Extract body-id and body-name of both contact geoms
            body1 = self.model.geom_bodyid[contact.geom1]
            body1_name = self.model.body_names[body1]
            body2 = self.model.geom_bodyid[contact.geom2]
            body2_name = self.model.body_names[body2]

            # Evaluate if the ball collides with part of the WAM (collision bodies)
            # or the connection of WAM and cup (geom_ids)
            cup_inner_id = self.model._geom_name2id['cup_inner']
            c1 = body1_name == 'ball' and contact.geom2 == cup_inner_id
            c2 = body2_name == 'ball' and contact.geom1 == cup_inner_id
            if c1 or c2:
                if verbose:
                    print_cbt(
                        f'The ball is in the cup at time step {self.curr_step}.',
                        'y')
                return True

        return False
コード例 #7
0
    def adapt(self, domain_distr_param: str,
              domain_distr_param_value: to.Tensor):
        """
        Update this domain parameter.

        :param domain_distr_param: distribution parameter to update, e.g. mean or std
        :param domain_distr_param_value: new value of the distribution parameter
        """

        # Set the attributes
        super().adapt(domain_distr_param, domain_distr_param_value)

        # Re-create the distributions, otherwise the changes will have no effect
        if domain_distr_param in ["target_mean", "target_cov_chol_flat"]:
            try:
                self._target_distr = MultivariateNormal(self.target_mean,
                                                        self.target_cov,
                                                        validate_args=True)
            except ValueError as err:
                print_cbt(
                    f"Inputs that lead to the ValueError from PyTorch Distributions:\n"
                    f"target_distribution; domain_distr_param = {domain_distr_param}\nloc = {self.target_mean}\ncov = {self.target_cov}"
                )
                raise err
        if domain_distr_param in ["context_mean", "context_cov_chol_flat"]:
            try:
                self._context_distr = MultivariateNormal(self.context_mean,
                                                         self.context_cov,
                                                         validate_args=True)
            except ValueError as err:
                print_cbt(
                    f"Inputs that lead to the ValueError from PyTorch Distributions:\n"
                    f"init_distribution; domain_distr_param = {domain_distr_param}\nloc = {self.context_mean}\ncov = {self.context_cov}"
                )
                raise err
コード例 #8
0
    def reset(self,
              init_state: np.ndarray = None,
              domain_param: dict = None) -> np.ndarray:
        # Create robcom GoTo process
        gt = self._client.create(robcom.Goto, 'RIGHT_ARM', '')

        # Move to initial state within 5 seconds
        gt.add_step(5., self.init_pose_des)
        print_cbt('Moving the Barret WAM to the initial position.',
                  'c',
                  bright=True)

        # Start process and wait for completion
        gt.start()
        gt.wait_for_completion()
        print_cbt('Reached the initial position.', 'c')

        # Reset the task which also resets the reward function if necessary
        self._task.reset(env_spec=self.spec)

        input('Hit enter to continue.')

        # Reset time steps
        self._curr_step = 0
        self.state = np.array([self._curr_step / self.max_steps])

        return self.observe(self.state)
コード例 #9
0
    def calibrate(self):
        if self._calibrated:
            return
        print_cbt("Calibrating the cart-pole ...", "c")

        # Go to the left
        with completion_context("Going to the left", color="c", bright=True):
            obs, _, _, _ = self.step(np.zeros(self.act_space.shape))
            ctrl = QCartPoleGoToLimCtrl(obs, positive=True)
            while not ctrl.done:
                act = ctrl(obs)
                obs, _, _, _ = self.step(act)

            if ctrl.success:
                self._norm_x_lim[1] = obs[0]
            else:
                raise RuntimeError("Going to the left limit failed!")

        # Go to the right
        with completion_context("Going to the right", color="c", bright=True):
            obs, _, _, _ = self.step(np.zeros(self.act_space.shape))
            ctrl = QCartPoleGoToLimCtrl(obs, positive=False)
            while not ctrl.done:
                act = ctrl(obs)
                obs, _, _, _ = self.step(act)

            if ctrl.success:
                self._norm_x_lim[0] = obs[0]
            else:
                raise RuntimeError("Going to the right limit failed!")

        # Activate the absolute cart position:
        self._calibrated = True
コード例 #10
0
def wrap_like_other_env(env_targ: Env, env_src: [SimEnv, EnvWrapper]) -> Env:
    """
    Wrap a given real environment like it's simulated counterpart (except the domain randomization of course).

    :param env_targ: target environment e.g. environment representing the physical device
    :param env_src: source environment e.g. simulation environment used for training
    :return: target environment
    """
    if env_src.dt > env_targ.dt:
        ds_factor = int(env_src.dt / env_targ.dt)
        env_targ = DownsamplingWrapper(env_targ, ds_factor)
        print_cbt(
            f'Wrapped the env with an DownsamplingWrapper of factor {ds_factor}.',
            'c')

    if typed_env(env_src, ActNormWrapper) is not None:
        env_targ = ActNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ActNormWrapper.', 'c')

    if typed_env(env_src, ObsNormWrapper) is not None:
        env_targ = ObsNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ObsNormWrapper.', 'c')
    elif typed_env(env_src, ObsRunningNormWrapper) is not None:
        env_targ = ObsRunningNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ObsRunningNormWrapper.', 'c')

    if typed_env(env_src, ObsPartialWrapper) is not None:
        env_targ = ObsPartialWrapper(env_targ,
                                     mask=typed_env(
                                         env_src, ObsPartialWrapper).keep_mask,
                                     keep_selected=True)
        print_cbt('Wrapped the env with an ObsPartialWrapper.', 'c')

    return env_targ
コード例 #11
0
def rollout_dummy_rbf_policy_4dof():
    # Environment
    env = WAMBallInCupSim(
        num_dof=4,
        max_steps=3000,
        # Note, when tuning the task args: the `R` matrices are now 4x4 for the 4 dof WAM
        task_args=dict(R=np.zeros((4, 4)),
                       R_dev=np.diag([0.2, 0.2, 1e-2, 1e-2])),
    )

    # Stabilize ball and print out the stable state
    env.reset()
    act = np.zeros(env.spec.act_space.flat_dim)
    for i in range(1500):
        env.step(act)
        env.render(mode=RenderMode(video=True))

    # Printing out actual positions for 4-dof (..just needed to setup the hard-coded values in the class)
    print('Ball pos:', env.sim.data.get_body_xpos('ball'))
    print('Cup goal:', env.sim.data.get_site_xpos('cup_goal'))
    print('Joint pos (incl. first rope angle):', env.sim.data.qpos[:5])

    # Apply DualRBFLinearPolicy and plot the joint states over the desired ones
    rbf_hparam = dict(num_feat_per_dim=7,
                      bounds=(np.array([0.]), np.array([1.])))
    policy = DualRBFLinearPolicy(env.spec, rbf_hparam, dim_mask=2)
    done, param = False, None
    while not done:
        ro = rollout(env,
                     policy,
                     render_mode=RenderMode(video=True),
                     eval=True,
                     reset_kwargs=dict(domain_param=param))
        print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
        done, _, param = after_rollout_query(env, policy, ro)
コード例 #12
0
    def __init__(self,
                 num_feat_per_dim: int,
                 bounds: [
                     Sequence[np.ndarray], Sequence[to.Tensor], Sequence[float]
                 ],
                 scale: float = None,
                 state_wise_norm: bool = True):
        """
        Constructor

        :param num_feat_per_dim: number of radial basis functions, identical for every dimension of the input
        :param bounds: lower and upper bound for the Gaussians' centers, the input dimension is inferred from them
        :param scale: scaling factor for the squared distance, if `None` the factor is determined such that two
                      neighboring RBFs have a value of 0.2 at the other center
        :param state_wise_norm: `True` to apply the normalization across input state dimensions separately (every
                                 dimension sums to one), or `False` to jointly normalize them
        """
        if not num_feat_per_dim > 1:
            raise pyrado.ValueErr(given=num_feat_per_dim, g_constraint='1')
        if not len(bounds) == 2:
            raise pyrado.ShapeErr(given=bounds, expected_match=np.empty(2))

        # Get the bounds, e.g. from the observation space and then clip them in case the
        bounds_to = [None, None]
        for i, b in enumerate(bounds):
            if isinstance(b, np.ndarray):
                bounds_to[i] = to.from_numpy(b)
            elif isinstance(b, to.Tensor):
                bounds_to[i] = b.clone()
            elif isinstance(b, (int, float)):
                bounds_to[i] = to.tensor(b, dtype=to.get_default_dtype()).view(
                    1, )
            else:
                raise pyrado.TypeErr(
                    given=b, expected_type=[np.ndarray, to.Tensor, int, float])
        if any([any(np.isinf(b)) for b in bounds_to]):
            bound_lo, bound_up = [
                to.clamp(b, min=-1e6, max=1e6) for b in bounds_to
            ]
            print_cbt('Clipped the bounds of the RBF centers to [-1e6, 1e6].',
                      'y')
        else:
            bound_lo, bound_up = bounds_to

        # Create a matrix with center locations for the Gaussians
        num_dim = len(bound_lo)
        self.num_feat = num_feat_per_dim * num_dim
        self.centers = to.empty(num_feat_per_dim, num_dim)
        for i in range(num_dim):
            # Features along columns
            self.centers[:, i] = to.linspace(bound_lo[i], bound_up[i],
                                             num_feat_per_dim)

        if scale is None:
            delta_center = self.centers[1, :] - self.centers[0, :]
            self.scale = -to.log(to.tensor(0.2)) / to.pow(delta_center, 2)
        else:
            self.scale = scale

        self._state_wise_norm = state_wise_norm
コード例 #13
0
    def __init__(self,
                 save_dir: str,
                 env: Env,
                 policy: Policy,
                 max_iter: int,
                 num_rollouts: int,
                 pop_size: [int, None] = None,
                 num_workers: int = 4,
                 logger: StepLogger = None):
        """
        Constructor

        :param save_dir: directory to save the snapshots i.e. the results in
        :param env: the environment which the policy operates
        :param policy: policy to be updated
        :param max_iter: maximum number of iterations (i.e. policy updates) that this algorithm runs
        :param num_rollouts: number of rollouts per policy parameter set
        :param pop_size: number of solutions in the population, pass `None` to use a default that scales logarithmically
                         with the number of policy parameters
        :param num_workers: number of environments for parallel sampling
        :param logger: logger for every step of the algorithm, if `None` the default logger will be created
        """
        if not isinstance(env, Env):
            raise pyrado.TypeErr(given=env, expected_type=Env)
        if not (isinstance(pop_size, int) or pop_size is None):
            raise pyrado.TypeErr(given=pop_size, expected_type=int)
        if isinstance(pop_size, int) and pop_size <= 0:
            raise pyrado.ValueErr(given=pop_size, g_constraint='0')

        # Call Algorithm's constructor
        super().__init__(save_dir, max_iter, policy, logger)

        # Store the inputs
        self._env = env
        self.num_rollouts = num_rollouts

        # Auto-select population size if needed
        if pop_size is None:
            pop_size = 4 + int(3 * np.log(policy.num_param))
            print_cbt(f'Initialized population size to {pop_size}.', 'y')
        self.pop_size = pop_size

        # Create sampler
        self.sampler = ParameterExplorationSampler(
            env,
            policy,
            num_workers=num_workers,
            num_rollouts_per_param=num_rollouts,
        )

        # Stopping criterion
        self.ret_avg_stack = 1e3 * np.random.randn(20)  # stack size = 20
        self.thold_ret_std = 1e-1  # algorithm terminates if below for multiple iterations

        # Saving the best policy (this is not the mean for policy parameter exploration)
        self.best_policy_param = policy.param_values.clone()

        # Set this in subclasses
        self._expl_strat = None
コード例 #14
0
 def close(self):
     """ Sends a zero-step and closes the communication. """
     if self._qsoc.is_open():
         self.step(np.zeros(self.act_space.shape))
         self._qsoc.close()
         print_cbt('Closed the connection to the Quanser device.',
                   'c',
                   bright=True)
コード例 #15
0
ファイル: spota.py プロジェクト: fdamken/SimuRLacra
    def _compute_candidate(self, nc: int):
        """
        Train and save one candidate solution to a pt-file

        :param nc: number of domains used for training the candidate solution
        """
        # Do a warm start if desired
        self._subrtn_cand.init_modules(
            self.warmstart_cand,
            prefix=f"iter_{self._curr_iter - 1}",
            suffix="cand",
            policy_param_init=self.cand_policy_param_init,
            valuefcn_param_init=self.cand_critic_param_init,
        )

        # Sample sets of physics params xi_{1}, ..., xi_{nc}
        self.env_dr.fill_buffer(nc)
        env_params_cand = self.env_dr.randomizer.get_params()
        joblib.dump(
            env_params_cand,
            osp.join(self.save_dir,
                     f"iter_{self._curr_iter}_env_params_cand.pkl"))
        print("Randomized parameters of for the candidate solution:")
        print_domain_params(env_params_cand)

        # Reset the subroutine algorithm which includes resetting the exploration
        self._cnt_samples += self._subrtn_cand.sample_count
        self._subrtn_cand.reset()
        print("Reset candidate exploration noise.")

        pol_param_before = self._subrtn_cand.policy.param_values.clone()
        if isinstance(self._subrtn_cand, ActorCritic):
            # Set dropout and batch normalization layers to training mode
            self._subrtn_cand.critic.vfcn.train()
            critic_param_before = self._subrtn_cand.critic.vfcn.param_values.clone(
            )

        # Solve the (approx) stochastic program SP_nc for the sampled physics parameter sets
        print_cbt(f"\nIteration {self._curr_iter} | Candidate solution\n",
                  "c",
                  bright=True)
        self._subrtn_cand.train(snapshot_mode="best",
                                meta_info=dict(
                                    prefix=f"iter_{self._curr_iter}",
                                    suffix="cand"))

        if (self._subrtn_cand.policy.param_values == pol_param_before).all():
            warn(
                "The candidate's policy parameters did not change during training!",
                UserWarning)
        if isinstance(self._subrtn_refs, ActorCritic):
            if (self._subrtn_cand.critic.vfcn.param_values ==
                    critic_param_before).all():
                warn(
                    "The candidate's critic parameters did not change during training!",
                    UserWarning)

        print_cbt("Learned an approx solution for SP_nc.\n", "y")
コード例 #16
0
def load(name: str,
         load_dir: PathLike,
         prefix: str = "",
         suffix: str = "",
         obj=None,
         verbose: bool = False):
    """
    Load an object object using a prefix or suffix, depending on the meta information.

    :param name: name of the object for loading including the file extension, e.g. 'policy.pt' for PyTorch modules
                 like a Pyrado `Policy` instance
    :param load_dir: directory to load from
    :param prefix: prefix for altering the name, e.g. 'iter_0_...'
    :param suffix: suffix for altering the name, e.g. '..._ref'
    :param obj: PyTorch module to load into, this can be `None` except for the case if you want to load and save the
                module's `state_dict`
    :param verbose: if `True`, print the path of what has been loaded

    .. seealso::
        https://pytorch.org/tutorials/beginner/saving_loading_models.html#saving-loading-model-for-inference
    """
    if not isinstance(name, str):
        raise TypeErr(given=name, expected_type=str)
    if not osp.isdir(load_dir):
        raise PathErr(given=load_dir)
    if not isinstance(prefix, str):
        raise TypeErr(given=prefix, expected_type=str)
    elif prefix != "":
        # A valid non-default prefix was given
        prefix = prefix + "_"
    if not isinstance(suffix, str):
        raise TypeErr(given=suffix, expected_type=str)
    elif suffix != "":
        # A valid non-default prefix was given
        suffix = "_" + suffix

    # Infer file type
    file_ext = name[name.rfind(".") + 1:]
    if not (file_ext in ["pt", "npy", "pkl"]):
        raise ValueErr(
            msg="Only pt, npy, and pkl files are currently supported!")

    # Load the data
    name_wo_file_ext = name[:name.find(".")]
    name_load = f"{prefix}{name_wo_file_ext}{suffix}.{file_ext}"
    obj_ = _load_fcn(path=osp.join(load_dir, name_load), extension=file_ext)
    assert obj_ is not None

    if isinstance(obj_, dict) and file_ext == "pt":
        # PyTorch saves state_dict as an OrderedDict
        obj.load_state_dict(obj_)
    else:
        obj = obj_

    if verbose:
        print_cbt(f"Loaded {osp.join(load_dir, name_load)}", "g")

    return obj
コード例 #17
0
def plot_features(ro: StepSequence, policy: Policy):
    """
    Plot all features given the policy and the observation trajectories.

    :param policy: linear policy used during the rollout
    :param ro: input rollout
    """
    if not isinstance(policy, LinearPolicy):
        print_cbt(
            'Plotting of the feature values is only supports linear policies!',
            'y')
        return

    if hasattr(ro, 'observations'):
        # Use recorded time stamps if possible
        t = ro.env_infos.get('t', np.arange(0, ro.length)) if hasattr(
            ro, 'env_infos') else np.arange(0, ro.length)

        # Recover the features from the observations
        feat_vals = policy.eval_feats(to.from_numpy(ro.observations))
        dim_feat = range(feat_vals.shape[1])
        if len(dim_feat) <= 6:
            divisor = 2
        elif len(dim_feat) <= 12:
            divisor = 4
        else:
            divisor = 8
        num_cols = int(np.ceil(len(dim_feat) / divisor))
        num_rows = int(np.ceil(len(dim_feat) / num_cols))

        fig, axs = plt.subplots(num_rows,
                                num_cols,
                                figsize=(num_cols * 5, num_rows * 3),
                                constrained_layout=True)
        fig.suptitle('Feature values over Time')
        plt.subplots_adjust(hspace=.5)
        colors = plt.get_cmap('tab20')(np.linspace(0, 1, len(dim_feat)))

        if len(dim_feat) == 1:
            axs.plot(t,
                     feat_vals[:-1, dim_feat[0]],
                     label=_get_obs_label(ro, dim_feat[0]))
            axs.legend()
        else:
            for i in range(num_rows):
                for j in range(num_cols):
                    if j + i * num_cols < len(dim_feat):
                        # Omit the last observation for simplicity
                        axs[i, j].plot(t,
                                       feat_vals[:-1, j + i * num_cols],
                                       label=rf'$\phi_{j + i*num_cols}$',
                                       c=colors[j + i * num_cols])
                        axs[i, j].legend()
                    else:
                        # We might create more subplots than there are observations
                        pass
        plt.show()
コード例 #18
0
    def _is_curr_task_done(self,
                           state: np.ndarray,
                           act: np.ndarray,
                           remaining_steps: int,
                           verbose: bool = False) -> float:
        """
        Check if the current task is done. If so, move to the next one and return the final reward of this task.

        :param state: current state
        :param act: current action
        :param remaining_steps: number of time steps left in the episode
        :param verbose: print messages on success or failure
        :return: final return of the current subtask
        """
        if (not self.succeeded_tasks[self._idx_curr]
                and not self.failed_tasks[self._idx_curr]
                and self._tasks[self._idx_curr].is_done(state)):
            # Task has not been marked done yet, but is now done

            if self._tasks[self._idx_curr].has_succeeded(state):
                # Check off successfully completed task
                self.succeeded_tasks[self._idx_curr] = True
                if verbose:
                    print_cbt(
                        f"task {self._idx_curr} has succeeded (is done) at state {state}",
                        "g")

            elif self._tasks[self._idx_curr].has_failed(state):
                # Check off unsuccessfully completed task
                self.failed_tasks[self._idx_curr] = True
                if verbose:
                    print_cbt(
                        f"Task {self._idx_curr} has failed (is done) at state {state}",
                        "r")

            else:
                raise pyrado.ValueErr(
                    msg=
                    f"Task {self._idx_curr} neither succeeded or failed but is done!"
                )

            # Memorize current reward
            if self.hold_rew_when_done:
                self.held_rews[self._idx_curr] = self._tasks[
                    self._idx_curr].step_rew(state, act, remaining_steps=0)

            # Give a reward for completing the task defined by the task
            task_final_rew = self._tasks[self._idx_curr].final_rew(
                state, remaining_steps)

            # Advance to the next task
            self.idx_curr = (self._idx_curr + 1) % len(self)

        else:
            task_final_rew = 0.0

        return task_final_rew