Esempio n. 1
0
def default_dummy_randomizer():
    return DomainRandomizer(
        DomainParam(name='mass', mean=1.2),
        DomainParam(name='special', mean=0),
        DomainParam(name='length', mean=4),
        DomainParam(name='time_delay', mean=13)
    )
Esempio n. 2
0
    def add_domain_params(self,
                          *domain_params: DomainParam,
                          dp_mapping: Optional[Mapping[int, str]] = None):
        """
        Add an arbitrary number of domain parameters with their distributions to the randomizer.

        :param domain_params: list or tuple of `DomainParam` instances
        :param dp_mapping: mapping from subsequent integers (starting at 0) to domain parameter names (e.g. mass).
                           This only sets the same and is intended to be used to guarantee the right number and
                           order of domain parameters in the randomizer.
        """
        for dp in domain_params:
            if not isinstance(dp, DomainParam):
                raise pyrado.TypeErr(given=dp, expected_type=DomainParam)
            self.domain_params.append(dp)

        if dp_mapping is not None:
            # Sort according to the indices held by the keys
            sorted_mapping = dict(sorted(dp_mapping.items()))
            for _, value in sorted_mapping.items():
                if not isinstance(value, str):
                    raise pyrado.TypeErr(given=value, expected_type=str)
                self.domain_params.append(DomainParam(name=value))
Esempio n. 3
0
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import torch as to

from pyrado.domain_randomization.domain_parameter import (
    BernoulliDomainParam,
    DomainParam,
    MultivariateNormalDomainParam,
    NormalDomainParam,
    UniformDomainParam,
)
from pyrado.domain_randomization.domain_randomizer import DomainRandomizer

DomainParam(name="a", mean=1)

BernoulliDomainParam(name="b", val_0=2, val_1=5, prob_1=0.8)

DomainRandomizer(
    NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100))

DomainRandomizer(
    NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100),
    UniformDomainParam(name="special",
                       mean=0,
                       halfspan=42,
                       clip_lo=-7.4,
                       roundint=True),
    NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1),
    UniformDomainParam(name="time_delay",
Esempio n. 4
0
import torch as to

from pyrado.domain_randomization.domain_parameter import DomainParam, BernoulliDomainParam, UniformDomainParam,\
    NormalDomainParam, MultivariateNormalDomainParam
from pyrado.domain_randomization.domain_randomizer import DomainRandomizer


DomainParam(name='a', mean=1)

BernoulliDomainParam(name='b', val_0=2, val_1=5, prob_1=0.8)

DomainRandomizer(
    NormalDomainParam(name='mass', mean=1.2, std=0.1, clip_lo=10, clip_up=100)
)

DomainRandomizer(
    NormalDomainParam(name='mass', mean=1.2, std=0.1, clip_lo=10, clip_up=100),
    UniformDomainParam(name='special', mean=0, halfspan=42, clip_lo=-7.4, roundint=True),
    NormalDomainParam(name='length', mean=4, std=0.6, clip_up=50.1),
    UniformDomainParam(name='time_delay', mean=13, halfspan=6, clip_up=17, roundint=True),
    MultivariateNormalDomainParam(name='multidim', mean=10 * to.ones((2,)), cov=2*to.eye(2), clip_up=11)
)
Esempio n. 5
0
    def __init__(
        self,
        save_dir: pyrado.PathLike,
        env: Env,
        subrtn: Algorithm,
        max_iter: int,
        svpg_particle_hparam: dict,
        num_svpg_particles: int,
        num_discriminator_epoch: int,
        batch_size: int,
        svpg_learning_rate: float = 3e-4,
        svpg_temperature: float = 10,
        svpg_evaluation_steps: int = 10,
        svpg_horizon: int = 50,
        svpg_kl_factor: float = 0.03,
        svpg_warmup: int = 0,
        svpg_serial: bool = False,
        num_workers: int = 4,
        num_trajs_per_config: int = 8,
        max_step_length: float = 0.05,
        randomized_params: Sequence[str] = None,
        logger: Optional[StepLogger] = None,
    ):
        """
        Constructor

        :param save_dir: directory to save the snapshots i.e. the results in
        :param env: the environment to train in
        :param subrtn: algorithm which performs the policy / value-function optimization
        :param max_iter: maximum number of iterations
        :param svpg_particle_hparam: SVPG particle hyperparameters
        :param num_svpg_particles: number of SVPG particles
        :param num_discriminator_epoch: epochs in discriminator training
        :param batch_size: batch size for training
        :param svpg_learning_rate: SVPG particle optimizers' learning rate
        :param svpg_temperature: SVPG temperature coefficient (how strong is the influence of the particles on each other)
        :param svpg_evaluation_steps: how many configurations to sample between training
        :param svpg_horizon: how many steps until the particles are reset
        :param svpg_kl_factor: kl reward coefficient
        :param svpg_warmup: number of iterations without SVPG training in the beginning
        :param svpg_serial: serial mode (see SVPG)
        :param num_workers: number of environments for parallel sampling
        :param num_trajs_per_config: number of trajectories to sample from each config
        :param max_step_length: maximum change of physics parameters per step
        :param randomized_params: which parameters to randomize
        :param logger: logger for every step of the algorithm, if `None` the default logger will be created
        """
        if not isinstance(env, Env):
            raise pyrado.TypeErr(given=env, expected_type=Env)
        if not isinstance(subrtn, Algorithm):
            raise pyrado.TypeErr(given=subrtn, expected_type=Algorithm)
        if not isinstance(subrtn.policy, Policy):
            raise pyrado.TypeErr(given=subrtn.policy, expected_type=Policy)

        # Call Algorithm's constructor
        super().__init__(save_dir, max_iter, subrtn.policy, logger)
        self.log_loss = True

        # Store the inputs
        self.env = env
        self._subrtn = subrtn
        self._subrtn.save_name = "subrtn"
        self.num_particles = num_svpg_particles
        self.num_discriminator_epoch = num_discriminator_epoch
        self.batch_size = batch_size
        self.num_trajs_per_config = num_trajs_per_config
        self.warm_up_time = svpg_warmup
        self.svpg_evaluation_steps = svpg_evaluation_steps
        self.svpg_temperature = svpg_temperature
        self.svpg_lr = svpg_learning_rate
        self.svpg_max_step_length = max_step_length
        self.svpg_horizon = svpg_horizon
        self.svpg_kl_factor = svpg_kl_factor

        self.pool = SamplerPool(num_workers)
        self.curr_time_step = 0

        # Get the number of params
        if isinstance(randomized_params, list) and len(randomized_params) == 0:
            randomized_params = inner_env(
                self.env).get_nominal_domain_param().keys()
        self.params = [DomainParam(param, 1) for param in randomized_params]
        self.num_params = len(self.params)

        # Initialize reward generator
        self.reward_generator = RewardGenerator(env.spec,
                                                self.batch_size,
                                                reward_multiplier=1,
                                                lr=1e-3,
                                                logger=self.logger)

        # Initialize logbook
        self.sim_instances_full_horizon = np.random.random_sample(
            (self.num_particles, self.svpg_horizon, self.svpg_evaluation_steps,
             self.num_params))

        # Initialize SVPG
        self.svpg_wrapper = SVPGAdapter(
            env,
            self.params,
            subrtn.expl_strat,
            self.reward_generator,
            horizon=self.svpg_horizon,
            num_rollouts_per_config=self.num_trajs_per_config,
            num_workers=num_workers,
        )
        self.svpg = SVPG(
            save_dir,
            self.svpg_wrapper,
            svpg_particle_hparam,
            max_iter,
            self.num_particles,
            self.svpg_temperature,
            self.svpg_lr,
            self.svpg_horizon,
            serial=svpg_serial,
            num_workers=num_workers,
            logger=logger,
        )
        self.svpg.save_name = "subrtn_svpg"