Exemple #1
0
def generate_oscillation_data(dt, t_end, excitation):
    """
    Use OMOEnv to generate a 1-dim damped oscillation signal.

    :param dt: time step size [s]
    :param t_end: Time duration [s]
    :param excitation: type of excitation, either (initial) 'position' or 'force' (function of time)
    :return: 1-dim oscillation trajectory
    """
    env = OneMassOscillatorSim(dt, np.ceil(t_end / dt))
    env.domain_param = dict(m=1., k=10., d=2.0)
    if excitation == 'force':
        policy = TimePolicy(
            env.spec,
            functools.partial(_dirac_impulse, env_spec=env.spec, amp=0.5), dt)
        reset_kwargs = dict(init_state=np.array([0, 0]))
    elif excitation == 'position':
        policy = IdlePolicy(env.spec)
        reset_kwargs = dict(init_state=np.array([0.5, 0]))
    else:
        raise pyrado.ValueErr(given=excitation,
                              eq_constraint="'force' or 'position'")

    # Generate the data
    ro = rollout(env, policy, reset_kwargs=reset_kwargs, record_dts=False)
    return ro.observations[:, 0]
Exemple #2
0
def create_default_randomizer_omo() -> DomainRandomizer:
    """
    Create the default randomizer for the `OneMassOscillatorSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.one_mass_oscillator import OneMassOscillatorSim
    dp_nom = OneMassOscillatorSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='m', mean=dp_nom['m'], std=dp_nom['m']/3, clip_lo=1e-3),
        NormalDomainParam(name='k', mean=dp_nom['k'], std=dp_nom['k']/3, clip_lo=1e-3),
        NormalDomainParam(name='d', mean=dp_nom['d'], std=dp_nom['d']/3, clip_lo=1e-3)
    )
Exemple #3
0
from pyrado.utils.data_types import EnvSpec


if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(OneMassOscillatorSim.name, f'{SAC.name}_{TwoHeadedFNNPolicy.name}')

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environment
    env_hparams = dict(dt=1/50., max_steps=200)
    env = OneMassOscillatorSim(**env_hparams, task_args=dict(task_args=dict(state_des=np.array([0.5, 0]))))
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(
        shared_hidden_sizes=[32, 32],
        shared_hidden_nonlin=to.relu,
    )
    policy = TwoHeadedFNNPolicy(spec=env.spec, **policy_hparam)

    # Critic
    qfcn_hparam = dict(
        hidden_sizes=[32, 32],
        hidden_nonlin=to.relu
    )
    obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
Exemple #4
0
        )

    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(
        OneMassOscillatorSim.name,
        f"{BayesSim.name}_{IdlePolicy.name}",
        num_segs_str + len_seg_str + seed_str,
    )

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environments
    env_hparams = dict(dt=1 / 100.0, max_steps=400)
    env_sim = OneMassOscillatorSim(**env_hparams,
                                   task_args=dict(task_args=dict(
                                       state_des=np.array([0.5, 0]))))

    # Create a fake ground truth target domain
    num_real_rollouts = 2
    env_real = deepcopy(env_sim)
    # randomizer = DomainRandomizer(
    #     NormalDomainParam(name="mass", mean=0.8, std=0.8 / 50),
    #     NormalDomainParam(name="stiffness", mean=33.0, std=33 / 50),
    #     NormalDomainParam(name="damping", mean=0.3, std=0.3 / 50),
    # )
    # env_real = DomainRandWrapperBuffer(env_real, randomizer)
    # env_real.fill_buffer(num_real_rollouts)
    env_real.domain_param = dict(m=0.8, k=36, d=0.3)

    # Behavioral policy
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Test model learning using PyTorch and the One Mass Oscillator setup.
"""
from pyrado.environments.pysim.one_mass_oscillator import OneMassOscillatorDomainParamEstimator, OneMassOscillatorSim
from pyrado.policies.feed_forward.dummy import DummyPolicy
from pyrado.sampling.parallel_rollout_sampler import ParallelRolloutSampler
from pyrado.utils.input_output import print_cbt

if __name__ == "__main__":
    # Set up environment
    dp_gt = dict(m=2.0, k=20.0, d=0.8)  # ground truth
    dp_init = dict(m=1.0, k=22.0, d=0.4)  # initial guess
    dt = 1 / 50.0
    env = OneMassOscillatorSim(dt=dt, max_steps=400)
    env.reset(domain_param=dp_gt)

    # Set up policy
    # policy = IdlePolicy(env.spec)
    policy = DummyPolicy(env.spec)

    # Sample
    sampler = ParallelRolloutSampler(env,
                                     policy,
                                     num_workers=4,
                                     min_rollouts=50,
                                     seed=1)
    ros = sampler.sample()

    # Create a model for learning the domain parameters
        env = QCartPoleSwingUpSim(dt=dt,
                                  max_steps=int(5 / dt),
                                  wild_init=False)
        state = np.array([0, 87 / 180 * np.pi, 0, 0])

    elif args.env_name == QQubeSwingUpSim.name:
        env = QQubeSwingUpSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([5 / 180 * np.pi, 87 / 180 * np.pi, 0, 0])

    elif args.env_name == QBallBalancerSim.name:
        env = QBallBalancerSim(dt=dt, max_steps=int(5 / dt))
        state = np.array(
            [2 / 180 * np.pi, 2 / 180 * np.pi, 0.1, -0.08, 0, 0, 0, 0])

    elif args.env_name == OneMassOscillatorSim.name:
        env = OneMassOscillatorSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([-0.7, 0])

    elif args.env_name == PendulumSim.name:
        env = PendulumSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([87 / 180 * np.pi, 0])

    elif args.env_name == BallOnBeamSim.name:
        env = BallOnBeamSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([-0.25, 0, 0, +20 / 180 * np.pi])

    else:
        raise pyrado.ValueErr(
            given=args.env_name,
            eq_constraint=
            f"{QCartPoleSwingUpSim.name}, {QQubeSwingUpSim.name}, {QBallBalancerSim.name}, "
Exemple #7
0
 def default_omo():
     return OneMassOscillatorSim(dt=0.02, max_steps=300, task_args=dict(state_des=np.array([0.5, 0])))
Exemple #8
0
import numpy as np
import torch as to
import torch.optim as optim
import torch.nn as nn

from pyrado.environments.pysim.one_mass_oscillator import OneMassOscillatorSim
from matplotlib import pyplot as plt
from pyrado.policies.dummy import IdlePolicy
from pyrado.sampling.rollout import rollout
from pyrado import set_seed

if __name__ == '__main__':
    # Generate the data
    set_seed(1001)
    env = OneMassOscillatorSim(dt=0.01, max_steps=500)
    ro = rollout(env,
                 IdlePolicy(env.spec),
                 reset_kwargs={'init_state': np.array([0.5, 0.])})
    ro.torch(data_type=to.get_default_dtype())
    inp = ro.observations[:-1, 0] + 0.01 * to.randn(
        ro.observations[:-1, 0].shape)  # added observation noise
    targ = ro.observations[1:, 0]

    # Problem dimensions
    inp_size = 1
    targ_size = 1
    num_trn_samples = inp.shape[0]

    # Hyper-parameters
    loss_fcn = nn.MSELoss()
    num_epoch = 1000
Exemple #9
0
    ro = rollout(
        env,
        policy,
        eval=True,
        reset_kwargs=dict(
            # domain_param=dict(k=mu[0], d=mu[1]), init_state=np.array([-0.7, 0.])  # no variance over the init state
            domain_param=dict(k=mu[0],
                              d=mu[1])  # no variance over the parameters
        ))
    return to.from_numpy(ro.observations[-1]).to(dtype=to.float32)


if __name__ == '__main__':
    pyrado.set_seed(0)

    env = OneMassOscillatorSim(dt=0.005, max_steps=200)
    policy = IdlePolicy(env.spec)

    prior = utils.BoxUniform(low=to.tensor([25., 0.05]),
                             high=to.tensor([35., 0.15]))

    # Let’s learn a likelihood from the simulator
    num_sim = 500
    method = 'SNRE'  # SNPE or SNLE or SNRE
    posterior = infer(
        simulator,
        prior,
        method=method,  # SNRE newer than SNLE newer than SNPE
        num_workers=-1,
        num_simulations=num_sim)