def generate_oscillation_data(dt, t_end, excitation): """ Use OMOEnv to generate a 1-dim damped oscillation signal. :param dt: time step size [s] :param t_end: Time duration [s] :param excitation: type of excitation, either (initial) 'position' or 'force' (function of time) :return: 1-dim oscillation trajectory """ env = OneMassOscillatorSim(dt, np.ceil(t_end / dt)) env.domain_param = dict(m=1., k=10., d=2.0) if excitation == 'force': policy = TimePolicy( env.spec, functools.partial(_dirac_impulse, env_spec=env.spec, amp=0.5), dt) reset_kwargs = dict(init_state=np.array([0, 0])) elif excitation == 'position': policy = IdlePolicy(env.spec) reset_kwargs = dict(init_state=np.array([0.5, 0])) else: raise pyrado.ValueErr(given=excitation, eq_constraint="'force' or 'position'") # Generate the data ro = rollout(env, policy, reset_kwargs=reset_kwargs, record_dts=False) return ro.observations[:, 0]
def create_idle_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits): # Set up environment env = BoxShelvingPosMPsSim( physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, mps_left=None, # use defaults mps_right=None, # use defaults ref_frame=ref_frame, collisionConfig={'file': 'collisionModel.xml'}, checkJointLimits=checkJointLimits, ) # Set up policy policy = IdlePolicy(env.spec) # don't move at all return env, policy
def idle_policy(env): return IdlePolicy(env.spec)
Test MuJoCo-based Hopper environment with a random policy. """ from pyrado.environments.mujoco.openai_hopper import HopperSim from pyrado.domain_randomization.utils import print_domain_params from pyrado.policies.dummy import DummyPolicy, IdlePolicy from pyrado.sampling.rollout import rollout, after_rollout_query from pyrado.utils.data_types import RenderMode from pyrado.utils.input_output import print_cbt if __name__ == '__main__': # Set up environment env = HopperSim() # Set up policy # policy = DummyPolicy(env.spec) policy = IdlePolicy(env.spec) # Simulate done, param, state = False, None, None while not done: env.reset() print_cbt(f'init obs (before): {env.observe(env.state)}', 'c') ro = rollout(env, policy, render_mode=RenderMode(text=False, video=True), eval=True, reset_kwargs=dict(domain_param=param, init_state=env.state.copy())) print_domain_params(env.domain_param) print_cbt(f'init obs (after): {ro.observations[0]}', 'c') done, state, param = after_rollout_query(env, policy, ro)
import torch as to import torch.optim as optim import torch.nn as nn from pyrado.environments.pysim.one_mass_oscillator import OneMassOscillatorSim from matplotlib import pyplot as plt from pyrado.policies.dummy import IdlePolicy from pyrado.sampling.rollout import rollout from pyrado import set_seed if __name__ == '__main__': # Generate the data set_seed(1001) env = OneMassOscillatorSim(dt=0.01, max_steps=500) ro = rollout(env, IdlePolicy(env.spec), reset_kwargs={'init_state': np.array([0.5, 0.])}) ro.torch(data_type=to.get_default_dtype()) inp = ro.observations[:-1, 0] + 0.01 * to.randn( ro.observations[:-1, 0].shape) # added observation noise targ = ro.observations[1:, 0] # Problem dimensions inp_size = 1 targ_size = 1 num_trn_samples = inp.shape[0] # Hyper-parameters loss_fcn = nn.MSELoss() num_epoch = 1000 num_layers = 1
def idlepol_bobspec(default_bob): return IdlePolicy(spec=default_bob.spec)
from pyrado.environments.pysim.one_mass_oscillator import OneMassOscillatorSim from pyrado.policies.dummy import IdlePolicy from pyrado.policies.rnn import RNNPolicy, GRUPolicy, LSTMPolicy from pyrado.sampling.rollout import rollout from pyrado import set_seed from pyrado.sampling.step_sequence import StepSequence if __name__ == '__main__': # ----- # Setup # ----- # Generate the data set_seed(1001) env = OneMassOscillatorSim(dt=0.01, max_steps=500) ro = rollout(env, IdlePolicy(env.spec), reset_kwargs={'init_state': np.array([0.5, 0.])}) ro.torch(data_type=to.get_default_dtype()) inp = ro.observations[:-1] + 0.01*to.randn(ro.observations[:-1].shape) # observation noise targ = ro.observations[1:, 0] inp_ro = StepSequence(rewards=ro.rewards, observations=inp, actions=targ) # Problem dimensions (input size is extracted from env.spec) targ_size = 1 num_trn_samples = inp.shape[0] # Hyper-parameters loss_fcn = nn.MSELoss() num_epoch = 500 num_layers = 1 hidden_size = 20 # targ_size