def generate_oscillation_data(dt, t_end, excitation): """ Use OMOEnv to generate a 1-dim damped oscillation signal. :param dt: time step size [s] :param t_end: Time duration [s] :param excitation: type of excitation, either (initial) 'position' or 'force' (function of time) :return: 1-dim oscillation trajectory """ env = OneMassOscillatorSim(dt, np.ceil(t_end / dt)) env.domain_param = dict(m=1.0, k=10.0, d=2.0) if excitation == "force": policy = TimePolicy( env.spec, functools.partial(_dirac_impulse, env_spec=env.spec, amp=0.5), dt) reset_kwargs = dict(init_state=np.array([0, 0])) elif excitation == "position": policy = IdlePolicy(env.spec) reset_kwargs = dict(init_state=np.array([0.5, 0])) else: raise pyrado.ValueErr(given=excitation, eq_constraint="'force' or 'position'") # Generate the data ro = rollout(env, policy, reset_kwargs=reset_kwargs, record_dts=False) return ro.observations[:, 0]
def create_ik_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits): def policy(t: float): if t < 2: return [0.001, 0.001, 0.001, 0.002, 0.002, 0.002, 0.001, 0.001, 0.001, 0.002, 0.002, 0.002] else: return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # Set up environment env = BallInTubeIKSim( usePhysicsNode=True, physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, ref_frame=ref_frame, fixedInitState=True, checkJointLimits=checkJointLimits, taskCombinationMethod="sum", collisionAvoidanceIK=True, observeVelocity=False, observeCollisionCost=True, observePredictedCollisionCost=True, observeManipulabilityIndex=True, observeCurrentManipulability=True, observeTaskSpaceDiscrepancy=True, ) # Set up policy policy = TimePolicy(env.spec, policy, dt) return env, policy
def create_joint_control_setup(dt, max_steps, max_dist_force, physics_engine): # Set up environment env = Planar3LinkJointCtrlSim( physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, taskCombinationMethod="sum", checkJointLimits=True, ) print_domain_params(env.domain_param) # Set up policy def policy_fcn(t: float): return [ 10 / 180 * math.pi, 10 / 180 * math.pi, # same as init config 10 / 180 * math.pi + 45.0 / 180.0 * math.pi * math.sin(2.0 * math.pi * 0.2 * t), ] # oscillation in last link policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True)
def create_ds_aktivation_setup(dt, max_steps, max_dist_force, physics_engine, graph_file_name): # Set up environment env = PlanarInsertTASim( physicsEngine=physics_engine, graphFileName=graph_file_name, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, taskCombinationMethod="sum", # 'sum', 'mean', 'product', or 'softmax' checkJointLimits=False, collisionAvoidanceIK=True, observeForceTorque=True, observePredictedCollisionCost=False, observeManipulabilityIndex=False, observeCurrentManipulability=True, observeDynamicalSystemGoalDistance=False, observeDynamicalSystemDiscrepancy=False, observeTaskSpaceDiscrepancy=True, ) env.reset(domain_param=dict(effector_friction=1.0)) # Set up policy def policy_fcn(t: float): return [0.7, 1, 0, 0.1, 0.5, 0.5] policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate and plot potentials print(env.obs_space.labels) return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=False)
def create_ika_setup(dt, max_steps, max_dist_force, physics_engine): # Set up environment env = Planar3LinkIKActivationSim( physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, taskCombinationMethod="product", positionTasks=True, checkJointLimits=False, collisionAvoidanceIK=True, observeTaskSpaceDiscrepancy=True, ) print_domain_params(env.domain_param) # Set up policy def policy_fcn(t: float): return [0.3 + 0.2 * math.sin(2.0 * math.pi * 0.2 * t), 0, 1] policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True)
def create_manual_activation_setup(dt, max_steps, max_dist_force, physics_engine): # Set up environment env = Planar3LinkTASim( physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, taskCombinationMethod="sum", positionTasks=True, observeTaskSpaceDiscrepancy=True, ) print_domain_params(env.domain_param) # Set up policy def policy_fcn(t: float): pot = np.fromstring(input("Enter potentials for next step: "), dtype=np.double, count=3, sep=" ") return 1 / (1 + np.exp(-pot)) policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True)
def create_ds_setup(dt, max_steps, max_dist_force, physics_engine): # Set up environment env = Planar3LinkTASim( physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, taskCombinationMethod="sum", positionTasks=True, checkJointLimits=False, collisionAvoidanceIK=True, observeCollisionCost=True, observeTaskSpaceDiscrepancy=True, observeDynamicalSystemDiscrepancy=False, ) print(env.obs_space.labels) # Set up policy def policy_fcn(t: float): if t < 3: return [0, 1, 0] elif t < 7: return [1, 0, 0] elif t < 10: return [0.5, 0.5, 0] else: return [0, 0, 1] policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=False)
def create_vel_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits): def policy(t: float): return [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] # Set up environment env = BallInTubeVelDSSim( usePhysicsNode=True, physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, tasks_left=None, # use defaults tasks_right=None, # use defaults ref_frame=ref_frame, fixedInitState=True, checkJointLimits=checkJointLimits, taskCombinationMethod="sum", collisionAvoidanceIK=False, observeVelocity=True, observeCollisionCost=True, observePredictedCollisionCost=False, observeManipulabilityIndex=False, observeCurrentManipulability=True, observeDynamicalSystemDiscrepancy=False, observeTaskSpaceDiscrepancy=True, observeForceTorque=True, observeDynamicalSystemGoalDistance=False, ) # Set up policy policy = TimePolicy(env.spec, policy, dt) return env, policy
def mean_variant(action_model_type, mps, dt, max_steps, physics_engine, render_mode): # Set up environment env = MPBlendingSim( action_model_type=action_model_type, mps=mps, physicsEngine=physics_engine, dt=dt, max_steps=max_steps, collisionAvoidanceIK=False, taskCombinationMethod="mean", ) # Set up policy policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=render_mode, stop_on_done=False)
def create_vel_ika_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits): # Set up environment env = BoxLiftingVelIKActivationSim( usePhysicsNode=True, physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, ref_frame=ref_frame, collisionConfig={"file": "collisionModel.xml"}, fixedInitState=True, checkJointLimits=checkJointLimits, taskCombinationMethod="sum", collisionAvoidanceIK=True, observeVelocity=True, observeCollisionCost=True, observePredictedCollisionCost=False, observeManipulabilityIndex=False, observeCurrentManipulability=True, observeDynamicalSystemDiscrepancy=False, observeTaskSpaceDiscrepancy=True, observeForceTorque=True, observeDynamicalSystemGoalDistance=False, ) # Set up policy def policy(t: float): if t < 2: return [0.0, -0.2] if t < 5.0: return [0.3, -0.05] if t < 8: return [0.15, 0.3] elif t < 10: return [0.1, 0.45] else: return [0.0, 0.0] policy = TimePolicy(env.spec, policy, dt) return env, policy
def create_setup(physics_engine, dt, max_steps, max_dist_force): # Set up environment env = BallOnPlate5DSim(physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force) env = ActNormWrapper(env) print_domain_params(env.domain_param) # Set up policy def policy_fcn(t: float): return [ 0.0, # x_ddot_plate 10.0 * math.cos(2.0 * math.pi * 5 * t), # y_ddot_plate 0.5 * math.cos(2.0 * math.pi / 5.0 * t), # z_ddot_plate 0.0, # alpha_ddot_plate 0.0, # beta_ddot_plate ] policy = TimePolicy(env.spec, policy_fcn, dt) return env, policy
def create_pos_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits): def policy_fcn(t: float): return [0, 0, 1, 0, 0, 0.01] # PG position # PG orientation # hand joints # Set up environment env = BoxShelvingPosDSSim( physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, tasks_left=None, # use defaults tasks_right=None, # use defaults ref_frame=ref_frame, fixedInitState=True, collisionConfig={"file": "collisionModel.xml"}, checkJointLimits=checkJointLimits, taskCombinationMethod="sum", collisionAvoidanceIK=True, observeVelocities=False, observeCollisionCost=True, observePredictedCollisionCost=True, observeManipulabilityIndex=True, observeCurrentManipulability=True, observeDynamicalSystemDiscrepancy=True, observeTaskSpaceDiscrepancy=True, observeForceTorque=True, observeDynamicalSystemGoalDistance=True, ) print(env.get_body_position("Box", "", "")) print(env.get_body_position("Box", "GoalUpperShelve", "")) print(env.get_body_position("Box", "", "GoalUpperShelve")) print(env.get_body_position("Box", "GoalUpperShelve", "GoalUpperShelve")) # Set up policy policy = TimePolicy(env.spec, policy_fcn, dt) return env, policy
def create_mg_joint_pos_policy(env: SimEnv, t_strike_end: float = 0.5) -> TimePolicy: """ Create a policy that executes the strike for mini golf by setting joint position commands. Used in the experiments of [1]. .. seealso:: [1] F. Muratore, T. Gruner, F. Wiese, B. Belousov, M. Gienger, J. Peters, "TITLE", VENUE, YEAR :param env: mini golf simulation environment :param t_strike_end:time when to finish the movement [s] :return: policy which executes the strike solely dependent on the time """ q_init = to.tensor([ 18.996253, -87.227101, 74.149568, -75.577025, 56.207369, -175.162794, -41.543793, ]) q_end = to.tensor([ 8.628977, -93.443498, 72.302435, -82.31844, 52.146531, -183.896354, -51.560886, ]) return TimePolicy( env.spec, functools.partial(_fcn_mg_joint_pos, q_init=q_init, q_end=q_end, t_strike_end=t_strike_end), env.dt)
# 6: "joint_7_damping", # } # dp_mapping = { # 0: "joint_1_dryfriction", # 1: "joint_2_dryfriction", # 2: "joint_3_dryfriction", # 3: "joint_4_dryfriction", # 4: "joint_5_dryfriction", # 5: "joint_6_dryfriction", # 6: "joint_7_dryfriction", # } # dp_mapping = create_damping_dryfriction_domain_param_map_wamjsc() # Behavioral policy policy_hparam = dict() policy = TimePolicy(env_real.spec, wam_jsp_7dof_sin, env_real.dt) # Prior dp_nom = env_sim.get_nominal_domain_param() prior_hparam = dict( low=to.tensor([dp_nom[name] * 0.5 for name in dp_mapping.values()]), high=to.tensor([dp_nom[name] * 1.5 for name in dp_mapping.values()]), ) prior = sbiutils.BoxUniform(**prior_hparam) # Time series embedding embedding_hparam = dict( downsampling_factor=1, # len_rollouts=env_sim.max_steps, # recurrent_network_type=nn.RNN, # only_last_output=True,
def time_policy(env: Env): def timefcn(t: float): return list(np.random.rand(env.spec.act_space.flat_dim)) return TimePolicy(env.spec, dt=env.dt, fcn_of_time=timefcn)
def create_vel_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, bidirectional_mps, checkJointLimits): if bidirectional_mps: def policy_fcn(t: float): if t <= 1: return [0.0, 0.0, 0.0, 0.0, 1.0, 0.0] elif t <= 2.0: return [0.0, 0.0, 0.0, 0.0, -1.0, 0.0] elif t <= 3: return [0.0, 0.0, 0.0, 1.0, 0.0, 0.0] elif t <= 4.0: return [0.0, 0.0, 0.0, -1.0, 0.0, 0.0] elif t <= 5: return [0.0, 0.0, 1.0, 0.0, 0.0, 0.0] elif t <= 6.0: return [0.0, 0.0, -1.0, 0.0, 0.0, 0.0] elif t <= 7: return [0.0, 1.0, 0.0, 0.0, 0.0, 0.0] elif t <= 8: return [0.0, -1.0, 0.0, 0.0, 0.0, 0.0] elif t <= 9: return [1.0, 0.0, 0.0, 0.0, 0.0, 1.0] elif t <= 10: return [-1.0, 0.0, 0.0, 0.0, 0.0, 1.0] else: return [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] else: def policy_fcn(t: float): if t <= 1: return [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0] elif t <= 2.0: return [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0] elif t <= 3: return [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0] elif t <= 4.0: return [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0] elif t <= 5: return [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0] elif t <= 6.0: return [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] elif t <= 7: return [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] elif t <= 8: return [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1] elif t <= 9: return [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] elif t <= 10: return [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1] else: return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # Set up environment env = BoxShelvingVelDSSim( physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, tasks_left=None, # use defaults tasks_right=None, # use defaults ref_frame=ref_frame, bidirectional_mps=bidirectional_mps, fixedInitState=True, collisionConfig={"file": "collisionModel.xml"}, checkJointLimits=checkJointLimits, taskCombinationMethod="sum", observeVelocities=True, observeCollisionCost=True, observePredictedCollisionCost=True, observeManipulabilityIndex=True, observeCurrentManipulability=True, observeDynamicalSystemDiscrepancy=True, observeTaskSpaceDiscrepancy=True, observeForceTorque=True, observeDynamicalSystemGoalDistance=True, ) # Set up policy policy = TimePolicy(env.spec, policy_fcn, dt) return env, policy
# Get the experiment's directory to load from ex_dir = ask_for_experiment(hparam_list=args.show_hparams) if args.dir is None else args.dir ex_tag = ex_dir.split("--", 1)[1] # Load the policy and the environment (for constructing the real-world counterpart) env_sim, policy, _ = load_experiment(ex_dir) if args.verbose: print(f"Policy params:\n{policy.param_values.detach().cpu().numpy()}") # Create real-world counterpart (without domain randomization) env_real = QCartPoleStabReal(args.dt, args.max_steps) print_cbt("Set up the QCartPoleStabReal environment.", "c") # Set up the disturber disturber_pos = TimePolicy(env_real.spec, volt_disturbance_pos, env_real.dt) disturber_neg = TimePolicy(env_real.spec, volt_disturbance_neg, env_real.dt) steps_disturb = 10 print_cbt( f"Set up the disturbers for the QCartPoleStabReal environment." f"\nVolt disturbance: {6} volts for {steps_disturb} steps", "c", ) # Center cart and reset velocity filters and wait until the user or the conroller has put pole upright env_real.reset() print_cbt("Ready", "g") ros = [] for r in range(args.num_runs): if args.mode == "wo":
act = act_predef[int(t / dt)] return act.repeat(env_real.act_space.flat_dim) elif args.mode.lower() == "sin": def fcn_of_time(t: float): act = max_amp * np.sin(2 * np.pi * t * 2.0) # 2 Hz return act.repeat(env_real.act_space.flat_dim) elif args.mode.lower() == "wam_sin": fcn_of_time = wam_jsp_7dof_sin else: raise pyrado.ValueErr(given=args.mode, eq_constraint="chrip or sin") policy = TimePolicy(env_real.spec, fcn_of_time, dt) # Simulate before executing if check_in_sim: print_cbt(f"Running action {args.mode} policy in simulation...", "c") done = False while not done: ro = rollout(env_sim, policy, eval=True, render_mode=RenderMode(video=True)) done, _, _ = after_rollout_query(env_real, policy, ro) # Run on device print_cbt(f"Running action {args.mode} policy on the robot...", "c") done = False
from pyrado.policies.feed_forward.time import TimePolicy from pyrado.sampling.rollout import rollout from pyrado.utils.data_types import RenderMode if __name__ == "__main__": # Set up environment dt = 1 / 5000.0 max_steps = 5000 env = QQubeRcsSim(physicsEngine="Bullet", dt=dt, max_steps=max_steps, max_dist_force=None) # Bullet or Vortex print_domain_params(env.domain_param) # Set up policy policy = TimePolicy(env.spec, lambda t: [1.0], dt) # constant acceleration with 1. rad/s**2 # Simulate ro = rollout( env, policy, render_mode=RenderMode(video=True), reset_kwargs=dict(init_state=np.array([0, 3 / 180 * np.pi, 0, 0.0])), ) # Plot print( f"After {max_steps*dt} s of accelerating with 1. rad/s**2, we should be at {max_steps*dt} rad/s" ) print( f"Difference: {max_steps*dt - ro.observations[-1][2]} rad/s (mind the swinging pendulum)"