def ds_activation_variant(dt, max_steps, max_dist_force, physics_engine, graph_file_name): # Set up environment env = PlanarInsertTASim( physicsEngine=physics_engine, graphFileName=graph_file_name, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, taskCombinationMethod='sum', # 'sum', 'mean', 'product', or 'softmax' checkJointLimits=False, collisionAvoidanceIK=True, observeForceTorque=True, observePredictedCollisionCost=False, observeManipulabilityIndex=False, observeCurrentManipulability=True, observeDynamicalSystemGoalDistance=False, observeDynamicalSystemDiscrepancy=False, observeTaskSpaceDiscrepancy=True, ) env.reset(domain_param=dict(effector_friction=1.)) # Set up policy def policy_fcn(t: float): return [0.7, 1, 0, 0.1, 0.5, 0.5] policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate and plot potentials print(env.obs_space.labels) return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=False)
def create_ik_activation_setup(dt, max_steps, max_dist_force, physics_engine): # Set up environment env = Planar3LinkIKActivationSim( physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, taskCombinationMethod='product', positionTasks=True, checkJointLimits=False, collisionAvoidanceIK=True, observeTaskSpaceDiscrepancy=True, ) print_domain_params(env.domain_param) # Set up policy def policy_fcn(t: float): return [0.3 + 0.2 * math.sin(2. * math.pi * 0.2 * t), 0, 1] policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True)
def create_manual_activation_setup(dt, max_steps, max_dist_force, physics_engine): # Set up environment env = Planar3LinkTASim(physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, positionTasks=True, observeTaskSpaceDiscrepancy=True) print_domain_params(env.domain_param) # Set up policy def policy_fcn(t: float): pot = np.fromstring(input("Enter potentials for next step: "), dtype=np.double, count=3, sep=' ') return 1 / (1 + np.exp(-pot)) policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True)
def create_joint_control_setup(dt, max_steps, max_dist_force, physics_engine): # Set up environment env = Planar3LinkJointCtrlSim( physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, checkJointLimits=True, ) print_domain_params(env.domain_param) # Set up policy def policy_fcn(t: float): return [ 10 / 180 * math.pi, 10 / 180 * math.pi, # same as init config 10 / 180 * math.pi + 45. / 180. * math.pi * math.sin(2. * math.pi * 0.2 * t) ] # oscillation in last link policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True)
def create_ik_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits): def policy(t: float): if t < 2: return [ 0.001, 0.001, 0.001, 0.002, 0.002, 0.002, 0.001, 0.001, 0.001, 0.002, 0.002, 0.002 ] else: return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # Set up environment env = BallInTubeIKSim( usePhysicsNode=True, physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, ref_frame=ref_frame, fixedInitState=True, checkJointLimits=checkJointLimits, collisionAvoidanceIK=True, observeVelocity=False, observeCollisionCost=True, observePredictedCollisionCost=True, observeManipulabilityIndex=True, observeCurrentManipulability=True, observeTaskSpaceDiscrepancy=True, ) # Set up policy policy = TimePolicy(env.spec, policy, dt) return env, policy
def create_ds_activation_setup(dt, max_steps, max_dist_force, physics_engine): # Set up environment env = Planar3LinkTASim( physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, positionTasks=True, checkJointLimits=False, collisionAvoidanceIK=True, observeCollisionCost=True, observeTaskSpaceDiscrepancy=True, observeDynamicalSystemDiscrepancy=False, ) print(env.obs_space.labels) # Set up policy def policy_fcn(t: float): if t < 3: return [0, 1, 0] elif t < 7: return [1, 0, 0] elif t < 10: return [.5, 0.5, 0] else: return [0, 0, 1] policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=False)
def create_velocity_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits): def policy(t: float): return [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] # Set up environment env = BallInTubeVelDSSim( usePhysicsNode=True, physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, mps_left=None, # use defaults mps_right=None, # use defaults ref_frame=ref_frame, fixedInitState=True, taskCombinationMethod='sum', checkJointLimits=checkJointLimits, collisionAvoidanceIK=False, observeVelocity=True, observeCollisionCost=True, observePredictedCollisionCost=False, observeManipulabilityIndex=False, observeCurrentManipulability=True, observeDynamicalSystemDiscrepancy=False, observeTaskSpaceDiscrepancy=True, observeForceTorque=True, observeDynamicalSystemGoalDistance=False, ) # Set up policy policy = TimePolicy(env.spec, policy, dt) return env, policy
def generate_oscillation_data(dt, t_end, excitation): """ Use OMOEnv to generate a 1-dim damped oscillation signal. :param dt: time step size [s] :param t_end: Time duration [s] :param excitation: type of excitation, either (initial) 'position' or 'force' (function of time) :return: 1-dim oscillation trajectory """ env = OneMassOscillatorSim(dt, np.ceil(t_end / dt)) env.domain_param = dict(m=1., k=10., d=2.0) if excitation == 'force': policy = TimePolicy( env.spec, functools.partial(_dirac_impulse, env_spec=env.spec, amp=0.5), dt) reset_kwargs = dict(init_state=np.array([0, 0])) elif excitation == 'position': policy = IdlePolicy(env.spec) reset_kwargs = dict(init_state=np.array([0.5, 0])) else: raise pyrado.ValueErr(given=excitation, eq_constraint="'force' or 'position'") # Generate the data ro = rollout(env, policy, reset_kwargs=reset_kwargs, record_dts=False) return ro.observations[:, 0]
def create_position_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits): # Set up environment env = BoxFlippingPosDSSim( usePhysicsNode=True, physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, mps_left=None, # use defaults mps_right=None, # use defaults ref_frame=ref_frame, collisionConfig={'file': 'collisionModel.xml'}, checkJointLimits=checkJointLimits, collisionAvoidanceIK=False, observeVelocities=True, observeCollisionCost=True, observePredictedCollisionCost=True, observeManipulabilityIndex=True, observeCurrentManipulability=True, observeDynamicalSystemDiscrepancy=True, observeTaskSpaceDiscrepancy=False, observeDynamicalSystemGoalDistance=True, ) # Set up policy # def policy(t: float): # if t < 3.1: # return [0, 0, 0, 0, # 0, 0, 0, 0, 1] # elif t <= 4.5: # return [0, 0, 0, 0, # 0, 0, 0, 0, 1] # else: # return [0, 0, 0, 0, # 0, 0, 0, 0, 1] def policy(t: float): if t <= 5: return [0.2, 0, 0, 0, 0, 1] else: return [-0.1, 0, 0, 0, 0.1, 1.] policy = TimePolicy(env.spec, policy, dt) return env, policy
def softmax_variant(action_model_type, mps, dt, max_steps, physics_engine, render_mode): # Set up environment env = MPBlendingSim( action_model_type=action_model_type, mps=mps, physicsEngine=physics_engine, dt=dt, max_steps=max_steps, collisionAvoidanceIK=False, taskCombinationMethod='softmax' ) # Set up policy policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=render_mode, stop_on_done=False)
def create_position_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits): def policy_fcn(t: float): return [ 0, 0, 1, # PG position 0, 0, # PG orientation 0.01 ] # hand joints # Set up environment env = BoxShelvingPosDSSim( physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, mps_left=None, # use defaults mps_right=None, # use defaults ref_frame=ref_frame, fixedInitState=True, collisionConfig={'file': 'collisionModel.xml'}, checkJointLimits=checkJointLimits, collisionAvoidanceIK=True, observeVelocities=False, observeCollisionCost=True, observePredictedCollisionCost=True, observeManipulabilityIndex=True, observeCurrentManipulability=True, observeDynamicalSystemDiscrepancy=True, observeTaskSpaceDiscrepancy=True, observeForceTorque=True, observeDynamicalSystemGoalDistance=True, ) print(env.get_body_position('Box', '', '')) print(env.get_body_position('Box', 'GoalUpperShelve', '')) print(env.get_body_position('Box', '', 'GoalUpperShelve')) print(env.get_body_position('Box', 'GoalUpperShelve', 'GoalUpperShelve')) # Set up policy policy = TimePolicy(env.spec, policy_fcn, dt) return env, policy
def create_setup(physics_engine, dt, max_steps, max_dist_force): # Set up environment env = BallOnPlate5DSim(physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force) env = ActNormWrapper(env) print_domain_params(env.domain_param) # Set up policy def policy_fcn(t: float): return [ 0., # x_ddot_plate 10. * math.cos(2. * math.pi * 5 * t), # y_ddot_plate 0.5 * math.cos(2. * math.pi / 5. * t), # z_ddot_plate 0., # alpha_ddot_plate 0., # beta_ddot_plate ] policy = TimePolicy(env.spec, policy_fcn, dt) return env, policy
def create_ik_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits): # Set up environment env = BoxFlippingIKActivationSim( usePhysicsNode=True, physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, ref_frame=ref_frame, collisionConfig={'file': 'collisionModel.xml'}, checkJointLimits=checkJointLimits, ) # Set up policy def policy(t: float): # return [0.1, 0.05, 0.1, # left Y, Z, dist # -0.1, 0.05, 0.1] # right X, Y, dist return [0.0, 0.05, # left Y, Z -0.1, 0.05] # right X, Y policy = TimePolicy(env.spec, policy, dt) return env, policy
def create_position_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits): def policy(t: float): if t < 3.1: return [0, 0.6, 0, 0] elif t <= 4.5: return [0, 0.6, 0, 1] else: return [0, 0, 0, 0] # Set up environment env = BoxLiftingSimplePosDSSim( usePhysicsNode=False, physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, mps_left=None, # use defaults mps_right=None, # use defaults ref_frame=ref_frame, taskCombinationMethod='sum', checkJointLimits=checkJointLimits, collisionAvoidanceIK=False, observeVelocities=False, observeCollisionCost=True, observePredictedCollisionCost=True, observeManipulabilityIndex=True, observeCurrentManipulability=True, observeDynamicalSystemDiscrepancy=True, observeTaskSpaceDiscrepancy=True, observeDynamicalSystemGoalDistance=True, ) # Set up policy policy = TimePolicy(env.spec, policy, dt) return env, policy
def ik_activation_variant(dt, max_steps, max_dist_force, physics_engine, graph_file_name): # Set up environment env = PlanarInsertIKActivationSim( physicsEngine=physics_engine, graphFileName=graph_file_name, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, checkJointLimits=False, collisionAvoidanceIK=True, observeForceTorque=True, observePredictedCollisionCost=False, observeManipulabilityIndex=False, observeCurrentManipulability=True, observeDynamicalSystemGoalDistance=False, observeDynamicalSystemDiscrepancy=False, observeTaskSpaceDiscrepancy=True, ) env.reset(domain_param=dict(effector_friction=1.)) # Set up policy def policy_fcn(t: float): return [ 0.1 * dt, -0.01 * dt, 3 / 180. * math.pi * math.sin(2. * math.pi * 2. * t) ] # [m/s, m/s, rad/s] policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate and plot potentials print(env.obs_space.labels) return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=False)
def time_policy(env): def timefcn(t: float): return list(np.random.rand(env.spec.act_space.flat_dim)) return TimePolicy(env.spec, dt=env.dt, fcn_of_time=timefcn)
from pyrado.sampling.rollout import rollout from pyrado.utils.data_types import RenderMode if __name__ == '__main__': # Set up environment dt = 1 / 5000. max_steps = 5000 env = QQubeRcsSim( physicsEngine='Bullet', # Bullet or Vortex dt=dt, max_steps=max_steps, max_dist_force=None) print_domain_params(env.domain_param) # Set up policy policy = TimePolicy(env.spec, lambda t: [1.], dt) # constant acceleration with 1. rad/s**2 # Simulate ro = rollout( env, policy, render_mode=RenderMode(video=True), reset_kwargs=dict(init_state=np.array([0, 3 / 180 * np.pi, 0, 0.]))) # Plot print( f'After {max_steps*dt} s of accelerating with 1. rad/s**2, we should be at {max_steps*dt} rad/s' ) print( f'Difference: {max_steps*dt - ro.observations[-1][2]} rad/s (mind the swinging pendulum)' )
], collisionConfig={ 'file': "collisionModel.xml", # 'pairs': [ # { # 'body1': 'PowerGrasp_L', # 'body2': 'PowerGrasp_R', # }, # ], # 'threshold': 0.1, }, checkJointLimits=True, ) print(env.obs_space.labels) # Set up policy policy = TimePolicy(spec=env.spec, fcn_of_time=policy_fcn, dt=dt) # Create scripted version of the policy tm = policy.script() print(tm.graph) print(tm.code) # Export config and policy for C++ env.save_config_xml(osp.join(rcsenv.RCSPYSIM_CONFIG_PATH, 'TargetTracking', "exTT_export.xml")) tm.save(osp.join(rcsenv.RCSPYSIM_CONFIG_PATH, 'TargetTracking', 'pTT_simpletime.pth')) # Simulate and plot ro = rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True) draw_rewards(ro)
# Get the experiment's directory to load from ex_dir = ask_for_experiment() if args.ex_dir is None else args.ex_dir ex_tag = ex_dir.split('--', 1)[1] # Load the policy and the environment (for constructing the real-world counterpart) env_sim, policy, _ = load_experiment(ex_dir) if args.verbose: print(f'Policy params:\n{policy.param_values.detach().cpu().numpy()}') # Create real-world counterpart (without domain randomization) env_real = QCartPoleStabReal(args.dt, args.max_steps) print_cbt('Set up the QCartPoleStabReal environment.', 'c') # Set up the disturber disturber_pos = TimePolicy(env_real.spec, volt_disturbance_pos, env_real.dt) disturber_neg = TimePolicy(env_real.spec, volt_disturbance_neg, env_real.dt) steps_disturb = 10 print_cbt( f'Set up the disturbers for the QCartPoleStabReal environment.' f'\nVolt disturbance: {6} volts for {steps_disturb} steps', 'c') # Center cart and reset velocity filters and wait until the user or the conroller has put pole upright env_real.reset() print_cbt('Ready', 'g') ros = [] for r in range(args.num_runs): if args.mode == 'wo': ro = experiment_wo_distruber(env_real, env_sim)
Test the functionality of Pyrado using the Quanser Ball balancer setup. """ from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim from pyrado.domain_randomization.utils import print_domain_params from pyrado.policies.special.time import TimePolicy from pyrado.sampling.rollout import rollout from pyrado.utils.data_types import RenderMode def policy_fcn(t: float): return [ 0.0, # V_x 0.0, # V_y ] # Set up environment dt = 1 / 500. env = QBallBalancerSim(dt=dt, max_steps=10000) env.reset(domain_param=dict(offset_th_x=50. / 180 * 3.141592)) print_domain_params(env.domain_param) # Set up policy policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate ro = rollout(env, policy, render_mode=RenderMode(text=True, video=True), stop_on_done=True)
def create_velocity_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, bidirectional_mps, checkJointLimits): if bidirectional_mps: def policy_fcn(t: float): if t <= 1: return [0., 0., 0., 0., 1., 0.] elif t <= 2.: return [0., 0., 0., 0., -1., 0.] elif t <= 3: return [0., 0., 0., 1., 0., 0.] elif t <= 4.: return [0., 0., 0., -1., 0., 0.] elif t <= 5: return [0., 0., 1., 0., 0., 0.] elif t <= 6.: return [0., 0., -1., 0., 0., 0.] elif t <= 7: return [0., 1., 0., 0., 0., 0.] elif t <= 8: return [0., -1., 0., 0., 0., 0.] elif t <= 9: return [1., 0., 0., 0., 0., 1.] elif t <= 10: return [-1., 0., 0., 0., 0., 1.] else: return [0., 0., 0., 0., 0., 0.] else: def policy_fcn(t: float): if t <= 1: return [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0] elif t <= 2.: return [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0] elif t <= 3: return [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0] elif t <= 4.: return [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0] elif t <= 5: return [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0] elif t <= 6.: return [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] elif t <= 7: return [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] elif t <= 8: return [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1] elif t <= 9: return [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] elif t <= 10: return [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1] else: return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # Set up environment env = BoxShelvingVelDSSim( physicsEngine=physicsEngine, graphFileName=graphFileName, dt=dt, max_steps=max_steps, mps_left=None, # use defaults mps_right=None, # use defaults ref_frame=ref_frame, bidirectional_mps=bidirectional_mps, fixedInitState=True, collisionConfig={'file': 'collisionModel.xml'}, checkJointLimits=checkJointLimits, observeVelocities=True, observeCollisionCost=True, observePredictedCollisionCost=True, observeManipulabilityIndex=True, observeCurrentManipulability=True, observeDynamicalSystemDiscrepancy=True, observeTaskSpaceDiscrepancy=True, observeForceTorque=True, observeDynamicalSystemGoalDistance=True, ) # Set up policy policy = TimePolicy(env.spec, policy_fcn, dt) return env, policy