コード例 #1
0
ファイル: sb_pi.py プロジェクト: arlene-kuehn/SimuRLacra
def ds_activation_variant(dt, max_steps, max_dist_force, physics_engine,
                          graph_file_name):
    # Set up environment
    env = PlanarInsertTASim(
        physicsEngine=physics_engine,
        graphFileName=graph_file_name,
        dt=dt,
        max_steps=max_steps,
        max_dist_force=max_dist_force,
        taskCombinationMethod='sum',  # 'sum', 'mean',  'product', or 'softmax'
        checkJointLimits=False,
        collisionAvoidanceIK=True,
        observeForceTorque=True,
        observePredictedCollisionCost=False,
        observeManipulabilityIndex=False,
        observeCurrentManipulability=True,
        observeDynamicalSystemGoalDistance=False,
        observeDynamicalSystemDiscrepancy=False,
        observeTaskSpaceDiscrepancy=True,
    )
    env.reset(domain_param=dict(effector_friction=1.))

    # Set up policy
    def policy_fcn(t: float):
        return [0.7, 1, 0, 0.1, 0.5, 0.5]

    policy = TimePolicy(env.spec, policy_fcn, dt)

    # Simulate and plot potentials
    print(env.obs_space.labels)
    return rollout(env,
                   policy,
                   render_mode=RenderMode(video=True),
                   stop_on_done=False)
コード例 #2
0
def create_ik_activation_setup(dt, max_steps, max_dist_force, physics_engine):
    # Set up environment
    env = Planar3LinkIKActivationSim(
        physicsEngine=physics_engine,
        dt=dt,
        max_steps=max_steps,
        max_dist_force=max_dist_force,
        taskCombinationMethod='product',
        positionTasks=True,
        checkJointLimits=False,
        collisionAvoidanceIK=True,
        observeTaskSpaceDiscrepancy=True,
    )
    print_domain_params(env.domain_param)

    # Set up policy
    def policy_fcn(t: float):
        return [0.3 + 0.2 * math.sin(2. * math.pi * 0.2 * t), 0, 1]

    policy = TimePolicy(env.spec, policy_fcn, dt)

    # Simulate
    return rollout(env,
                   policy,
                   render_mode=RenderMode(video=True),
                   stop_on_done=True)
コード例 #3
0
def create_manual_activation_setup(dt, max_steps, max_dist_force,
                                   physics_engine):
    # Set up environment
    env = Planar3LinkTASim(physicsEngine=physics_engine,
                           dt=dt,
                           max_steps=max_steps,
                           max_dist_force=max_dist_force,
                           positionTasks=True,
                           observeTaskSpaceDiscrepancy=True)
    print_domain_params(env.domain_param)

    # Set up policy
    def policy_fcn(t: float):
        pot = np.fromstring(input("Enter potentials for next step: "),
                            dtype=np.double,
                            count=3,
                            sep=' ')
        return 1 / (1 + np.exp(-pot))

    policy = TimePolicy(env.spec, policy_fcn, dt)

    # Simulate
    return rollout(env,
                   policy,
                   render_mode=RenderMode(video=True),
                   stop_on_done=True)
コード例 #4
0
def create_joint_control_setup(dt, max_steps, max_dist_force, physics_engine):
    # Set up environment
    env = Planar3LinkJointCtrlSim(
        physicsEngine=physics_engine,
        dt=dt,
        max_steps=max_steps,
        max_dist_force=max_dist_force,
        checkJointLimits=True,
    )
    print_domain_params(env.domain_param)

    # Set up policy
    def policy_fcn(t: float):
        return [
            10 / 180 * math.pi,
            10 / 180 * math.pi,  # same as init config
            10 / 180 * math.pi +
            45. / 180. * math.pi * math.sin(2. * math.pi * 0.2 * t)
        ]  # oscillation in last link

    policy = TimePolicy(env.spec, policy_fcn, dt)

    # Simulate
    return rollout(env,
                   policy,
                   render_mode=RenderMode(video=True),
                   stop_on_done=True)
コード例 #5
0
def create_ik_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame,
                    checkJointLimits):
    def policy(t: float):
        if t < 2:
            return [
                0.001, 0.001, 0.001, 0.002, 0.002, 0.002, 0.001, 0.001, 0.001,
                0.002, 0.002, 0.002
            ]
        else:
            return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

    # Set up environment
    env = BallInTubeIKSim(
        usePhysicsNode=True,
        physicsEngine=physicsEngine,
        graphFileName=graphFileName,
        dt=dt,
        max_steps=max_steps,
        ref_frame=ref_frame,
        fixedInitState=True,
        checkJointLimits=checkJointLimits,
        collisionAvoidanceIK=True,
        observeVelocity=False,
        observeCollisionCost=True,
        observePredictedCollisionCost=True,
        observeManipulabilityIndex=True,
        observeCurrentManipulability=True,
        observeTaskSpaceDiscrepancy=True,
    )

    # Set up policy
    policy = TimePolicy(env.spec, policy, dt)

    return env, policy
コード例 #6
0
def create_ds_activation_setup(dt, max_steps, max_dist_force, physics_engine):
    # Set up environment
    env = Planar3LinkTASim(
        physicsEngine=physics_engine,
        dt=dt,
        max_steps=max_steps,
        max_dist_force=max_dist_force,
        positionTasks=True,
        checkJointLimits=False,
        collisionAvoidanceIK=True,
        observeCollisionCost=True,
        observeTaskSpaceDiscrepancy=True,
        observeDynamicalSystemDiscrepancy=False,
    )
    print(env.obs_space.labels)

    # Set up policy
    def policy_fcn(t: float):
        if t < 3:
            return [0, 1, 0]
        elif t < 7:
            return [1, 0, 0]
        elif t < 10:
            return [.5, 0.5, 0]
        else:
            return [0, 0, 1]

    policy = TimePolicy(env.spec, policy_fcn, dt)

    # Simulate
    return rollout(env,
                   policy,
                   render_mode=RenderMode(video=True),
                   stop_on_done=False)
コード例 #7
0
def create_velocity_mps_setup(physicsEngine, graphFileName, dt, max_steps,
                              ref_frame, checkJointLimits):
    def policy(t: float):
        return [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

    # Set up environment
    env = BallInTubeVelDSSim(
        usePhysicsNode=True,
        physicsEngine=physicsEngine,
        graphFileName=graphFileName,
        dt=dt,
        max_steps=max_steps,
        mps_left=None,  # use defaults
        mps_right=None,  # use defaults
        ref_frame=ref_frame,
        fixedInitState=True,
        taskCombinationMethod='sum',
        checkJointLimits=checkJointLimits,
        collisionAvoidanceIK=False,
        observeVelocity=True,
        observeCollisionCost=True,
        observePredictedCollisionCost=False,
        observeManipulabilityIndex=False,
        observeCurrentManipulability=True,
        observeDynamicalSystemDiscrepancy=False,
        observeTaskSpaceDiscrepancy=True,
        observeForceTorque=True,
        observeDynamicalSystemGoalDistance=False,
    )

    # Set up policy
    policy = TimePolicy(env.spec, policy, dt)

    return env, policy
コード例 #8
0
def generate_oscillation_data(dt, t_end, excitation):
    """
    Use OMOEnv to generate a 1-dim damped oscillation signal.

    :param dt: time step size [s]
    :param t_end: Time duration [s]
    :param excitation: type of excitation, either (initial) 'position' or 'force' (function of time)
    :return: 1-dim oscillation trajectory
    """
    env = OneMassOscillatorSim(dt, np.ceil(t_end / dt))
    env.domain_param = dict(m=1., k=10., d=2.0)
    if excitation == 'force':
        policy = TimePolicy(
            env.spec,
            functools.partial(_dirac_impulse, env_spec=env.spec, amp=0.5), dt)
        reset_kwargs = dict(init_state=np.array([0, 0]))
    elif excitation == 'position':
        policy = IdlePolicy(env.spec)
        reset_kwargs = dict(init_state=np.array([0.5, 0]))
    else:
        raise pyrado.ValueErr(given=excitation,
                              eq_constraint="'force' or 'position'")

    # Generate the data
    ro = rollout(env, policy, reset_kwargs=reset_kwargs, record_dts=False)
    return ro.observations[:, 0]
コード例 #9
0
ファイル: sb_bf.py プロジェクト: arlene-kuehn/SimuRLacra
def create_position_mps_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits):
    # Set up environment
    env = BoxFlippingPosDSSim(
        usePhysicsNode=True,
        physicsEngine=physicsEngine,
        graphFileName=graphFileName,
        dt=dt,
        max_steps=max_steps,
        mps_left=None,  # use defaults
        mps_right=None,  # use defaults
        ref_frame=ref_frame,
        collisionConfig={'file': 'collisionModel.xml'},
        checkJointLimits=checkJointLimits,
        collisionAvoidanceIK=False,
        observeVelocities=True,
        observeCollisionCost=True,
        observePredictedCollisionCost=True,
        observeManipulabilityIndex=True,
        observeCurrentManipulability=True,
        observeDynamicalSystemDiscrepancy=True,
        observeTaskSpaceDiscrepancy=False,
        observeDynamicalSystemGoalDistance=True,
    )

    # Set up policy
    # def policy(t: float):
    #     if t < 3.1:
    #         return [0, 0, 0, 0,
    #                 0, 0, 0, 0, 1]
    #     elif t <= 4.5:
    #         return [0, 0, 0, 0,
    #                 0, 0, 0, 0, 1]
    #     else:
    #         return [0, 0, 0, 0,
    #                 0, 0, 0, 0, 1]
    def policy(t: float):
        if t <= 5:
            return [0.2, 0, 0, 0,
                    0, 1]
        else:
            return [-0.1, 0, 0, 0,
                    0.1, 1.]

    policy = TimePolicy(env.spec, policy, dt)

    return env, policy
コード例 #10
0
def softmax_variant(action_model_type, mps, dt, max_steps, physics_engine, render_mode):
    # Set up environment
    env = MPBlendingSim(
        action_model_type=action_model_type,
        mps=mps,
        physicsEngine=physics_engine,
        dt=dt,
        max_steps=max_steps,
        collisionAvoidanceIK=False,
        taskCombinationMethod='softmax'
    )

    # Set up policy
    policy = TimePolicy(env.spec, policy_fcn, dt)

    # Simulate
    return rollout(env, policy, render_mode=render_mode, stop_on_done=False)
コード例 #11
0
def create_position_mps_setup(physicsEngine, graphFileName, dt, max_steps,
                              ref_frame, checkJointLimits):
    def policy_fcn(t: float):
        return [
            0,
            0,
            1,  # PG position
            0,
            0,  # PG orientation
            0.01
        ]  # hand joints

    # Set up environment
    env = BoxShelvingPosDSSim(
        physicsEngine=physicsEngine,
        graphFileName=graphFileName,
        dt=dt,
        max_steps=max_steps,
        mps_left=None,  # use defaults
        mps_right=None,  # use defaults
        ref_frame=ref_frame,
        fixedInitState=True,
        collisionConfig={'file': 'collisionModel.xml'},
        checkJointLimits=checkJointLimits,
        collisionAvoidanceIK=True,
        observeVelocities=False,
        observeCollisionCost=True,
        observePredictedCollisionCost=True,
        observeManipulabilityIndex=True,
        observeCurrentManipulability=True,
        observeDynamicalSystemDiscrepancy=True,
        observeTaskSpaceDiscrepancy=True,
        observeForceTorque=True,
        observeDynamicalSystemGoalDistance=True,
    )
    print(env.get_body_position('Box', '', ''))
    print(env.get_body_position('Box', 'GoalUpperShelve', ''))
    print(env.get_body_position('Box', '', 'GoalUpperShelve'))
    print(env.get_body_position('Box', 'GoalUpperShelve', 'GoalUpperShelve'))

    # Set up policy
    policy = TimePolicy(env.spec, policy_fcn, dt)

    return env, policy
コード例 #12
0
def create_setup(physics_engine, dt, max_steps, max_dist_force):
    # Set up environment
    env = BallOnPlate5DSim(physicsEngine=physics_engine,
                           dt=dt,
                           max_steps=max_steps,
                           max_dist_force=max_dist_force)
    env = ActNormWrapper(env)
    print_domain_params(env.domain_param)

    # Set up policy
    def policy_fcn(t: float):
        return [
            0.,  # x_ddot_plate
            10. * math.cos(2. * math.pi * 5 * t),  # y_ddot_plate
            0.5 * math.cos(2. * math.pi / 5. * t),  # z_ddot_plate
            0.,  # alpha_ddot_plate
            0.,  # beta_ddot_plate
        ]

    policy = TimePolicy(env.spec, policy_fcn, dt)

    return env, policy
コード例 #13
0
ファイル: sb_bf.py プロジェクト: arlene-kuehn/SimuRLacra
def create_ik_setup(physicsEngine, graphFileName, dt, max_steps, ref_frame, checkJointLimits):
    # Set up environment
    env = BoxFlippingIKActivationSim(
        usePhysicsNode=True,
        physicsEngine=physicsEngine,
        graphFileName=graphFileName,
        dt=dt,
        max_steps=max_steps,
        ref_frame=ref_frame,
        collisionConfig={'file': 'collisionModel.xml'},
        checkJointLimits=checkJointLimits,
    )

    # Set up policy
    def policy(t: float):
        # return [0.1, 0.05, 0.1,  # left Y, Z, dist
        #         -0.1, 0.05, 0.1]  # right X, Y, dist
        return [0.0, 0.05,  # left Y, Z
                -0.1, 0.05]  # right X, Y

    policy = TimePolicy(env.spec, policy, dt)

    return env, policy
コード例 #14
0
ファイル: sb_bls.py プロジェクト: arlene-kuehn/SimuRLacra
def create_position_mps_setup(physicsEngine, graphFileName, dt, max_steps,
                              ref_frame, checkJointLimits):
    def policy(t: float):
        if t < 3.1:
            return [0, 0.6, 0, 0]
        elif t <= 4.5:
            return [0, 0.6, 0, 1]
        else:
            return [0, 0, 0, 0]

    # Set up environment
    env = BoxLiftingSimplePosDSSim(
        usePhysicsNode=False,
        physicsEngine=physicsEngine,
        graphFileName=graphFileName,
        dt=dt,
        max_steps=max_steps,
        mps_left=None,  # use defaults
        mps_right=None,  # use defaults
        ref_frame=ref_frame,
        taskCombinationMethod='sum',
        checkJointLimits=checkJointLimits,
        collisionAvoidanceIK=False,
        observeVelocities=False,
        observeCollisionCost=True,
        observePredictedCollisionCost=True,
        observeManipulabilityIndex=True,
        observeCurrentManipulability=True,
        observeDynamicalSystemDiscrepancy=True,
        observeTaskSpaceDiscrepancy=True,
        observeDynamicalSystemGoalDistance=True,
    )

    # Set up policy
    policy = TimePolicy(env.spec, policy, dt)

    return env, policy
コード例 #15
0
ファイル: sb_pi.py プロジェクト: arlene-kuehn/SimuRLacra
def ik_activation_variant(dt, max_steps, max_dist_force, physics_engine,
                          graph_file_name):
    # Set up environment
    env = PlanarInsertIKActivationSim(
        physicsEngine=physics_engine,
        graphFileName=graph_file_name,
        dt=dt,
        max_steps=max_steps,
        max_dist_force=max_dist_force,
        checkJointLimits=False,
        collisionAvoidanceIK=True,
        observeForceTorque=True,
        observePredictedCollisionCost=False,
        observeManipulabilityIndex=False,
        observeCurrentManipulability=True,
        observeDynamicalSystemGoalDistance=False,
        observeDynamicalSystemDiscrepancy=False,
        observeTaskSpaceDiscrepancy=True,
    )
    env.reset(domain_param=dict(effector_friction=1.))

    # Set up policy
    def policy_fcn(t: float):
        return [
            0.1 * dt, -0.01 * dt,
            3 / 180. * math.pi * math.sin(2. * math.pi * 2. * t)
        ]  # [m/s, m/s, rad/s]

    policy = TimePolicy(env.spec, policy_fcn, dt)

    # Simulate and plot potentials
    print(env.obs_space.labels)
    return rollout(env,
                   policy,
                   render_mode=RenderMode(video=True),
                   stop_on_done=False)
コード例 #16
0
ファイル: conftest.py プロジェクト: arlene-kuehn/SimuRLacra
    def time_policy(env):
        def timefcn(t: float):
            return list(np.random.rand(env.spec.act_space.flat_dim))

        return TimePolicy(env.spec, dt=env.dt, fcn_of_time=timefcn)
コード例 #17
0
from pyrado.sampling.rollout import rollout
from pyrado.utils.data_types import RenderMode

if __name__ == '__main__':
    # Set up environment
    dt = 1 / 5000.
    max_steps = 5000
    env = QQubeRcsSim(
        physicsEngine='Bullet',  # Bullet or Vortex
        dt=dt,
        max_steps=max_steps,
        max_dist_force=None)
    print_domain_params(env.domain_param)

    # Set up policy
    policy = TimePolicy(env.spec, lambda t: [1.],
                        dt)  # constant acceleration with 1. rad/s**2

    # Simulate
    ro = rollout(
        env,
        policy,
        render_mode=RenderMode(video=True),
        reset_kwargs=dict(init_state=np.array([0, 3 / 180 * np.pi, 0, 0.])))

    # Plot
    print(
        f'After {max_steps*dt} s of accelerating with 1. rad/s**2, we should be at {max_steps*dt} rad/s'
    )
    print(
        f'Difference: {max_steps*dt - ro.observations[-1][2]} rad/s (mind the swinging pendulum)'
    )
コード例 #18
0
    ],
    collisionConfig={
        'file': "collisionModel.xml",
        # 'pairs': [
        #     {
        #         'body1': 'PowerGrasp_L',
        #         'body2': 'PowerGrasp_R',
        #     },
        # ],
        # 'threshold': 0.1,
    },
    checkJointLimits=True,
)
print(env.obs_space.labels)

# Set up policy
policy = TimePolicy(spec=env.spec, fcn_of_time=policy_fcn, dt=dt)

# Create scripted version of the policy
tm = policy.script()
print(tm.graph)
print(tm.code)

# Export config and policy for C++
env.save_config_xml(osp.join(rcsenv.RCSPYSIM_CONFIG_PATH, 'TargetTracking', "exTT_export.xml"))
tm.save(osp.join(rcsenv.RCSPYSIM_CONFIG_PATH, 'TargetTracking', 'pTT_simpletime.pth'))

# Simulate and plot
ro = rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True)
draw_rewards(ro)
コード例 #19
0
    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment() if args.ex_dir is None else args.ex_dir
    ex_tag = ex_dir.split('--', 1)[1]

    # Load the policy and the environment (for constructing the real-world counterpart)
    env_sim, policy, _ = load_experiment(ex_dir)

    if args.verbose:
        print(f'Policy params:\n{policy.param_values.detach().cpu().numpy()}')

    # Create real-world counterpart (without domain randomization)
    env_real = QCartPoleStabReal(args.dt, args.max_steps)
    print_cbt('Set up the QCartPoleStabReal environment.', 'c')

    # Set up the disturber
    disturber_pos = TimePolicy(env_real.spec, volt_disturbance_pos,
                               env_real.dt)
    disturber_neg = TimePolicy(env_real.spec, volt_disturbance_neg,
                               env_real.dt)
    steps_disturb = 10
    print_cbt(
        f'Set up the disturbers for the QCartPoleStabReal environment.'
        f'\nVolt disturbance: {6} volts for {steps_disturb} steps', 'c')

    # Center cart and reset velocity filters and wait until the user or the conroller has put pole upright
    env_real.reset()
    print_cbt('Ready', 'g')

    ros = []
    for r in range(args.num_runs):
        if args.mode == 'wo':
            ro = experiment_wo_distruber(env_real, env_sim)
コード例 #20
0
ファイル: sb_qbb.py プロジェクト: arlene-kuehn/SimuRLacra
Test the functionality of Pyrado using the Quanser Ball balancer setup.
"""
from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim
from pyrado.domain_randomization.utils import print_domain_params
from pyrado.policies.special.time import TimePolicy
from pyrado.sampling.rollout import rollout
from pyrado.utils.data_types import RenderMode


def policy_fcn(t: float):
    return [
        0.0,  # V_x
        0.0,  # V_y
    ]


# Set up environment
dt = 1 / 500.
env = QBallBalancerSim(dt=dt, max_steps=10000)
env.reset(domain_param=dict(offset_th_x=50. / 180 * 3.141592))
print_domain_params(env.domain_param)

# Set up policy
policy = TimePolicy(env.spec, policy_fcn, dt)

# Simulate
ro = rollout(env,
             policy,
             render_mode=RenderMode(text=True, video=True),
             stop_on_done=True)
コード例 #21
0
def create_velocity_mps_setup(physicsEngine, graphFileName, dt, max_steps,
                              ref_frame, bidirectional_mps, checkJointLimits):
    if bidirectional_mps:

        def policy_fcn(t: float):
            if t <= 1:
                return [0., 0., 0., 0., 1., 0.]
            elif t <= 2.:
                return [0., 0., 0., 0., -1., 0.]
            elif t <= 3:
                return [0., 0., 0., 1., 0., 0.]
            elif t <= 4.:
                return [0., 0., 0., -1., 0., 0.]
            elif t <= 5:
                return [0., 0., 1., 0., 0., 0.]
            elif t <= 6.:
                return [0., 0., -1., 0., 0., 0.]
            elif t <= 7:
                return [0., 1., 0., 0., 0., 0.]
            elif t <= 8:
                return [0., -1., 0., 0., 0., 0.]
            elif t <= 9:
                return [1., 0., 0., 0., 0., 1.]
            elif t <= 10:
                return [-1., 0., 0., 0., 0., 1.]
            else:
                return [0., 0., 0., 0., 0., 0.]

    else:

        def policy_fcn(t: float):
            if t <= 1:
                return [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
            elif t <= 2.:
                return [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
            elif t <= 3:
                return [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
            elif t <= 4.:
                return [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
            elif t <= 5:
                return [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
            elif t <= 6.:
                return [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
            elif t <= 7:
                return [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
            elif t <= 8:
                return [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]
            elif t <= 9:
                return [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
            elif t <= 10:
                return [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1]
            else:
                return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

    # Set up environment
    env = BoxShelvingVelDSSim(
        physicsEngine=physicsEngine,
        graphFileName=graphFileName,
        dt=dt,
        max_steps=max_steps,
        mps_left=None,  # use defaults
        mps_right=None,  # use defaults
        ref_frame=ref_frame,
        bidirectional_mps=bidirectional_mps,
        fixedInitState=True,
        collisionConfig={'file': 'collisionModel.xml'},
        checkJointLimits=checkJointLimits,
        observeVelocities=True,
        observeCollisionCost=True,
        observePredictedCollisionCost=True,
        observeManipulabilityIndex=True,
        observeCurrentManipulability=True,
        observeDynamicalSystemDiscrepancy=True,
        observeTaskSpaceDiscrepancy=True,
        observeForceTorque=True,
        observeDynamicalSystemGoalDistance=True,
    )

    # Set up policy
    policy = TimePolicy(env.spec, policy_fcn, dt)

    return env, policy