from railrl.demos.collect_demo import collect_demos, SpaceMouseExpert
from multiworld.core.image_env import ImageEnv
from multiworld.envs.mujoco.cameras import sawyer_pusher_camera_upright_v2

from multiworld.envs.mujoco.sawyer_xyz.sawyer_push_multiobj import SawyerTwoObjectNIPSEnv
from multiworld.envs.pygame.point2d import Point2DWallEnv

import numpy as np

if __name__ == '__main__':
    expert = SpaceMouseExpert(
        xyz_dims=2,
        xyz_remap=[1, 0, 2],
        xyz_scale=[-1, -1, -1],
    )

    env = SawyerTwoObjectNIPSEnv()
    env = ImageEnv(
        env,
        recompute_reward=False,
        # transpose=True,
        init_camera=sawyer_pusher_camera_upright_v2,
    )

    collect_demos(env, expert, "pusher_demos_100.npy", 100)
    )

    x_low = -0.2
    x_high = 0.2
    y_low = 0.5
    y_high = 0.7
    t = 0.03
    env = SawyerMultiobjectEnv(
        num_objects=1,
        reset_to_initial_position=False,
        puck_goal_low=(x_low + t + t, y_low + t),
        puck_goal_high=(x_high - t - t, y_high - t),
        hand_goal_low=(x_low, y_low),
        hand_goal_high=(x_high, y_high),
        mocap_low=(x_low, y_low, 0.0),
        mocap_high=(x_high, y_high, 0.5),
        object_low=(x_low + t + t, y_low + t, 0.0),
        object_high=(x_high - t - t, y_high - t, 0.5),
        preload_obj_dict=[
            dict(color2=(0.1, 0.1, 0.9)),
        ],
    )
    env = ImageEnv(
        env,
        recompute_reward=False,
        # transpose=True,
        init_camera=sawyer_init_camera_zoomed_in,
    )

    collect_demos(env, expert, "pusher_reset_free_demos_100b.npy", 100)
Exemple #3
0
from multiworld.envs.mujoco.cameras import sawyer_init_camera_zoomed_in
import numpy as np
from railrl.demos.collect_demo import collect_demos
from railrl.misc.asset_loader import load_local_or_remote_file

if __name__ == '__main__':
    data = load_local_or_remote_file('/home/murtaza/research/railrl/data/doodads3/11-16-pusher-state-td3-sweep-params-policy-update-period/11-16-pusher_state_td3_sweep_params_policy_update_period_2019_11_17_00_28_45_id000--s62098/params.pkl')
    env = data['evaluation/env']
    policy = data['trainer/trained_policy']
    image_env = ImageEnv(
        env,
        48,
        init_camera=sawyer_init_camera_zoomed_in,
        transpose=True,
        normalize=True,
    )
    collect_demos(image_env, policy, "data/local/demos/pusher_demos_action_noise_1000.npy", N=1000, horizon=50, threshold=.1, add_action_noise=False, key='puck_distance', render=True, noise_sigma=0.0)
    # data = load_local_or_remote_file("demos/pusher_demos_1000.npy")
    # for i in range(100):
    #     goal = data[i]['observations'][49]['desired_goal']
    #     o = env.reset()
    #     path_length = 0
    #     while path_length < 50:
    #         env.set_goal({'state_desired_goal':goal})
    #         o = o['state_observation']
    #         new_obs = np.hstack((o, goal))
    #         a, agent_info = policy.get_action(new_obs)
    #         o, r, d, env_info = env.step(a)
    #         path_length += 1
    #     print(i, env_info['puck_distance'])
Exemple #4
0
from railrl.demos.collect_demo import collect_demos, SpaceMouseExpert
from multiworld.core.image_env import ImageEnv
from multiworld.envs.mujoco.cameras import sawyer_pusher_camera_upright_v2

from multiworld.envs.pygame.point2d import Point2DWallEnv

import numpy as np

if __name__ == '__main__':
    expert = SpaceMouseExpert(xyz_dims=2)

    env = Point2DWallEnv(
        render_onscreen=False,
        images_are_rgb=True,
    )
    env = ImageEnv(
        env,
        non_presampled_goal_img_is_garbage=True,
        recompute_reward=False,
        # transpose=True,
        # init_camera=sawyer_pusher_camera_upright_v2,
    )

    collect_demos(env, expert, "pointmass_demos_100.npy", 100)
Exemple #5
0
    policy = data['evaluation/policy']
    # import ipdb; ipdb.set_trace()
    # policy =
    policy.to("cpu")
    # image_env = ImageEnv(
    #     env,
    #     48,
    #     init_camera=sawyer_init_camera_zoomed_in,
    #     transpose=True,
    #     normalize=True,
    # )
    # env_name = pendulum
    outfile = "/home/ashvin/data/s3doodad/demos/icml2020/pusher/demos100.npy"
    horizon = 200
    collect_demos(
        env, policy, outfile, N=100, horizon=horizon
    )  # , threshold=.1, add_action_noise=False, key='puck_distance', render=True, noise_sigma=0.0)
    # data = load_local_or_remote_file("demos/pusher_demos_1000.npy")
    # for i in range(100):
    #     goal = data[i]['observations'][49]['desired_goal']
    #     o = env.reset()
    #     path_length = 0
    #     while path_length < 50:
    #         env.set_goal({'state_desired_goal':goal})
    #         o = o['state_observation']
    #         new_obs = np.hstack((o, goal))
    #         a, agent_info = policy.get_action(new_obs)
    #         o, r, d, env_info = env.step(a)
    #         path_length += 1
    #     print(i, env_info['puck_distance'])
Exemple #6
0
import numpy as np
from railrl.demos.collect_demo import collect_demos
from railrl.misc.asset_loader import load_local_or_remote_file

if __name__ == '__main__':
    data = load_local_or_remote_file('ashvin/icml2020/murtaza/pusher/state/run3/id3/itr_980.pkl')
    env = data['evaluation/env']
    policy = data['trainer/trained_policy']
    policy = policy.to("cpu")
    image_env = ImageEnv(
        env,
        48,
        init_camera=sawyer_init_camera_zoomed_in,
        transpose=True,
        normalize=True,
    )
    collect_demos(image_env, policy, "/home/ashvin/data/s3doodad/demos/icml2020/pusher/demos_action_noise_1000.npy", N=1000, horizon=50, threshold=.1, add_action_noise=False, key='puck_distance', render=True, noise_sigma=0.0)
    # data = load_local_or_remote_file("demos/pusher_demos_1000.npy")
    # for i in range(100):
    #     goal = data[i]['observations'][49]['desired_goal']
    #     o = env.reset()
    #     path_length = 0
    #     while path_length < 50:
    #         env.set_goal({'state_desired_goal':goal})
    #         o = o['state_observation']
    #         new_obs = np.hstack((o, goal))
    #         a, agent_info = policy.get_action(new_obs)
    #         o, r, d, env_info = env.step(a)
    #         path_length += 1
    #     print(i, env_info['puck_distance'])
if __name__ == '__main__':
    data = load_local_or_remote_file(
        '11-16-door-reset-free-state-td3-sweep-params-policy-update-period/11-16-door_reset_free_state_td3_sweep_params_policy_update_period_2019_11_17_00_26_50_id000--s89728/params.pkl'
    )
    env = data['evaluation/env']
    policy = data['trainer/trained_policy']
    presampled_goals_path = osp.join(
        osp.dirname(mwmj.__file__),
        "goals",
        "door_goals.npy",
    )
    presampled_goals = load_local_or_remote_file(presampled_goals_path).item()
    image_env = ImageEnv(
        env,
        48,
        init_camera=sawyer_door_env_camera_v0,
        transpose=True,
        normalize=True,
        presampled_goals=presampled_goals,
    )
    collect_demos(image_env,
                  policy,
                  "data/local/demos/door_demos_action_noise_1000.npy",
                  N=1000,
                  horizon=100,
                  threshold=.1,
                  add_action_noise=True,
                  key='angle_difference',
                  render=False)