def __init__(self, horizon=500, *args, **kwargs):
        options = {}
        controller_name = 'OSC_POSE'
        options["controller_configs"] = suite.load_controller_config(
            default_controller=controller_name)

        self.env = GymWrapper(
            suite.make(
                "NutAssemblyRound",  # Nut Assembly task with the round peg
                robots="IIWA",  # use IIWA robot
                **options,  # controller options
                use_object_obs=True,
                use_camera_obs=False,  # do not use pixel observations
                has_offscreen_renderer=
                False,  # not needed since not using pixel obs
                has_renderer=False,  # make sure we can render to the screen
                reward_shaping=True,  # use dense rewards
                reward_scale=1.0,
                control_freq=
                20,  # control should happen fast enough so that simulation looks smooth
                horizon=horizon,  # number of timesteps for ending episode
            ))
        self.max_episode_steps = horizon
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        self.reward_type = 'sparse'
        self.distance_threshold = 0.065
def watch_trajectory():
    # Setting up the robot enviornment
    env = suite.make(
        env_name=args.env_name,
        robots=args.robot,
        has_renderer=args.render,
        has_offscreen_renderer=False,
        use_camera_obs=False,
        use_object_obs=True,                    
        horizon = args.horizon, 
        reward_shaping=False,
    )
    obs = env.reset()
    state_dim = obs['robot0_robot-state'].shape[0]+obs['object-state'].shape[0]
    state = np.append(obs['robot0_robot-state'],obs['object-state'])
    agent = set_agent(state_dim, env, args)

    # Visualize a single run
    done=False
    while done==False: 
        if args.algo=='REINFORCE':
            action, log_prob = agent.select_action(state)
        else:
            action = agent.select_action(state)           
        obs, reward, done, info = env.step(action)
        state = np.append(obs['robot0_robot-state'],obs['object-state'])
        env.render()
Ejemplo n.º 3
0
def sample_goal_frames_env(args):
    env = suite.make(
        "SawyerLift",
        has_renderer=False,  # no on-screen renderer
        has_offscreen_renderer=
        True,  # off-screen renderer is required for camera observations
        ignore_done=True,  # (optional) never terminates episode
        use_camera_obs=True,  # use camera observations
        camera_height=64,  # set camera height
        camera_width=64,  # set camera width
        camera_name='sideview',  # use "agentview" camera
        use_object_obs=True,  # no object feature when training on pixels
        control_freq=60)

    goal_imgs = []
    grip_pos_l = []
    for i in range(50):
        x = env.reset()
        for k in range(100):
            x, _, _, _ = env.step(np.random.randn(env.dof))

        im = img_as_ubyte(rotate(x['image'], 180))
        goal_imgs.append(im)

        grip_pos = np.array(env.sim.data.site_xpos[env.sim.model.site_name2id(
            "grip_site")]).astype(np.float32)
        grip_pos_l.append(grip_pos)

    goal_imgs = np.stack(goal_imgs)
    grip_pos_l = np.stack(grip_pos_l)
    dir_name = "data/goal/sawyer_reach_joint"
    if not os.path.isdir(dir_name): os.makedirs(dir_name)

    data = {'image': goal_imgs, 'grip_pos': grip_pos_l}
    np.savez(os.path.join(dir_name, args.save_path + ".npz"), **data)
Ejemplo n.º 4
0
def main():

    width = 512
    height = 512
    action_scale = 10.0
    screen = pygame.display.set_mode((width, height))

    # create environment instance
    env = suite.make(
        "PandaILIAD",
        has_renderer=False,
        ignore_done=True,
        camera_height=height,
        camera_width=width,
        gripper_visualization=False,
        use_camera_obs=True,
        use_object_obs=False,
    )

    # reset the environment
    env.reset()

    # enable controlling the end effector directly instead of using joint velocities
    env = IKWrapper(env)

    # create the human input device
    joystick = Joystick()
    joystick.start_control()

    # create recorder
    number = sys.argv[1]
    savename = "demonstrations/test" + number + ".pkl"
    data = []


    while True:

        input = joystick.get_controller_state()
        dpos, dquat, grasp, reset = (
            input["dpos"],
            input["dquat"],
            input["grasp"],
            input["reset"],
        )
        if reset:
            pickle.dump(data, open(savename, "wb"))
            break

        action = np.concatenate([dpos, dquat, grasp]) * action_scale
        obs, reward, done, info = env.step(action)
        image = np.flip(obs['image'].transpose((1, 0, 2)), 1)
        joint_pos = obs['joint_pos']

        res_image = cv2.resize(image, dsize=(28, 28))
        data.append((res_image, joint_pos))

        pygame.pixelcopy.array_to_surface(screen, image)
        pygame.display.update()

    env.close()
Ejemplo n.º 5
0
def change_parameters_of_soft_body_demo(episodes):

    for _ in range(episodes):

        env = suite.make('Ultrasound',
                         robots='UR5e',
                         controller_configs=None,
                         gripper_types='UltrasoundProbeGripper',
                         has_renderer=True,
                         has_offscreen_renderer=False,
                         use_camera_obs=False,
                         use_object_obs=False,
                         control_freq=50,
                         render_camera=None,
                         horizon=800)

        print_world_xml_and_soft_torso_params(env.model)

        sim, viewer = create_mjsim_and_viewer(env)

        for _ in range(env.horizon):
            sim.step()
            viewer.render()

        glfw.destroy_window(viewer.window)
Ejemplo n.º 6
0
 def __init__(self):
     self.env = suite.make(env_name="Lift",
                           robots="Sawyer",
                           horizon=600,
                           has_renderer=False,
                           has_offscreen_renderer=True,
                           use_camera_obs=True,
                           reward_shaping=True,
                           camera_names="agentview",
                           camera_heights=84,
                           camera_widths=84)
     init_obs = self.env.observation_spec()
     obs_shape = init_obs['robot0_agentview_image'].shape
     obs_low = -np.full(obs_shape, np.inf)
     obs_high = np.full(obs_shape, np.inf)
     lower_bound, upper_bound = self.env.action_spec
     # print(lower_bound.shape, self.env.action_dim)
     self.action_space = spaces.Box(low=lower_bound,
                                    high=upper_bound,
                                    shape=(self.env.action_dim, ),
                                    dtype=np.float32)
     self.observation_space = spaces.Box(low=obs_low,
                                         high=obs_high,
                                         shape=obs_shape,
                                         dtype=np.float32)
Ejemplo n.º 7
0
def controller_demo(experiment_name, save_data=False):

    env = suite.make('Ultrasound',
                     robots='UR5e',
                     controller_configs=None,
                     gripper_types=None,
                     has_renderer=True,
                     has_offscreen_renderer=False,
                     use_camera_obs=False,
                     use_object_obs=False,
                     control_freq=50,
                     render_camera='sideview',
                     horizon=1600)

    # Reset the env
    env.reset()

    sim_time = env.horizon

    robot = env.robots[0]
    joint_pos_obs = np.empty(shape=(sim_time, robot.dof))
    ref_values = np.array([np.pi / 2, 3 * np.pi / 2, -np.pi / 4])

    goal_joint_pos = [ref_values[0], 0, 0, 0, 0, 0]
    kp = 2
    kd = 1.2

    for t in range(sim_time):
        if env.done:
            break
        env.render()

        action = relative2absolute_joint_pos_commands(goal_joint_pos, robot,
                                                      kp, kd)

        if t > 1200:
            action = relative2absolute_joint_pos_commands(
                [0, ref_values[2], 0, 0, 0, 0], robot, kp, kd)
        elif t > 800:
            action = relative2absolute_joint_pos_commands([0, 0, 0, 0, 0, 0],
                                                          robot, kp, kd)
        elif t > 350:
            action = relative2absolute_joint_pos_commands(
                [ref_values[1], 0, 0, 0, 0, 0], robot, kp, kd)

        observation, reward, done, info = env.step(action)
        joint_pos_obs[t] = observation['robot0_joint_pos']

    # close window
    env.close()

    if save_data:
        np.savetxt('data/' + experiment_name +
                   '_joint_pos_controller_test.csv',
                   joint_pos_obs,
                   delimiter=",")
        np.savetxt('data/' + experiment_name +
                   '_ref_values_controller_test.csv',
                   ref_values,
                   delimiter=",")
Ejemplo n.º 8
0
def test_all_controllers():
    for controller_name in controllers.keys():
        # Define variables for each controller test
        action_dim = controllers[controller_name][0]
        num_test_steps = controllers[controller_name][1]
        test_value = controllers[controller_name][2]
        neutral = np.zeros(action_dim)

        # Define controller path to load
        controller_config = load_controller_config(default_controller=controller_name)

        # Now, create a test env for testing the controller on
        env = suite.make(
            "Lift",
            robots="Sawyer",
            has_renderer=args.render,  # use on-screen renderer for visual validation only if requested
            has_offscreen_renderer=False,
            use_camera_obs=False,
            horizon=(steps_per_action + steps_per_rest) * num_test_steps,
            controller_configs=controller_config,
        )
        print("Testing controller: {}...".format(controller_name))

        env.reset()
        # If rendering, set controller to front view to get best angle for viewing robot movements
        if args.render:
            env.viewer.set_camera(camera_id=0)

        # get action range
        action_min, action_max = env.action_spec
        assert action_min.shape == action_max.shape
        assert action_min.shape[0] == action_dim, "Expected {}, got {}".format(action_dim, action_min.shape[0])

        # Keep track of done variable to know when to break loop
        count = 0
        # Loop through controller space
        while count < num_test_steps:
            action = neutral.copy()
            for i in range(steps_per_action):
                if controller_name in {"IK_POSE", "OSC_POSE"} and count > 2:
                    # Set this value to be the angle and set appropriate axis
                    vec = np.zeros(3)
                    vec[count - 3] = test_value
                    action[3:6] = vec
                else:
                    action[count] = test_value
                env.step(action)
                if args.render:
                    env.render()
            for i in range(steps_per_rest):
                env.step(neutral)
                if args.render:
                    env.render()
            count += 1

        # Shut down this env before starting the next test
        env.close()

    # Tests passed!
    print("All controller tests completed.")
Ejemplo n.º 9
0
def make_robosuite(env_name, env_config):
    import robosuite
    env_config.render = False

    env = robosuite.make(
        env_name,
        has_renderer=env_config.render,
        ignore_done=True,
        use_camera_obs=env_config.pixel_input,
        has_offscreen_renderer=env_config.pixel_input,
        camera_height=256,
        camera_width=256,
        render_collision_mesh=False,
        render_visual_mesh=True,
        camera_name='agentview',
        use_object_obs=(not env_config.pixel_input),
        camera_depth=env_config.use_depth,
        reward_shaping=True,
        # demo_config=env_config.demonstration,
    )
    env = RobosuiteWrapper(env, env_config)
    env = FilterWrapper(env, env_config)
    env = ObservationConcatenationWrapper(env)
    if env_config.pixel_input:
        env = TransposeWrapper(env)
        if env_config.use_grayscale:
            env = GrayscaleWrapper(env)
        if env_config.frame_stacks:
            env = FrameStackWrapper(env, env_config)
    env_config.action_spec = env.action_spec()
    env_config.obs_spec = env.observation_spec()
    return env, env_config
Ejemplo n.º 10
0
def make_base_robosuite_env(env_name, kwargs, use_dm_backend=True):
    import gym
    import numpy as np

    gym.logger.setLevel(40)
    import robosuite as suite

    from rlkit.envs.wrappers.primitives_wrappers import (
        NormalizeBoxEnvFixed,
        RobosuitePrimitives,
        RobosuiteWrapper,
    )

    if suite.environments.robot_env.RobotEnv.__bases__[0] != RobosuitePrimitives:
        suite.environments.robot_env.RobotEnv.__bases__ = (RobosuitePrimitives,)
    RobosuitePrimitives._use_dm_backend = use_dm_backend
    if kwargs["has_offscreen_renderer"]:
        keys = ["image-state"]
    else:
        keys = None
    reset_action_space_kwargs = kwargs.get("reset_action_space_kwargs", {})
    env_kwargs_new = kwargs.copy()
    if "reset_action_space_kwargs" in kwargs:
        del env_kwargs_new["reset_action_space_kwargs"]
    np.random.seed(42)
    env = suite.make(env_name, **env_kwargs_new)
    env = RobosuiteWrapper(
        env, keys=keys, reset_action_space_kwargs=reset_action_space_kwargs
    )
    if reset_action_space_kwargs["control_mode"] == "robosuite":
        env = NormalizeBoxEnvFixed(env)
    return env
Ejemplo n.º 11
0
def gather_calibration_measurements():

    env = suite.make(
        'Ultrasound',
        robots='Panda',
        controller_configs=load_controller_config(
            default_controller='OSC_POSE'),
        gripper_types='UltrasoundProbeGripper',
        has_renderer=True,
        has_offscreen_renderer=False,
        use_camera_obs=False,
        use_object_obs=False,
        control_freq=100,
        render_camera=None,
        horizon=250,
        initialization_noise=None,
    )

    # Reset the env
    env.reset()

    robot = env.robots[0]

    ee_z_force = np.empty(shape=(env.horizon, 1))
    ee_z_pos = np.empty(shape=(env.horizon, 1))
    ee_z_vel = np.empty(shape=(env.horizon, 1))

    for t in range(env.horizon):
        print(t)
        if env.done:
            break
        env.render()

        action = [0.2, 0.05, -0.2, 0, 0, 0]

        if t > 50:
            action = [0, 0, 0, 0, 0, 0]
        if t > 75:
            action = [0, 0, -0.4, 0, 0, 0]
        if t > 200:
            action = [0, 0, 0.8, 0, 0, 0]
        if t > 225:
            action = [0, 0, 0, 0, 0, 0]

        observation, reward, done, info = env.step(action)

        ee_z_force[t] = transform_ee_frame_axes(
            robot.ee_force
        )[-1] if robot.gripper_type == 'UltrasoundProbeGripper' else robot.ee_force[
            -1]
        ee_z_pos[t] = observation['robot0_eef_pos'][-1]
        ee_z_vel[t] = observation['gripper_velp'][-1]

    np.savetxt('data/calibration_z_force.csv', ee_z_force, delimiter=",")
    np.savetxt('data/calibration_z_pos.csv', ee_z_pos, delimiter=",")
    np.savetxt('data/calibration_z_vel.csv', ee_z_vel, delimiter=",")

    # close window
    env.close()
Ejemplo n.º 12
0
        def _make_robosuite_env():
            from gym.wrappers import FlattenDictWrapper
            from baselines.bench import Monitor

            env = suite.make(env_id)
            env = FlattenDictWrapper(env, ['robot-state', 'object-state'])
            env = Monitor(env, logger.get_dir(), allow_early_resets=True)
            return env
Ejemplo n.º 13
0
def make_robosuite_env(args):
    demo_dict = {
        ## Robosuite
        21: "pegs-RoundNut",  #
        22: "pegs-SquareNut",  #
        23: "pegs-full",
        24: "bins-Bread",  #
        25: "bins-Can",  # 
        26: "bins-Cereal",
        27: "bins-Milk",  # 
        28: "bins-full",
    }
    demo_name = demo_dict[args.env_id]

    hd5_demo_path = "../imitation_data/RoboTurkPilot/%s" % (demo_name)

    env_name = args.env_name + "_reach"
    # total_traj = len(os.listdir(demo_path))
    demo_list = []
    filename = "../imitation_data/TRAJ_h5/%s/%s_chosen.txt" % (env_name,
                                                               env_name)
    demo_idx = -1

    sort_list = np.arange(0, 10)  # 10 demo.
    with open(filename, 'r') as ff:
        i = 0
        for line in ff:
            line = line.replace(":", " ").replace("(", " ").replace(
                ")", " ").replace(",", " ").split()
            if demo_idx == -1:
                demo_idx = line.index("demo") + 1
            if np.any(sort_list == i):
                demo_list += [int(line[demo_idx])]
            i = i + 1

    env = RobosuiteReacherWrapper(
        robosuite.make(
            args.env_name,
            has_offscreen_renderer=
            False,  # not needed since we do not use pixel obs
            has_renderer=args.render,
            ignore_done=False,
            use_camera_obs=False,
            gripper_visualization=False,
            reward_shaping=False,  # 
            control_freq=100,
            horizon=args.t_max  # default is 500 
        ),
        demo_path=hd5_demo_path,
        demo_list=demo_list,
        need_xml=True,
        sampling_schemes=[
            "uniform_first", "random"
        ],  # initial state sampler. Default from the repo is uniform + random, but initilizing states at the middle of trajectory is possible only in simulations.. 
        scheme_ratios=[0.9, 0.1],
        robo_task="reach",
    )
    return env
Ejemplo n.º 14
0
def test_playback():
    # set seeds
    random.seed(0)
    np.random.seed(0)

    env = robosuite.make(
        "Lift",
        robots=["Panda"],
        controller_configs=load_controller_config(
            default_controller="OSC_POSE"),
        has_renderer=False,
        has_offscreen_renderer=False,
        ignore_done=True,
        use_camera_obs=False,
        reward_shaping=True,
        control_freq=20,
    )
    env.reset()

    # task instance
    task_xml = env.sim.model.get_xml()
    task_init_state = np.array(env.sim.get_state().flatten())

    # trick for ensuring that we can play MuJoCo demonstrations back
    # deterministically by using the recorded actions open loop
    env.reset_from_xml_string(task_xml)
    env.sim.reset()
    env.sim.set_state_from_flattened(task_init_state)
    env.sim.forward()

    # random actions to play
    n_actions = 100
    actions = 0.1 * np.random.uniform(
        low=-1.0, high=1.0, size=(n_actions, env.action_spec[0].shape[0]))

    # play actions
    print("playing random actions...")
    states = [task_init_state]
    for i in range(n_actions):
        env.step(actions[i])
        states.append(np.array(env.sim.get_state().flatten()))

    # try playback
    print("attempting playback...")
    env.reset()
    env.reset_from_xml_string(task_xml)
    env.sim.reset()
    env.sim.set_state_from_flattened(task_init_state)
    env.sim.forward()

    for i in range(n_actions):
        env.step(actions[i])
        state_playback = env.sim.get_state().flatten()
        assert np.all(np.equal(states[i + 1], state_playback))

    env.close()
    print("test passed!")
Ejemplo n.º 15
0
def run_simulation():
    env_id = "Ultrasound"

    env_options = {}
    env_options["robots"] = "Panda"
    env_options["gripper_types"] = "UltrasoundProbeGripper"
    env_options["controller_configs"] = {
        "type": "OSC_POSE",
        "input_max": 1,
        "input_min": -1,
        "output_max": [0.05, 0.05, 0.05, 0.5, 0.5, 0.5],
        "output_min": [-0.05, -0.05, -0.05, -0.5, -0.5, -0.5],
        "kp": 300,
        "damping_ratio": 1,
        "impedance_mode": "fixed",
        "kp_limits": [0, 500],
        "kp_input_max": 1,
        "kp_input_min": 0,
        "damping_ratio_limits": [0, 2],
        "position_limits": None,
        "orientation_limits": None,
        "uncouple_pos_ori": True,
        "control_delta": True,
        "interpolation": None,
        "ramp_ratio": 0.2
    }
    env_options["control_freq"] = 500
    env_options["has_renderer"] = True
    env_options["has_offscreen_renderer"] = False
    env_options["render_camera"] = None
    env_options["use_camera_obs"] = False
    env_options["use_object_obs"] = False
    env_options["horizon"] = 1000
    env_options["early_termination"] = False
    env_options["save_data"] = False
    env_options["torso_solref_randomization"] = False
    env_options["initial_probe_pos_randomization"] = False
    env_options["deterministic_trajectory"] = False
    env_options["use_box_torso"] = True

    env = suite.make(env_id, **env_options)

    # reset the environment to prepare for a rollout
    obs = env.reset()

    done = False
    ret = 0.
    
    for t in range(env.horizon):
        action = [0.0, 0, 0, 0, 0, 0]
        obs, reward, done, _ = env.step(action) # play action
        ret += reward
        env.render()
        if done:
            env.close()
            break
    print("rollout completed with return {}".format(ret))
Ejemplo n.º 16
0
def evaluate_policy(env_config, model_path, n_eval, printout=False):
    if printout:
        print("Loading policy...")

    # Load trained model and corresponding policy
    data = torch.load(model_path)
    policy = data['evaluation/policy']

    if printout:
        print("Policy loaded")

    # Load controller
    controller = env_config.pop("controller")
    if controller in set(ALL_CONTROLLERS):
        # This is a default controller
        controller_config = load_controller_config(
            default_controller=controller)
    else:
        # This is a string to the custom controller
        controller_config = load_controller_config(custom_fpath=controller)

    # Create robosuite env
    env = suite.make(**env_config,
                     has_renderer=False,
                     has_offscreen_renderer=False,
                     use_object_obs=True,
                     use_camera_obs=False,
                     reward_shaping=True,
                     controller_configs=controller_config)
    env = GymWrapper(env)

    # Use CUDA if available
    if torch.cuda.is_available():
        set_gpu_mode(True)
        policy.cuda() if not isinstance(
            policy, MakeDeterministic) else policy.stochastic_policy.cuda()

    if printout:
        print("Evaluating policy over {} simulations...".format(n_eval))

    # Create variable to hold rewards to be averaged
    returns = []

    # Loop through simulation n_eval times and take average returns each time
    for i in range(n_eval):
        path = rollout(
            env,
            policy,
            max_path_length=env_config["horizon"],
            render=False,
        )

        # Determine total summed rewards from episode and append to 'returns'
        returns.append(sum(path["rewards"]))

    # Average the episode rewards and return the normalized corresponding value
    return sum(returns) / (env_config["reward_scale"] * n_eval)
Ejemplo n.º 17
0
 def create_from_id(self, env_id):
     _, task = env_id.split(".")
     # Env interface will do the actual job of creating the environment.
     env = robosuite.make(task,
                          has_offscreen_renderer=set_eval,
                          has_renderer=False,
                          use_object_obs=True,
                          reward_shaping=self.args.rs_reward_shaping,
                          use_camera_obs=set_eval)
     return env
Ejemplo n.º 18
0
    def __init__(self, has_display=False):

        # Create environment.
        print("Do I have a display?", has_display)
        # self.base_env = robosuite.make('BaxterLift', has_renderer=has_display)
        self.base_env = robosuite.make("BaxterViz", has_renderer=has_display)

        # Create kinematics object.
        self.baxter_IK_object = IKWrapper(self.base_env)
        self.environment = self.baxter_IK_object.env
Ejemplo n.º 19
0
def make_video(n_episode, slower=3):
    low = np.array([0.57, 0.35])
    high = np.array([0.63, 0.405])

    render_drop_freq = 10

    env = suite.make(
        'BinPackPlace',
        has_renderer=False,
        has_offscreen_renderer=True,
        ignore_done=False,
        use_camera_obs=False,
        control_freq=1,
        camera_height=240,
        camera_width=240,
        render_drop_freq=render_drop_freq,
        # obj_names=obj_names,
        action_bound=(low, high),
    )

    subprocess.call(['rm', '-rf', 'frames'])
    subprocess.call(['mkdir', '-p', 'frames'])
    subprocess.call(['mkdir', '-p', 'demo'])
    time_step_counter = 0

    ## make video
    for i_episode in range(n_episode):
        env.reset()

        for _ in range(100):

            actions = env.action_space.sample()

            obs, rew, done, info = env.step(actions)

            for i in range(len(info['birdview'])):
                image_data_bird, image_data_agent = info['birdview'][i], info[
                    'targetview'][i]
                image_data = np.concatenate(
                    (image_data_bird, image_data_agent), 1)

                img = Image.fromarray(image_data, 'RGB')
                for __ in range(slower):
                    img.save('frames/frame-%.10d.png' % time_step_counter)
                    time_step_counter += 1

            if done:
                break

    subprocess.call([
        'ffmpeg', '-framerate', '50', '-y', '-i', 'frames/frame-%010d.png',
        '-r', '24', '-pix_fmt', 'yuv420p', '-s', '480x240', DEMO_PATH
    ])

    subprocess.call(['rm', '-rf', 'frames'])
Ejemplo n.º 20
0
def main():

    width = 512
    height = 512
    screen = pygame.display.set_mode((width, height))

    # create environment instance
    env = suite.make(
        "PandaILIAD",
        has_renderer=False,
        ignore_done=True,
        camera_height=height,
        camera_width=width,
        gripper_visualization=False,
        use_camera_obs=True,
        use_object_obs=False,
    )

    # reset the environment
    env.reset()

    # create the human input device
    joystick = Joystick()
    model = Model()
    action_scale = 0.1

    # initialize state
    obs, reward, done, info = env.step([0.0] * 8)
    image = np.flip(obs['image'].transpose((1, 0, 2)), 1)
    joint_pos = obs['joint_pos']

    while True:

        # get human input
        z, reset = joystick.input()
        if reset:
            return True

        # get observation
        res_image = cv2.resize(image, dsize=(28, 28))

        # decode to high-DoF action
        action_arm = model.decoder(res_image, joint_pos, z)
        action = np.asarray(action_arm + [0.0]) * action_scale

        # take action
        obs, reward, done, info = env.step(action)
        image = np.flip(obs['image'].transpose((1, 0, 2)), 1)
        joint_pos = obs['joint_pos']

        pygame.pixelcopy.array_to_surface(screen, image)
        pygame.display.update()

    env.close()
Ejemplo n.º 21
0
def make(has_renderer=False,
         init_robot_pose=(-1.37550484e-02, 5.21560077e-03, 8.78072546e-02),
         renderer=None,
         horizon=100):
    controller_conf = load_controller_config(
        default_controller="LOCOMOTION_JOINT_TORQUE")
    os.makedirs(log_dir, exist_ok=True)

    rbs_env = suite.make(env_name="Walk",
                         robots="Laikago",
                         controller_configs=controller_conf,
                         has_renderer=has_renderer,
                         render_camera=renderer,
                         has_offscreen_renderer=False,
                         use_camera_obs=False,
                         control_freq=10,
                         init_robot_pose=init_robot_pose,
                         init_robot_ori=(0, 0, 0),
                         reward_shaping=True,
                         horizon=horizon)

    env = GymWrapper(rbs_env,
                     keys=[
                         "robot0_chassis_pos", "robot0_chassis_vel",
                         "robot0_joint_pos", "robot0_joint_vel"
                     ])

    if not has_renderer:
        env = Monitor(env, log_dir)

    print("box:")
    print(env.action_space)
    print("shape -1")
    print(env.action_space.shape[-1])
    print("shape")
    print(env.action_space.shape)
    print("high")
    print(env.action_space.high)
    print("low")
    print(env.action_space.low)

    print("box:")
    print(env.observation_space)
    print("shape -1")
    print(env.observation_space.shape[-1])
    print("shape")
    print(env.observation_space.shape)
    print("high")
    print(env.observation_space.high)
    print("low")
    print(env.observation_space.low)

    return env
Ejemplo n.º 22
0
    def _thunk():
        if env_id.startswith("Sawyer"):
            env = robosuite.make(
                env_id,
                has_renderer=False,
                has_offscreen_renderer=False,
                ignore_done=False,
                horizon=1000,  #200,
                use_camera_obs=False,
                gripper_visualization=False,
                reward_shaping=True,
                control_freq=100  #10, # 100
            )
            env = GymGymWrapper(env)
        elif env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
Ejemplo n.º 23
0
def main():

    # create environment instance
    env = suite.make(
        "PandaLift",
        has_renderer=True,
        ignore_done=True,
        use_camera_obs=False,
        gripper_visualization=True,
        control_freq=100
    )

    # reset the environment
    env.reset()
    env.viewer.set_camera(camera_id=0)

    # enable controlling the end effector directly instead of using joint velocities
    env = IKWrapper(env)

    # create the human input device
    joystick = Joystick()

    # create recorder
    demo_number = sys.argv[1]
    savename = "demonstrations/demo" + demo_number + ".pkl"
    sample_time = 0.1
    prev_time = time.time()
    data = []

    while True:

        # get human input
        dpos, dquat, grasp, reset = joystick.input()
        if reset:
            pickle.dump(data, open(savename, "wb"))
            break

        # take action and record joint position
        action = np.concatenate([dpos, dquat, grasp])
        obs, reward, done, info = env.step(action)
        joint_pos = obs['joint_pos']

        # save joint position to dataset
        curr_time = time.time()
        if curr_time - prev_time > sample_time:
            prev_time = curr_time
            data.append(list(joint_pos))
            pickle.dump(data, open(savename, "wb"))

        env.render()

    env.close()
Ejemplo n.º 24
0
def make_env(env_name):
    import robosuite
    env = robosuite.make(
        env_name,
        has_renderer=False,
        has_offscreen_renderer=False,
        ignore_done=True,
        use_object_obs=True,
        use_camera_obs=False,
        gripper_visualization=False,
        reward_shaping=True,
        control_freq=100,
    )
    return env
def evaluate_grip_goal():
    env = suite.make(
        env_name=args.env_name,
        robots=args.robot,
        has_renderer=False,
        has_offscreen_renderer=False,
        use_camera_obs=False,
        use_object_obs=True,                    
        horizon = args.horizon, 
        reward_shaping=False,           
    )
    success_count = 0 
    cumulative_success = []
    for test in range(args.num_trials):
        obs = env.reset()
        state_dim = obs['robot0_robot-state'].shape[0]+obs['object-state'].shape[0]
        state = np.append(obs['robot0_robot-state'],obs['object-state'])
        agent = set_agent(state_dim, env, args)

        # Visualize a single run
        done=False
        while done==False: 
            if args.algo=='REINFORCE':
                action, log_prob = agent.select_action(state)
            else:
                action = agent.select_action(state)           
            obs, reward, done, info = env.step(action)
            if check_grip(env)==True:
                success_count+=1
                print('The robot succeded in gripping the block')
                break

            state = np.append(obs['robot0_robot-state'],obs['object-state'])
        
        cumulative_success.append(success_count)
        if done==True:
            print('The robot failed to grip the block')

    print('Creating success fig...')
    fig=go.Figure()
    fig.add_trace(go.Scatter(name='actual_success_rate',x=[i for i in range(args.num_trials)],y=cumulative_success))
    fig.add_trace(go.Scatter(name='max_success_rate',x=[i for i in range(args.num_trials)],y=[i+1 for i in range(args.num_trials)]))
    fig.update_layout(template='plotly_white', 
                        title='Grasp Success Rate',
                        xaxis=dict(title='evaluation step'),
                        yaxis=dict(title='num. successes'))
    fig.write_image('grasp_success_rate.png')
    print(success_count/args.num_trials)
Ejemplo n.º 26
0
def collect_projection_data_sawyer(args):
    num_frames = args.num_frames

    # env = gym.make("FetchPushCustom-v1", n_substeps=20)
    import robosuite as suite
    env = suite.make(
        "SawyerLift",
        has_renderer=False,  # no on-screen renderer
        has_offscreen_renderer=True,  # off-screen renderer is required for camera observations
        use_camera_obs=True,  # use camera observations
        camera_name='sideview',  # use "agentview" camera
        use_object_obs=True,  # no object feature when training on pixels
        gripper_visualization=True,
        ignore_done=True,
        control_freq=30)

    source_pts = []
    target_pts = []

    n = 0
    while n < num_frames:
        x = env.reset()
        for k in range(3):
            for i in range(10):
                x, _, _, _  = env.step(np.random.randn(env.dof))

            im = skimage.img_as_ubyte(skimage.transform.rotate(x['image'], 180))
            points = cpselect(im)

            if len(points) > 0:
                source_pts.append(get_grip_pos(env))
                target_pts.append(points[0])
                print(len(source_pts))

        n += 1

    data = {}
    data['world_coords'] = np.stack(source_pts).astype(np.float32)
    data['pixel_coords'] = np.stack(target_pts).astype(np.float32)

    for n in range(len(source_pts)):
        s, t = data['world_coords'][n], data['pixel_coords'][n]
        print(n, s, t)

    if not os.path.isdir(args.dir_name): os.makedirs(args.dir_name)
    print(os.path.join(args.dir_name, args.save_path + ".npz"))
    np.savez(os.path.join(args.dir_name, args.save_path + ".npz"), **data)
def main(args):
    """ Starts different tests

    Args:
        param1(args): args

    """
    env = suite.make(
        args.env_name,
        has_renderer=True,  # noon-screenrenderer
        has_offscreen_renderer=True,  # no off-screen renderer
        use_object_obs=True,  # usebject-centric feature
        use_camera_obs=False,  # no camera 
        reward_shaping=True,
    )

    state = env.reset()
    state = createstate(state)
    state_dim = state.shape[0]
    action_dim = env.dof
    max_action = float(6)
    min_action = float(-6)
    print(state_dim)

    policy = TD31v1(state_dim, action_dim, max_action, args)
    directory = 'pytorch_models'
    filename = "TD3_" + args.env_name + '_42-' + args.agent
    print("Load %s/%s_actor.pth" % (directory, filename))
    policy.load(filename, directory)
    avg_reward = 0.
    seeds = [x for x in range(2)]
    for s in seeds:
        state = env.reset()
        done = False
        while not done:
            state = createstate(state)
            action = policy.select_action(state)
            #action = action.clip(-1, 1)
            #action = np.random.randn(env.dof)
            #print(action)
            state, reward, done, _ = env.step(action)
            avg_reward += reward
            env.render()
    avg_reward /= len(seeds)
    print("---------------------------------------")
    print("Average Reward over the Evaluation Step: %f" % (avg_reward))
    print("---------------------------------------")
Ejemplo n.º 28
0
    def copy(self):
        """Override default copy method to allow robosuite env serialization.

        Robosuite environments are not serializable, and thus we cannot use the
        default `copy.deepcopy(self)` from `SoftlearningEnv`. Instead, we first
        create a copy of the self *without* robosuite environment (`self._env`)
        and then instantiate a new robosuite environment and attach it to the
        copied self.
        """
        env = self._env
        self._env = None
        result = copy.deepcopy(self)
        result._env = suite.make(f"{self._domain}{self._task}",
                                 **self._env_kwargs)
        self._env = env

        return result
 def __init__(self,
              wrapped_env_id,
              observation_keys=('robot-state', 'object-state'),
              **wrapped_env_kwargs):
     Serializable.quick_init(self, locals())
     self._wrapped_env = suite.make(wrapped_env_id, **wrapped_env_kwargs)
     self.action_space = Box(self._wrapped_env.action_spec[0],
                             self._wrapped_env.action_spec[1],
                             dtype=np.float32)
     observation_dim = self._wrapped_env.observation_spec()['robot-state'].shape[0] \
                       + self._wrapped_env.observation_spec()['object-state'].shape[0]
     self.observation_space = Box(
         -np.inf * np.ones(observation_dim),
         np.inf * np.ones(observation_dim),
         dtype=np.float32,
     )
     self._observation_keys = observation_keys
Ejemplo n.º 30
0
def make_env():
    controller = load_controller_config(default_controller="OSC_POSE")
    env = GymWrapper(
        suite.make(
            "Door",
            robots="Panda",  # use Sawyer robot
            use_camera_obs=False,  # do not use pixel observations
            has_offscreen_renderer=False,  # not needed since not using pixel obs
            has_renderer=False,  # make sure we can render to the screen
            reward_shaping=True,  # use dense rewards
            reward_scale=1.0,  # scale max 1 per timestep
            control_freq=
            20,  # control should happen fast enough so that simulation looks smooth
            horizon=500,
            ignore_done=True,
            hard_reset=False,
            controller_configs=controller))
    return env