Esempio n. 1
0
def mpc_episode_keypoint_observation(
        config,  # the global config
        mpc_idx,  # int: index of this trial for logging purposes
        model_dy,  # the dynamics model
        mpc_dir,  # str: directory to store results
        planner,
        obs_goal,
        action_function,
        observation_function,
        video=True,
        image=True,
        use_gpu=True):
    '''
    setup
    '''

    # optionally add noise to observation as specified in config
    add_noise_to_observation = False
    if "add_noise_to_observation" in config["mpc"]:
        # raise ValueError("testing")
        add_noise_to_observation = config["mpc"]["add_noise_to_observation"]

    # basic info
    n_his = config['train']['n_history']
    num_steps = config['mpc']['num_timesteps'] + n_his

    # create PusherSlider env
    env = PusherSlider(config)
    env.setup_environment()
    env.reset()
    env.step_space_to_initialize_render()

    # set up episode container
    action_zero = np.zeros(2)

    # states used to roll out the MPC model
    states_roll_mpc = np.zeros((num_steps, config['dataset']['state_dim']))

    # gt states that actually occurred
    states_roll_gt = np.zeros(states_roll_mpc.shape)
    actions_roll = np.zeros((num_steps, config['dataset']['action_dim']))

    gt_obs_list = []
    img_gt_PIL_list = []

    actions_roll = np.zeros((num_steps, config['dataset']['action_dim']))

    for i in range(n_his, num_steps):
        actions_roll[i] = sample_pusher_velocity()

    action_lower_lim = np.array(config['mpc']['action_lower_lim'])
    action_upper_lim = np.array(config['mpc']['action_upper_lim'])
    '''
    model predictive control
    '''
    for step in range(num_steps):

        print('roll_step %d/%d' % (step, num_steps))

        states_roll_mpc[step] = observation_function(
            env.get_observation(), data_augmentation=add_noise_to_observation)
        img_gt_PIL_list.append(numpy_to_PIL(env.render(mode='rgb_array')))

        # stand still for n_his timesteps
        if step < n_his:
            action = action_zero
        else:
            actions_roll[step:] = planner.trajectory_optimization(
                states_roll_mpc[step - n_his + 1:step + 1],
                obs_goal,
                model_dy,
                actions_roll[step - n_his + 1:],
                n_sample=config['mpc']['n_sample'],
                n_look_ahead=min(num_steps - step,
                                 config['mpc']['n_look_ahead']),
                n_update_iter=config['mpc']['n_update_iter_init']
                if step == n_his else config['mpc']['n_update_iter'],
                action_lower_lim=action_lower_lim,
                action_upper_lim=action_upper_lim,
                use_gpu=use_gpu)

            action = actions_roll[step]

        obs, reward, done, info = env.step(action)
        gt_obs_list.append(obs)
    '''
    render the prediction
    '''

    # reset PusherSlider env
    env.reset()

    # setup rendering data

    width = config['env']['display_size'][0]
    height = config['env']['display_size'][1]
    n_split = 3
    split = 4

    if image:
        eval_img_dir = os.path.join(mpc_dir, '%d_img' % mpc_idx)
        os.system('mkdir -p ' + eval_img_dir)
        print('Save images to %s' % eval_img_dir)

    if video:
        eval_vid_path = os.path.join(mpc_dir, '%d_vid.avi' % mpc_idx)
        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        print('Save video as %s' % eval_vid_path)
        fps = int(env.fps)
        print("fps:", fps)
        out = cv2.VideoWriter(eval_vid_path, fourcc, fps,
                              (width * n_split + split *
                               (n_split - 1), height))

    # gt represents goal
    for i, start_idx in enumerate(range(num_steps)):

        # get ground truth positions at goal
        goal_gt_state_dict = pusher_pose_slider_keypoints_from_tensor(obs_goal)

        # now render them
        img_goal_gt_PIL = pusher_slider_keypoints_image(
            config, goal_gt_state_dict['pusher']['position'],
            goal_gt_state_dict['keypoint_positions'])

        img_goal_gt = np.array(img_goal_gt_PIL)

        #
        img_gt_PIL = img_gt_PIL_list[i]

        # # actual GT positions that happened
        state_roll = states_roll_mpc[i, :]
        state_roll_dict = pusher_pose_slider_keypoints_from_tensor(state_roll)
        img_gt_PIL = pusher_slider_keypoints_image(
            config,
            state_roll_dict['pusher']['position'],
            state_roll_dict['keypoint_positions'],
            img=img_gt_PIL)

        img_gt = np.array(img_gt_PIL)

        # blend the images
        img_merge_PIL = Image.blend(img_goal_gt_PIL, img_gt_PIL, 0.5)

        # get numpy image
        img_merge = np.array(img_merge_PIL)

        img_output = np.zeros(
            (img_goal_gt.shape[0],
             img_goal_gt.shape[1] * n_split + split * (n_split - 1),
             3)).astype(np.uint8)

        img_output[:, :img_goal_gt.
                   shape[1]] = img_goal_gt  # goal keypoints image
        img_output[:, img_goal_gt.shape[1] + split:img_goal_gt.shape[1] * 2 +
                   split] = img_gt
        img_output[:, (img_goal_gt.shape[1] + split) * 2:] = img_merge

        if image:
            # convert to PIL then save
            img_output_PIL = numpy_to_PIL(img_output)
            img_output_PIL.save(os.path.join(eval_img_dir, 'fig_%d.png' % i))

        if video:
            # convert from RGB to BGR since video writer wants BGR
            img_output_bgr = cv2.cvtColor(img_output, cv2.COLOR_RGB2BGR)
            out.write(img_output_bgr)

    if video:
        out.release()
Esempio n. 2
0
def test_gym_API():
    """
    Collects two episodes and saves them to disk
    Currently no images are being saved
    :return:
    :rtype:
    """

    config_file = os.path.join(get_project_root(),
                               'experiments/01/config.yaml')
    config = load_yaml(config_file)

    env = PusherSlider(config=config)
    env.setup_environment()

    episode_container = None  # for namespacing purposes
    multi_episode_container = MultiEpisodeContainer()

    print("fps", env._fps)
    num_episodes = 1
    # num_timesteps_per_episode = 40
    for episode_idx in range(num_episodes):
        env.reset()
        obs_prev = env.get_observation()

        episode_container = EpisodeContainer()
        episode_container.set_config(env.config)
        for i in range(env.config['dataset']['num_timesteps']):
            env.render(mode='human')
            if i == 50:

                # save this image out in local directory
                img = env.render(mode='rgb_array')

                if DEBUG_PRINTS:
                    print("saving figure to file")
                    print("type(img)", type(img))
                    print(img.shape)
                    print(img.dtype)

                pil_img = numpy_to_PIL(img)
                pil_img.save('test_PIL.png')

            action = np.array([100, 0])
            obs, reward, done, info = env.step(action)

            episode_container.add_obs_action(obs_prev, action)
            obs_prev = obs

            if DEBUG_PRINTS:
                print("\n\nsim_time = ", env._sim_time)
                print("obs sim_time = ", obs['sim_time'])

        multi_episode_container.add_episode(episode_container)

    if True:
        save_file = os.path.join(os.getcwd(), "single_episode_log.p")
        episode_container.save_to_file(save_file)

        multi_episode_save_file = os.path.join(os.getcwd(),
                                               "multi_episode_log.p")
        multi_episode_container.save_to_file(multi_episode_save_file)
Esempio n. 3
0
def eval_episode(
        config,  # the global config
        dataset,  # MultiEpisodeDataset
        episode_name,  # str,
        roll_idx,  # int: index of this rollout for logging purposes
        model_dy,  # dynamics model
        eval_dir,  # str: directory to store results
        video=True,  # save a video of rollout
        image=True,  # save images of rollout
        use_gpu=True,  # bool: whether or not to use the gpu
        start_idx=None,  # int: starting index for rollout
        n_prediction=5,  # prediction horizon,
        render_human=False,  # whether to render in human visible mode
):

    episode = dataset._episodes[episode_name]
    n_his = config["train"]["n_history"]
    n_roll = config["train"]["n_rollout"]

    if start_idx is None:
        low, high = dataset.get_valid_idx_range_for_episode(episode)
        start_idx = int((low + high) / 2.0)

    data = dataset.__getitem__(0,
                               episode_name=episode_name,
                               episode_idx=start_idx,
                               rollout_length=0)

    # need to do the unsqueeze to make it look like they
    # are in batch form for doing forward pass through the
    # network
    states_unnorm = data['observations'].unsqueeze(0)
    actions_unnorm = data['actions'].unsqueeze(0)

    if use_gpu:
        device = 'cuda'
        states_unnorm = states_unnorm.cuda()
        actions_unnorm = actions_unnorm.cuda()
    else:
        device = 'cpu'

    # normalize states for the entire rollout
    states = model_dy.state_normalizer.normalize(states_unnorm)

    # state_cur: B x n_his x state_dim
    state_cur = states[:, :n_his]

    # states.shape[2] should be state_dim
    states_predicted = torch.zeros((n_prediction, states.shape[2]),
                                   device=device)
    states_predicted_idx = [None] * n_prediction

    print("states_predicted.shape", states_predicted.shape)

    # this code is exactly copied from the training setup
    # ropagate dynamics forward through time using the learned model
    with torch.no_grad():
        for j in range(n_prediction):

            # action_cur: B x n_his x action_dim
            # get actions from the dataloader
            action_cur = None
            data = dataset.__getitem__(0,
                                       episode_name=episode_name,
                                       episode_idx=(start_idx + j),
                                       rollout_length=0)
            actions_unnorm = data['actions'].unsqueeze(0).cuda()
            actions = model_dy.action_normalizer.normalize(actions_unnorm)
            action_cur = actions[:, :n_his]

            # state_pred: B x state_dim
            # state_cur: B x n_his x state_dim
            state_pred = model_dy(state_cur, action_cur)
            states_predicted[j] = state_pred.squeeze()
            states_predicted_idx[j] = start_idx + j + 1

            # update state_cur
            state_cur = torch.cat(
                [state_cur[:, 1:], state_pred.unsqueeze(1)], 1)

    states_predicted_unnorm = model_dy.state_normalizer.denormalize(
        states_predicted)

    # create PusherSlider env
    env = PusherSlider(config)
    env.setup_environment()
    env.reset()

    # setup rendering data
    '''
    render the prediction
    '''
    width = config['env']['display_size'][0]
    height = config['env']['display_size'][1]
    n_split = 3
    split = 4

    if image:
        eval_img_dir = os.path.join(eval_dir, '%d_img' % roll_idx)
        os.system('mkdir -p ' + eval_img_dir)
        print('Save images to %s' % eval_img_dir)

    if video:
        eval_vid_path = os.path.join(eval_dir, '%d_vid.avi' % roll_idx)
        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        print('Save video as %s' % eval_vid_path)
        fps = int(env.fps)
        print("fps:", fps)
        out = cv2.VideoWriter(eval_vid_path, fourcc, fps,
                              (width * n_split + split *
                               (n_split - 1), height))

    # can easily read the ground truth states directly from the episode . . .
    for i, start_idx in enumerate(states_predicted_idx):
        # get predicted positions
        pred_state_np = states_predicted_unnorm[i, :].cpu().numpy()
        predicted_state_dict = pusher_slider_pose_from_tensor(pred_state_np)

        # get ground truth positions
        gt_state_dict = episode.get_observation(start_idx)

        # now render them
        env.set_object_positions_for_rendering(gt_state_dict)
        img_gt = env.render(mode='rgb_array')
        img_gt_PIL = numpy_to_PIL(img_gt)
        if render_human:
            print("rendering human")
            env.render(mode='human')

        # predicted image
        env.set_object_positions_for_rendering(predicted_state_dict)
        img_pred = env.render(mode='rgb_array')
        img_pred_PIL = numpy_to_PIL(img_pred)

        if render_human:
            env.render(mode='human')

        # blend the images
        img_merge_PIL = Image.blend(img_gt_PIL, img_pred_PIL, 0.5)

        # # show the predictions
        # if SHOW_BLENDED_IMAGE:
        #     print("showing PIL image")
        #     img_merge_PIL.show()
        #     time.sleep(1.0)

        # get numpy image
        img_merge = np.array(img_merge_PIL)

        img_output = np.zeros((img_gt.shape[0], img_gt.shape[1] * n_split +
                               split * (n_split - 1), 3)).astype(np.uint8)

        img_output[:, :img_gt.shape[1]] = img_gt
        img_output[:, img_gt.shape[1] + split:img_gt.shape[1] * 2 +
                   split] = img_pred
        img_output[:, (img_gt.shape[1] + split) * 2:] = img_merge

        if image:
            # convert to PIL then save
            img_output_PIL = numpy_to_PIL(img_output)
            img_output_PIL.save(os.path.join(eval_img_dir, 'fig_%d.png' % i))

        if video:
            # convert from RGB to BGR since video writer wants BGR
            img_output_bgr = cv2.cvtColor(img_output, cv2.COLOR_RGB2BGR)
            out.write(img_output_bgr)

    if video:
        out.release()