コード例 #1
0
    def _get_latent_locs(self, model, hidden_state, file_name=None):
        rand_actions = ptu.from_numpy(
            np.stack([
                self.env.sample_action() for _ in range(self.num_loc_samples)
            ]))  # (B,A)
        state_action_attention, interaction_attention, all_delta_vals, all_lambdas_deltas = \
            model.get_all_activation_values(hidden_state, rand_actions)

        interaction_attention = interaction_attention.sum(
            -1)  # (B,K,K-1) -> (B,K)
        normalized_weights = interaction_attention / interaction_attention.sum(
            0)  #(B,K)
        mean_point = (normalized_weights.unsqueeze(2) *
                      rand_actions[:, :2].unsqueeze(1)
                      )  # ((B,K)->(B,K,1) * (B,2)->(B,1,2)) -> (B,K,2)
        mean_point = mean_point.sum(0)  #(B,K,2) -> (K,2)

        if file_name is not None:
            plot_action_vals(self.env,
                             ptu.get_numpy(interaction_attention),
                             ptu.get_numpy(rand_actions),
                             "{}/{}_pick_locs".format(self.logging_dir,
                                                      file_name),
                             is_normalized=True)

        return mean_point
コード例 #2
0
    def plot_action_errors(self, env, actions, pred_recons, file_name):
        errors = env.get_action_error(ptu.get_numpy(actions))  # (B) np

        full_plot = pred_recons.view(
            [5, -1] + list(pred_recons.shape[1:]))  # (5,B//5,3,D,D)
        caption = np.reshape(errors, (5, -1))  # (5,B//5) np
        plot_multi_image(ptu.get_numpy(full_plot),
                         '{}/{}.png'.format(self.logging_dir, file_name),
                         caption=caption)
コード例 #3
0
    def sum_aggregate(self, goal_latents, goal_latents_recon, goal_image,
                            pred_latents, pred_latents_recon, pred_images):

        n_goal_latents = goal_latents_recon.shape[0] #Note, this should equal K if we did not filter anything out
        # Compare against each goal latent
        costs = []  #(n_goal_latents, n_actions)
        latent_idxs = [] # (n_goal_latents, n_actions), [a,b] is an index corresponding to a latent
        for i in range(n_goal_latents): #Going through all n_goal_latents goal latents
            # pdb.set_trace()
            single_costs = self.get_single_costs(goal_latents[i], goal_latents_recon[i], pred_latents, pred_latents_recon)
            min_costs, latent_idx = single_costs.min(-1)  # take min among K, size is (n_actions)

            costs.append(min_costs)
            latent_idxs.append(latent_idx)

        costs = torch.stack(costs) # (n_goal_latents, n_actions)
        latent_idxs = torch.stack(latent_idxs)  # (n_goal_latents, n_actions)

        #Sort by sum cost
        #Image contains the following: Pred_images, goal_latent_reconstructions, and
        # corresponding pred_latent_reconstructions
        #For every latent in goal latents, find corresponding predicted one (this is in latent_idxs)
        #  Should have something that is (K, num_actions) -> x[a,b] is index for pred_latents_recon
        sorted_costs, best_action_idxs = costs.sum(0).sort()

        if self.plot_actions:
            sorted_pred_images = pred_images[best_action_idxs]

            corresponding_pred_latent_recons = []
            for i in range(n_goal_latents):
                tmp = pred_latents_recon[best_action_idxs, latent_idxs[i, best_action_idxs]]  # (n_actions, 3, 64, 64)
                corresponding_pred_latent_recons.append(tmp)
            corresponding_pred_latent_recons = torch.stack(corresponding_pred_latent_recons)  # (n_goal_latents, n_actions, 3, 64, 64)
            corresponding_costs = costs[:, best_action_idxs]

            full_plot = torch.cat([sorted_pred_images.unsqueeze(0),  # (1, n_actions, 3, 64, 64)
                                   corresponding_pred_latent_recons,  # (n_goal_latents, n_actions, 3, 64, 64)
                                   ], 0)
            plot_size = self.plot_actions
            full_plot = full_plot[:, :plot_size]

            #Add goal latents
            tmp = torch.cat([goal_image, goal_latents_recon], dim=0).unsqueeze(1)  # (n_goal_latents+1, 1, 3, 64, 64)
            full_plot = torch.cat([tmp, full_plot], dim=1)

            #Add captions
            caption = np.zeros(full_plot.shape[:2])
            caption[0, 1:] = ptu.get_numpy(sorted_costs[:plot_size])
            caption[1:1+n_goal_latents, 1:] = ptu.get_numpy(corresponding_costs[:plot_size])[:,:plot_size]

            plot_multi_image(ptu.get_numpy(full_plot),
                             '{}/{}.png'.format(self.logging_directory, self.image_suffix), caption=caption)

        return ptu.get_numpy(sorted_costs), ptu.get_numpy(best_action_idxs), np.zeros(len(sorted_costs))
コード例 #4
0
ファイル: mpc_stack.py プロジェクト: pvskand/OP3
    def run_plan(self,
                 goal_image,
                 env,
                 action_selection_class,
                 num_actions_to_take,
                 true_data,
                 filter_goal_image=None):
        #Goal inference
        self.env = env
        goal_info = self.goal_inference(goal_image, filter_goal_image)
        # pdb.set_trace()

        #State acquisition
        # cur_state_and_other_info = self.state_acquisition(initial_obs, initial_actions) #Not required for stage 1

        #Planning
        actions, pred_recons, obs, try_obs = [], [], [], [
        ]  #(T), (T,3,D,D), (T,D,D,3) numpy, (T,D,D,3) numpy
        for t in range(num_actions_to_take):
            next_action, goal_latent_index, pred_recon = action_selection_class.select_action(
                goal_info, env, self.model,
                "{}".format(t))  #Returns an action (Tp,A)

            actions.append(next_action)  # (A)
            pred_recons.append(pred_recon)  # (3,D,D)
            try_obs.append(env.try_action(next_action))  # (D,D,3), numpy array
            obs.append(env.step(next_action))  # (D,D,3), numpy array

            # next_obs = np.array([env.step(an_action) for an_action in next_actions]) #(Tp=1,D,D,3) 255's numpy
            # actions.extend(next_actions)
            # obs.extend(next_obs)

            # self._remove_goal_latent(goal_info, goal_latent_index)
            # cur_state_and_other_info = self.update_state(next_obs, next_actions, cur_state_and_other_info["state"]) #Not required for stage 1

        ########Create final mpc image########
        obs = process_env_obs(np.array(obs))  #(T,3,D,D)
        try_obs = process_env_obs(np.array(try_obs))  #(T,3,D,D)
        pred_recons = torch.stack(pred_recons)
        save_image(torch.cat([obs, pred_recons, try_obs], dim=0),
                   "{}/mpc.png".format(self.logging_dir),
                   nrow=obs.shape[0])

        ########Compute result stats########
        final_obs = process_env_obs(env.get_observation())  # (1,3,D,D)
        torch_goal_image = process_env_obs(goal_image)  # (1,3,D,D)
        mse = ptu.get_numpy(
            torch.pow(final_obs - torch_goal_image,
                      2).mean())  # Compare final obs to goal obs (Sc), numpy

        (correct, max_pos, max_rgb), state = env.compute_accuracy(true_data)
        stats = {
            'mse': mse,
            'correct': int(correct),
            'max_pos': max_pos,
            'max_rgb': max_rgb,
            'actions': actions
        }
        return stats
コード例 #5
0
ファイル: visualize_datasets_v2.py プロジェクト: pvskand/OP3
def get_mse_from_dataset(variant):
    from op3.core import logger
    copy_to_save_file(logger.get_snapshot_dir())
    train_path = get_module_path() + '/ec2_data/{}.h5'.format(
        variant['dataset'])
    num_samples = 100
    train_dataset, _ = load_dataset(train_path,
                                    train=False,
                                    batchsize=1,
                                    size=num_samples,
                                    static=False)

    models_and_type = []
    for a_model in variant['models']:
        m = load_model(a_model["saved_model_args"], train_dataset.action_dim,
                       a_model["K"])
        m_type = a_model['model_type']
        models_and_type.append((m, m_type))

    # image_indices = list(range(50))
    batch_indices = np.arange(0, num_samples, 4)  #bs=4
    all_mse = []
    for i in range(len(batch_indices) - 1):
        start_idx, end_idx = batch_indices[i], batch_indices[i + 1]
        frames, actions = train_dataset[start_idx:end_idx]  #(bs, T, 3, D, D)
        # pdb.set_trace()
        # frames = frames.unsqueeze(0)
        # actions = actions.unsqueeze(0)
        mse = get_mse(models_and_type, frames, actions,
                      variant['T'])  #(M, bs, T), torch tensors
        all_mse.append(mse.permute(1, 0, 2))  #(bs, M, T)
    all_mse = torch.stack(all_mse, dim=0)  #(I/bs, bs, M, T)
    all_mse = ptu.get_numpy(
        all_mse.view(-1, len(models_and_type),
                     variant['T']))  #(I, M, T), numpy array now
    np.save(logger.get_snapshot_dir() + '/computed_mse.npy', all_mse)

    mean_vals = np.mean(all_mse, axis=0)  #(M, T)
    std_vals = np.std(all_mse, axis=0)  #(M, T)
    for i in range(len(models_and_type)):
        if models_and_type[i][1] == 'next_step' or 'rprp_pred':
            plt.errorbar(range(1, variant['T']),
                         mean_vals[i][1:],
                         std_vals[i][1:],
                         label='{}'.format(models_and_type[i][1]),
                         capsize=5)
        else:
            plt.errorbar(range(0, variant['T']),
                         mean_vals[i],
                         std_vals[i],
                         label='{}'.format(models_and_type[i][1]),
                         capsize=5)

    # plt.legend(bbox_to_anchor=(0.4, 0.8), loc="upper right")
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    # plt.yscale('log')
    plt.savefig(logger.get_snapshot_dir() + '/relative_mse.png',
                bbox_inches="tight")
コード例 #6
0
    def _random_shooting(self, actions, model, image_suffix):
        # Like internal_inference except initial_hidden_state might only contain one state while obs/actions contain (B,*)
        # Inputs: obs (B,T1,3,D,D) or None, actions (B,T2,A) or None, initial_hidden_state or None, schedule (T3)
        #   Note: Assume that initial_hidden_state has entries of size (B=1,*)

        goal_info = self.goal_info
        schedule = np.array([1] * actions.shape[1])
        actions = ptu.from_numpy(actions)
        if self.action_type is None:
            predicted_info = model.batch_internal_inference(
                obs=None,
                actions=actions,
                initial_hidden_state=self.initial_hidden_state,
                schedule=schedule,
                figure_path=None)
            all_env_actions = actions
        else:
            predicted_info, all_env_actions = self._latent_batch_internal_inference(
                model, actions, self.initial_hidden_state)

        # Inputs to get_action_rankings(): goal_latents (n_goal_latents=K,R),
        # goal_latents_recon (n_goal_latents=K,3,64,64), goal_image (1,3,64,64), pred_latents (n_actions,K,R),
        # pred_latents_recon (n_actions,K,3,64,64),  pred_images (n_actions,3,64,64)
        sorted_costs, best_actions_indices, goal_latent_indices = self.score_actions_class.get_action_rankings(
            goal_info["state"]["post"]["samples"][0],
            goal_info["sub_images"][0],
            goal_info["goal_image"],
            predicted_info["state"]["post"]["samples"],
            predicted_info["sub_images"],
            predicted_info["final_recon"],
            image_suffix=image_suffix)

        num_plot_actions = 20
        self.plot_action_errors(
            self.env, all_env_actions[best_actions_indices][:num_plot_actions,
                                                            0],
            predicted_info["final_recon"][best_actions_indices]
            [:num_plot_actions], image_suffix + "_action_errors")

        best_single_env_action = all_env_actions[best_actions_indices[0]]

        return best_actions_indices, goal_latent_indices, predicted_info[
            "final_recon"], ptu.get_numpy(best_single_env_action)
コード例 #7
0
    def run_plan(self,
                 goal_image,
                 env,
                 initial_obs,
                 initial_actions,
                 action_selection_class,
                 num_actions_to_take,
                 planning_horizon,
                 true_data,
                 filter_goal_image=None):
        #Goal inference
        self.env = env
        goal_info = self.goal_inference(goal_image, filter_goal_image)

        #State acquisition
        cur_state_and_other_info = self.state_acquisition(
            initial_obs, initial_actions)
        initial_recon = cur_state_and_other_info["final_recon"]

        #Planning
        actions_taken, actions_planned, pred_recons, obs, try_obs = [], [], [], [], []  #(T), (?,3,D,D), (T,D,D,3) np, (T,D,D,3) np, (T,D,D,3)
        pred_recons = [initial_recon[0]]
        best_accuracy = 0
        first_finished_plan_steps = np.nan

        for t in range(num_actions_to_take):
            next_actions, goal_latent_index, pred_recon = action_selection_class.select_action(
                goal_info, cur_state_and_other_info["state"], env, self.model,
                "{}".format(t))  # (Tp,A), (Sc), (3,D,D)

            # try_obs.append(env.try_step(next_actions))  # (D,D,3)
            # pred_recons.append(pred_recon)  # (3,D,D)

            next_obs = [env.get_observation()
                        ]  # This is needed for update_state
            for i in range(planning_horizon):
                next_obs.append(env.step(next_actions[i]))  # (D,D,3), np
                try_obs.append(env.try_step(next_actions))  # (D,D,3)
                pred_recons.append(pred_recon)  # (3,D,D)

            actions_taken.extend(next_actions[:planning_horizon])  # (Tt,A)
            actions_planned.append(next_actions)  # (Tp,A)
            obs.extend(
                next_obs[1:])  # Don't want to include starting image again

            # next_obs = np.array([env.step(an_action) for an_action in next_actions]) #(Tp=1,D,D,3) 255's numpy
            # actions.extend(next_actions)
            # obs.extend(next_obs)

            # self._remove_goal_latent(goal_info, goal_latent_index)
            next_obs = np.array(next_obs)  # (Tp+1,D,D,3) np
            cur_state_and_other_info = self.update_state(
                next_obs,
                next_actions[:planning_horizon],
                cur_state_and_other_info["state"],
                file_name="{}/state_update_{}.png".format(self.logging_dir, t))

            accuracy = self.env.compute_accuracy(
                true_data, threshold=self.accuracy_threshold)
            best_accuracy = max(accuracy, best_accuracy)
            if first_finished_plan_steps is np.nan and accuracy == 1:
                first_finished_plan_steps = t + 1
                break

        ########Create final mpc image########
        # pdb.set_trace()
        goal_image_tensor = process_env_obs(goal_image)  # (1,3,D,D)
        starting_image_tensor = process_env_obs(initial_obs[-1])  # (1,3,D,D)

        obs = np.concatenate((initial_obs[-1:], obs))  # (T+1,D,D,3) np
        obs = process_env_obs(np.array(obs))  # (T+1,3,D,D)
        obs = torch.cat([obs, goal_image_tensor])  # (T+2,3,D,D)
        try_obs = process_env_obs(np.array(try_obs))  # (T,3,D,D)
        try_obs = torch.cat(
            [starting_image_tensor, try_obs, goal_image_tensor])  # (T+2,3,D,D)
        pred_recons = torch.stack(pred_recons)  # (T+1,3,D,D)
        pred_recons = torch.cat([pred_recons,
                                 goal_info["final_recon"]])  # (T+2,3,D,D)
        save_image(torch.cat([obs, pred_recons, try_obs], dim=0),
                   "{}/mpc.png".format(self.logging_dir),
                   nrow=obs.shape[0])

        ########Compute result stats########
        final_obs = process_env_obs(env.get_observation())  # (1,3,D,D)
        torch_goal_image = process_env_obs(goal_image)  # (1,3,D,D)
        mse = ptu.get_numpy(
            torch.pow(final_obs - torch_goal_image,
                      2).mean())  # Compare final obs to goal obs (Sc), numpy

        #correct = env.compute_accuracy(true_data, threshold=self.accuracy_threshold)
        stats = {
            'mse': mse,
            'correct': best_accuracy,
            'actions': actions_taken,
            "first_finished_plan_steps": first_finished_plan_steps
        }
        return stats