Beispiel #1
0
    def _get_latent_locs(self, model, hidden_state, file_name=None):
        rand_actions = ptu.from_numpy(
            np.stack([
                self.env.sample_action() for _ in range(self.num_loc_samples)
            ]))  # (B,A)
        state_action_attention, interaction_attention, all_delta_vals, all_lambdas_deltas = \
            model.get_all_activation_values(hidden_state, rand_actions)

        interaction_attention = interaction_attention.sum(
            -1)  # (B,K,K-1) -> (B,K)
        normalized_weights = interaction_attention / interaction_attention.sum(
            0)  #(B,K)
        mean_point = (normalized_weights.unsqueeze(2) *
                      rand_actions[:, :2].unsqueeze(1)
                      )  # ((B,K)->(B,K,1) * (B,2)->(B,1,2)) -> (B,K,2)
        mean_point = mean_point.sum(0)  #(B,K,2) -> (K,2)

        if file_name is not None:
            plot_action_vals(self.env,
                             ptu.get_numpy(interaction_attention),
                             ptu.get_numpy(rand_actions),
                             "{}/{}_pick_locs".format(self.logging_dir,
                                                      file_name),
                             is_normalized=True)

        return mean_point
Beispiel #2
0
    def _random_shooting(self, actions, model, image_suffix):
        # Like internal_inference except initial_hidden_state might only contain one state while obs/actions contain (B,*)
        # Inputs: obs (B,T1,3,D,D) or None, actions (B,T2,A) or None, initial_hidden_state or None, schedule (T3)
        #   Note: Assume that initial_hidden_state has entries of size (B=1,*)

        goal_info = self.goal_info
        schedule = np.array([1]*actions.shape[1])
        actions = ptu.from_numpy(actions)
        if self.action_type is None:
            predicted_info = model.batch_internal_inference(obs=None, actions=actions, initial_hidden_state=self.initial_hidden_state,
                                                            schedule=schedule, figure_path=None)
            all_env_actions = actions
        else:
            predicted_info, all_env_actions = self._latent_batch_internal_inference(model, actions, self.initial_hidden_state)

        # Inputs to get_action_rankings(): goal_latents (n_goal_latents=K,R),
        # goal_latents_recon (n_goal_latents=K,3,64,64), goal_image (1,3,64,64), pred_latents (n_actions,K,R),
        # pred_latents_recon (n_actions,K,3,64,64),  pred_images (n_actions,3,64,64)
        sorted_costs, best_actions_indices, goal_latent_indices = self.score_actions_class.get_action_rankings(
            goal_info["state"]["post"]["samples"][0], goal_info["sub_images"][0], goal_info["goal_image"],
            predicted_info["state"]["post"]["samples"], predicted_info["sub_images"], predicted_info["final_recon"],
            image_suffix = image_suffix)

        num_plot_actions = 20
        self.plot_action_errors(self.env, all_env_actions[best_actions_indices][:num_plot_actions, 0],
                                predicted_info["final_recon"][best_actions_indices][:num_plot_actions],
                                image_suffix+"_action_errors")


        best_single_env_action = all_env_actions[best_actions_indices[0]]

        return best_actions_indices, goal_latent_indices, predicted_info["final_recon"], ptu.get_numpy(best_single_env_action)
Beispiel #3
0
def process_env_actions(env_actions):
    if len(env_actions.shape) == 1:  #(A) numpy
        env_actions = np.expand_dims(env_actions, 0)  #(T=1,A), numpy
    return ptu.from_numpy(env_actions)
Beispiel #4
0
def process_env_obs(env_obs):
    if len(env_obs.shape) == 3:  #(D,D,3) numpy
        env_obs = np.expand_dims(env_obs, 0)  #(T=1,D,D,3), numpy
    return ptu.from_numpy(np.moveaxis(env_obs, 3, 1)) / 255
Beispiel #5
0
    def __init__(
            self,
            input_width,
            input_height,
            input_channels,
            output_size,
            kernel_sizes,
            n_channels,
            strides,
            paddings,
            hidden_sizes=None,
            added_fc_input_size=0,
            batch_norm_conv=False,
            batch_norm_fc=False,
            init_w=1e-4,
            hidden_init=nn.init.xavier_uniform_,
            hidden_activation=nn.ReLU(),
            output_activation=identity,
    ):
        if hidden_sizes is None:
            hidden_sizes = []
        assert len(kernel_sizes) == \
               len(n_channels) == \
               len(strides) == \
               len(paddings)
        super().__init__()
        self.hidden_sizes = hidden_sizes
        self.input_width = input_width
        self.input_height = input_height
        self.input_channels = input_channels
        self.output_size = output_size
        self.output_activation = output_activation
        self.hidden_activation = hidden_activation
        self.batch_norm_conv = batch_norm_conv
        self.batch_norm_fc = batch_norm_fc
        self.added_fc_input_size = added_fc_input_size
        self.conv_input_length = self.input_width * self.input_height * self.input_channels

        self.conv_layers = nn.ModuleList()
        self.conv_norm_layers = nn.ModuleList()
        self.fc_layers = nn.ModuleList()
        self.fc_norm_layers = nn.ModuleList()

        for out_channels, kernel_size, stride, padding in \
                zip(n_channels, kernel_sizes, strides, paddings):
            conv = nn.Conv2d(input_channels,
                             out_channels,
                             kernel_size,
                             stride=stride,
                             padding=padding)
            hidden_init(conv.weight)
            conv.bias.data.fill_(0)

            conv_layer = conv
            self.conv_layers.append(conv_layer)
            input_channels = out_channels

        xcoords = np.expand_dims(np.linspace(-1, 1, self.input_width),
                                 0).repeat(self.input_height, 0)
        ycoords = np.repeat(np.linspace(-1, 1, self.input_height),
                            self.input_width).reshape(
                                (self.input_height, self.input_width))

        self.coords = from_numpy(
            np.expand_dims(np.stack([xcoords, ycoords], 0), 0))
Beispiel #6
0
    def __init__(
        self,
        input_width,
        input_height,
        input_channels,
        output_size,
        kernel_sizes,
        n_channels,
        strides,
        paddings,
        hidden_sizes,
        lstm_size,
        lstm_input_size,
        added_fc_input_size=0,
        batch_norm_conv=False,
        batch_norm_fc=False,
        init_w=1e-4,
        hidden_init=nn.init.xavier_uniform_,
        hidden_activation=nn.ReLU(),
        lambda_output_activation=identity,
        k=None,
    ):
        if hidden_sizes is None:
            hidden_sizes = []
        assert len(kernel_sizes) == \
               len(n_channels) == \
               len(strides) == \
               len(paddings)
        super().__init__()

        self.hidden_sizes = hidden_sizes
        self.input_width = input_width
        self.input_height = input_height
        self.input_channels = input_channels
        self.lstm_size = lstm_size
        self.output_size = output_size
        self.lambda_output_activation = lambda_output_activation
        self.hidden_activation = hidden_activation
        self.batch_norm_conv = batch_norm_conv
        self.batch_norm_fc = batch_norm_fc
        self.added_fc_input_size = added_fc_input_size
        self.conv_input_length = self.input_width * self.input_height * self.input_channels
        self.K = k

        self.conv_layers = nn.ModuleList()
        self.conv_norm_layers = nn.ModuleList()
        self.fc_layers = nn.ModuleList()
        self.fc_norm_layers = nn.ModuleList()
        self.avg_pooling = torch.nn.AvgPool2d(kernel_size=input_width)

        self.lstm = nn.LSTM(lstm_input_size,
                            lstm_size,
                            num_layers=1,
                            batch_first=True)

        for out_channels, kernel_size, stride, padding in \
                zip(n_channels, kernel_sizes, strides, paddings):
            conv = nn.Conv2d(input_channels,
                             out_channels,
                             kernel_size,
                             stride=stride,
                             padding=padding)
            hidden_init(conv.weight)
            conv.bias.data.fill_(0)

            conv_layer = conv
            self.conv_layers.append(conv_layer)
            input_channels = out_channels

        # find output dim of conv_layers by trial and add normalization conv layers
        test_mat = torch.zeros(
            1, self.input_channels, self.input_width, self.input_height
        )  # initially the model is on CPU (caller should then move it to GPU if
        for conv_layer in self.conv_layers:
            test_mat = conv_layer(test_mat)
            #self.conv_norm_layers.append(nn.BatchNorm2d(test_mat.shape[1]))

        test_mat = self.avg_pooling(test_mat)  #Avg pooling layer

        fc_input_size = int(np.prod(test_mat.shape))
        # used only for injecting input directly into fc layers
        fc_input_size += added_fc_input_size

        for idx, hidden_size in enumerate(hidden_sizes):
            fc_layer = nn.Linear(fc_input_size, hidden_size)

            #norm_layer = nn.BatchNorm1d(hidden_size)
            fc_layer.weight.data.uniform_(-init_w, init_w)
            fc_layer.bias.data.uniform_(-init_w, init_w)

            self.fc_layers.append(fc_layer)
            #self.fc_norm_layers.append(norm_layer)
            fc_input_size = hidden_size

        self.last_fc = nn.Linear(lstm_size, output_size)
        #self.last_fc.weight.data.uniform_(-init_w, init_w)
        #self.last_fc.bias.data.uniform_(-init_w, init_w)
        self.last_fc2 = nn.Linear(lstm_size, output_size)

        xcoords = np.expand_dims(np.linspace(-1, 1, self.input_width),
                                 0).repeat(self.input_height, 0)
        ycoords = np.repeat(np.linspace(-1, 1, self.input_height),
                            self.input_width).reshape(
                                (self.input_height, self.input_width))

        self.coords = from_numpy(
            np.expand_dims(np.stack([xcoords, ycoords], 0), 0))  #(1, 2, D, D)