Exemple #1
0
    def set_dset_list(self, data_dir, down_sampling=True):
        """ Fill scene_information with the static environment features that will be used as part of the input of Static
                 Scene Feature Extractor module in SafeGAN"""
        _dir = os.path.dirname(os.path.realpath(__file__))
        _dir = _dir.split("/")[:-2]
        _dir = "/".join(_dir)
        directory = _dir + '/datasets/safegan_dataset/'

        self.list_data_files = sorted([
            get_dset_name(os.path.join(data_dir, _path).split("/")[-1])
            for _path in os.listdir(data_dir)
        ])
        for name in self.list_data_files:
            path_group = os.path.join(directory, get_dset_group_name(name))
            """ The inputs are the boundary points between the traversable and non-traversable areas. It is 
                possible to take all points or just a sample"""
            path = os.path.join(path_group, name)
            map = np.load(path + "/world_points_boundary.npy")
            if self.down_samples != -1 and down_sampling and map.shape[
                    0] > self.down_samples:
                down_sampling = (map.shape[0] // self.down_samples)
                sampled = map[::down_sampling]
                map = sampled[:self.down_samples]
            self.scene_information[name] = torch.from_numpy(map).type(
                torch.float).to(device)
Exemple #2
0
    def set_dset_list(self, data_dir, down_sampling=True, down_samples=200):
        directory = get_root_dir() + '/datasets/safegan_dataset/'

        self.list_data_files = sorted([get_dset_name(os.path.join(data_dir, _path).split("/")[-1]) for _path in os.listdir(data_dir)])
        for name in self.list_data_files:
            path_group = os.path.join(directory, get_dset_group_name(name))

            """ The inputs are the boundary points between the traversable and non-traversable areas. It is 
                possible to take all points or just a sample"""
            path = os.path.join(path_group, name)
            map = np.load(path + "/world_points_boundary.npy")
            if down_samples != -1 and down_sampling and map.shape[0] > down_samples:
                down_sampling = (map.shape[0] // down_samples)
                sampled = map[::down_sampling]
                map = sampled[:down_samples]
            self.scene_information[name] = torch.from_numpy(map).type(torch.float).to(device)
Exemple #3
0
    def set_dset_list(self, data_dir):
        """ Fill scene_information with the static environment features that will be used as part of the input of Static
                 Scene Feature Extractor module in SafeGAN"""
        directory = get_root_dir() + '/datasets/safegan_dataset/'

        self.list_data_files = sorted([
            get_dset_name(os.path.join(data_dir, _path).split("/")[-1])
            for _path in os.listdir(data_dir)
        ])
        for name in self.list_data_files:
            path_group = os.path.join(directory, get_dset_group_name(name))

            if self.pool_static_type == "physical_attention_no_encoder":
                """ In this case the features are the one extracted by one of Segmentation Networks I trained on the new dataset 
                I created. The features are taken before the last upsample layers."""
                path = os.path.join(path_group + "/segmented_features", name)
                features = np.load(path + "_segmentation_features.npy")
                features = torch.from_numpy(features).type(
                    torch.float).to(device)

            elif self.pool_static_type == "physical_attention_with_encoder":
                """ In this case the input is the raw image or the segmented one (by one of the Segmentation Networks I trained 
                on the new dataset I created). This image is then encoded by a Deep Network like ResNet"""
                path = os.path.join(path_group + "/segmented_scenes", name)
                image = plt.imread(path + ".jpg")
                image = torch.from_numpy(image).type(torch.float).to(device)
                # Images fed to the model must be a Float tensor of dimension N, 3, 256, 256, where N is the batch size.
                # PyTorch follows the NCHW convention, which means the channels dimension (C) must precede the size dimensions
                image = image.permute(2, 0, 1)
                # Normalize the image
                image = self.transform(image)
                features = self.attention_encoder(image.unsqueeze(0))

            else:
                print(
                    "ERROR in recognizing physical attention pool static type")
                exit()
            self.scene_information[name] = features
Exemple #4
0
    def forward(self,
                h_states,
                seq_start_end,
                end_pos,
                rel_pos,
                seq_scene_ids=None):
        """
        Inputs:
        - h_states: Tesnsor of shape (num_layers, batch, h_dim)
        - seq_start_end: A list of tuples which delimit sequences within batch.
        - end_pos: Absolute end position of obs_traj (batch, 2)
        Output:
        - pool_h: Tensor of shape (batch, h_dim)
        """
        pool_h = []
        total_grid_size = self.grid_size * self.grid_size
        for i, (start, end) in enumerate(seq_start_end):
            start = start.item()
            end = end.item()
            num_ped = end - start

            curr_hidden = h_states.view(-1, self.h_dim)[start:end]
            curr_hidden_repeat = curr_hidden.repeat(num_ped, 1)
            curr_end_pos = end_pos[start:end]
            curr_pool_h_size = (num_ped * total_grid_size) + 1
            curr_pool_h = curr_hidden.new_zeros(
                (curr_pool_h_size, self.h_dim)).to(device)
            # curr_end_pos = curr_end_pos.data
            top_left, bottom_right = self.get_bounds(curr_end_pos)

            # Repeat position -> P1, P2, P1, P2
            curr_end_pos_rep = curr_end_pos.repeat(num_ped, 1)
            # Repeat bounds -> B1, B1, B2, B2
            top_left = self.repeat(top_left, num_ped)
            bottom_right = self.repeat(bottom_right, num_ped)

            grid_pos = self.get_grid_locations(
                top_left, curr_end_pos_rep).type_as(seq_start_end)
            # Make all positions to exclude as non-zero
            # Find which peds to exclude
            x_bound = ((curr_end_pos_rep[:, 0] >= bottom_right[:, 0]) +
                       (curr_end_pos_rep[:, 0] <= top_left[:, 0]))
            y_bound = ((curr_end_pos_rep[:, 1] >= top_left[:, 1]) +
                       (curr_end_pos_rep[:, 1] <= bottom_right[:, 1]))

            within_bound = x_bound + y_bound
            within_bound[0::num_ped + 1] = 1  # Don't include the ped itself
            within_bound = within_bound.view(-1)

            # This is a tricky way to get scatter add to work. Helps me avoid a
            # for loop. Offset everything by 1. Use the initial 0 position to
            # dump all uncessary adds.
            grid_pos += 1
            offset = torch.arange(0, total_grid_size * num_ped,
                                  total_grid_size).type_as(seq_start_end)

            offset = self.repeat(offset.view(-1, 1), num_ped).view(-1)
            grid_pos += offset
            grid_pos[within_bound != 0] = 0
            grid_pos = grid_pos.view(-1, 1).expand_as(curr_hidden_repeat).to(
                device)  # grid_pos = [num_ped**2, h_dim]

            curr_pool_h = curr_pool_h.scatter_add(
                0, grid_pos, curr_hidden_repeat
            )  # curr_pool_h = [num_peds * total_grid_size + 1, h_dim],  grid_pos = [num_peds**2], curr_hidden_repeat = [num_ped**2, h_dim]
            curr_pool_h = curr_pool_h[1:]

            if visualize_attention:
                # #pool_h.append(curr_pool_h.view(num_ped, -1))  # grid_size * grid_size * h_dim
                # Used for visualization
                embed_info = torch.cat([curr_end_pos, rel_pos[start:end]],
                                       dim=1)
                encoder_out = curr_pool_h.view(num_ped, total_grid_size,
                                               self.h_dim)
                curr_pool_h_after_attention, attention_weights = self.attention_decoder(
                    encoder_out=encoder_out,
                    curr_hidden=curr_hidden,
                    embed_info=embed_info)
                data_dir = get_test_data_path('sdd')
                list_data_files = sorted([
                    get_dset_name(
                        os.path.join(data_dir, _path).split("/")[-1])
                    for _path in os.listdir(data_dir)
                ])
                seq_scenes = [list_data_files[num] for num in seq_scene_ids]
                visualize_attention_weights(seq_scenes[i], self.grid_size,
                                            attention_weights,
                                            end_pos[start:end], ax1, ax2)

            pool_h.append(
                curr_pool_h.view(num_ped, total_grid_size,
                                 self.h_dim))  # grid_size * grid_size * h_dim

        pool_h = torch.cat(pool_h, dim=0)
        encoder_out = pool_h.view(-1, total_grid_size, self.h_dim)
        embed_info = torch.cat([end_pos, rel_pos], dim=1)
        pool_h, attention_weights = self.attention_decoder(
            encoder_out=encoder_out,
            curr_hidden=h_states.squeeze(0),
            embed_info=embed_info)
        pool_h = self.mlp_pool(pool_h)
        return pool_h
Exemple #5
0
def collect_generated_samples(args,
                              generator1,
                              generator2,
                              data_dir,
                              data_set,
                              model_name,
                              selected_scene=None,
                              selected_batch=-1):
    num_samples = 10  # args.best_k
    _, loader = data_loader(args, data_dir, shuffle=False)

    with torch.no_grad():
        for b, batch in enumerate(loader):
            print('batch = {}'.format(b))
            batch = [tensor.cuda() for tensor in batch]
            if b != selected_batch and selected_batch != -1:
                continue

            (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel,
             non_linear_ped, loss_mask, traj_frames, seq_start_end,
             seq_scene_ids) = batch

            list_data_files = sorted([
                get_dset_name(os.path.join(data_dir, _path).split("/")[-1])
                for _path in os.listdir(data_dir)
            ])
            seq_scenes = [list_data_files[num] for num in seq_scene_ids]

            photo_list, homography_list, annotated_points_list, scene_name_list, scene_information = [], [], [], [], {}
            for i, (start, end) in enumerate(seq_start_end):
                dataset_name = seq_scenes[i]
                path = get_path(dataset_name)
                reader = imageio.get_reader(get_sdd_dir(dataset_name, 'video'),
                                            'ffmpeg')
                annotated_points, h = get_homography_and_map(
                    dataset_name, "/world_points_boundary.npy")
                homography_list.append(h)
                annotated_points_list.append(annotated_points)
                scene_name_list.append(dataset_name)
                scene_information[dataset_name] = annotated_points

                start = start.item()
                (obs_len, batch_size, _) = obs_traj.size()
                frame = traj_frames[obs_len][start][0].item()
                photo = reader.get_data(int(frame))
                photo_list.append(photo)

            scene_name = np.unique(scene_name_list)
            if selected_scene != None and not (scene_name
                                               == selected_scene).all():
                print(selected_scene, ' is not in current batch ', scene_name)
                continue

            save_pickle(obs_traj, 'obs_traj', selected_scene, b, data_set,
                        model_name)
            save_pickle(pred_traj_gt, 'pred_traj_gt', selected_scene, b,
                        data_set, model_name)
            save_pickle(seq_start_end, 'seq_start_end', selected_scene, b,
                        data_set, model_name)

            save_pickle(homography_list, 'homography_list', selected_scene, b,
                        data_set, model_name)
            save_pickle(annotated_points_list, 'annotated_points_list',
                        selected_scene, b, data_set, model_name)
            save_pickle(photo_list, 'photo_list', selected_scene, b, data_set,
                        model_name)
            save_pickle(scene_name_list, 'scene_name_list', selected_scene, b,
                        data_set, model_name)
            save_pickle(scene_information, 'scene_information', selected_scene,
                        b, data_set, model_name)

            pred_traj_fake1_list, pred_traj_fake2_list = [], []

            for sample in range(num_samples):
                pred_traj_fake1, _ = get_trajectories(generator1, obs_traj,
                                                      obs_traj_rel,
                                                      seq_start_end,
                                                      pred_traj_gt,
                                                      seq_scene_ids, data_dir)
                pred_traj_fake2, _ = get_trajectories(generator2, obs_traj,
                                                      obs_traj_rel,
                                                      seq_start_end,
                                                      pred_traj_gt,
                                                      seq_scene_ids, data_dir)

                pred_traj_fake1_list.append(pred_traj_fake1)
                pred_traj_fake2_list.append(pred_traj_fake2)

            save_pickle(pred_traj_fake1_list, 'pred_traj_fake1_list',
                        selected_scene, b, data_set, model_name)
            save_pickle(pred_traj_fake2_list, 'pred_traj_fake2_list',
                        selected_scene, b, data_set, model_name)