Exemple #1
0
    def forward(self, x):
        x = self.pad2d(x)
        in_channels, h, w = x.shape[1], x.shape[2], x.shape[3]
        pooled_features, idx = self.max_pool(x[:, :-5, ...])
        num_of_win = pooled_features.shape[2] * pooled_features.shape[3]
        # ********** Shape aggregation **********
        max_unpooled = self.max_unpool(pooled_features,
                                       idx,
                                       output_size=(h, w))
        # The division a heuristic taken from the Attention paper. The idea is to avoid the extremes of the softmax. It should
        # be experimented with. I think it has to do with random walks.
        shape_weights = torch.norm(
            max_unpooled, p=1, dim=1, keepdim=True) / torch.tensor(
                in_channels - 5, requires_grad=False).type(
                    torch.FloatTensor).sqrt()
        # shape_weights = torch.norm(x, p=2, dim=1, keepdim=True) / torch.tensor(in_channels - 5, requires_grad=False).type(torch.FloatTensor).sqrt()

        # shape_weights = max_unpooled.sum(dim=1, keepdim=True) / torch.tensor(in_channels - 5, requires_grad=False).type(
        #     torch.FloatTensor)  # .sqrt()
        # window_shape_query = functional.softmax(unfold(shape_weights, (self.k, self.k), padding=0, stride=self.stride),
        #                                         dim=1).unsqueeze(1)
        shape_windows = unfold(shape_weights, (self.k, self.k),
                               padding=0,
                               stride=self.stride)
        window_shape_query = (
            shape_windows /
            torch.norm(shape_windows, p=1, dim=1, keepdim=True)).unsqueeze(1)
        # window_shape_query = unfold(shape_weights, (self.k, self.k), padding=0, stride=self.stride).unsqueeze(1)
        # Computing window means
        window_means = torch.sum(
            unfold(x[:, -5:-3, ...],
                   (self.k, self.k), stride=self.stride).view(
                       -1, 2, self.k * self.k, num_of_win) *
            window_shape_query,
            dim=2)
        # Part_1 is contribution of variances of ellipses. Part_2 is the contribution of how far the mean of each ellipse is
        # from the mean of the window.
        window_var_part_1 = torch.sum(
            unfold(x[:, -3:-1, ...],
                   (self.k, self.k), stride=self.stride).view(
                       -1, 2, self.k * self.k, num_of_win) *
            window_shape_query,
            dim=2)
        window_var_part_2 = ((
            (unfold(x[:, -5:-3, ...],
                    (self.k, self.k), stride=self.stride).view(
                        -1, 2, self.k * self.k, num_of_win) -
             window_means.unsqueeze(2))**2) * window_shape_query).sum(dim=2)
        window_var = window_var_part_1 + window_var_part_2
        # Part_1 is contribution of covariances of ellipses. Part_2 is the contribution of how far the mean of each ellipse is
        # from the mean of the window.
        window_covar_part_1 = torch.sum(
            unfold(x[:, -1:, ...], (self.k, self.k), stride=self.stride).view(
                -1, 1, self.k * self.k, num_of_win) * window_shape_query,
            dim=2)
        window_covar_part_2 = (
            ((unfold(x[:, -5:-4, ...],
                     (self.k, self.k), stride=self.stride).view(
                         -1, 1, self.k * self.k, num_of_win) -
              window_means[:, 0:1, :].unsqueeze(2)) *
             (unfold(x[:, -4:-3, ...],
                     (self.k, self.k), stride=self.stride).view(
                         -1, 1, self.k * self.k, num_of_win) -
              window_means[:, 1:2, :].unsqueeze(2))) *
            window_shape_query).sum(dim=2)
        window_covar = window_covar_part_1 + window_covar_part_2
        window_aggregated_shapes = torch.cat(
            [window_means, window_var, window_covar], dim=1)
        window_aggregated_shapes = fold(
            window_aggregated_shapes,
            (pooled_features.shape[2], pooled_features.shape[3]), (1, 1))
        # plot_shapes(window_aggregated_shapes, idx=0)
        output = torch.cat([pooled_features, window_aggregated_shapes], dim=1)
        return output
Exemple #2
0
    def forward(self, x):
        """Applies network layers and ops on input image(s) x.
        Args:
            x: input image or batch of images. Shape: [batch,3,300,300].
        Return:
            Depending on phase:
            test:
                Variable(tensor) of output class label predictions,
                confidence score, and corresponding location predictions for
                each object detected. Shape: [batch,topk,7]
            train:
                list of concat outputs from:
                    1: confidence layers, Shape: [batch*num_priors,num_classes]
                    2: localization layers, Shape: [batch,num_priors*4]
                    3: priorbox layers, Shape: [2,num_priors*4]
        """
        size = x.size()[2:]
        batch_size = x.shape[0]
        sources = list()
        loc = list()
        conf = list()

        x = self.conv_top(x)

        s = self.L2Norm3_3(x)
        sources.append(s)

        patches = self.unfold(x)
        patches = torch.cat(torch.unbind(patches, dim=2), dim=0)
        patches = torch.reshape(patches, (-1, 4, 8, 8))

        output_x = int((x.shape[2] - 8) / 4 + 1)
        output_y = int((x.shape[3] - 8) / 4 + 1)

        rnnX = self.rnn_model(patches, int(batch_size) * output_x * output_y)

        x = torch.stack(torch.split(rnnX,
                                    split_size_or_sections=int(batch_size),
                                    dim=0),
                        dim=2)

        x = F.fold(x, kernel_size=(1, 1), output_size=(output_x, output_y))

        x = F.pad(x, (0, 1, 0, 1), mode='replicate')

        for k in range(4):
            x = self.mob[k](x)

        s = self.L2Norm4_3(x)
        sources.append(s)

        for k in range(4, 8):
            x = self.mob[k](x)

        s = self.L2Norm5_3(x)
        sources.append(s)

        for k in range(8, 10):
            x = self.mob[k](x)
        sources.append(x)

        for k in range(10, 11):
            x = self.mob[k](x)
        sources.append(x)

        for k in range(11, 12):
            x = self.mob[k](x)
        sources.append(x)

        # apply multibox head to source layers

        loc_x = self.loc[0](sources[0])
        conf_x = self.conf[0](sources[0])

        loc_x = self.loc[1](loc_x)
        conf_x = self.conf[1](conf_x)

        max_conf, _ = torch.max(conf_x[:, 0:3, :, :], dim=1, keepdim=True)
        conf_x = torch.cat((max_conf, conf_x[:, 3:, :, :]), dim=1)

        loc.append(loc_x.permute(0, 2, 3, 1).contiguous())
        conf.append(conf_x.permute(0, 2, 3, 1).contiguous())

        for i in range(1, len(sources)):
            x = sources[i]
            conf.append(self.conf[i + 1](x).permute(0, 2, 3, 1).contiguous())
            loc.append(self.loc[i + 1](x).permute(0, 2, 3, 1).contiguous())

        features_maps = []
        for i in range(len(loc)):
            feat = []
            feat += [loc[i].size(1), loc[i].size(2)]
            features_maps += [feat]

        self.priorbox = PriorBox(size, features_maps, cfg)
        self.priors = self.priorbox.forward()

        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)

        if self.phase == 'test':
            output = detect_function(
                cfg,
                loc.view(loc.size(0), -1, 4),  # loc preds
                self.softmax(conf.view(conf.size(0), -1,
                                       self.num_classes)),  # conf preds
                self.priors.type(type(x.data))  # default boxes
            )

        else:
            output = (loc.view(loc.size(0), -1,
                               4), conf.view(conf.size(0), -1,
                                             self.num_classes), self.priors)
        return output, loc, conf
Exemple #3
0
    def __getitem__(self, index):
        # get item in tensor shape
        scan_file = self.scan_files[index]
        if self.gt:
            label_file = self.label_files[index]

        # open a semantic laserscan
        if self.gt:
            scan = SemLaserScan(self.color_map,
                                project=True,
                                H=self.sensor_img_H,
                                W=self.sensor_img_W,
                                fov_up=self.sensor_fov_up,
                                fov_down=self.sensor_fov_down)
        else:
            scan = LaserScan(project=True,
                             H=self.sensor_img_H,
                             W=self.sensor_img_W,
                             fov_up=self.sensor_fov_up,
                             fov_down=self.sensor_fov_down)

        # open and obtain scan
        scan.open_scan(scan_file)
        if self.gt:
            scan.open_label(label_file)
            # map unused classes to used classes (also for projection)
            scan.sem_label = self.map(scan.sem_label, self.learning_map)
            scan.proj_sem_label = self.map(scan.proj_sem_label,
                                           self.learning_map)

        # make a tensor of the uncompressed data (with the max num points)
        unproj_n_points = scan.points.shape[0]
        unproj_xyz = torch.full((self.max_points, 3), -1.0, dtype=torch.float)
        unproj_xyz[:unproj_n_points] = torch.from_numpy(scan.points)
        unproj_range = torch.full([self.max_points], -1.0, dtype=torch.float)
        unproj_range[:unproj_n_points] = torch.from_numpy(scan.unproj_range)
        unproj_remissions = torch.full([self.max_points],
                                       -1.0,
                                       dtype=torch.float)
        unproj_remissions[:unproj_n_points] = torch.from_numpy(scan.remissions)
        if self.gt:
            unproj_labels = torch.full([self.max_points],
                                       -1.0,
                                       dtype=torch.int32)
            unproj_labels[:unproj_n_points] = torch.from_numpy(scan.sem_label)
        else:
            unproj_labels = []

        # get points and labels
        proj_range = torch.from_numpy(scan.proj_range).clone()
        proj_xyz = torch.from_numpy(scan.proj_xyz).clone()
        proj_remission = torch.from_numpy(scan.proj_remission).clone()
        proj_mask = torch.from_numpy(scan.proj_mask)
        if self.gt:
            proj_labels = torch.from_numpy(scan.proj_sem_label).clone()
            proj_labels = proj_labels * proj_mask
        else:
            proj_labels = []
        proj_x = torch.full([self.max_points], -1, dtype=torch.long)
        proj_x[:unproj_n_points] = torch.from_numpy(scan.proj_x)
        proj_y = torch.full([self.max_points], -1, dtype=torch.long)
        proj_y[:unproj_n_points] = torch.from_numpy(scan.proj_y)
        proj = torch.cat([
            proj_range.unsqueeze(0).clone(),
            proj_xyz.clone().permute(2, 0, 1),
            proj_remission.unsqueeze(0).clone()
        ])

        proj_blocked = proj.unsqueeze(1)  # Swap Batch and channel dimensions

        proj = (proj - self.sensor_img_means[:, None, None]
                ) / self.sensor_img_stds[:, None, None]

        proj = proj * proj_mask.float()

        # get name and sequence
        path_norm = os.path.normpath(scan_file)
        path_split = path_norm.split(os.sep)
        path_seq = path_split[-3]
        path_name = path_split[-1].replace(".bin", ".label")
        # print("path_norm: ", path_norm)
        # print("path_seq", path_seq)
        # print("path_name", path_name)

        # import time
        # import cv2
        # cv2.imwrite('/home/snowflake/Desktop/big8192-128.png', proj_blocked[0,0, :, :].numpy()*15)
        # print('proj_blocked.shape')
        # print(proj_blocked.shape)
        # time.sleep(1000)

        n, c, h, w = proj_blocked.size()
        proj2 = proj.clone()
        proj = proj.unsqueeze(0)
        mask_image = proj_mask.unsqueeze(0).unsqueeze(0).float()
        downsamplings = 4
        representations = {}
        representations['image'] = []
        representations['points'] = []
        windows_size = 3  # windows size

        for i in range(downsamplings):

            proj_chan_group_points = f.unfold(proj_blocked,
                                              kernel_size=3,
                                              stride=1,
                                              padding=1)
            projmask_chan_group_points = f.unfold(mask_image,
                                                  kernel_size=3,
                                                  stride=1,
                                                  padding=1)

            # Get the mean point (taking apart non-valid points)
            proj_chan_group_points_sum = torch.sum(proj_chan_group_points,
                                                   dim=1)
            projmask_chan_group_points_sum = torch.sum(
                projmask_chan_group_points, dim=1)
            proj_chan_group_points_mean = proj_chan_group_points_sum / projmask_chan_group_points_sum

            # tile it for being able to substract it to the other points
            tiled_proj_chan_group_points_mean = proj_chan_group_points_mean.unsqueeze(
                1).repeat(1, windows_size * windows_size, 1)

            # remove nans due to empty blocks
            is_nan = tiled_proj_chan_group_points_mean != tiled_proj_chan_group_points_mean
            tiled_proj_chan_group_points_mean[is_nan] = 0.

            # compute valid mask per point
            tiled_projmask_chan_group_points = (
                1 - projmask_chan_group_points.repeat(n, 1, 1)).byte()

            # substract mean point to points
            proj_chan_group_points_relative = proj_chan_group_points - tiled_proj_chan_group_points_mean

            # set to zero points which where non valid at the beginning
            proj_chan_group_points_relative[
                tiled_projmask_chan_group_points] = 0.

            # compute distance (radius) to mean point
            # xyz_relative = proj_chan_group_points_relative[1:4,...]
            # relative_distance = torch.norm(xyz_relative, dim=0).unsqueeze(0)

            # NOW proj_chan_group_points_relative HAS Xr, Yr, Zr, Rr, Dr relative to the mean point
            proj_norm_chan_group_points = f.unfold(proj.permute(1, 0, 2, 3),
                                                   kernel_size=3,
                                                   stride=1,
                                                   padding=1)
            # NOW proj_norm_chan_group_points HAS X, Y, Z, R, D. Now we have to concat them both
            proj_chan_group_points_combined = torch.cat(
                [proj_norm_chan_group_points, proj_chan_group_points_relative],
                dim=0)
            # convert back to image for image-convolution-branch
            proj_out = f.fold(proj_chan_group_points_combined,
                              proj_blocked.shape[-2:],
                              kernel_size=3,
                              stride=1,
                              padding=1)
            proj_out = proj_out.squeeze(1)

            proj = nn.functional.interpolate(proj,
                                             size=(int(proj.shape[2] / 2),
                                                   int(proj.shape[3] / 2)),
                                             mode='nearest')
            proj_blocked = nn.functional.interpolate(
                proj_blocked.permute(1, 0, 2, 3),
                size=(int(proj_blocked.shape[2] / 2),
                      int(proj_blocked.shape[3] / 2)),
                mode='nearest').permute(1, 0, 2, 3)
            mask_image = nn.functional.interpolate(
                mask_image,
                size=(int(mask_image.shape[2] / 2),
                      int(mask_image.shape[3] / 2)),
                mode='nearest')

            representations['points'].append(proj_chan_group_points_combined)
            representations['image'].append(proj_out)
            # print('append' +str(i))
            #
            # print(proj_chan_group_points_combined.shape)
            # print(proj_out.shape)

        return proj2, proj_mask, proj_labels, unproj_labels, path_seq, path_name, proj_x, proj_y, proj_range, unproj_range, proj_xyz, unproj_xyz, proj_remission, unproj_remissions, unproj_n_points, representations
Exemple #4
0
def overlap_add(X, stride):
    n_fft = X.shape[1]
    output_len = n_fft + stride * (X.shape[2] - 1)

    return fold(X, (1, output_len), kernel_size=(1, n_fft),
                stride=stride).flatten(1)
Exemple #5
0
def rmac_hist(inp,
              L_min=7, #7 for fixed width, 1 for all
              L=7,
              nb_bins=8,
              eps=1e-7):
    '''
    https://github.com/filipradenovic/cnnimageretrieval-pytorch/blob/master/cirtorch/layers/functional.py#L26
    '''
#     x = inp.clone().detach()
    x = torch.empty_like(inp).copy_(inp)
    with torch.no_grad():
        ovr = 0.4 # desired overlap of neighboring regions
        steps = torch.LongTensor([2, 3, 4, 5, 6, 7]) # possible regions for the long dimension

        W = x.size(3)
        H = x.size(2)

        w = min(W, H)
        w2 = math.floor(w/2.0 - 1)

        b = (max(H, W)-w)/(steps-1)
        (tmp, idx) = torch.min(torch.abs(((w**2 - w*b)/w**2)-ovr), 0) # steps(idx) regions for long dimension

        # region overplus per dimension
        Wd = 0;
        Hd = 0;
        if H < W:
            Wd = idx.item() + 1
        elif H > W:
            Hd = idx.item() + 1

        v = [feat_map_shape_hist(x)]
    #     v = v / (torch.norm(v, p=2, dim=1, keepdim=True) + eps).expand_as(v)

        for l in range(L_min, L+1):
            wl = math.floor(2*w/(l+1))
            wl2 = math.floor(wl/2 - 1)

            if l+Wd == 1:
                b = 0
            else:
                b = (W-wl)/(l+Wd-1)
            cenW_tmp = wl2 + torch.Tensor(range(l-1+Wd+1)).long()*b
            cenW_tmp = cenW_tmp.float()
            cenW = torch.floor(cenW_tmp) - wl2 # center coordinates
            if l+Hd == 1:
                b = 0
            else:
                b = (H-wl)/(l+Hd-1)
            cenH_tmp = wl2 + torch.Tensor(range(l-1+Wd+1)).long()*b
            cenH_tmp = cenH_tmp.float()
            cenH = torch.floor(cenH_tmp) - wl2 # center coordinates
            # print(cenH, cenW,wl, wl2)
            vt_array = []
            for i_ in cenH.tolist():
                for j_ in cenW.tolist():
                    if wl == 0:
                        continue
                    R = x[:,:,(int(i_)+torch.LongTensor(range(wl)).to(device)).tolist(),:]
                    R = R[:,:,:,(int(j_)+torch.LongTensor(range(wl)).to(device)).tolist()]

                    vt = feat_map_shape_hist(R)
#                     vt = torch.histc(R, bins=nb_bins, min=R.min().item(), max=R.max().item())

                    vt = vt / (torch.norm(vt, p=2, dim=-1, keepdim=True) + eps).expand_as(vt)
    #                 v += vt
                    vt_array+=[vt]

            vt_array = torch.stack(vt_array, -1)

            arr_along_batch = []
            for batch_id in range(x.shape[0]):
                arr_along_batch.append(F.fold(vt_array[batch_id,...], (len(cenH.tolist()),len(cenW.tolist())), (1,1)))
            arr_along_batch = torch.stack(arr_along_batch, 0).cuda()

    #         v += vt_array
#     print(arr_along_batch.shape)
    return arr_along_batch
Exemple #6
0
 def test_fold(self):
     inp = torch.randn(3, 20, 20, device='cuda', dtype=self.dtype)
     inp_folded = F.fold(inp, (4, 5), (1, 1))