コード例 #1
0
    def _helper_boxes_shape(self, func):
        # test boxes as Tensor[N, 5]
        with self.assertRaises(AssertionError):
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3, 3]], dtype=a.dtype)
            func(a, boxes, output_size=(2, 2))

        # test boxes as List[Tensor[N, 4]]
        with self.assertRaises(AssertionError):
            a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
            boxes = torch.tensor([[0, 0, 3]], dtype=a.dtype)
            ops.roi_pool(a, [boxes], output_size=(2, 2))
コード例 #2
0
 def forward(self, batch, proposals):
     """ Feed forward the proposal regions into the RCNN head to predict     
         object ROIs and corresponding classes.
     """
     
     # Perform ROI Max Pooling to create feature sets of the same size for 
     # obtained proposals. 
     #_______________________________________________________________________
     # Append batch indices to proposal coordinates and permute them to 
     # [k x1 y1 x2 y2] format as required by roi_pool:
     B, N, _  = proposals.size()
     batchids = torch.from_numpy(np.repeat(np.arange(B), N))
     batchids = batchids.view(-1,1).to(proposals.device).to(proposals.dtype)
     rois     = torch.cat((batchids, proposals.view(-1, 4)),dim=1)
     
     # NOTE: IMPORTANT - Have to convert YX -> XY
     xyROIs   = rois[:, [0,2,1,4,3]]
     
     # Perform pooling:
     scale    = 1 / float(self.spatial_scale)
     pool     = roi_pool(batch, xyROIs, self.pool_size, spatial_scale=scale)
     
     # Feed pooled features to RCNN head, obtain ROI targets and scores: 
     #_______________________________________________________________________
     pool_features = self.Classifier(pool.view(pool.size(0), -1))
     roi_targets   = self.RCNNBBox(pool_features)
     roi_scores    = self.RCNNClass(pool_features)
     
     # Resize the predictions to batch specific scores:
     #_______________________________________________________________________
     roi_targets   = roi_targets.view([B, N, roi_targets.size(1)])
     roi_scores    = roi_scores.view([B, N, roi_scores.size(1)])
     
     return roi_targets, roi_scores
コード例 #3
0
    def forward(self, x):
        feature = x[0]
        print("feature = {}".format(feature.shape))
        rois = x[1].view(-1, 4)

        print("rois = {}".format(rois.shape))
        samples = ops.roi_pool(input=feature,
                               boxes=rois,
                               output_size=(self.pooling_regions,
                                            self.pooling_regions))

        return samples
コード例 #4
0
ファイル: network.py プロジェクト: meteora9479/wsddn.pytorch
    def forward(self, batch_imgs, batch_boxes, batch_scores):
        # assume batch size is 1
        batch_boxes = [batch_boxes[0]]

        out = self.features(batch_imgs)  # [1, 256, 21, 29]

        out = roi_pool(out, batch_boxes, self.roi_output_size, 1.0 / 16)
        out = out.view(len(batch_boxes[0]), -1)

        out = out * batch_scores[0]  # apply box scores
        out = self.fcs(out)  # [4000, 4096]

        classification_scores = F.softmax(self.fc_c(out), dim=1)
        detection_scores = F.softmax(self.fc_d(out), dim=0)
        combined_scores = classification_scores * detection_scores
        return combined_scores
コード例 #5
0
 def cal_perc_feat(self, x, target_bbox=None):
     initial_size = x.size()
     image_w = initial_size[2]
     output = {}
     mask_output = {}
     roi_cnt = 0
     for name, module in self.vgg_layers._modules.items():
         x = module(x)
         if name in self.layers_mapping:
             if target_bbox is not None:
                 spatial_scale = x.shape[2] / image_w
                 mask_output[self.layers_mapping[name]] = roi_pool(
                     x, target_bbox, self.roi_size[roi_cnt], spatial_scale)
                 roi_cnt += 1
             output[self.layers_mapping[name]] = x
     return output, mask_output
コード例 #6
0
    def train(self,epoch):
        self.network.train()
        num_trains = len(self.train_loader.dataset)
        for idx, (data, target) in enumerate(self.train_loader):
            data = torch.stack(data, 0)  # 不使用多尺度,因此会resize到同一尺度,可以直接按batch计算,加快速度
            if self.use_cuda:
                data = data.to(self.device)
                target = [{k: v.to(self.device) for k, v in targ.items()} for targ in target]

            output,feature = self.network(data)
            loss_dict = self.loss_func(output, target)
            # """
            proposals = self.loss_func.proposal(output)
            # ROi pooling
            outs = []
            proposal_list = []
            fh, fw = feature.shape[-2:]
            stride_h, stride_w = self.resize[0] / fh, self.resize[1] / fw
            for i,proposal in enumerate(proposals):
                boxes = proposal["boxes"] / torch.as_tensor([stride_w, stride_h, stride_w, stride_h],dtype=torch.float32, device=self.device).unsqueeze(0)
                roi_out = roi_pool(feature[i].unsqueeze(0),proposal["boxes"],[7,7])
                outs.append(roi_out)
                proposal_list.append(boxes)

            output = self.network.doRCNN(outs)
            loss_dict_rcnn = self.loss_func_rcnn(output,proposal_list,target)
            loss_dict.update(loss_dict_rcnn)
            # """
            losses = sum(loss for loss in loss_dict.values())

            self.optimizer.zero_grad()
            losses.backward()
            self.optimizer.step()

            # 记录到TensorBoard
            self.writer.add_scalar('total_loss', losses.item(), epoch * num_trains // self.batch_size + idx)
            for key, loss in loss_dict.items():
                self.writer.add_scalar(key, loss.item(), epoch * num_trains // self.batch_size + idx)

            if idx % self.print_freq == 0:
                ss = "epoch:{}-({}/{})".format(epoch, idx * self.batch_size, num_trains)
                ss += "\ttotal:{:.3f}".format(losses.item())
                for key, loss in loss_dict.items():
                    ss += "\t{}:{:.3f}".format(key, loss.item())

                print(ss)
コード例 #7
0
    def forward(self, x, pred_boxes, pred_batch_idx):
        num_regions = len(pred_batch_idx)

        pred_indices_and_boxes = np.concatenate(
            (pred_batch_idx.reshape(-1, 1), pred_boxes), axis=1)
        pred_indices_and_boxes = torch.from_numpy(pred_indices_and_boxes).to(x)

        # TODO should I use roi_align? Roi sampling should be configurable
        # regions = ops.roi_align(x, pred_indices_and_boxes, self.roi_align_size, self.spatial_scale)
        regions = ops.roi_pool(x, pred_indices_and_boxes, self.roi_align_size,
                               self.spatial_scale)
        y = self.avgpool(self.layer4(regions))
        y = torch.flatten(y, start_dim=1)

        pred_roi_cls = self.fc_cls(y)
        pred_roi_loc = self.fc_loc(y).view(num_regions, -1, 4)

        return pred_roi_cls, pred_roi_loc
コード例 #8
0
    def forward(self, x, region_proposal):
        # assume batch size is 1
        # print(region_proposal.shape)
        region_proposal = [region_proposal[0]]
        # print(x.shape)
        out = self.features(x)  # [1, 512, 30, 30]
        # print(out.shape)
        # print(len(region_proposal[0]))
        out = roi_pool(out, region_proposal, self.roi_output_size,
                       1.0 / 16)  # spp
        # print(out.shape)
        out = out.view(len(region_proposal[0]), -1)
        # print(out.shape)

        # out = out * batch_scores[0]  # apply box scores
        out = self.fc6_and_fc7(out)  # [4000, 4096]
        # print(out.shape)

        classification_scores = F.softmax(self.fc8c(out), dim=1)
        detection_scores = F.softmax(self.fc8d(out), dim=0)
        combined_scores = classification_scores * detection_scores
        # print(combined_scores.shape)
        return combined_scores
コード例 #9
0
def relocalize(xA_,
               yA_,
               xB_,
               yB_,
               score_,
               feature_A_2x,
               feature_B_2x,
               N_matches=None,
               upsample_positions=True,
               crop_size=2):
    assert crop_size == 3 or crop_size == 2

    if N_matches is None:
        N_matches = xA_.shape[1]
    else:
        idx = torch.argsort(-score_.view(-1))
        N_matches = min(N_matches, idx.shape[0])
        idx = idx[:N_matches]
        score_ = score_[:, idx]
        xA_ = xA_[:, idx]
        yA_ = yA_[:, idx]
        xB_ = xB_[:, idx]
        yB_ = yB_[:, idx]

    if upsample_positions:
        xA_ = xA_ * 2
        yA_ = yA_ * 2
        xB_ = xB_ * 2
        yB_ = yB_ * 2

    coords_A = torch.cat(
        (torch.zeros(1, N_matches).to(xA_.device), xA_ - (crop_size % 2), yA_ -
         (crop_size % 2), xA_ + 1, yA_ + 1),
        dim=0).t()

    coords_B = torch.cat(
        (torch.zeros(1, N_matches).to(xB_.device), xB_ - (crop_size % 2), yB_ -
         (crop_size % 2), xB_ + 1, yB_ + 1),
        dim=0).t()

    ch = feature_A_2x.shape[1]
    feature_A_local = O.roi_pool(feature_A_2x,
                                 coords_A,
                                 output_size=(crop_size, crop_size)).view(
                                     N_matches, ch, -1, 1)
    feature_B_local = O.roi_pool(feature_B_2x,
                                 coords_B,
                                 output_size=(crop_size, crop_size)).view(
                                     N_matches, ch, 1, -1)

    deltaY, deltaX = torch.meshgrid(
        torch.linspace(-(crop_size % 2), 1, crop_size),
        torch.linspace(-(crop_size % 2), 1, crop_size))

    deltaX = deltaX.contiguous().view(-1).to(xA_.device)
    deltaY = deltaY.contiguous().view(-1).to(xA_.device)

    corr_local = (feature_A_local * feature_B_local).sum(dim=1)

    delta_A_idx = torch.argmax(corr_local.max(dim=2, keepdim=True)[0], dim=1)
    delta_B_idx = torch.argmax(corr_local.max(dim=1, keepdim=True)[0], dim=2)

    xA_ = xA_ + deltaX[delta_A_idx].t()
    yA_ = yA_ + deltaY[delta_A_idx].t()
    xB_ = xB_ + deltaX[delta_B_idx].t()
    yB_ = yB_ + deltaY[delta_B_idx].t()

    return xA_, yA_, xB_, yB_, score_
コード例 #10
0
 def forward(self, x, rois):
     return roi_pool(x, rois, (self.outh, self.outw), self.spatial_scale)
コード例 #11
0
 def forward(self, features, rois):
     return roi_pool(features, rois, self.out_size, self.spatial_scale)
コード例 #12
0
                    j,
                    y1,
                    x1,
                    y2,
                    x2,
                    oH,
                    oW,
                )  # (i,j) define the number and the position of the 4x3 grid
                # so for each sample for each bounding box I do this 4x3= 12 times cause i need 12 sectors over which apply the max and get a 4x3 final matrix
                # IMPORTANT: REGIONS CAN OVERLAP! IT DOES NOT HAPPEN IF I SUBSTITUTE .floor and .ceil with .round!

                slice = input[
                    n, :, y_start:y_end, x_start:
                    x_end]  # input is torch.Size([2, 16, 5, 7]) -> I take 1 sample from the 0 one to the n-1, all channels and a portion defined by those 12 group of coordinates -> torch.Size([16, 2, 2])
                slice, _ = torch.max(
                    torch.max(slice, dim=1)[0], dim=1
                )  # IMPORTANT PART: from this tensor, so a 2x2 image with 16 channels I want to take max value in each of the 12 grid (in this case grid are only 2x2 cause it's too little) along row and along col -> so in numpy would be something like np.amax(slice, axis=(-1, -2)), here you have to do 2 times taking the max over axis=1 -> torch.Size([16])
                # inner torch.max torch.Size([16, 2, 2]) -> torch.Size([16, 2])
                # outer torch.max torch.Size([16, 2) -> torch.Size([16])
                # torch.max return tensor after max and indexes of max values, so discard second value
                out[n, l, :, i,
                    j] = slice  # now you can insert a whole channel in ':'

out_pytorch = roi_pool(input, boxes, (oH, oW), spatial_scale=1.0)
out_pytorch = out_pytorch.reshape((N, L, C, oH, oW))
# Computes element-wise equality
# print(torch.eq(out, out_pytorch, out=None))  # not equal

# slice = input.numpy()[:, :, 3:6, 0:4] convert to numpy
# print (slice.data) get tensor data
コード例 #13
0
ファイル: models.py プロジェクト: alebeck/tracking_wo_bnw
    def prepare_decoder(self,
                        diffs,
                        boxes_resized,
                        image_features,
                        image_sizes,
                        lengths,
                        batched=False):
        B, L, F = diffs.shape

        bounds = (torch.cumsum(lengths, dim=0) - 1).tolist()
        keep = torch.tensor(
            sorted(
                list(
                    set(range(len(image_sizes) - 1)).difference(
                        set(bounds[:-1])))))

        if self.use_pre_conv:
            assert not isinstance(image_features, list)
            image_features = self.pre_conv(image_features)

        # roi pooling on enlarged areas around boxes
        widths, heights = get_width(boxes_resized[keep]), get_height(
            boxes_resized[keep])
        dx = ((self.correlation_args['patch_size'] - 1) * widths *
              self.correlation_args['dilation_patch']) / (2 *
                                                          self.roi_output_size)
        dy = ((self.correlation_args['patch_size'] - 1) * heights *
              self.correlation_args['dilation_patch']) / (2 *
                                                          self.roi_output_size)
        if not self.use_roi_align:
            dx, dy = dx.ceil(), dy.ceil()
        if self.fixed_env:
            dx, dy = dx + widths, dy + heights

        dpos = torch.stack([-dx, -dy, dx, dy], dim=1)
        proposals = list((boxes_resized[keep] + dpos).unsqueeze(1))
        if self.use_roi_align:
            if self.fixed_env:
                if batched:
                    box_to_images = torch.cat([
                        torch.arange(lengths.max() - l,
                                     lengths.max() - 1) for l in lengths
                    ])
                    enlarged_boxes = boxes_resized[keep] + dpos
                    proposals = [
                        enlarged_boxes[box_to_images == l]
                        for l in range(lengths.max() - 1)
                    ]
                    image_sizes = image_sizes[0].repeat(
                        len(image_features) - 1, 1)

                    perm = torch.zeros_like(box_to_images)
                    current_i = 0
                    for i in range(box_to_images.max().item() + 1):
                        mask = box_to_images == i
                        perm[mask] = torch.arange(
                            current_i, current_i + mask.sum().item())
                        current_i = perm.max() + 1

                    prev_features = self.roi_pool_env_ext(
                        OrderedDict([(0, image_features[:-1])]), proposals,
                        image_sizes.tolist())[perm]
                    next_features = self.roi_pool_env_ext(
                        OrderedDict([(0, image_features[1:])]), proposals,
                        image_sizes.tolist())[perm]
                else:
                    prev_features = self.roi_pool_env_ext(
                        OrderedDict([(0, image_features[keep])]), proposals,
                        image_sizes[keep].tolist())
                    next_features = self.roi_pool_env_ext(
                        OrderedDict([(0, image_features[keep + 1])]),
                        proposals, image_sizes[keep + 1].tolist())
            else:
                assert not batched
                prev_features = self.roi_pool_ext(
                    OrderedDict([(0, image_features[keep])]), proposals,
                    image_sizes[keep].tolist())
                next_features = self.roi_pool_ext(
                    OrderedDict([(0, image_features[keep + 1])]), proposals,
                    image_sizes[keep + 1].tolist())
        else:
            output_size = (self.roi_output_size_ext, self.roi_output_size_ext)
            prev_features = roi_pool(image_features[keep],
                                     proposals,
                                     output_size,
                                     spatial_scale=0.125)
            next_features = roi_pool(image_features[keep + 1],
                                     proposals,
                                     output_size,
                                     spatial_scale=0.125)

        # correlate
        correlation = correlate(prev_features, next_features,
                                self.correlation_args)

        if self.fixed_env:
            # for boxes with height > threshold, set appropriate locations to zero
            del_idc = heights > 120
            margin = int(
                (self.roi_output_size_env_ext - self.roi_output_size) / 2)

            mask = torch.ones_like(correlation).cuda()
            mask[del_idc, :, :margin] = 0
            mask[del_idc, :, -margin:] = 0
            mask[del_idc, :, :, :margin] = 0
            mask[del_idc, :, :, -margin:] = 0
            correlation = correlation * mask

            # now extract box features
            margin = int(
                (self.roi_output_size_env_ext - self.roi_output_size_env) / 2)
            correlation = correlation[:, :, margin:-margin, margin:-margin]

        elif not self.use_env_features:
            assert not self.fixed_env
            # isolate correlation features which belong to the bounding box
            margin = int((self.roi_output_size_ext - self.roi_output_size) / 2)
            correlation = correlation[:, :, margin:-margin, margin:-margin]

        if self.correlation_only:
            if self.refine_correlation:
                out_conv3 = self.conv3_1(correlation)
                box_features = self.conv4_1(self.conv4(out_conv3))
            else:
                box_features = self.conv_reduce(correlation)
        else:
            # roi pool image features and append them to corr features
            box_proposals = list(boxes_resized[keep].unsqueeze(1))
            roi_out = self.roi_pool(OrderedDict([(0, image_features[keep])]),
                                    box_proposals, image_sizes[keep].tolist())

            out_conv_redir = self.conv_redir(roi_out)
            in_conv3_1 = torch.cat([out_conv_redir, correlation], dim=1)
            out_conv3 = self.conv3_1(in_conv3_1)
            box_features = self.conv4_1(self.conv4(out_conv3))

        if self.avg_box_features:
            assert not self.max_box_features
            box_features = box_features.view(*box_features.shape[:2],
                                             -1).mean(2).unsqueeze(2)
        elif self.max_box_features:
            box_features = box_features.view(*box_features.shape[:2],
                                             -1).max(dim=2, keepdim=True)[0]

        corr_lengths = lengths - 1
        target_idc = (torch.cumsum(corr_lengths, dim=0) - 1).tolist()
        in_idc = list(set(range(len(keep))).difference(set(target_idc)))

        encoder_in = torch.zeros(B, L, self.input_size).cuda()
        encoder_in[:, :, :F] = diffs

        mask = torch.zeros(encoder_in.shape[:2], dtype=torch.bool)
        for i, l in enumerate(corr_lengths):
            if l - 1 > 0:
                mask[i, -(l - 1):] = True

        if len(in_idc) > 0 and not self.correlation_last_only:
            t_tmp = encoder_in[mask]
            t_tmp[:, F:] = box_features[in_idc].view(len(in_idc), -1)
            encoder_in[mask] = t_tmp

        # feed features into encoder, retrieve hidden states
        encoder_out = self.encoder(encoder_in)  # encoder_out[0]: 32, 60, 48
        decoder_h = encoder_out[1][0]
        decoder_c = torch.zeros(self.n_layers, B, self.hidden_size).cuda()

        # construct decoder input
        decoder_in = torch.zeros(B, 1, self.input_size).cuda()
        decoder_in[:, 0, F - 2] = 1.  # start token
        decoder_in[:, 0,
                   F:] = box_features[target_idc].view(len(target_idc), -1)

        return encoder_out, decoder_in, decoder_h, decoder_c
コード例 #14
0
    def forward(self, x, targetObject_img, coords):

        x2_feat, x3_feat, x4_feat = self.encoder(x)
        ## (torch.Size([20, 128, 38, 38]), torch.Size([20, 256, 19, 19]), torch.Size([20, 512, 10, 10]))

        targetObject_img = nn.functional.interpolate(targetObject_img,
                                                     size=[100, 100])
        con_x2_feat, con_x3_feat, con_x4_feat = self.encoder(targetObject_img)
        ## (torch.Size([20, 128, 13, 13]), torch.Size([20, 256, 7, 7]), torch.Size([20, 512, 4, 4]))

        # Mutual Adaptation Module
        DC_2 = AdaptiveConv2d(x2_feat.size(0) * x2_feat.size(1),  x2_feat.size(0) * x2_feat.size(1), 5, padding=1, \
                                groups=x2_feat.size(0) * x2_feat.size(1), bias=False)
        DC_3 = AdaptiveConv2d(x3_feat.size(0) * x3_feat.size(1),  x3_feat.size(0) * x3_feat.size(1), 5, padding=1, \
                                groups=x3_feat.size(0) * x3_feat.size(1), bias=False)
        DC_4 = AdaptiveConv2d(x4_feat.size(0) * x4_feat.size(1),  x4_feat.size(0) * x4_feat.size(1), 5, padding=1, \
                                groups=x4_feat.size(0) * x4_feat.size(1), bias=False)

        dc_feats_2 = DC_2(x2_feat,
                          con_x2_feat)  ## torch.Size([20, 128, 28, 28])
        # dc_feats_2 = self.relu(dc_feats_2)

        dc_feats_3 = DC_3(x3_feat,
                          con_x3_feat)  ## torch.Size([20, 256, 15, 15])
        # dc_feats_3 = self.relu(dc_feats_3)

        dc_feats_4 = DC_4(x4_feat, con_x4_feat)  ## torch.Size([20, 512, 9, 9])
        # dc_feats_4 = self.relu(dc_feats_4)

        gated_2 = torch.sigmoid(dc_feats_2)
        gated_3 = torch.sigmoid(dc_feats_3)
        gated_4 = torch.sigmoid(dc_feats_4)

        gated_output_2 = gated_2 * dc_feats_2  ## torch.Size([20, 128, 28, 28])
        gated_output_3 = gated_3 * dc_feats_3  ## torch.Size([20, 256, 15, 15])
        gated_output_4 = gated_4 * dc_feats_4  ## torch.Size([20, 512, 9, 9])

        # encoded_feat = gated_output_2 + gated_output_3 + gated_output_4

        # pdb.set_trace()
        gated_output_3 = nn.functional.interpolate(
            gated_output_3, size=[18, 18])  ## torch.Size([20, 256, 18, 18])
        gated_output_2 = nn.functional.interpolate(gated_output_2,
                                                   size=[36, 36])

        ####################################################
        ######            decoding + concat path
        ####################################################
        gated_output_4 = self.CT_1(gated_output_4)
        gated_output_4 = self.CT_2(gated_output_4)
        gated_output_4 = self.CT_3(gated_output_4)
        gated_output_4 = self.relu(gated_output_4)
        ## gated_output_4.shape: torch.Size([20, 512, 9, 9])

        gated_output_4_new = torch.zeros(gated_output_4.shape[0],
                                         gated_output_4.shape[1] + 2,
                                         gated_output_4.shape[2],
                                         gated_output_4.shape[3])

        # pdb.set_trace()

        for point_idx in range(gated_output_4.shape[0]):
            feat_map = gated_output_4[point_idx]
            point = coords[point_idx]

            coords_feat = self.addcoords(feat_map, point)
            coords_feat = torch.squeeze(coords_feat, dim=0)
            fused_feats = torch.cat((coords_feat, feat_map), dim=0)
            gated_output_4_new[point_idx] = fused_feats

        gated_output_4_new = gated_output_4_new.cuda()

        bi = torch.arange(coords.shape[0])
        bi = torch.unsqueeze(bi, dim=1)  ## (batchSize, 1)
        rois = torch.cat((coords * 9 // 300, (coords) * 9 // 300),
                         dim=1)  ## (x1, y1, x2, y2)
        bi = bi.type(torch.FloatTensor)
        rois = torch.cat((bi, rois), dim=1).cuda()
        output_size = (1, 1)

        www = torchops.roi_pool(
            gated_output_4, rois, output_size,
            spatial_scale=1.0)  ## torch.Size([20, 512, 1, 1])
        www = torch.squeeze(www, dim=2)  ## (20, 512, 1)
        www = torch.squeeze(www, dim=2)  ## (20, 512)

        adaIN_input = self.fc_controler(www)  ## (20, 512)

        # pdb.set_trace()

        dc_feats_4 = self.CT_4(gated_output_4_new)
        dc_feats_4 = self.CT_5(dc_feats_4)
        dc_feats_4 = self.CT_6(dc_feats_4)
        dc_feats_4 = self.relu(dc_feats_4)
        up_d4 = self.Upsamp_2(
            dc_feats_4)  ##  dc_feats_4: torch.Size([20, 512, 9, 9])
        ##  up_d4       torch.Size([20, 512, 18, 18])

        AdaIN_output = self.AdaIN(
            up_d4, adaIN_input)  ## torch.Size([20, 512, 18, 18])
        AdaIN_output = torch.cat((AdaIN_output, up_d4), dim=1)

        # pdb.set_trace()
        dc_feats_3 = self.CT_7(AdaIN_output)
        dc_feats_3 = self.CT_8(torch.cat((dc_feats_3, gated_output_3), dim=1))
        dc_feats_3 = self.CT_9(dc_feats_3)
        dc_feats_3 = self.relu(dc_feats_3)
        up_d3 = self.Upsamp_3(
            dc_feats_3)  ##  up_d3: torch.Size([20, 768, 36, 36])

        dc_feats_2 = self.CT_10(
            up_d3)  ##  dc_feats_2: torch.Size([20, 512, 36, 36])
        dc_feats_2 = self.CT_11(torch.cat((dc_feats_2, gated_output_2), dim=1))
        dc_feats_2 = self.relu(
            dc_feats_2)  ##  dc_feats_2: torch.Size([20, 640, 36, 36])

        up_d2 = self.Upsamp_4(dc_feats_2)  ## torch.Size([20, 640, 72, 72])

        dc_feats_1 = self.CT_12(up_d2)
        dc_feats_1 = self.CT_13(dc_feats_1)
        dc_feats_1 = self.relu(dc_feats_1)

        dc_feats_1 = self.Upsamp_5(dc_feats_1)
        dc_feats_1 = self.relu(dc_feats_1)  ## torch.Size([20, 64, 216, 216])

        # pdb.set_trace()

        # output = self.Conv_1x1(dc_feats_1)
        output = self.mymodules[0](dc_feats_1)
        output = self.mymodules[1](output)

        # output = nn.functional.interpolate(output, size=[300, 300])

        return output
コード例 #15
0
    def forward(self, im_data, gt_boxes, im_info):
        batch_size = im_data.size(0)
        im_info = im_info.data


        if not gt_boxes is None:
            gt_boxes = gt_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map to RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes)

        # if it is training phase, then use ground truth bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.pooling_mode == 'align':
            # pooled_feat = self.RCNN_roi_align(feature_map, rois.view(-1, 5))
            pooled_feat = roi_align(base_feat, rois.view(-1, 5), (cfg.pool_size, cfg.pool_size), 1.0/16)
        elif cfg.pooling_mode == 'pool':
            #pooled_feat = self.RCNN_roi_pool(feature_map, rois.view(-1, 5))
            pooled_feat = roi_pool(base_feat, rois.view(-1, 5), (cfg.pool_size, cfg.pool_size), 1.0/16)

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
コード例 #16
0
def relocalize_soft(xA_,
                    yA_,
                    xB_,
                    yB_,
                    score_,
                    feature_A_2x,
                    feature_B_2x,
                    N_matches=None,
                    sigma=10,
                    upsample_positions=True):
    if N_matches is None:
        N_matches = xA_.shape[1]
    else:
        idx = torch.argsort(-score_.view(-1))
        N_matches = min(N_matches, idx.shape[0])
        idx = idx[:N_matches]
        score_ = score_[:, idx]
        xA_ = xA_[:, idx]
        yA_ = yA_[:, idx]
        xB_ = xB_[:, idx]
        yB_ = yB_[:, idx]

    if upsample_positions:
        xA_ = xA_ * 2
        yA_ = yA_ * 2
        xB_ = xB_ * 2
        yB_ = yB_ * 2

    coords_A = torch.cat((torch.zeros(1, N_matches).to(
        xA_.device), xA_ - 1, yA_ - 1, xA_ + 1, yA_ + 1),
                         dim=0).t()

    coords_B = torch.cat((torch.zeros(1, N_matches).to(
        xB_.device), xB_ - 1, yB_ - 1, xB_ + 1, yB_ + 1),
                         dim=0).t()

    ch = feature_A_2x.shape[1]
    feature_A_local = O.roi_pool(feature_A_2x, coords_A, output_size=(3, 3))
    feature_B_local = O.roi_pool(feature_B_2x, coords_B, output_size=(3, 3))

    deltaY, deltaX = torch.meshgrid(torch.linspace(-1, 1, 3),
                                    torch.linspace(-1, 1, 3))

    deltaX = deltaX.contiguous().to(xA_.device).unsqueeze(0)
    deltaY = deltaY.contiguous().to(xA_.device).unsqueeze(0)

    corrA_B = (feature_A_local[:, :, 1:2, 1:2] * feature_B_local).sum(
        dim=1).mul(sigma).view(N_matches,
                               -1).softmax(dim=1).view(N_matches, 3, 3)
    corrB_A = (feature_B_local[:, :, 1:2, 1:2] * feature_A_local).sum(
        dim=1).mul(sigma).view(N_matches,
                               -1).softmax(dim=1).view(N_matches, 3, 3)

    deltaX_B = (corrA_B * deltaX).view(N_matches, -1).sum(dim=1).unsqueeze(0)
    deltaY_B = (corrA_B * deltaY).view(N_matches, -1).sum(dim=1).unsqueeze(0)

    deltaX_A = (corrB_A * deltaX).view(N_matches, -1).sum(dim=1).unsqueeze(0)
    deltaY_A = (corrB_A * deltaY).view(N_matches, -1).sum(dim=1).unsqueeze(0)

    xA_ = xA_ + deltaX_A
    yA_ = yA_ + deltaY_A
    xB_ = xB_ + deltaX_B
    yB_ = yB_ + deltaY_B

    return xA_, yA_, xB_, yB_, score_
コード例 #17
0
def _roi_pool(pred_heatmap, rois, patch_size=8):
    from torchvision.ops import roi_pool
    patches = roi_pool(pred_heatmap, rois.float(), (patch_size, patch_size), spatial_scale=1.0)
    return patches
    pass
コード例 #18
0
 def forward(self, input, rois):
     return roi_pool(input, rois, self.output_size, self.spatial_scale)
コード例 #19
0
ファイル: encoder.py プロジェクト: zzmcdc/PICK-pytorch
    def forward(self, images: torch.Tensor, boxes_coordinate: torch.Tensor,
                transcripts: torch.Tensor, src_key_padding_mask: torch.Tensor):
        '''

        :param images: whole_images, shape is (B, N, H, W, C), where B is batch size, N is the number of segments of
                the documents, H is height of image, W is width of image, C is channel of images (default is 3).
        :param boxes_coordinate: boxes coordinate, shape is (B, N, 8),
                where 8 is coordinates (x1, y1, x2, y2, x3, y3, x4, y4).
        :param transcripts: text segments, shape is (B, N, T, D), where T is the max length of transcripts,
                                D is dimension of model.
        :param src_key_padding_mask: text padding mask, shape is (B*N, T), True for padding value.
            if provided, specified padding elements in the key will be ignored by the attention.
            This is an binary mask. When the value is True, the corresponding value on the attention layer of Transformer
            will be filled with -inf.
        need_weights: output attn_output_weights.
        :return: set of nodes X, shape is (B*N, T, D)
        '''

        B, N, T, D = transcripts.shape

        # get image embedding using cnn
        # (B, 3, H, W)
        _, _, origin_H, origin_W = images.shape

        # image embedding: (B, C, H/16, W/16)
        images = self.cnn(images)
        _, C, H, W = images.shape

        # generate rois for roi pooling, rois shape is (B, N, 5), 5 means (batch_index, x0, y0, x1, y1)
        rois_batch = torch.zeros(B, N, 5, device=images.device)
        # Loop on the every image.
        for i in range(B):  # (B, N, 8)
            # (N, 8)
            doc_boxes = boxes_coordinate[i]
            # (N, 4)
            pos = torch.stack([
                doc_boxes[:, 0], doc_boxes[:, 1], doc_boxes[:, 4], doc_boxes[:,
                                                                             5]
            ],
                              dim=1)
            rois_batch[i, :, 1:5] = pos
            rois_batch[i, :, 0] = i

        spatial_scale = float(H / origin_H)
        # use roi pooling get image segments
        # (B*N, C, roi_pooling_size, roi_pooling_size)
        if self.roi_pooling_mode == 'roi_align':
            image_segments = roi_align(images, rois_batch.view(-1, 5),
                                       self.roi_pooling_size, spatial_scale)
        else:
            image_segments = roi_pool(images, rois_batch.view(-1, 5),
                                      self.roi_pooling_size, spatial_scale)

        # (B*N, D, 1, 1)
        image_segments = F.relu(self.bn(self.conv(image_segments)))
        # # (B*N, D,)
        image_segments = image_segments.squeeze()

        # (B*N, 1, D)
        image_segments = image_segments.unsqueeze(dim=1)

        # add positional embedding
        transcripts_segments = self.pe_droput(
            transcripts +
            self.position_embedding[:, :, :transcripts.size(2), :])
        # (B*N, T ,D)
        transcripts_segments = transcripts_segments.reshape(B * N, T, D)

        # (B*N, T, D)
        image_segments = image_segments.expand_as(transcripts_segments)

        # here we first add image embedding and text embedding together,
        # then as the input of transformer to get a non-local fusion features, different from paper process.
        out = image_segments + transcripts_segments

        # (T, B*N, D)
        out = out.transpose(0, 1).contiguous()

        # (T, B*N, D)
        out = self.transformer_encoder(
            out, src_key_padding_mask=src_key_padding_mask)

        # (B*N, T, D)
        out = out.transpose(0, 1).contiguous()
        out = self.norm(out)
        out = F.dropout(out, p=self.dropout, training=self.training)

        return out
コード例 #20
0
 def script_func(input, rois):
     return ops.roi_pool(input, rois, 5, 1.0)[0]
コード例 #21
0
 def script_fn(input, rois, pool_size):
     # type: (Tensor, Tensor, int) -> Tensor
     return ops.roi_pool(input, rois, pool_size, 1.0)[0]
コード例 #22
0
 def forward(self, features, rois):
     outputs = roi_pool(features, rois,
                        (self.pooled_height, self.pooled_width),
                        self.spatial_scale)
     return outputs