Ejemplo n.º 1
0
    def proposal_layer(rpn_cls_prob_reshape,
                       rpn_bbox_pred,
                       im_info,
                       cfg_key,
                       _feat_stride,
                       anchor_scales,
                       gt_boxes=None):
        rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy()
        rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy()

        rois, scores, anchor_inds, labels = proposal_layer_py(
            rpn_cls_prob_reshape,
            rpn_bbox_pred,
            im_info,
            cfg_key,
            _feat_stride=_feat_stride,
            anchor_scales=anchor_scales,
            gt_boxes=gt_boxes)

        z = np.zeros((rois.shape[0], 2))
        z[:, 1] = scores[:, 0]

        rois = network.np_to_variable(rois, is_cuda=True)
        scores = network.np_to_variable(z, is_cuda=True)
        anchor_inds = network.np_to_variable(anchor_inds,
                                             is_cuda=True,
                                             dtype=torch.LongTensor)
        labels = network.np_to_variable(labels,
                                        is_cuda=True,
                                        dtype=torch.LongTensor)

        return rois, scores, anchor_inds, labels
Ejemplo n.º 2
0
    def forward(self,
                im_data,
                rois,
                im_info,
                gt_vec=None,
                gt_boxes=None,
                gt_ishard=None,
                dontcare_areas=None):
        im_data = network.np_to_variable(im_data, is_cuda=True)
        im_data = im_data.permute(0, 3, 1, 2)

        #TODO: Use im_data and rois as input
        # compute cls_prob which are N_roi X 20 scores
        # Checkout faster_rcnn.py for inspiration

        x = self.features(im_data)
        x = self.roi_pool(x, network.np_to_variable(rois, is_cuda=True))
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        cls_score = self.score_cls(x)
        det_score = self.score_det(x)
        # cls_score = F.softmax(cls_score, dim=-1)
        # det_score = F.softmax(det_score, dim=-2)
        cls_score = F.softmax(cls_score, dim=1)
        det_score = F.softmax(det_score, dim=0)
        cls_prob = cls_score * det_score

        if self.training:
            label_vec = network.np_to_variable(gt_vec, is_cuda=True)
            # label_vec = label_vec.view(self.n_classes,-1)
            self.cross_entropy = self.build_loss(cls_prob, label_vec)
        return cls_prob
    def build_roi_loss(self, rpn_cls_score_reshape, rpn_cls_prob_reshape,
                       scores, anchor_inds, labels):

        batch_size = rpn_cls_score_reshape.size()[0]
        rpn_cls_score = rpn_cls_score_reshape.permute(
            0, 2, 3, 1)  #.contiguous().view(-1, 2)
        bg_scores = torch.index_select(
            rpn_cls_score, 3,
            network.np_to_variable(np.arange(0, 9),
                                   is_cuda=True,
                                   dtype=torch.LongTensor))
        fg_scores = torch.index_select(
            rpn_cls_score, 3,
            network.np_to_variable(np.arange(9, 18),
                                   is_cuda=True,
                                   dtype=torch.LongTensor))
        bg_scores = bg_scores.contiguous().view(-1, 1)
        fg_scores = fg_scores.contiguous().view(-1, 1)

        rpn_cls_score = torch.cat([bg_scores, fg_scores], 1)

        rpn_cls_score = torch.index_select(rpn_cls_score, 0,
                                           anchor_inds.view(-1))
        labels = labels.view(-1)

        roi_cross_entropy = F.cross_entropy(rpn_cls_score,
                                            labels,
                                            size_average=False)

        return roi_cross_entropy
    def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchor_scales, gt_boxes=None):
        
        #convert to  numpy
        rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy()
        rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy()

        rois, scores, anchor_inds, labels = proposal_layer_py(rpn_cls_prob_reshape,
                                                               rpn_bbox_pred,
        #prop_info = proposal_layer_py(rpn_cls_prob_reshape, rpn_bbox_pred,
                                                      im_info, cfg_key, 
                                                      _feat_stride=_feat_stride,
                                                      anchor_scales=anchor_scales,
                                                      gt_boxes=gt_boxes)


        rois = network.np_to_variable(rois, is_cuda=True)
        anchor_inds = network.np_to_variable(anchor_inds, is_cuda=True,
                                                 dtype=torch.LongTensor)
        labels = network.np_to_variable(labels, is_cuda=True,
                                             dtype=torch.LongTensor)

        #just get fg scores, make bg scores 0 
        #b_scores = np.zeros((info[0].shape[0], 2))
        #b_scores[:,1] = info[1][:,0]
        scores = network.np_to_variable(scores, is_cuda=True)

        return rois, scores, anchor_inds, labels
Ejemplo n.º 5
0
    def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas,
                            im_info, _feat_stride, anchor_scales):
        """
        rpn_cls_score: for pytorch (1, Ax2, H, W) bg/fg scores of previous conv layer
        gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
        gt_ishard: (G, 1), 1 or 0 indicates difficult or not
        dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
        im_info: a list of [image_height, image_width, scale_ratios]
        _feat_stride: the downsampling ratio of feature map to the original input image
        anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
        ----------
        Returns
        ----------
        rpn_labels : (1, 1, HxA, W), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
        rpn_bbox_targets: (1, 4xA, H, W), distances of the anchors to the gt_boxes(may contains some transform)
                        that are the regression objectives
        rpn_bbox_inside_weights: (1, 4xA, H, W) weights of each boxes, mainly accepts hyper param in cfg
        rpn_bbox_outside_weights: (1, 4xA, H, W) used to balance the fg/bg,
        beacuse the numbers of bgs and fgs mays significiantly different
        """
        rpn_cls_score = rpn_cls_score.data.cpu().numpy()
        rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = \
            anchor_target_layer_py(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride, anchor_scales)

        rpn_labels = network.np_to_variable(rpn_labels,
                                            is_cuda=True,
                                            dtype=torch.LongTensor)
        rpn_bbox_targets = network.np_to_variable(rpn_bbox_targets,
                                                  is_cuda=True)
        rpn_bbox_inside_weights = network.np_to_variable(
            rpn_bbox_inside_weights, is_cuda=True)
        rpn_bbox_outside_weights = network.np_to_variable(
            rpn_bbox_outside_weights, is_cuda=True)

        return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Ejemplo n.º 6
0
    def proposal_target_layer(rpn_rois, gt_boxes, gt_ishard, dontcare_areas, num_classes):
        """
        ----------
        rpn_rois:  (1 x H x W x A, 5) [0, x1, y1, x2, y2]
        gt_boxes: (G, 5) [x1 ,y1 ,x2, y2, class] int
        # gt_ishard: (G, 1) {0 | 1} 1 indicates hard
        dontcare_areas: (D, 4) [ x1, y1, x2, y2]
        num_classes
        ----------
        Returns
        ----------
        rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2]
        labels: (1 x H x W x A, 1) {0,1,...,_num_classes-1}
        bbox_targets: (1 x H x W x A, K x4) [dx1, dy1, dx2, dy2]
        bbox_inside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
        bbox_outside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
        """
        rpn_rois = rpn_rois.data.cpu().numpy()
        rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = \
            proposal_target_layer_py(rpn_rois, gt_boxes, gt_ishard, dontcare_areas, num_classes)
        # print labels.shape, bbox_targets.shape, bbox_inside_weights.shape
        rois = network.np_to_variable(rois, is_cuda=True)
        labels = network.np_to_variable(labels, is_cuda=True, dtype=torch.LongTensor)
        bbox_targets = network.np_to_variable(bbox_targets, is_cuda=True)
        bbox_inside_weights = network.np_to_variable(bbox_inside_weights, is_cuda=True)
        bbox_outside_weights = network.np_to_variable(bbox_outside_weights, is_cuda=True)

        return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
Ejemplo n.º 7
0
    def forward(self, im_data, rois, im_info, gt_vec=None,
                gt_boxes=None, gt_ishard=None, dontcare_areas=None):
        im_data = network.np_to_variable(im_data, is_cuda=True)
        im_data = im_data.permute(0, 3, 1, 2)
	
        #TODO: Use im_data and rois as input
        # compute cls_prob which are N_roi X 20 scores
        # Checkout faster_rcnn.py for inspiration
        rois = network.np_to_variable(rois, is_cuda=True)
	out = self.features(im_data)	
	#print('feature shape:',out.shape)
	#print('rois shape:', rois.shape)
	out = self.roi_pool(out, rois)
	#print('roi_pool out shape:',out.shape)
	out = out.view(out.shape[0], -1)
	#print('reshape roi_pool out shape:',out.shape)
	out = self.classifier(out)
	#print('classifier out shape:',out.shape)
	score_cls = self.score_cls(out)
	softmax_cls = self.cls_softmax(score_cls)
	score_det = self.score_det(out)
	softmax_det = self.det_softmax(score_det)
	cls_prob = softmax_cls * softmax_det


        if self.training:
            label_vec = network.np_to_variable(gt_vec, is_cuda=True)
            label_vec = label_vec.view(self.n_classes,-1)
            self.cross_entropy = self.build_loss(cls_prob, label_vec)
        return cls_prob
Ejemplo n.º 8
0
    def forward(self, im_data, rois, im_info, gt_vec=None,
                gt_boxes=None, gt_ishard=None, dontcare_areas=None):
        im_data = network.np_to_variable(im_data, is_cuda=True)
        im_data = im_data.permute(0, 3, 1, 2)

        #TODO: Use im_data and rois as input
        # compute cls_prob which are N_roi X 20 scores
        # Checkout faster_rcnn.py for inspiration
        rois = network.np_to_variable(rois, is_cuda=True)

        x = self.features(im_data)
        x = self.roi_pool(x, rois)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        score_cls = self.score_cls(x)
        score_det = self.score_det(x)
        # print(score_cls)
        score_cls = F.softmax(score_cls, dim=1)
        score_det = F.softmax(score_det, dim=0)
        # print(score_det)
        cls_prob = score_cls * score_det          # check how to implement
        # print(cls_prob.shape)


        if self.training:
            label_vec = network.np_to_variable(gt_vec, is_cuda=True)
            # label_vec = label_vec.view(self.n_classes,-1)
            self.cross_entropy = self.build_loss(cls_prob, label_vec)
        return cls_prob
Ejemplo n.º 9
0
    def proposal_target_layer(region_rois,
                              gt_regions,
                              n_classes_obj,
                              voc_sign,
                              is_training=False,
                              graph_generation=False):
        """
        ----------
        object_rois:  (1 x H x W x A, 5) [0, x1, y1, x2, y2]
        region_rois:  (1 x H x W x A, 5) [0, x1, y1, x2, y2]
        gt_objects:   (G_obj, 5) [x1 ,y1 ,x2, y2, obj_class] int
        gt_relationships: (G_obj, G_obj) [pred_class] int (-1 for no relationship)
        gt_regions:   (G_region, 4+40) [x1, y1, x2, y2, word_index] (-1 for padding)
        # gt_ishard: (G_region, 4+40) {0 | 1} 1 indicates hard
        # dontcare_areas: (D, 4) [ x1, y1, x2, y2]
        n_classes_obj
        n_classes_pred
        is_training to indicate whether in training scheme
        ----------
        Returns
        ----------
        rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2]
        labels: (1 x H x W x A, 1) {0,1,...,_num_classes-1}
        bbox_targets: (1 x H x W x A, K x4) [dx1, dy1, dx2, dy2]
        bbox_inside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
        bbox_outside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
        """

        #object_rois = object_rois.data.cpu().numpy()
        region_rois = region_rois.data.cpu().numpy()

        region_seq, region_rois, \
            bbox_targets_region, bbox_inside_weights_region, bbox_outside_weights_region= \
            proposal_target_layer_py(region_rois,
                gt_regions, n_classes_obj, voc_sign, is_training, graph_generation=graph_generation)

        # print labels.shape, bbox_targets.shape, bbox_inside_weights.shape
        if is_training:
            # object_labels = network.np_to_variable(object_labels, is_cuda=True, dtype=torch.LongTensor)
            # bbox_targets = network.np_to_variable(bbox_targets, is_cuda=True)
            # bbox_inside_weights = network.np_to_variable(bbox_inside_weights, is_cuda=True)
            # bbox_outside_weights = network.np_to_variable(bbox_outside_weights, is_cuda=True)
            # phrase_label = network.np_to_variable(phrase_label, is_cuda=True, dtype=torch.LongTensor)
            region_seq = network.np_to_variable(region_seq,
                                                is_cuda=True,
                                                dtype=torch.LongTensor)
            bbox_targets_region = network.np_to_variable(bbox_targets_region,
                                                         is_cuda=True)
            bbox_inside_weights_region = network.np_to_variable(
                bbox_inside_weights_region, is_cuda=True)
            bbox_outside_weights_region = network.np_to_variable(
                bbox_outside_weights_region, is_cuda=True)

        #object_rois = network.np_to_variable(object_rois, is_cuda=True)
        #phrase_rois = network.np_to_variable(phrase_rois, is_cuda=True)
        region_rois = network.np_to_variable(region_rois, is_cuda=True)

        return (region_rois, region_seq, bbox_targets_region,
                bbox_inside_weights_region, bbox_outside_weights_region)
Ejemplo n.º 10
0
 def forward(self,  im_data, gt_data=None):        
     im_data = network.np_to_variable(im_data, is_cuda=True, is_training=self.training)                
     density_map = self.DME(im_data)
     
     if self.training:                        
         gt_data = network.np_to_variable(gt_data, is_cuda=True, is_training=self.training)            
         self.loss_mse = self.build_loss(density_map, gt_data)
         
     return density_map
Ejemplo n.º 11
0
    def forward(self,im_data,gt_data):
        im_data = network.np_to_variable(im_data,is_cuda=True,is_training=self.training)
        final = self.DA_Net(im_data)

        if self.training:
            gt_data = network.np_to_variable(gt_data, is_cuda=True, is_training=self.training)

            self.loss_mse = self.build_loss(final,gt_data)
            return final
        else:
            return final
Ejemplo n.º 12
0
 def forward(self,  im_data, gt_data=None, gt_cls_label=None, ce_weights=None):        
     im_data = network.np_to_variable(im_data, is_cuda=True, is_training=self.training)                        
     density_map, density_cls_score = self.CCN(im_data)
     density_cls_prob = F.softmax(density_cls_score)
     
     if self.training:                        
         gt_data = network.np_to_variable(gt_data, is_cuda=True, is_training=self.training)            
         gt_cls_label = network.np_to_variable(gt_cls_label, is_cuda=True, is_training=self.training,dtype=torch.FloatTensor)                        
         self.loss_mse, self.cross_entropy = self.build_loss(density_map, density_cls_prob, gt_data, gt_cls_label, ce_weights)
         
         
     return density_map
 def select_to_match_dimensions(a,b):
     if a.size()[2] > b.size()[2]:
         a = torch.index_select(a, 2, 
                               network.np_to_variable(np.arange(0,
                                     b.size()[2]).astype(np.int32),
                                      is_cuda=True,dtype=torch.LongTensor))
     if a.size()[3] > b.size()[3]:
         a = torch.index_select(a, 3, 
                               network.np_to_variable(np.arange(0,
                                 b.size()[3]).astype(np.int32),
                                       is_cuda=True,dtype=torch.LongTensor))
    
     return a 
Ejemplo n.º 14
0
    def forward(self, im_data, gt_data=None):
        im_data = network.np_to_variable(im_data,
                                         is_cuda=True,
                                         is_training=self.training)
        feature_map = self.mcnn_backbone(im_data)
        density_map = self.fuse(feature_map)

        if self.training:
            gt_data = network.np_to_variable(gt_data,
                                             is_cuda=True,
                                             is_training=self.training)
            self.loss_mse = nn.MSELoss()(density_map, gt_data)

        return density_map
Ejemplo n.º 15
0
 def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key,
                    _feat_stride, anchor_scales, anchor_ratios, is_region):
     rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy()
     rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy()
     x, scores = proposal_layer_py(rpn_cls_prob_reshape,
                                   rpn_bbox_pred,
                                   im_info,
                                   cfg_key,
                                   _feat_stride,
                                   anchor_scales,
                                   anchor_ratios,
                                   is_region=is_region)
     x = network.np_to_variable(x, is_cuda=True)
     scores = network.np_to_variable(scores, is_cuda=True)
     return x.view(-1, 5), scores.view(-1, )
Ejemplo n.º 16
0
    def __index__(self, i):
        '''bid: image index; pid: patch index, to find slice'''
        bid, pid = self.patch_list[i]
        transform_img = []
        transform_den = []
        transform_raw = []

        transform_raw.append(transforms.Lambda(lambda img: i_crop(img, self.patches[bid][pid], self.crop_size)))
        transform_raw.append(transforms.Lambda(lambda img: i_flip(img, self.filps[i])))
        transform_raw.append(transforms.Lambda(lambda img: np.array(img)))
        transform_raw = transforms.Compose(transform_raw)

        transform_img.append(transforms.Lambda(lambda img: i_crop(img, self.patches[bid][pid], self.crop_size)))
        transform_img.append(transforms.Lambda(lambda img: i_flip(img, self.filps[i])))
        transform_img += [ transforms.ToTensor(),
                           transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
                           ]
        transform_img = transforms.Compose(transform_img)

        transform_den.append(transforms.Lambda(lambda img: d_crop(img, self.patches[bid][pid], self.crop_size)))
        transform_den.append(transforms.Lambda(lambda img: d_flip(img, self.filps[i])))
        transform_den += [transforms.Lambda(lambda den: network.np_to_variable(den, is_cuda=False, is_training=self.training))]
        transform_den = transforms.Compose(transform_den)

        img, den, gt_count = self.dataloader[bid]

        return transform_img(img.copy()), transform_den(den), transform_raw(img.copy()), gt_count, i
Ejemplo n.º 17
0
    def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
        im_data = network.np_to_variable(im_data, is_cuda=True)
        im_data = im_data.permute(0, 3, 1, 2)
        features = self.features(im_data)

        rpn_conv1 = self.conv1(features)

        # rpn score
        rpn_cls_score = self.score_conv(rpn_conv1)
        rpn_cls_score_reshape = self.reshape_layer(rpn_cls_score, 2)
        rpn_cls_prob = F.softmax(rpn_cls_score_reshape)
        rpn_cls_prob_reshape = self.reshape_layer(rpn_cls_prob, len(self.anchor_scales)*3*2)

        # rpn boxes
        rpn_bbox_pred = self.bbox_conv(rpn_conv1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'
        rois = self.proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
                                   cfg_key, self._feat_stride, self.anchor_scales)

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None
            rpn_data = self.anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas,
                                                im_info, self._feat_stride, self.anchor_scales)
            self.cross_entropy, self.loss_box = self.build_loss(rpn_cls_score_reshape, rpn_bbox_pred, rpn_data)

        return features, rois
Ejemplo n.º 18
0
    def get_features(self, im_data):
        im_data = network.np_to_variable(im_data, is_cuda=True)
        im_data = im_data.permute(0, 3, 1, 2)
        im_in = im_data  # self.input_conv(im_data)
        features = self.features(im_in)

        return features
Ejemplo n.º 19
0
    def __index__(self, i): 
        bid, pid = self.patch_list[i]

        ncl = self.ncls[bid + 1]
        choice = self.choices[bid  + 1]
        idx, image, label = self.sample_dict[bid + 1][ncl][choice]
        img, den, gt_count = self.loaded[idx]


        transform_img = []
        transform_den = []
        transform_raw = []

        transform_raw.append(transforms.Lambda(lambda img: i_crop(img, self.patches[bid][pid], self.crop_size)))
        transform_raw.append(transforms.Lambda(lambda img: i_flip(img, self.filps[i])))
        transform_raw.append(transforms.Lambda(lambda img: np.array(img)))
        transform_raw = transforms.Compose(transform_raw)

        transform_img.append(transforms.Lambda(lambda img: i_crop(img, self.patches[bid][pid], self.crop_size)))
        transform_img.append(transforms.Lambda(lambda img: i_flip(img, self.filps[i])))
        transform_img += [ transforms.ToTensor(),
                           transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
                           ]
        transform_img = transforms.Compose(transform_img)

        transform_den.append(transforms.Lambda(lambda img: d_crop(img, self.patches[bid][pid], self.crop_size)))
        transform_den.append(transforms.Lambda(lambda img: d_flip(img, self.filps[i])))
        transform_den += [transforms.Lambda(lambda den: network.np_to_variable(den, is_cuda=False, is_training=self.training))]
        transform_den = transforms.Compose(transform_den)


        return transform_img(img.copy()), transform_den(den), transform_raw(img.copy()), gt_count, i
Ejemplo n.º 20
0
    def build_roi_loss(self, rpn_cls_score_reshape, rpn_cls_prob_reshape,
                       scores, anchor_inds, labels):

        batch_size = rpn_cls_score_reshape.size()[0]
        rpn_cls_score = rpn_cls_score_reshape.permute(
            0, 2, 3, 1)  #.contiguous().view(-1, 2)
        bg_scores = torch.index_select(
            rpn_cls_score, 3,
            network.np_to_variable(np.arange(0, 9),
                                   is_cuda=True,
                                   dtype=torch.LongTensor))
        fg_scores = torch.index_select(
            rpn_cls_score, 3,
            network.np_to_variable(np.arange(9, 18),
                                   is_cuda=True,
                                   dtype=torch.LongTensor))
        bg_scores = bg_scores.contiguous().view(batch_size, -1, 1)
        fg_scores = fg_scores.contiguous().view(batch_size, -1, 1)

        all_rpn_cls_scores = None
        all_labels = None

        for batch_ind in range(batch_size):

            b_ind_var = network.np_to_variable(np.zeros(1) + batch_ind,
                                               is_cuda=True,
                                               dtype=torch.LongTensor)
            bg = torch.index_select(bg_scores, 0, b_ind_var)
            fg = torch.index_select(fg_scores, 0, b_ind_var)
            #rpn_cls_score = torch.cat([bg_scores[batch_ind,:,:],fg_scores[batch_ind,:,:]],1)
            rpn_cls_score = torch.cat([bg.squeeze(0), fg.squeeze(0)], 1)
            rpn_cls_score = torch.index_select(rpn_cls_score, 0,
                                               anchor_inds[batch_ind])
            if all_rpn_cls_scores is None:
                all_rpn_cls_scores = rpn_cls_score
                all_labels = labels[batch_ind]
            else:
                all_rpn_cls_scores = torch.cat(
                    [all_rpn_cls_scores, rpn_cls_score], 0)
                all_labels = torch.cat([all_labels, labels[batch_ind]], 0)

        #roi_cross_entropy = F.cross_entropy(rpn_cls_score, labels, size_average=False)
        roi_cross_entropy = F.cross_entropy(all_rpn_cls_scores,
                                            all_labels,
                                            size_average=False)

        return roi_cross_entropy
Ejemplo n.º 21
0
    def forward(self,
                im_data,
                rois,
                im_info,
                gt_vec=None,
                gt_boxes=None,
                gt_ishard=None,
                dontcare_areas=None):
        im_data = network.np_to_variable(im_data, is_cuda=True)
        im_data = im_data.permute(0, 3, 1, 2)

        #TODO: Use im_data and rois as input
        # compute cls_prob which are N_roi X 20 scores
        # Checkout faster_rcnn.py for inspiration

        #print('\n\nHERE HERE\n\n')
        #pdb.set_trace()

        rois = network.np_to_variable(rois, is_cuda=True)
        #print(rois.shape)

        conv_features = self.features(im_data)
        #print(conv_features.shape)
        roi_pooled_features = self.roi_pool(conv_features, rois)
        #print(roi_pooled_features.shape)

        roi_pooled_features = roi_pooled_features.view(
            roi_pooled_features.shape[0], -1)
        #print(roi_pooled_features.shape)

        classifier_features = self.classifier(roi_pooled_features)
        #print(classifier_features.shape)

        prob_clas = F.softmax(self.score_cls(classifier_features), dim=1)
        #print(prob_clas.shape)
        prob_det = F.softmax(self.score_det(classifier_features), dim=0)
        #print(prob_det.shape)

        cls_prob = prob_clas * prob_det
        #print(cls_prob.shape)

        if self.training:
            label_vec = network.np_to_variable(gt_vec, is_cuda=True)
            label_vec = label_vec.view(self.n_classes, -1)
            self.cross_entropy = self.build_loss(cls_prob, label_vec)
        return cls_prob
Ejemplo n.º 22
0
 def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key,
                    _feat_stride, anchor_scales):
     rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy()
     rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy()
     x = proposal_layer_py(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
                           cfg_key, _feat_stride, anchor_scales)
     x = network.np_to_variable(x, is_cuda=False)
     return x.view(-1, 5)
Ejemplo n.º 23
0
    def forward(self, im_data, gt_data=None):
        im_data = network.np_to_variable(im_data,
                                         is_cuda=True,
                                         is_training=self.training)
        feature_map = self.mcnn_backbone(im_data)
        x = self.conv3x3(im_data)
        x = self.downsample(x)
        x = torch.cat((feature_map, x), 1)
        x = self.amend(x)
        density_map = self.fuse(x)

        if self.training:
            gt_data = network.np_to_variable(gt_data,
                                             is_cuda=True,
                                             is_training=self.training)
            self.loss_mse = nn.MSELoss()(density_map, gt_data)

        return density_map
Ejemplo n.º 24
0
    def forward(self,
                im_data,
                rois,
                im_info,
                gt_vec=None,
                gt_boxes=None,
                gt_ishard=None,
                dontcare_areas=None):
        im_data = network.np_to_variable(im_data, is_cuda=True)
        im_data = im_data.permute(0, 3, 1, 2)

        #TODO: Use im_data and rois as input
        # compute cls_prob which are N_roi X 20 scores
        # Checkout faster_rcnn.py for inspiration

        if self.training:
            label_vec = network.np_to_variable(gt_vec, is_cuda=True)
            label_vec = label_vec.view(self.n_classes, -1)
            self.cross_entropy = self.build_loss(cls_prob, label_vec)
        return cls_prob
    def forward(self,
                im_data,
                rois,
                im_info,
                gt_vec=None,
                gt_boxes=None,
                gt_ishard=None,
                dontcare_areas=None):

        im_data = network.np_to_variable(im_data, is_cuda=True)
        im_data = im_data.permute(0, 3, 1, 2)

        # TODO: Use im_data and rois as input
        # compute cls_prob which are N_roi X 20 scores
        # Checkout faster_rcnn.py for inspiration

        features = self.features(im_data)

        # compute SPP and fc6, fc7
        rois_var = network.np_to_variable(rois, is_cuda=True)
        pooled_features = self.roi_pool(features, rois_var)
        x = pooled_features.view(pooled_features.size()[0], -1)
        x = self.classifier(x)

        # compute classification stream and detection stream
        reg_cls_score = self.score_cls(x)
        reg_cls_prob = F.softmax(reg_cls_score, dim=1)
        det_score = self.score_det(x)
        det_prob = F.softmax(det_score, dim=0)
        # combine region scores and detection
        cls_prob = torch.mul(reg_cls_prob, det_prob)

        #if self.training:
        if gt_vec is not None:
            label_vec = network.np_to_variable(gt_vec, is_cuda=True)
            label_vec = label_vec.view(self.n_classes, -1)
            self.cross_entropy = self.build_loss(cls_prob, label_vec)
        return cls_prob
Ejemplo n.º 26
0
    def remove_bad_roi(gt_objects):
        """remove the gt rois with heights or weights which are smaller than 1"""
        check_gt = gt_objects[:, :4]
        width_gt = (check_gt[:, 2] - check_gt[:, 0])
        height_gt = (check_gt[:, 3] - check_gt[:, 1])
        gt_objects_without_error = gt_objects[np.where(
            (width_gt > 1) * (height_gt > 1))]

        zeros = np.zeros((gt_objects_without_error.shape[0], 1),
                         dtype=gt_objects.dtype)
        object_rois_gt = np.hstack((zeros, gt_objects_without_error[:, :4]))
        object_rois_gt = network.np_to_variable(object_rois_gt, is_cuda=True)
        object_rois = object_rois_gt
        return object_rois
    def forward(self,
                im_data,
                rois,
                bbox_3d_targets=None,
                bbox_loss_3d_weights=None,
                labels=None):
        im_data = network.np_to_variable(im_data, is_cuda=True)
        #features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)
        #im_data = im_data.permute(0, 3, 1, 2)
        features = self.features(im_data)
        #rois = boxes
        #pdb.set_trace()
        if self.training:
            roi_data = self.proposal_target_layer(rois, labels,
                                                  bbox_3d_targets,
                                                  bbox_loss_3d_weights)
            rois = roi_data[0]
        else:
            rois = network.np_to_variable(rois, is_cuda=True)
            #pdb.set_trace()
        # roi pool
        pooled_features = self.roi_pool(features, rois)
        x = pooled_features.view(pooled_features.size()[0], -1)
        x = self.fc6(x)
        x = F.dropout(x, training=self.training)
        x = self.fc7(x)
        x = F.dropout(x, training=self.training)

        cls_score = self.score_fc(x)
        cls_prob = F.softmax(cls_score)
        bbox_pred = self.bbox_fc(x)
        #pdb.set_trace()
        if self.training:
            self.cross_entropy, self.loss_box = self.build_loss(
                cls_score, bbox_pred, roi_data)

        return cls_prob, bbox_pred, rois
Ejemplo n.º 28
0
    def forward(self, im_data, rois, im_info, gt_vec=None,
                gt_boxes=None, gt_ishard=None, dontcare_areas=None):
#         if rois.shape[0] > 256 and (self.training != True):
        if rois.shape[0] > 64:
            rois = rois[:64,:]
#             print("after clip{0}".format(rois.shape))
#         print(im_data.shape)
#         print(rois.shape)
        im_data = network.np_to_variable(im_data, is_cuda=True)
        im_data = im_data.permute(0, 3, 1, 2)
#         print im_data.shape
        #TODO: Use im_data and rois as input
        # compute cls_prob which are N_roi X 20 scores
        # Checkout faster_rcnn.py for inspiration
        feature = self.features(im_data)
        rois = network.np_to_variable(rois, is_cuda=True)
#         print("feature {0}".format(feature))
#         debug_draw(im_data, rois)
#         print("rois_shape{0}".format(rois.shape))
        pooled_features = self.roi_pool(feature, rois)
#         print("pooled_features range {0}-{1}".format(pooled_features.min(),pooled_features.max()))
        x = pooled_features.view(pooled_features.size()[0], -1)
        x = self.classifier_share(x)
#         print("classifier_share_min {0}-{1}".format(x.min(), x.max()))
        output_c = self.classifier_c(x)
        output_d = self.classifier_d(x)
        pred_score = output_c*output_d
        cls_prob = pred_score.sum(0)
#         print("shape of out put {0} and {1}".format(output_c1.size(), output_d1.size()))

        if cls_prob.max().data.cpu().numpy() > 1 + 1e-3:
            print("cls_error")
        if self.training:
            label_vec = network.np_to_variable(gt_vec, is_cuda=True)
            label_vec = label_vec.squeeze()
            self.cross_entropy = -self.build_loss(cls_prob, label_vec)
        return cls_prob, pred_score
Ejemplo n.º 29
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes=None,
                gt_ishard=None,
                dontcare_areas=None):
        im_data = network.np_to_variable(im_data, is_cuda=False)
        im_data = im_data.permute(0, 3, 1, 2)
        features = self.features(im_data)

        rpn_conv1 = self.conv1(features)

        # rpn score
        rpn_cls_score = self.score_conv(rpn_conv1)
        rpn_cls_score_reshape = self.reshape_layer(rpn_cls_score, 2)
        rpn_cls_prob = F.softmax(rpn_cls_score_reshape)  #变成概率了!注意区分score和prob
        rpn_cls_prob_reshape = self.reshape_layer(
            rpn_cls_prob,
            len(self.anchor_scales) * 3 * 2)

        # rpn boxes
        rpn_bbox_pred = self.bbox_conv(rpn_conv1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'
        rois = self.proposal_layer(
            rpn_cls_prob_reshape,
            rpn_bbox_pred,
            im_info,  #此处的rois是anchor按照bbox_pred进行了转换的后的按照fg概率大小从高到低返回的top_k
            cfg_key,
            self._feat_stride,
            self.anchor_scales)
        # print("region proposal",len(rois)) #此处返回300+个region proposals

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None
            # print("gt_boxes",gt_boxes) #此处的gt_boxes仍使用绝对坐标
            rpn_data = self.anchor_target_layer(rpn_cls_score, gt_boxes,
                                                gt_ishard, dontcare_areas,
                                                im_info, self._feat_stride,
                                                self.anchor_scales)
            self.cross_entropy, self.loss_box = self.build_loss(
                rpn_cls_score_reshape, rpn_bbox_pred, rpn_data)

        return features, rois
Ejemplo n.º 30
0
	def proposal_target_layer(object_rois, gt_objects, gt_relationships, n_classes_obj, is_training=False):

		"""
		----------
		object_rois:  (1 x H x W x A, 5) [0, x1, y1, x2, y2], Variable, cuda
		phrase_rois:  (1 x H x W x A, 5) [0, x1, y1, x2, y2], Variable, cuda
		gt_objects:   (G_obj, 5) [x1 ,y1 ,x2, y2, obj_class] int, tensor
		gt_relationships: (G_obj, G_obj) [pred_class] int (-1 for no relationship), tensor
		gt_regions:   (G_region, 4+40) [x1, y1, x2, y2, word_index] (-1 for padding), tensor
		# gt_ishard: (G_region, 4+40) {0 | 1} 1 indicates hard
		# dontcare_areas: (D, 4) [ x1, y1, x2, y2]
		n_classes_obj
		n_classes_pred
		is_training to indicate whether in training scheme
		----------
		Returns
		----------
		rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2]
		labels: (1 x H x W x A, 1) {0,1,...,_num_classes-1}
		bbox_targets: (1 x H x W x A, K x4) [dx1, dy1, dx2, dy2]
		bbox_inside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
		bbox_outside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
		"""

		object_rois = object_rois.data.cpu().numpy()

		object_labels, object_rois, bbox_targets_object, bbox_inside_weights_object, bbox_outside_weights_object, \
		phrase_labels, phrase_rois, \
		mat_object, mat_phrase, keep_inds = \
			proposal_target_layer_py(object_rois, gt_objects, gt_relationships, n_classes_obj, is_training)

		# print labels.shape, bbox_targets.shape, bbox_inside_weights.shape
		if is_training:
			object_labels = network.np_to_variable(object_labels, is_cuda=True, dtype=torch.LongTensor)
			bbox_targets_object = network.np_to_variable(bbox_targets_object, is_cuda=True)
			bbox_inside_weights_object = network.np_to_variable(bbox_inside_weights_object, is_cuda=True)
			bbox_outside_weights_object = network.np_to_variable(bbox_outside_weights_object, is_cuda=True)
			phrase_labels = network.np_to_variable(phrase_labels, is_cuda=True, dtype=torch.LongTensor)

		object_rois = network.np_to_variable(object_rois, is_cuda=True)
		phrase_rois = network.np_to_variable(phrase_rois, is_cuda=True)

		return (object_rois, object_labels, bbox_targets_object, bbox_inside_weights_object,
				bbox_outside_weights_object), \
			   (phrase_rois, phrase_labels), \
			   mat_object, mat_phrase, keep_inds