def generate_proposals(self, anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor: batch_size = anchor_bboxes.shape[0] proposal_bboxes = BBox.apply_transformer(anchor_bboxes, transformers) proposal_bboxes = BBox.clip(proposal_bboxes, left=0, top=0, right=image_width, bottom=image_height) proposal_probs = F.softmax(objectnesses[:, :, 1], dim=-1) _, sorted_indices = torch.sort(proposal_probs, dim=-1, descending=True) nms_proposal_bboxes_batch = [] for batch_index in range(batch_size): sorted_bboxes = proposal_bboxes[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n] sorted_probs = proposal_probs[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n] threshold = 0.7 kept_indices = nms(sorted_bboxes, sorted_probs, threshold) nms_bboxes = sorted_bboxes[kept_indices][:self._post_nms_top_n] nms_proposal_bboxes_batch.append(nms_bboxes) max_nms_proposal_bboxes_length = max([len(it) for it in nms_proposal_bboxes_batch]) padded_proposal_bboxes = [] for nms_proposal_bboxes in nms_proposal_bboxes_batch: padded_proposal_bboxes.append( torch.cat([ nms_proposal_bboxes, torch.zeros(max_nms_proposal_bboxes_length - len(nms_proposal_bboxes), 4).to(nms_proposal_bboxes) ]) ) padded_proposal_bboxes = torch.stack(padded_proposal_bboxes, dim=0) return padded_proposal_bboxes
def generate_proposals(self, anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor: proposal_score = objectnesses[:, 1] _, sorted_indices = torch.sort(proposal_score, dim=0, descending=True) sorted_transformers = transformers[sorted_indices] sorted_anchor_bboxes = anchor_bboxes[sorted_indices] proposal_bboxes = BBox.apply_transformer(sorted_anchor_bboxes, sorted_transformers.detach()) proposal_bboxes = BBox.clip(proposal_bboxes, 0, 0, image_width, image_height) # sorted_bboxes = proposal_bboxes[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n] # sorted_probs = proposal_probs[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n] # threshold = 0.7 # kept_indices = nms(sorted_bboxes, sorted_probs, threshold) # nms_bboxes = sorted_bboxes[kept_indices][:self._post_nms_top_n] # nms_proposal_bboxes_batch.append(nms_bboxes) proposal_bboxes = proposal_bboxes[:self._pre_nms_top_n] proposal_score = proposal_score[:self._pre_nms_top_n] threshold = 0.7 kept_indices = nms(proposal_bboxes, proposal_score, threshold) proposal_bboxes = proposal_bboxes[kept_indices] proposal_bboxes = proposal_bboxes[:self._post_nms_top_n] return proposal_bboxes
def generate_detections( self, proposal_bboxes: Tensor, proposal_classes: Tensor, proposal_transformers: Tensor, image_width: int, image_height: int) -> Tuple[Tensor, Tensor, Tensor]: proposal_transformers = proposal_transformers.view( -1, self.num_classes, 4) mean = self._transformer_normalize_mean.repeat( 1, self.num_classes, 1) std = self._transformer_normalize_std.repeat( 1, self.num_classes, 1) proposal_transformers = proposal_transformers * std - mean proposal_bboxes = proposal_bboxes.view(-1, 1, 4).repeat( 1, self.num_classes, 1) detection_bboxes = BBox.apply_transformer( proposal_bboxes.view(-1, 4), proposal_transformers.view(-1, 4)) detection_bboxes = detection_bboxes.view(-1, self.num_classes, 4) detection_bboxes[:, :, [0, 2]] = detection_bboxes[:, :, [0, 2]].clamp( min=0, max=image_width) detection_bboxes[:, :, [1, 3]] = detection_bboxes[:, :, [1, 3]].clamp( min=0, max=image_height) detection_probs = F.softmax(proposal_classes, dim=1) # detection_bboxes = detection_bboxes.cpu() # proposal_probs = detection_probs.cpu() generated_bboxes = [] generated_classes = [] generated_probs = [] for c in range(1, self.num_classes): detection_class_bboxes = detection_bboxes[:, c, :] detection_class_probs = detection_probs[:, c] # _, sorted_indices = detection_class_probs.sort(descending=True) # detection_class_bboxes = detection_class_bboxes[sorted_indices] # proposal_class_probs = detection_class_probs[sorted_indices] threshold = 0.3 kept_indices = nms(detection_class_bboxes, detection_class_probs, threshold) detection_class_bboxes = detection_class_bboxes[kept_indices] detection_class_probs = detection_class_probs[kept_indices] generated_bboxes.append(detection_class_bboxes) generated_classes.append( torch.ones(len(kept_indices), dtype=torch.int) * c) generated_probs.append(detection_class_probs) generated_bboxes = torch.cat(generated_bboxes, dim=0) generated_classes = torch.cat(generated_classes, dim=0) generated_probs = torch.cat(generated_probs, dim=0) return generated_bboxes, generated_classes, generated_probs
def generate_detections(self, proposal_bboxes: Tensor, proposal_classes: Tensor, proposal_transformers: Tensor, image_width: int, image_height: int ): # -> Tuple[Tensor, Tensor, Tensor, Tensor]: batch_size = proposal_bboxes.shape[0] proposal_transformers = proposal_transformers.view( batch_size, -1, self.num_classes, 4) transformer_normalize_std = self._transformer_normalize_std.to( device=proposal_transformers.device) transformer_normalize_mean = self._transformer_normalize_mean.to( device=proposal_transformers.device) proposal_transformers = proposal_transformers * transformer_normalize_std + transformer_normalize_mean proposal_bboxes = proposal_bboxes.unsqueeze(dim=2).repeat( 1, 1, self.num_classes, 1) detection_bboxes = BBox.apply_transformer(proposal_bboxes, proposal_transformers) detection_bboxes = BBox.clip(detection_bboxes, left=0, top=0, right=image_width, bottom=image_height) detection_probs = F.softmax(proposal_classes, dim=-1) all_detection_bboxes = [] all_detection_classes = [] all_detection_probs = [] all_detection_batch_indices = [] for batch_index in range(batch_size): for c in range(1, self.num_classes): class_bboxes = detection_bboxes[batch_index, :, c, :] class_probs = detection_probs[batch_index, :, c] threshold = 0.3 kept_indices = nms(class_bboxes, class_probs, threshold) class_bboxes = class_bboxes[kept_indices] class_probs = class_probs[kept_indices] all_detection_bboxes.append(class_bboxes) all_detection_classes.append( torch.full((len(kept_indices), ), c, dtype=torch.int)) all_detection_probs.append(class_probs) all_detection_batch_indices.append( torch.full((len(kept_indices), ), batch_index, dtype=torch.long)) all_detection_bboxes = torch.cat(all_detection_bboxes, dim=0) all_detection_classes = torch.cat(all_detection_classes, dim=0) all_detection_probs = torch.cat(all_detection_probs, dim=0) all_detection_batch_indices = torch.cat( all_detection_batch_indices, dim=0) return all_detection_bboxes, all_detection_classes, all_detection_probs, all_detection_batch_indices
def _generate_detections( self, proposal_bboxes: Tensor, proposal_classes: Tensor, proposal_transformers: Tensor, image_width: int, image_height: int) -> Tuple[Tensor, Tensor, Tensor]: proposal_transformers = proposal_transformers.view( -1, Model.NUM_CLASSES, 4) mean = self._transformer_normalize_mean.repeat(1, Model.NUM_CLASSES, 1).cuda() std = self._transformer_normalize_std.repeat(1, Model.NUM_CLASSES, 1).cuda() proposal_transformers = proposal_transformers * std - mean proposal_bboxes = proposal_bboxes.view(-1, 1, 4).repeat( 1, Model.NUM_CLASSES, 1) detection_bboxes = BBox.apply_transformer( proposal_bboxes.view(-1, 4), proposal_transformers.view(-1, 4)) detection_bboxes = detection_bboxes.view(-1, Model.NUM_CLASSES, 4) detection_bboxes[:, :, [0, 2]] = detection_bboxes[:, :, [0, 2]].clamp( min=0, max=image_width) detection_bboxes[:, :, [1, 3]] = detection_bboxes[:, :, [1, 3]].clamp( min=0, max=image_height) proposal_probs = F.softmax(proposal_classes, dim=1) detection_bboxes = detection_bboxes.cpu() proposal_probs = proposal_probs.cpu() generated_bboxes = [] generated_labels = [] generated_probs = [] for c in range(1, Model.NUM_CLASSES): detection_class_bboxes = detection_bboxes[:, c, :] proposal_class_probs = proposal_probs[:, c] _, sorted_indices = proposal_class_probs.sort(descending=True) detection_class_bboxes = detection_class_bboxes[sorted_indices] proposal_class_probs = proposal_class_probs[sorted_indices] keep_indices = NMS.suppress(detection_class_bboxes.cuda(), threshold=0.3) detection_class_bboxes = detection_class_bboxes[keep_indices] proposal_class_probs = proposal_class_probs[keep_indices] generated_bboxes.append(detection_class_bboxes) generated_labels.append(torch.ones(len(keep_indices)) * c) generated_probs.append(proposal_class_probs) generated_bboxes = torch.cat(generated_bboxes, dim=0) generated_labels = torch.cat(generated_labels, dim=0) generated_probs = torch.cat(generated_probs, dim=0) return generated_bboxes, generated_labels, generated_probs
def generate_detections(self,proposal_bboxes,proposal_classes,proposal_transformers,image_width,image_height): ''' Get proposal deltas for each different class Denormalized the deltas Duplicate the proposal bboxes for each class Apply delta transform on the proposal bboxes for each class CLip detection bboxes so the it wont go out of bounds ''' batch_size = proposal_bboxes.shape[0] proposal_transformers = proposal_transformers.view(batch_size,-1,self.num_classes,4) transformer_normalize_std = self._transformer_normalize_std.to(device=proposal_transformers.device) transformer_normalize_mean = self._transformer_normalize_mean.to(device=proposal_transformers.device) proposal_transformers = proposal_transformers * transformer_normalize_std + transformer_normalize_mean proposal_bboxes = proposal_bboxes.unsqueeze(dim=2).repeat(1,1,self.num_classes,1) detection_bboxes = BBox.apply_transformer(proposal_bboxes,proposal_transformers) detection_bboxes = BBox.clip(detection_bboxes, left=0, top=0, right=image_width, bottom=image_height) detection_probs = F.softmax(proposal_classes, dim=-1) detection_bboxes_list = [] detection_classes_list = [] detection_probs_list = [] detection_batch_indices_list = [] '''Class iteration starts from 1, ignore background class (0) ''' for batch_index in range(batch_size): for class_ in range(1, self.num_classes): # print(detection_bboxes.shape) class_bboxes = detection_bboxes[batch_index,:,class_,:] class_probs = detection_probs[batch_index,:,class_] threshold = 0.3 kept_indices = nms(class_bboxes, class_probs, threshold) class_bboxes = class_bboxes[kept_indices] # print(class_bboxes.shape) class_probs = class_probs[kept_indices] detection_bboxes_list.append(class_bboxes) detection_classes_list.append(torch.full((len(kept_indices),), class_, dtype=torch.int)) detection_probs_list.append(class_probs) detection_batch_indices_list.append(torch.full((len(kept_indices),),batch_index,dtype=torch.long)) detection_bboxes_list = torch.cat(detection_bboxes_list,dim=0) detection_classes_list = torch.cat(detection_classes_list,dim=0) detection_probs_list = torch.cat(detection_probs_list,dim=0) detection_batch_indices_list = torch.cat(detection_batch_indices_list,dim=0) return detection_bboxes_list,detection_classes_list,detection_probs_list,detection_batch_indices_list
def _generate_proposals(anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor: proposal_score = objectnesses[:, 1] _, sorted_indices = torch.sort(proposal_score, dim=0, descending=True) sorted_transformers = transformers[sorted_indices] sorted_anchor_bboxes = anchor_bboxes[sorted_indices] proposal_bboxes = BBox.apply_transformer(sorted_anchor_bboxes, sorted_transformers.detach()) proposal_bboxes = BBox.clip(proposal_bboxes, 0, 0, image_width, image_height) area_threshold = 16 non_small_area_indices = ((proposal_bboxes[:, 2] - proposal_bboxes[:, 0] >= area_threshold) & (proposal_bboxes[:, 3] - proposal_bboxes[:, 1] >= area_threshold)).nonzero().view(-1) proposal_bboxes = proposal_bboxes[non_small_area_indices] return proposal_bboxes
def generate_proposals(self, anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor: proposal_score = objectnesses[:, 1] _, sorted_indices = torch.sort(proposal_score, dim=0, descending=True) sorted_transformers = transformers[sorted_indices] sorted_anchor_bboxes = anchor_bboxes[sorted_indices] proposal_bboxes = BBox.apply_transformer(sorted_anchor_bboxes, sorted_transformers.detach()) proposal_bboxes = BBox.clip(proposal_bboxes, 0, 0, image_width, image_height) proposal_bboxes = proposal_bboxes[:self._pre_nms_top_n] kept_indices = NMS.suppress(proposal_bboxes, threshold=0.7) proposal_bboxes = proposal_bboxes[kept_indices] proposal_bboxes = proposal_bboxes[:self._post_nms_top_n] return proposal_bboxes
def create_bboxes(self, proposal_bboxes, proposal_transformers, image_width, image_height, batch_size): proposal_transformers = proposal_transformers.view( batch_size, -1, 4) proposal_bboxes = proposal_bboxes.unsqueeze(dim=2).repeat( 1, 1, 1, 1) transformer_normalize_std = self._transformer_normalize_std.to( device=proposal_transformers.device) transformer_normalize_mean = self._transformer_normalize_mean.to( device=proposal_transformers.device) proposal_transformers = proposal_transformers * transformer_normalize_std + transformer_normalize_mean detection_bboxes = BBox.apply_transformer( proposal_bboxes, proposal_transformers.unsqueeze(2)) detection_bboxes = BBox.clip(detection_bboxes, left=0, top=0, right=image_width, bottom=image_height) return detection_bboxes
def generate_proposals(self, anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor: #ga_n=anchors_n=anchors_x * anchors_y * 9 #ab@(bn,ga_n,4) #ao@(bn,ga_n,2) #at@(bn,ga_n,4) batch_size = anchor_bboxes.shape[0] #对所有anchors进行变换. 注意anchors是 左上右下 模式,进入函数后要变换为 中心宽高 模式. 返回前pb又变换成 左上右下 模式. proposal_bboxes = BBox.apply_transformer(anchor_bboxes, transformers) proposal_bboxes = BBox.clip(proposal_bboxes, left=0, top=0, right=image_width, bottom=image_height) #(bn,ga_n,4) #objectnesses[:, :, 1]表示前景列的置信,而 objectnesses[:, :, 0] 表示后景列置信 proposal_probs = F.softmax( objectnesses[:, :, 1], dim=-1) #(bn,ga_n) softmax是增函数.对所有前景置信执行softmax. _, sorted_indices = torch.sort( proposal_probs, dim=-1, descending=True) #(bn,ga_n).对所有前景置信降序排列. nms_proposal_bboxes_batch = [] for batch_index in range(batch_size): sorted_bboxes = proposal_bboxes[batch_index][sorted_indices[ batch_index]][:self._pre_nms_top_n] #(pre_nms_n,4) sorted_probs = proposal_probs[batch_index][sorted_indices[ batch_index]][:self._pre_nms_top_n] #(pre_nms_n,) threshold = 0.7 kept_indices = nms( sorted_bboxes, sorted_probs, threshold ) #(nms_n,) 在rpn中执行一次. 在detection.generate_detections中执行(推断时) nms_bboxes = sorted_bboxes[ kept_indices][:self. _post_nms_top_n] #(post_nms_n,4) , post_nms_n<=_post_nms_top_n nms_proposal_bboxes_batch.append(nms_bboxes) #从一批图像中,找到proposal_boxes最多的,记录数量为max_nms_n #其它图像的proposal_boxes要0填充对齐max_nms_n(dim0方向,即增加行数) #就是说,一批内每幅图像的proposal_boxes数量都要相等 max_nms_proposal_bboxes_length = max( [len(it) for it in nms_proposal_bboxes_batch]) padded_proposal_bboxes = [] for nms_proposal_bboxes in nms_proposal_bboxes_batch: padded_proposal_bboxes.append( torch.cat( [ nms_proposal_bboxes, torch.zeros( max_nms_proposal_bboxes_length - len(nms_proposal_bboxes), 4).to(nms_proposal_bboxes) ] ) #为每幅图像的proposal_boxes增加 delta 行,每行4列 . 其中 delta = max_nms_proposal_bboxes_length - len(nms_proposal_bboxes) ) padded_proposal_bboxes = torch.stack( padded_proposal_bboxes, dim=0 ) #(bn,max_nms_n,4)->外部符号记为pb@(bn,gp_n,4),gp_n表示经过generate_proposal处理过的 return padded_proposal_bboxes
def generate_detections( self, proposal_bboxes: Tensor, proposal_classes: Tensor, proposal_transformers: Tensor, image_width: int, image_height: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: batch_size = proposal_bboxes.shape[0] proposal_transformers = proposal_transformers.view( batch_size, -1, self.num_classes, 4) #(bn,gp_n,num_cls,4) #因为在计算损失的时候,对变换进行了正则化处理,因此这里要逆正则化处理 transformer_normalize_std = self._transformer_normalize_std.to( device=proposal_transformers.device) transformer_normalize_mean = self._transformer_normalize_mean.to( device=proposal_transformers.device) proposal_transformers = proposal_transformers * transformer_normalize_std + transformer_normalize_mean #(bn,gp_n,num_cls,4) proposal_bboxes = proposal_bboxes.unsqueeze(dim=2).repeat( 1, 1, self.num_classes, 1) #(bn,gp_n,num_cls,4) detection_bboxes = BBox.apply_transformer( proposal_bboxes, proposal_transformers) #(bn,gp_n,num_cls,4) detection_bboxes = BBox.clip(detection_bboxes, left=0, top=0, right=image_width, bottom=image_height) detection_probs = F.softmax(proposal_classes, dim=-1) #(bn,gp_n,num_cls) all_detection_bboxes = [] all_detection_classes = [] all_detection_probs = [] all_detection_batch_indices = [] for batch_index in range(batch_size): for c in range(1, self.num_classes): #1.先筛选出每个类 class_bboxes = detection_bboxes[batch_index, :, c, :] #(gp_n,4) class_probs = detection_probs[batch_index, :, c] #(gp_n,) #2.对同一类的boxes执行nms threshold = 0.3 kept_indices = nms(class_bboxes, class_probs, threshold) #(nc,) class_bboxes = class_bboxes[kept_indices] #(nc,4) class_probs = class_probs[kept_indices] #(nc,) #3.记录筛选后的同类boxes以及对应的类标签、置信、批内索引. all_detection_bboxes.append(class_bboxes) all_detection_classes.append( torch.full((len(kept_indices), ), c, dtype=torch.int)) all_detection_probs.append(class_probs) all_detection_batch_indices.append( torch.full((len(kept_indices), ), batch_index, dtype=torch.long)) #以这种形式返回的话,使用zip(boxes,classes,...)展开,通过循环就可以取得一个个的预测框:(box,cls,prob,bn). all_detection_bboxes = torch.cat(all_detection_bboxes, dim=0) #(gd_n,4) all_detection_classes = torch.cat(all_detection_classes, dim=0) #(gd_n,) all_detection_probs = torch.cat(all_detection_probs, dim=0) #(gd_n,) all_detection_batch_indices = torch.cat( all_detection_batch_indices, dim=0) #(gd_n,) return all_detection_bboxes, all_detection_classes, all_detection_probs, all_detection_batch_indices