def forward(self, loc_data, conf_data, prior_data): loc_data = loc_data.cpu() conf_data = conf_data.cpu() num = loc_data.size(0) # batch size num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # 对每一张图片进行处理 for i in range(num): # 对先验框解码获得预测框 decoded_boxes = decode(loc_data[i], prior_data, self.variance) conf_scores = conf_preds[i].clone() for cl in range(1, self.num_classes): # 对每一类进行非极大抑制 c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # 进行非极大抑制 ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.contiguous().view(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) return output
def forward(self, loc_data, conf_data, prior_data): #--------------------------------# # 先转换成cpu下运行 #--------------------------------# loc_data = loc_data.cpu() conf_data = conf_data.cpu() #--------------------------------# # num的值为batch_size # num_priors为先验框的数量 #--------------------------------# num = loc_data.size(0) num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) #--------------------------------------# # 对分类预测结果进行reshape # num, num_classes, num_priors #--------------------------------------# conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # 对每一张图片进行处理正常预测的时候只有一张图片,所以只会循环一次 for i in range(num): #--------------------------------------# # 对先验框解码获得预测框 # 解码后,获得的结果的shape为 # num_priors, 4 #--------------------------------------# decoded_boxes = decode(loc_data[i], prior_data, self.variance) conf_scores = conf_preds[i].clone() #--------------------------------------# # 获得每一个类对应的分类结果 # num_priors, #--------------------------------------# for cl in range(1, self.num_classes): #--------------------------------------# # 首先利用门限进行判断 # 然后取出满足门限的得分 #--------------------------------------# c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) #--------------------------------------# # 将满足门限的预测框取出来 #--------------------------------------# boxes = decoded_boxes[l_mask].view(-1, 4) #--------------------------------------# # 利用这些预测框进行非极大抑制 #--------------------------------------# ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = torch.cat( (scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) return output
def forward(self, loc_data, conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ num = loc_data.size(0) # batch size num_priors = prior_data.size(0) self.output.zero_() if num == 1: # size batch x num_classes x num_priors conf_preds = conf_data.t().contiguous().unsqueeze(0) else: conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) self.output.expand_(num, self.num_classes, self.top_k, 5) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() for cl in range(1, self.num_classes): c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.dim() == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class # NMS if self.soft_nms == -1: ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) self.output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) else: count = boxes.size( 0) if boxes.size(0) < self.top_k else self.top_k new_scores, new_boxes = soft_nms(boxes, scores, self.nms_thresh, self.top_k, type=self.soft_nms) self.output[i, cl, :count] = torch.cat( (new_scores.unsqueeze(1), new_boxes), 1) # flt = self.output.view(-1, 5) # _, idx = flt[:, 0].sort(0) # _, rank = idx.sort(0) # flt[(rank >= self.top_k).unsqueeze(1).expand_as(flt)].fill_(0) return self.output
def predict(self, image, top_k=-1, prob_threshold=None): """Implement Predictor while testing of the model Arguments: image: image input for predictor prob_threshold: threshold for probability top_k: keep top_k results. If k <= 0, keep all the results. Returns: predicted boxes, labels and their probability """ cpu_device = torch.device("cpu") height, width, _ = image.shape image = self.transform(image) images = image.unsqueeze(0) images = images.to(self.device) with torch.no_grad(): self.timer.start() scores, boxes = self.net.forward(images) print("Inference time: ", self.timer.end()) boxes = boxes[0] scores = scores[0] if not prob_threshold: prob_threshold = self.filter_threshold # this version of nms is slower on GPU, so we move data to CPU. boxes = boxes.to(cpu_device) scores = scores.to(cpu_device) picked_box_probs = [] picked_labels = [] for class_index in range(1, scores.size(1)): probs = scores[:, class_index] mask = probs > prob_threshold probs = probs[mask] if probs.size(0) == 0: continue subset_boxes = boxes[mask, :] box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) box_probs = box_utils.nms(box_probs, self.nms_method, score_threshold=prob_threshold, iou_threshold=self.iou_threshold, sigma=self.sigma, top_k=top_k, candidate_size=self.candidate_size) picked_box_probs.append(box_probs) picked_labels.extend([class_index] * box_probs.size(0)) if not picked_box_probs: return torch.tensor([]), torch.tensor([]), torch.tensor([]) picked_box_probs = torch.cat(picked_box_probs) picked_box_probs[:, 0] *= width picked_box_probs[:, 1] *= height picked_box_probs[:, 2] *= width picked_box_probs[:, 3] *= height return picked_box_probs[:, :4], torch.tensor( picked_labels), picked_box_probs[:, 4]
def forward(ctx, loc_data, conf_data, prior_data): # # loc_data preds torch.Size([1, 8732, 4]) # # conf_data # torch.Size([1, 8732, 3]) # # prior_data torch.Size([8732, 4]) loc_data = loc_data.cpu() conf_data = conf_data.cpu() num = loc_data.size(0) # batch size 1 num_priors = prior_data.size(0) # 8732 output = torch.zeros(num, Detect.num_classes, Detect.top_k, 5) # torch.Size([1, 3, 200, 5]) conf_preds = conf_data.view(num, num_priors, Detect.num_classes).transpose( 2, 1) # torch.Size([1, 3, 8732]) # 对每一张图片进行处理 for i in range(num): # 对先验框解码获得预测框 decoded_boxes = decode(loc_data[i], prior_data, Detect.variance) # torch.Size([8732, 4]) conf_scores = conf_preds[i].clone() # torch.Size([3, 8732]) for cl in range(1, Detect.num_classes): # 遍历1到2,因为0代表背景 # 对每一类进行非极大抑制 c_mask = conf_scores[cl].gt( Detect.conf_thresh) # 获取正样本的索引 torch.Size([8732]) scores = conf_scores[cl][ c_mask] # 获取所有正样本的置信度分数 torch.Size([11]) if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as( decoded_boxes) # torch.Size([8732, 4]) boxes = decoded_boxes[l_mask].view( -1, 4) # torch.Size([11, 4]) 获取所有正样本的边框 # 进行非极大抑制 ids, count = nms(boxes, scores, Detect.nms_thresh, Detect.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.contiguous().view(num, -1, 5) # 这几行代码注释掉之后程序仍然能够正确运行 _, idx = flt[:, :, 0].sort(1, descending=True) # 这几行代码注释掉之后程序仍然能够正确运行 _, rank = idx.sort(1) # 这几行代码注释掉之后程序仍然能够正确运行 flt[(rank < Detect.top_k).unsqueeze(-1).expand_as(flt)].fill_( 0) # 这几行代码注释掉之后程序仍然能够正确运行 # 注意这里的操作并不会影响output,因为flt[mask].fill_(0)不会影响output return output # torch.Size([1, 3, 200, 5]) 1置信度+4位置信息
def forward(self, loc_data, conf_data, prior_data): if Config['nms_thresh'] <= 0: raise ValueError('nms_threshold must be non negative.') loc_data = loc_data.cpu() conf_data = conf_data.cpu() # 图片数量 预测一般一张 num = loc_data.size(0) # batch size 一张图片 # 先验框数量 8732 num_priors = prior_data.size(0) # 存放输出(1,类别,200) output = torch.zeros(num, Config['num_classes'], Config["top_k"], 5) # 分类预测结果转换(1,8732,种类)torch.transpose(input, dim0, dim1, out=None) → Tensor 返回输入矩阵input的转置。交换维度dim0和dim1。 输出张量与输入张量共享内存,所以改变其中一个会导致另外一个也被修改。 conf_preds = conf_data.view(num, num_priors, Config['num_classes']).transpose(2, 1) # 对每一张图片进行处理 for i in range(num): # 对先验框解码获得预测框 decoded_boxes = decode(loc_data[i], prior_data, Config['variance']) # 取出某一图片所有先验框种类 conf_scores = conf_preds[i].clone() for cl in range(1, Config['num_classes']): # 对每一类进行非极大抑制 # gt(a,b) 相当于 a > b conf_thresh阈值0.01 返回(True,False) c_mask = conf_scores[cl].gt(Config["conf_thresh"]) # 两组合并去除false对应数据数据 scores = conf_scores[cl][c_mask] if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # 进行非极大抑制 ids, count = nms(boxes, scores, Config['nms_thresh'], Config["top_k"]) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) # 进行排序 flt = output.contiguous().view(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) # 取出top_K框返回 flt[(rank < Config["top_k"]).unsqueeze(-1).expand_as(flt)].fill_(0) return output
def predict(self, image, top_k=-1, prob_threshold=None): cpu_device = torch.device("cpu") height, width, _ = image.shape image = self.transform(image) images = image.unsqueeze(0) images = images.to(self.device) with torch.no_grad(): self.timer.start() scores, boxes = self.net.forward(images) boxes = boxes[0] scores = scores[0] if not prob_threshold: prob_threshold = self.filter_threshold # this version of nms is slower on GPU, so we move data to CPU. boxes = boxes.to(cpu_device) scores = scores.to(cpu_device) picked_box_probs = [] picked_labels = [] for class_index in range(1, scores.size(1)): probs = scores[:, class_index] mask = probs > prob_threshold probs = probs[mask] if probs.size(0) == 0: continue subset_boxes = boxes[mask, :] box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) box_probs = box_utils.nms(box_probs, self.nms_method, score_threshold=prob_threshold, iou_threshold=self.iou_threshold, sigma=self.sigma, top_k=top_k, candidate_size=self.candidate_size) picked_box_probs.append(box_probs) picked_labels.extend([class_index] * box_probs.size(0)) if not picked_box_probs: return torch.tensor([]), torch.tensor([]), torch.tensor([]) picked_box_probs = torch.cat(picked_box_probs) picked_box_probs[:, 0] *= width picked_box_probs[:, 1] *= height picked_box_probs[:, 2] *= width picked_box_probs[:, 3] *= height return picked_box_probs[:, :4], torch.tensor( picked_labels), picked_box_probs[:, 4]
def forward(self, loc_p, class_p, priors): batch_size = loc_p.size(0) num_priors = priors.size(0) output = torch.zeros(batch_size, self.num_classes, self.top_k, 5) class_p = class_p.transpose(1, 2) for idx in range(batch_size): decoded_boxes = decode(loc_p[idx], priors, self.variance) for c in range(1, self.num_classes): c_mask = (class_p[idx][c] > self.conf_thresh) l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) scores = class_p[idx][c][c_mask] boxes = decoded_boxes[l_mask].view(-1, 4) if len(scores) == 0: continue ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[idx, c, :count] = torch.cat( (scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) return output
def forward(self, loc_data, conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ num = loc_data.size(0) # batch size num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() for cl in range(1, self.num_classes): c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.reshape(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) # output[batch_index][class_index] = list of (score, box), sort by score in descending order # output batch_num * num_classes * top_k * 5 (5 means the list of score, x_min, y_min, x_max and y_max) # top_k = 200 by default. if number of valid boxes is less than that, still keep top-k elements(zero filled); # otherwise, only keep top-k elements. return output
def compute_nms(self, scores, boxes, original_size): height, width, _ = original_size boxes = boxes.cpu().detach() # height = height.item() # width = width.item() # original_size = (width, height) # Filtering by confidence threshold? prob_threshold = 0.01 # Compute prediction with NMS picked_box_probs = [] picked_labels = [] for class_index in range(1, scores.size(1)): probs = scores[:, class_index] mask = probs > prob_threshold probs = probs[mask] if probs.size(0) == 0: continue subset_boxes = boxes[mask, :] box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) box_probs = nms(box_probs, "hard", score_threshold=prob_threshold, iou_threshold=self.config.iou_threshold, sigma=0.5, top_k=-1, candidate_size=200) picked_box_probs.append(box_probs) picked_labels.extend([class_index] * box_probs.size(0)) if not picked_box_probs: return torch.tensor([]), torch.tensor([]), torch.tensor([]) else: picked_box_probs = torch.cat(picked_box_probs) picked_box_probs[:, 0] *= width picked_box_probs[:, 1] *= height picked_box_probs[:, 2] *= width picked_box_probs[:, 3] *= height return picked_box_probs[:, :4], torch.tensor( picked_labels), picked_box_probs[:, 4]
def forward(self, loc_data, conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ num = loc_data.size(0) # batch size num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() for cl in range(1, self.num_classes): c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.contiguous().view(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) return output
# ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
# ignore low scores inds = np.where(scores > cfg["conf_threshold"])[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:cfg["top_k"]] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep_ind, keep_count = nms(torch.from_numpy(boxes.astype(np.float32)), torch.from_numpy(scores.astype(np.float32)), cfg["nms_threshold"], cfg['keep_top_k']) dets = dets[keep_ind[:keep_count], :] if len(dets.shape) == 1: dets = dets[np.newaxis, :] # Adding second dim (if only 1 det) # keep top-K faster NMS dets = dets[:cfg["keep_top_k"], :] # Show image and saving image results with open(save_file, 'a') as f: f.write(str(img_path) + ":\n") for b in dets: if b[4] < cfg['min_for_visual']: continue
def predict(self, image, full_processing=True, inter_tensor=None): height, width, _ = image.shape if inter_tensor is None: image = self.prediction_transform(image, resize=full_processing) images = image.unsqueeze(0) images = images.to(self.device) with torch.no_grad(): if inter_tensor is not None: scores, boxes = self.forward(None, full_processing=full_processing, inter_tensor=inter_tensor) else: scores, boxes = self.forward(images, full_processing=full_processing) if not full_processing: return torch.tensor([]), torch.tensor([]), torch.tensor([]), 0 # Compute metrics boxes = boxes[0] scores = scores[0] # Move to cpu? boxes = boxes.to(torch.device("cpu")) scores = scores.to(torch.device("cpu")) # Filtering by confidence threshold? prob_threshold = 0.01 # Compute prediction with NMS picked_box_probs = [] picked_labels = [] for class_index in range(1, scores.size(1)): probs = scores[:, class_index] mask = probs > prob_threshold probs = probs[mask] if probs.size(0) == 0: continue subset_boxes = boxes[mask, :] box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) box_probs = nms(box_probs, "hard", score_threshold=prob_threshold, iou_threshold=self.config.iou_threshold, sigma=0.5, top_k=-1, candidate_size=200) picked_box_probs.append(box_probs) picked_labels.extend([class_index] * box_probs.size(0)) if not picked_box_probs: return torch.tensor([]), torch.tensor([]), torch.tensor([]), 0 else: picked_box_probs = torch.cat(picked_box_probs) picked_box_probs[:, 0] *= width picked_box_probs[:, 1] *= height picked_box_probs[:, 2] *= width picked_box_probs[:, 3] *= height return picked_box_probs[:, :4], torch.tensor( picked_labels), picked_box_probs[:, 4], 0
def validation_step(self, batch, batch_nb): if self.hparams.net == 'lstm': # Detach hidden states from graph self.detach_hidden() if not int( self.pred_dec.bottleneck_lstm1.hidden_state.shape[0] ) == 1 or not int( self.pred_dec.bottleneck_lstm1.cell_state.shape[0]) == 1: # Adjust hidden state due to batch size (h, c) = self.pred_dec.bottleneck_lstm1.cell.init_hidden( 1, hidden=self.pred_dec.bottleneck_lstm1.hidden_channels, shape=(10, 10)) self.pred_dec.bottleneck_lstm1.hidden_state = h self.pred_dec.bottleneck_lstm1.cell_state = c # OPTIONAL images, boxes_batch, labels_batch, original_size = batch # gt scores, boxes = self.forward(images) regression_loss, classification_loss = self.loss_criterion( self.pred_dec.confidences, self.pred_dec.locations, labels_batch, boxes_batch) loss = regression_loss + classification_loss self.accum_val_loss += loss.item() # Apply inverse transform boxes = boxes[0] scores = scores[0] image, _, _ = self.inverse_val_transform(images[0], None, None) # height, width, _ = image.shape height, width, _ = original_size height = height.item() width = width.item() original_size = (width, height) # Filtering by confidence threshold? prob_threshold = 0.01 # Compute prediction with NMS picked_box_probs = [] picked_labels = [] for class_index in range(1, scores.size(1)): probs = scores[:, class_index] mask = probs > prob_threshold probs = probs[mask] if probs.size(0) == 0: continue subset_boxes = boxes[mask, :] box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) box_probs = nms(box_probs, "hard", score_threshold=prob_threshold, iou_threshold=self.config.iou_threshold, sigma=0.5, top_k=-1, candidate_size=200) picked_box_probs.append(box_probs) picked_labels.extend([class_index] * box_probs.size(0)) if not picked_box_probs: boxes, labels, probs = torch.tensor([]), torch.tensor( []), torch.tensor([]) else: picked_box_probs = torch.cat(picked_box_probs) picked_box_probs[:, 0] *= width picked_box_probs[:, 1] *= height picked_box_probs[:, 2] *= width picked_box_probs[:, 3] *= height boxes, labels, probs = picked_box_probs[:, :4], torch.tensor( picked_labels), picked_box_probs[:, 4] if self.plot_image: img_draw = image.copy() img_draw = cv2.resize(img_draw, original_size) for j, box in enumerate(boxes): if probs[j].item() > 0.01: # Threshold x1 = int(box[0].cpu().item()) y1 = int(box[1].cpu().item()) x2 = int(box[2].cpu().item()) y2 = int(box[3].cpu().item()) cv2.rectangle(img_draw, (x1, y1), (x2, y2), (36, 255, 12), 2) cv2.putText( img_draw, self.val_dataset._classes_names[labels[j]] + " " + str(probs[j].cpu().item()), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2) cv2.imshow("img", img_draw) key = cv2.waitKey(0) if key == 27: # if ESC is pressed, exit loop self.plot_image = False cv2.destroyAllWindows() # After prediction indexes = torch.ones(labels.size(0), 1, dtype=torch.float32) * self.val_index self.val_index += 1 tmprslt = torch.cat( [ indexes.reshape(-1, 1).to(self.device), labels.reshape(-1, 1).float().to(self.device), probs.reshape(-1, 1).to(self.device), (boxes + 1.0).to(self.device) # matlab's indexes start from 1 ], dim=1) if tmprslt.shape[0] > 0: self.results.append(tmprslt) tensorboard_logs = {'val_loss': loss} return {'val_loss': loss, 'log': tensorboard_logs}
def detect_faces(img_path, save_path=None): print("Starting detection...") # Loading pretrained model net = FaceDetectionSSD("test", cfg['img_dim'], cfg['num_classes']) net.load_state_dict(torch.load(cfg['pretrained_model'])) net.eval() cudnn.benchmark = True device = torch.device("cuda:0" if cfg['gpu_train'] else "cpu") net.to(device) rgb_mean = (104, 117, 123) # BGR order # Processing image init_im = cv2.imread(img_path, cv2.IMREAD_COLOR) if init_im is None: raise RuntimeError("Image does not exist!") image = np.float32(init_im) im_height, im_width, _ = image.shape scale = torch.Tensor( [image.shape[1], image.shape[0], image.shape[1], image.shape[0]]) image -= rgb_mean image = image.transpose(2, 0, 1) image = torch.from_numpy(image).unsqueeze(0) image = image.to(device) scale = scale.to(device) loc, conf = net(image) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > cfg["conf_threshold"])[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:cfg["top_k"]] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep_ind, keep_count = nms(torch.from_numpy(boxes.astype(np.float32)), torch.from_numpy(scores.astype(np.float32)), cfg["nms_threshold"], cfg['keep_top_k']) dets = dets[keep_ind[:keep_count], :] # keep top-K faster NMS try: dets = dets[:cfg["keep_top_k"], :] except: dets = dets.reshape((1, dets.size))[:cfg["keep_top_k"], :] face_count = 0 # Show image and saving image results for b in dets: if b[4] < cfg['min_for_visual']: continue face_count += 1 text = "{:.2f}%".format(b[4] * 100) b = list(b) for i in range(4): b[i] = int(b[i]) cv2.rectangle(init_im, (b[0], b[1]), (b[2], b[3]), (0, 255, 255), 2) cx = b[0] cy = b[1] - 5 cv2.putText(init_im, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) if save_path is not None: cv2.imwrite(save_path, init_im) print("Detection completed!") print(f"Found {face_count} faces!") return init_im