def _pnet_detect(self, inputs, minsize=20, scale_factor=0.709): bboxes = [] scores = [] # 以12*12为1个单元,将最小人脸调整成12*12大小,而后图像金字塔检测缩放至12,即从检测多个人脸到最后检测一个人脸 img = self._img_resize(inputs, 12/minsize) # 图像金字得到所有预选框 while min(img.shape[:2]) >= 12: cls, reg = self.pnet.predict(tf.expand_dims(img, 0)) bbox, score = self._get_box(reg, cls[0, :, :, 1], img.shape[0]/inputs.shape[0]) img = self._img_resize(img, scale_factor) if len(bbox) == 0: continue keep = nms(bbox, score, 0.5, 'union') bboxes.append(bbox[keep]) scores.append(score[keep]) if not bboxes: return np.empty((0, 4)), np.empty((0, 1)) bboxes = np.vstack(bboxes) scores = np.hstack(scores) # 将金字塔后的图片再进行一次抑制, 此时主要避免重合 keep = nms(bboxes, scores, 0.7, 'min') bboxes = bboxes[keep] scores = scores[keep] return bboxes, scores
def detOnet(self, img, boxes): _boxes = self._ro_net(img, boxes, 48) _boxes = utils.nms(_boxes, 0.7) _boxes = utils.nms(_boxes, 0.3, is_min=True) return _boxes
def textline_extract(image, prediction, threshold=0.3): h, w, _ = image.shape cls = np.array(prediction[0]) regr = np.array(prediction[1]) cls_prod = np.array(prediction[2]) anchor = utils.gen_anchor((int(h / 16), int(w / 16)), 16) bbox = utils.bbox_transfor_inv(anchor, regr) bbox = utils.clip_box(bbox, [h, w]) #score > 0.7 fg = np.where(cls_prod[0, :, 1] > threshold)[0] select_anchor = bbox[fg, :] select_score = cls_prod[0, fg, 1] select_anchor = select_anchor.astype('int32') #filter size keep_index = utils.filter_bbox(select_anchor, 16) #nsm select_anchor = select_anchor[keep_index] select_score = select_score[keep_index] select_score = np.reshape(select_score, (select_score.shape[0], 1)) nmsbox = np.hstack((select_anchor, select_score)) keep = utils.nms(nmsbox, 0.3) select_anchor = select_anchor[keep] select_score = select_score[keep] #text line textConn = text_connect.TextProposalConnector() text = textConn.get_text_lines(select_anchor, select_score, [h, w]) text = list(text.astype('int32')) return text
def detect_pnet(self,image): scale = 1 w ,h = image.size _w,_h = w,h min_side_len = min(_w,_h) boxes = [] img = image while min_side_len>12: img_data = transform(img) img_data.unsqueeze_(0)#转换成1,c,h,w img_data = img_data.to(self.device) cond, offset = self.pnet(img_data) offset=offset.detach()#变量能求导,提取变量元素,变为标量,结构(N4HW) cond = cond.detach()#结构(N1HW) _cond,_offset = cond[0][0].cpu(),offset[0].cpu() index = torch.gt(_cond,0.6) __cond = _cond[index]#花式索引取出满足条件的置信度的值 indexs = torch.nonzero(index)#(N,2) __offset = _offset[:,indexs[:,0],indexs[:,1]].T#(N,4) indexs,__cond ,__offset= indexs.numpy(),__cond.numpy(),__offset.numpy() offset_boxes = self.offset_to_boxes(indexs,__cond,__offset,scale) scale *= 0.7#缩放比例这个很关键,搞成0.9有些脸要漏掉 _w,_h = int(w*scale),int(h*scale) min_side_len = min(_w,_h) img= img.resize((_w, _h)) _boxes= utils.nms(offset_boxes,i=0.5,isMin=False) boxes.extend(_boxes) p_boxes = np.array(boxes) return p_boxes
def __pnet_detect(self, img): # any image size can enter fully convolution total_boxes = np.array([]) # empty boxes w, h = img.size min_side_len = min(w, h) scale = 1 # initial scale while min_side_len > 12: #stop at 12pixel img_data = self.__image_transform(img) #img to tensor if self.isCuda: img_data = img_data.cuda() img_data.unsqueeze_(0) # add C dimension _cls, _offest,_ = self.pnet(img_data) cls = _cls[0][0].cpu().data offset = _offest[0].cpu().data idxs = torch.gt(cls, p_cls) # compare with confidence threshold idx = torch.nonzero(idxs,as_tuple=False) boxes = self.__box(idx, offset[:, idxs], cls[idxs], scale) boxes = utils.nms(np.array(boxes), p_nms) #perform iou scale *= 0.7 # resize _w = int(w * scale) _h = int(h * scale) img = img.resize((_w, _h)) min_side_len = min(_w, _h) if boxes.shape[0] != 0: total_boxes = np.vstack([total_boxes,boxes]) if total_boxes.size else boxes return total_boxes
def show_boxes(curimg, ancs, thresh, name): regs = zip(*np.where(ancs[:3,:,:] > thresh)) cellcostx, cellcosty = float(curimg.shape[0]) / float(ancs.shape[0]), float(curimg.shape[1]) / float(ancs.shape[1]) curimg = curimg.astype(np.float32) rects = [] for i0, pair in enumerate(regs): #non opencv format xstep, ystep = float(curimg.shape[0]) / float(ancs.shape[1]), float(curimg.shape[1]) / float(ancs.shape[2]) xcenter = xstep * pair[1] + xstep * ancs[3 + pair[0] * 4, pair[1], pair[2]] ycenter = ystep * pair[2] + ystep * ancs[3 + pair[0] * 4 + 1, pair[1], pair[2]] dx = xstep * ancs[3 + pair[0] * 4 + 2, pair[1], pair[2]] dy = ystep * ancs[3 + pair[0] * 4 + 3, pair[1], pair[2]] A = (int(xcenter - dx / 2), int(ycenter - dy / 2)) B = (int(xcenter + dx / 2), int(ycenter + dy / 2)) rects.append(np.array([[A[0], A[1], B[0], B[1], ancs[pair]]])) #curimg = (cv2.cvtColor(curimg, cv2.COLOR_BGR2RGB)*255.).astype(int) if len(rects) > 0: rects = np.concatenate(rects, 0) rects = rects[utils.nms(rects, 0.18)] rects = rects.astype(int) for i0 in range(len(rects[:,0])): cv2.rectangle(curimg, (rects[i0,1], rects[i0,0]), (rects[i0,3], rects[i0,2]), (255,0,0), 4) #cv2.imshow('img', cv2.cvtColor(curimg, cv2.COLOR_BGR2RGB)) curimg = cv2.cvtColor(curimg, cv2.COLOR_BGR2RGB) #print(np.max(curimg)) #curimg = (cv2.cvtColor(curimg, cv2.COLOR_BGR2RGB)).astype(int) #cv2.imwrite(name, curimg) #print(np.max(curimg)) cv2.imshow('img', curimg) cv2.waitKey() return 1
def is_img(img_cv, color): j = 0 if len(img_cv) != 0: print("---1312--------------") for i in range (len(img_cv)): im_cv_r = cv2.resize(img_cv[i], (1300, 414)) gray = cv2.cvtColor(im_cv_r, cv2.COLOR_BGR2GRAY) equ = cv2.equalizeHist(gray) gaussian = cv2.GaussianBlur(gray, (3, 3), 0, 0, cv2.BORDER_DEFAULT) median = cv2.medianBlur(gaussian, 3) original_image = median original_image_size = original_image.shape[:2] image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...] data = json.dumps({"signature_name": "serving_default", "instances": image_data.tolist()}) headers = {"content-type": "application/json"} num_classes=65 json_response = requests.post( 'http://tf:port/v1/models/yolov3:predict', data=data, headers=headers) predictions = json.loads(json_response.text)['predictions'] pred_sbbox, pred_mbbox, pred_lbbox =predictions[0]['pred_sbbox'],predictions[0]['pred_mbbox'],predictions[0]['pred_lbbox'] pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)), np.reshape(pred_mbbox, (-1, 5 + num_classes)), np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) bboxes = utils.nms(bboxes, 0.45, method='nms') if np.array(bboxes).shape[0] > 6: image = utils.draw_bbox(im_cv_r, bboxes) # print(image) name = color +'im' + str(i) + '.jpg' path = os.path.join("./pre_out/", name) cv2.imwrite(path,image) print("-------------")
def evaluate(path,nms_threshold,conf_threshold): gts = json.load(open('data/val.json')) nms_threshold = nms_threshold conf_threshold = conf_threshold thresholds = np.around(np.arange(0.5,0.76,0.05),2) pds = json.load(open(path)) mAP = 0 batch_metrics={} for th in thresholds: batch_metrics[th] = [] n_gt = 0 for img in tqdm(gts.keys()): pred = torch.tensor(pds[img]) pred = pred.reshape(-1,5) gt = gen_gts(gts[img]) n_gt += gt.shape[0] pred_nms = nms(pred,conf_threshold, nms_threshold) for th in batch_metrics: batch_metrics[th].append(cal_tp_per_item(pred_nms,gt,th)) metrics = {} for th in batch_metrics: tps,scores= [np.concatenate(x, 0) for x in list(zip(*batch_metrics[th]))] precision, recall, AP= ap_per_class(tps, scores, n_gt) mAP += np.mean(AP) if th in plot: metrics['AP/'+str(th)] = np.mean(AP) metrics['Precision/'+str(th)] = np.mean(precision) metrics['Recall/'+str(th)] = np.mean(recall) metrics['mAP'] = mAP/len(thresholds) for k in metrics: print(k,':',metrics[k]) return metrics['mAP']
def test(): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i return 50 model.eval() num_classes = model.num_classes total = 0.0 proposals = 0.0 correct = 0.0 device = torch.device("cuda" if use_cuda else "cpu") if model.net_name() == 'region': # region_layer shape = (0, 0) else: shape = (model.width, model.height) for data, target, org_w, org_h in test_loader: print("======") data = data.to(device) output = model(data) all_boxes = get_all_boxes(output, shape, conf_thresh, num_classes, use_cuda=use_cuda) for k in range(len(all_boxes)): boxes = all_boxes[k] correct_yolo_boxes(boxes, org_w[k], org_h[k], model.width, model.height) boxes = np.array(nms(boxes, nms_thresh)) truths = target[k].view(-1, 5) num_gts = truths_length(truths) total = total + num_gts num_pred = len(boxes) if num_pred == 0: continue proposals += int((boxes[:, 4] > conf_thresh).sum()) for i in range(num_gts): gt_boxes = torch.FloatTensor([ truths[i][1], truths[i][2], truths[i][3], truths[i][4], 1.0, 1.0, truths[i][0] ]) gt_boxes = gt_boxes.repeat(num_pred, 1).t() pred_boxes = torch.FloatTensor(boxes).t() best_iou, best_j = torch.max( multi_bbox_ious(gt_boxes, pred_boxes, x1y1x2y2=False), 0) # pred_boxes and gt_boxes are transposed for torch.max if best_iou > iou_thresh and pred_boxes[6][best_j] == gt_boxes[ 6][0]: correct += 1 precision = 1.0 * correct / (proposals + eps) recall = 1.0 * correct / (total + eps) fscore = 2.0 * precision * recall / (precision + recall + eps) logging("correct: %d, precision: %f, recall: %f, fscore: %f" % (correct, precision, recall, fscore))
def visualize(self, image_name, depth_name, flow_name, box_name, figure_path): im, orig_im, dp, orig_dp, fl, orig_fl, box, lb, of = \ self.data.get_one_sample(image_name, depth_name, flow_name, box_name) pred, pred_of, loss = self.predict(im, dp, fl, lb, of) pred_box = nms(pred, pred_of, self.data.orig_im_size[0], self.data.orig_im_size[1]) if figure_path is '': self.visualize_groundtruth(orig_im, im, orig_dp, dp, orig_fl, fl, box, lb, of) self.visualize_prediction(im, dp, fl, pred, pred_of, loss) self.visualize_box(orig_im, orig_dp, orig_fl, pred_box, loss) plt.show() plt.close('all') else: if not os.path.exists(figure_path): os.makedirs(figure_path) dirs = image_name.split('/') sub_dir, image_name = dirs[-2], dirs[-1] file_name, file_ext = os.path.splitext(image_name) file_id = file_name.split('_')[0] figure_prefix = os.path.join(figure_path, sub_dir + '_' + file_id) self.visualize_groundtruth(orig_im, im, orig_dp, dp, orig_fl, fl, box, lb, of, figure_prefix) self.visualize_prediction(im, dp, fl, pred, pred_of, loss, figure_prefix) self.visualize_box(orig_im, orig_dp, orig_fl, pred_box, loss, figure_prefix)
def model_reponse(self, data_string, original_image_size): channel = implementations.insecure_channel(self.host, int( self.port)) # 创建channel凭据 stub = prediction_service_pb2_grpc.PredictionServiceStub( channel._channel) # 利用.proto文件生成的类创建服务存根 request = predict_pb2.PredictRequest() # 请求类型 request.model_spec.name = self.model_name # 待评估模型的名称 request.model_spec.signature_name = 'serving_default' # 待评估模型的签名 request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(data_string, shape=[1, 416, 416, 3])) # 输入数据格式转换 result = stub.Predict(request, 10.0) sbbox = np.array(list(result.outputs['out1'].float_val)) mbbox = np.array(list(result.outputs['out2'].float_val)) lbbox = np.array(list(result.outputs['out3'].float_val)) pred_bbox = np.concatenate([ np.reshape(sbbox, (-1, 85)), np.reshape(mbbox, (-1, 85)), np.reshape(lbbox, (-1, 85)) ], axis=0) bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, 416, 0.3) bboxes = utils.nms(bboxes, 0.15, method='nms') return bboxes
def nms_box(self, boxes): if boxes.shape[0] == 0: return np.array([]).reshape(-1, 6) lis_tatal = [] cls_boxes = boxes[:, 5:] index = torch.argmax(cls_boxes, dim=1).float() index = index.reshape(-1, 1) _boxes = torch.cat((boxes[:, 0:5], index), dim=1) #(n,6) _boxes = _boxes.cpu().detach() _boxes = _boxes.numpy() # print(_boxes.shape) # print(_boxes) for i in range(10): index = np.where(_boxes[:, 5] == i) boxes1 = _boxes[index] boxes2 = boxes1.copy() boxes2[:, 0] = boxes1[:, 0] boxes2[:, 1] = boxes1[:, 1] - boxes1[:, 3] * 0.5 boxes2[:, 2] = boxes1[:, 2] - boxes1[:, 4] * 0.5 boxes2[:, 3] = boxes2[:, 1] + boxes1[:, 3] boxes2[:, 4] = boxes2[:, 2] + boxes1[:, 4] boxes2[:, 5] = boxes1[:, 5] # print(boxes2) nms_boxes1 = utils.nms(boxes2, i=0.3, isMin=False) #大 # print(nms_boxes1) # nms_boxes1 = utils.nms(boxes2, i=0.3, isMin=False)#iou设为0.3两头鹿要丢一头,因为iou达到了0.55 if nms_boxes1.shape[0] > 0: lis_tatal.extend(nms_boxes1) # print(lis_tatal) nms_boxes = np.stack(lis_tatal) print(nms_boxes) return nms_boxes
def __detect_pnet(self, image): boxes = [] img = image w, h = img.size min_side_len = min(w, h) scale = 1 while min_side_len > 12: img_data = self.__image_transform(img) img_data.unsqueeze_(0) img_data = img_data.to(self.device) _cls, _offset, _landmark = self.pnet(img_data) cls, offset, landmark = _cls[0][0].cpu().data, _offset[0].cpu().data, _landmark[0].cpu().data idxs = torch.nonzero(torch.gt(cls, 0.6)) boxes.extend(_box(idxs, offset, landmark, cls[idxs[:, 0], idxs[:, 1]], scale)) if len(boxes) == 0: return np.array([]) scale *= 0.7 _w, _h = int(w * scale), int(h * scale) img = img.resize((_w, _h)) min_side_len = min(_w, _h) return nms(np.stack(boxes), 0.6)
def predict(self): np.set_printoptions(threshold=np.inf) image_path = './414162.jpg' image = np.array(cv2.imread(image_path)) image_shape = image.shape print("image_shape: ", image_shape) image = np.copy(image) image_data = utils.image_preprocess(image, [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] pred_bbox = self.sess.run([self.pred_bbox], feed_dict={ self.input: image_data, self.training: False }) pred_bbox = np.array(pred_bbox[0]) pred_bbox = utils.postprocess_boxes(pred_bbox, image_shape, 416, 0.5) print("pred_bbox shape: ", pred_bbox.shape) pred_bbox = utils.nms(pred_bbox, 0.45) print("pred_bbox after: ", pred_bbox) image = utils.draw_bbox(image, pred_bbox, show_label=True) cv2.imwrite('./test.jpg', image)
def decode(self, loc_preds, cls_preds, input_size): """ Decode outputs back to bounding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#anchors, 4]. cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. input_size: (int/tuple) model input size of (w,h). Returns: boxes: (tensor) decode box locations, sized [#obj,4]. labels: (tensor) class labels for each box, sized [#obj,]. """ CLS_THRESH = 0.5 NMS_THRESH = 0.5 input_size = torch.FloatTensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.FloatTensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) loc_xy = loc_preds[:, :2] loc_wh = loc_preds[:, 2:] xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_wh.exp() * anchor_boxes[:, 2:] boxes = torch.cat([xy - wh / 2, xy + wh / 2], 1) # [#anchors,4] score, labels = cls_preds.sigmoid().max(1) # [#anchors,] ids = score > CLS_THRESH ids = ids.nonzero().squeeze() # [#obj,] keep = nms(boxes[ids], score[ids], threshold=NMS_THRESH) return boxes[ids][keep], labels[ids][keep]
def forward(self, input, thresh, anchors): # thresh 计算置信度的时候要达到的阈值 # 通过网络得到输出NCHW output_13, output_26, output_52 = self.net(input.to(device)) # 通过过滤方法,得到置信度大于阈值的位置 # 得到置信度大于阈值的位置-idxs_13:大于1的数量,位置,例如:[[0,6,4,2],[0, 6, 5, 2]],shape:[12,4] # 位置上的值:大于1的数量,5+cls。shape:[12,85] idxs_13, vecs_13 = self._filter(output_13, thresh) # 得到 x1, y1, x2, y2, c 置信度, cls 类别, n 那个照片 boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13]) idxs_26, vecs_26 = self._filter(output_26, thresh) boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26]) idxs_52, vecs_52 = self._filter(output_52, thresh) boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52]) boxes_all = torch.cat([boxes_13, boxes_26, boxes_52], dim=0) # 同一张图片得不同分类分开坐NMS last_boxes = [] for n in range(input.size(0)): n_boxes = [] boxes_n = boxes_all[boxes_all[:, 6] == n] print(boxes_n) for cls in range(cfg.class_num): boxes_c = boxes_n[boxes_n[:, 5] == cls] if boxes_c.size(0) > 0: n_boxes.extend(utils.nms(boxes_c, 0.3)) else: pass last_boxes.append(torch.stack(n_boxes)) return last_boxes
def forward(self, face_conf, face_locdata): priors = pyramidAnchors(640) face_confdata_0, _ = torch.max(face_conf[:, :, 0:3], dim=2, keepdim=True) face_confdata_1 = face_conf[:, :, 3:4] face_confdata = F.softmax(torch.cat((face_confdata_0, face_confdata_1), dim=2), dim=2) # [n, prior_num, 2] conf_pred = face_confdata.transpose(2, 1) num = face_conf.size(0) output = torch.zeros(num, self.top_k, 5) prs = torch.Tensor(priors[0]).to(self.device) for i in range(1, len(priors)): prs = torch.cat((prs, torch.Tensor(priors[i]).to(self.device)), 0) # [prior_num, 4] for i in range(num): conf_scores = conf_pred[i].clone() c_mask = conf_scores[0].gt(self.confidence_thred) scores = conf_scores[0][c_mask] if scores.dim() == 0: continue decoded_boxes = decode(face_locdata[i], prs) l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) ids, count = nms(boxes, scores, self.nms_thred, self.top_k) output[i, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) return output
def __pnet_detect(self, image): boxes = [] img = image w, h = img.size min_side_len = min(w, h) scale = 1 while min_side_len > 12: img_data = self.__image_transform(img) if self.isCuda: img_data = img_data.cuda() img_data.unsqueeze_(0) _cls, _offest = self.pnet(img_data) cls, offest = _cls[0][0].cpu().data, _offest[0].cpu().data idxs = torch.nonzero(torch.gt(cls, 0.6)) for idx in idxs: boxes.append( self.__box(idx, offest, cls[idx[0], idx[1]], scale)) scale *= 0.7 _w = int(w * scale) _h = int(h * scale) img = img.resize((_w, _h)) min_side_len = min(_w, _h) return utils.nms(np.array(boxes), 0.5)
def save_json(self, epoch, test_split, nms_mode, nms_thresh): predictions_path = os.path.join(self.out_dir, test_split, str(epoch)) try: os.makedirs(predictions_path) except OSError as e: if e.errno != errno.EEXIST: raise if nms_mode == "new": preds_after_nms = nms(self.preds, nms_thresh) elif nms_mode == "standard": preds_after_nms = standard_nms(self.preds, nms_thresh) else: print( "Error: invalid NMS mode specified, must be 'standard' or 'new'" ) sys.exit() print("Saving prediction json at epoch: " + str(epoch) + "...") for f, pred in preds_after_nms.items(): try: os.makedirs(os.path.join(predictions_path, f)) except OSError as e: if e.errno != errno.EEXIST: raise with open( os.path.join(predictions_path, f, "results_spotting.json"), "w") as outfile: json.dump(pred, outfile) return predictions_path
def yuNetDetection(self, frame): if self.init == 0: frameWidth, frameHeight = frame.shape[:2] self.pb = PriorBox(input_shape=(640, 480), output_shape=(frameHeight, frameWidth)) self.init = 1 blob = cv2.dnn.blobFromImage(frame, size=(640, 480)) outputNames = ['loc', 'conf', 'iou'] self.detector.setInput(blob) loc, conf, iou = self.detector.forward(outputNames) dets = self.pb.decode(np.squeeze(loc, axis=0), np.squeeze(conf, axis=0), np.squeeze(iou, axis=0)) idx = np.where(dets[:, -1] > self.confidence)[0] dets = dets[idx] if dets.shape[0]: facess = nms(dets, self.threshold) else: facess = () return facess faces = np.array(facess[:, :4]) faces = faces.astype(np.int) faceStartXY = faces[:, :2] faceEndXY = faces[:, 2:4] faceWH = faceEndXY - faceStartXY faces = np.hstack((faceStartXY, faceWH)) # scores = facess[:, -1] return faces
def main(_argv): input_layer = tf.keras.layers.Input([FLAGS.size, FLAGS.size, 3]) feature_maps = YOLOv3(input_layer) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) # model.summary() utils.load_weights(model, FLAGS.weights) test_img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3) img_size = test_img.shape[:2] test_img = tf.expand_dims(test_img, 0) test_img = utils.transform_images(test_img, FLAGS.size) pred_bbox = model.predict(test_img) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) boxes = utils.postprocess_boxes(pred_bbox, img_size, FLAGS.size, 0.3) boxes = utils.nms(boxes, 0.45, method='nms') original_image = cv2.imread(FLAGS.image) img = utils.draw_outputs(original_image, boxes) cv2.imwrite(FLAGS.output, img)
def rcnn_detect(imidx, imdb, rcnn_model): d = rcnn_load_cached_pool5_features(rcnn_model.cache_name, imdb.name, imidx) d['feat'] = rcnn_pool5_to_fcX(d['feat'], rcnn_model.opts.layer, rcnn_model) # boxes = selective_search(img, ks = 500) # boxes = boxes.swapaxes(0, 1).swapaxes(2, 3) # feat = rcnn_extract_features(img, boxes, rcnn_model) # feat = rcnn_scale_features(feat, rcnn_model.training_opts.feat_norm_mean) scores = rcnn_model.classifier.predict_proba(d['feat'].astype('f')) # scores = feat * rcnn_model.detectors.W + rcnn_model.detectors.B scores_idx = np.argmax(scores, 1) num_classes = len(rcnn_model.classes) dets = [[] for _ in xrange(num_classes)] for i in xrange(1, num_classes): # I = np.where(scores[:, i] > thresh) I = np.where(scores_idx == i)[0] if I.size == 0: continue scored_boxes = np.concatenate((d['boxes'][I, :], scores[I, i].reshape( (scores[I, i].size, 1))), 1) keep = nms(scored_boxes, 0.3) dets[i] = scored_boxes[keep, :] return dets
def forward(self, input, thresh, anchors): output_13, output_26, output_52 = self.net(input) idxs_13, vecs_13 = self._filter(output_13, thresh) boxes_13 = self._parase(idxs_13, vecs_13, 32, anchors[13]) idxs_26, vecs_26 = self._filter(output_26, thresh) boxes_26 = self._parase(idxs_26, vecs_26, 16, anchors[26]) idxs_52, vecs_52 = self._filter(output_52, thresh) boxes_52 = self._parase(idxs_52, vecs_52, 8, anchors[52]) boxes_all = torch.cat([boxes_13, boxes_26, boxes_52],dim=0) last_boxes = [] #0: 第几张图片 #1:第几个框 #2:框的坐标 for n in range(input.size(0)): n_boxes=[] boxes_n = boxes_all[boxes_all[:,6] == n] for cls in range(cfg.CLASS_NUM): boxes_c = boxes_n[boxes_n[:,5] == cls] if boxes_c.size(0) > 0: n_boxes.extend(nms(boxes_c, 0.3)) else: pass last_boxes.append(torch.stack(n_boxes)) return last_boxes
def detect(self, im, conf_thresh=0.7): im_resized = cv2.resize(im, self.__shape) im_rgb = cv2.cvtColor(im_resized, cv2.COLOR_BGR2RGB) im_torch = torch.from_numpy(im_rgb.transpose( 2, 0, 1)).float().div(255.0).unsqueeze(0) im_torch = im_torch.to(torch.device("cuda")) output = self.__net(im_torch) boxes = get_all_boxes(output, self.__shape, conf_thresh, self.__net.num_classes, use_cuda=True)[0] boxes = nms(boxes, self.__nms_thresh) result = [] w = im.shape[1] h = im.shape[0] for i in range(len(boxes)): box = boxes[i] x1 = int(round((box[0] - box[2] / 2.0) * w)) y1 = int(round((box[1] - box[3] / 2.0) * h)) x2 = int(round((box[0] + box[2] / 2.0) * w)) y2 = int(round((box[1] + box[3] / 2.0) * h)) x1 = 0 if x1 < 0 else x1 y1 = 0 if y1 < 0 else y1 x2 = w - 1 if x2 >= w else x2 y2 = h - 1 if y2 >= h else y2 result.append([x1, y1, x2, y2]) return result
def filter_results(self, scores, boxes): # in order to avoid custom C++ extensions # we use an NMS implementation written purely # on python. This implementation is faster on the # CPU, which is why we run this part on the CPU cpu_device = torch.device("cpu") boxes = boxes[0] scores = scores[0] boxes = boxes.to(cpu_device) scores = scores.to(cpu_device) selected_box_probs = [] labels = [] for class_index in range(1, scores.size(1)): probs = scores[:, class_index] mask = probs > self.score_threshold probs = probs[mask] subset_boxes = boxes[mask, :] box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) box_probs = nms(box_probs, self.nms_threshold) selected_box_probs.append(box_probs) labels.append( torch.full((box_probs.size(0), ), class_index, dtype=torch.int64)) selected_box_probs = torch.cat(selected_box_probs) labels = torch.cat(labels) return selected_box_probs[:, :4], labels, selected_box_probs[:, 4]
def parse(self, idxs, vecs, t, anchors): if idxs.size(0) == 0: return torch.Tensor([]) anchors = torch.Tensor(anchors) n = idxs[:, 0] # 所属的图片 a = idxs[:, 3] # 建议框 conf = vecs[:, 0] # 置信度 # (索引值+偏移量)*416/13 cy = (idxs[:, 1].float() + vecs[:, 2]) * t # 原图的中心点y cx = (idxs[:, 2].float() + vecs[:, 1]) * t # 原图的中心点x w = anchors[a, 0] * torch.exp(vecs[:, 3]) h = anchors[a, 1] * torch.exp(vecs[:, 4]) x1 = cx - w / 2 y1 = cy - h / 2 x2 = cx + w / 2 y2 = cy + h / 2 name = vecs[:, 5:] if name.shape[0] == 0: name = name.reshape(-1) else: name = torch.argmax(name, dim=1).float() np_boxes = torch.stack([n.float(), conf, x1, y1, x2, y2, name], dim=1).numpy() nms = utils.nms(np_boxes, cls_nms, False) return nms
def detect_pnet(self, image): scale = 1 w, h = image.size _w, _h = w, h min_side_len = min(_w, _h) boxes = [] img = image while min_side_len > 12: img_data = transform(img) img_data.unsqueeze_(0) img_data = img_data.to(self.device) # print(img_data.size()) cond, offset = self.pnet(img_data) offset = offset.detach() # cond = cond.detach() _cond, _offset = cond[0][0].cpu(), offset[0].cpu() indexs = torch.nonzero(torch.gt(_cond, 0.6)) for index in indexs: boxes.append( self.offset_to_boxes(index, _cond[index[0], index[1]], _offset, scale)) scale *= 0.7 _w, _h = int(w * scale), int(h * scale) min_side_len = min(_w, _h) img = img.resize((_w, _h)) if len(boxes) == 0: return np.array([]) p_boxes = utils.nms(np.array(boxes), i=0.5, isMin=False) return p_boxes
def pNetDetect(self, imge): boxes = [] w, h = imge.size minSideLen = min(w, h) scale = 1 while minSideLen > 12: imgData = self.imgTransform(imge) imgData = imgData.unsqueeze(0) imgData = imgData.to(self.device) cons, offsets, _ = self.pNet(imgData) idxs = torch.nonzero(torch.gt(cons[0][0], self.pCon)) boxes.extend(self.returnBox(idxs, offsets[0], cons[0][0], scale)) scale *= self.pScale _w = int(w * scale) _h = int(h * scale) imge = imge.resize((_w, _h)) minSideLen = min(_w, _h) del imgData, cons, offsets, idxs, _ gc.collect() boxes = torch.stack(boxes) return utils.nms(boxes, self.pNms)
def __call__(self, loc, score, anchor, img_size, scale=1.): if self.mode == "training": n_pre_nms = self.n_train_pre_nms n_post_nms = self.n_train_post_nms else: n_pre_nms = self.n_test_pre_nms n_post_nms = self.n_test_post_nms # 将RPN网络预测结果转化成建议框 roi = loc2bbox(anchor, loc) # 利用slice进行分割,防止建议框超出图像边缘 roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0, img_size[1]) roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0, img_size[0]) # 宽高的最小值不可以小于16 min_size = self.min_size * scale # 计算高宽 ws = roi[:, 2] - roi[:, 0] hs = roi[:, 3] - roi[:, 1] # 防止建议框过小 keep = np.where((hs >= min_size) & (ws >= min_size))[0] roi = roi[keep, :] score = score[keep] # 取出成绩最好的一些建议框 order = score.ravel().argsort()[::-1] if n_pre_nms > 0: order = order[:n_pre_nms] roi = roi[order, :] roi = nms(roi, self.nms_thresh) roi = torch.Tensor(roi) roi = roi[:n_post_nms] return roi
def test(): anchors = config.ANCHORS transform = config.test_transforms dataset = YOLODataset( "COCO/train.csv", "COCO/images/images/", "COCO/labels/labels_new/", S=[13, 26, 52], anchors=anchors, transform=transform, ) S = [13, 26, 52] scaled_anchors = torch.tensor(anchors) / ( 1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)) loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True) for x, y in loader: boxes = [] for i in range(y[0].shape[1]): anchor = scaled_anchors[i] print(anchor.shape) print(y[i].shape) boxes += cells_to_bboxes(y[i], is_preds=False, S=y[i].shape[2], anchors=anchor)[0] boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint") print(boxes) plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
def rcnn_detect(imidx, imdb, rcnn_model): d = rcnn_load_cached_pool5_features(rcnn_model.cache_name, imdb.name, imidx) d['feat'] = rcnn_pool5_to_fcX(d['feat'], rcnn_model.opts.layer, rcnn_model) # boxes = selective_search(img, ks = 500) # boxes = boxes.swapaxes(0, 1).swapaxes(2, 3) # feat = rcnn_extract_features(img, boxes, rcnn_model) # feat = rcnn_scale_features(feat, rcnn_model.training_opts.feat_norm_mean) scores = rcnn_model.classifier.predict_proba(d['feat'].astype('f')) # scores = feat * rcnn_model.detectors.W + rcnn_model.detectors.B scores_idx = np.argmax(scores, 1) num_classes = len(rcnn_model.classes) dets = [[] for _ in xrange(num_classes)] for i in xrange(1, num_classes): # I = np.where(scores[:, i] > thresh) I = np.where(scores_idx == i)[0] if I.size == 0: continue scored_boxes = np.concatenate((d['boxes'][I, :], scores[I, i].reshape((scores[I, i].size, 1))), 1) keep = nms(scored_boxes, 0.3) dets[i] = scored_boxes[keep, :] return dets
coder = Coder() image = np.random.rand(224, 224, 3) data = np.reshape(image, (1, 3, 224, 224)) gt = coder._generate_boxes(1) inputs = prepare_inputs(data, gt) caffe.set_mode_gpu() net = solver.net utils.set_inputs(net, **inputs) for step in range(100): solver.step(1) delta = unpack_outputs(net.blobs['preds_reshape'].data) probs = unpack_outputs(net.blobs['final_probs'].data) bboxes = np.zeros((100, 5)) bboxes[:, 0:4] = coder.decode(delta) bboxes[:, 4] = probs[:, 1] dets = utils.nms(bboxes) if step % 10 == 0: ax = utils.draw_image(image) utils.vis_bboxes(ax, dets * 224, 'red') utils.vis_bboxes(ax, gt * 224, 'green') plt.axis('off') plt.tight_layout() plt.savefig('%04d.png'%step) plt.close()