def forward(self, x): """ The input should be of size [batch_size, 3, img_h, img_w] """ _, _, img_h, img_w = x.size() cfg._tmp_img_h = img_h cfg._tmp_img_w = img_w with timer.env('backbone'): outs = self.backbone(x) if cfg.fpn is not None: with timer.env('fpn'): # Use backbone.selected_layers because we overwrote self.selected_layers outs = [outs[i] for i in cfg.backbone.selected_layers] outs = self.fpn(outs) proto_out = None if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch: with timer.env('proto'): proto_x = x if self.proto_src is None else outs[self.proto_src] if self.num_grids > 0: grids = self.grid.repeat(proto_x.size(0), 1, 1, 1) proto_x = torch.cat([proto_x, grids], dim=1) proto_out = self.proto_net(proto_x) proto_out = cfg.mask_proto_prototype_activation(proto_out) if cfg.mask_proto_prototypes_as_features: # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary proto_downsampled = proto_out.clone() if cfg.mask_proto_prototypes_as_features_no_grad: proto_downsampled = proto_out.detach() # Move the features last so the multiplication is easy proto_out = proto_out.permute(0, 2, 3, 1).contiguous() if cfg.mask_proto_bias: bias_shape = [x for x in proto_out.size()] bias_shape[-1] = 1 proto_out = torch.cat( [proto_out, torch.ones(*bias_shape)], -1) with timer.env('pred_heads'): pred_outs = {'loc': [], 'conf': [], 'mask': [], 'priors': []} if cfg.use_mask_scoring: pred_outs['score'] = [] if cfg.use_instance_coeff: pred_outs['inst'] = [] for idx, pred_layer in zip(self.selected_layers, self.prediction_layers): pred_x = outs[idx] if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features: # Scale the prototypes down to the current prediction layer's size and add it as inputs proto_downsampled = F.interpolate( proto_downsampled, size=outs[idx].size()[2:], mode='bilinear', align_corners=False) pred_x = torch.cat([pred_x, proto_downsampled], dim=1) # A hack for the way dataparallel works if cfg.share_prediction_module and pred_layer is not self.prediction_layers[ 0]: pred_layer.parent = [self.prediction_layers[0]] p = pred_layer(pred_x) for k, v in p.items(): pred_outs[k].append(v) for k, v in pred_outs.items(): pred_outs[k] = torch.cat(v, -2) if proto_out is not None: pred_outs['proto'] = proto_out if self.training: # For the extra loss functions if cfg.use_class_existence_loss: pred_outs['classes'] = self.class_existence_fc( outs[-1].mean(dim=(2, 3))) if cfg.use_semantic_segmentation_loss: pred_outs['segm'] = self.semantic_seg_conv(outs[0]) return pred_outs else: if cfg.use_mask_scoring: pred_outs['score'] = torch.sigmoid(pred_outs['score']) if cfg.use_focal_loss: if cfg.use_sigmoid_focal_loss: # Note: even though conf[0] exists, this mode doesn't train it so don't use it pred_outs['conf'] = torch.sigmoid(pred_outs['conf']) if cfg.use_mask_scoring: pred_outs['conf'] *= pred_outs['score'] elif cfg.use_objectness_score: # See focal_loss_sigmoid in multibox_loss.py for details objectness = torch.sigmoid(pred_outs['conf'][:, :, 0]) pred_outs['conf'][:, :, 1:] = objectness[:, :, None] * F.softmax( pred_outs['conf'][:, :, 1:], -1) pred_outs['conf'][:, :, 0] = 1 - objectness else: pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) else: if cfg.use_objectness_score: objectness = torch.sigmoid(pred_outs['conf'][:, :, 0]) pred_outs['conf'][:, :, 1:] = (objectness > 0.10)[..., None] \ * F.softmax(pred_outs['conf'][:, :, 1:], dim=-1) else: pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) return self.detect(pred_outs, self)
def prep_display_mod(dets_out, img, h, w, depth_map, rel_depth, undo_transform=True, mask_alpha=1.0): # was mask_alpha=0.45 """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ score_threshold = 0.15 top_k = 15 if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, score_threshold=score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:top_k] # top_k = 15 if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < score_threshold: num_dets_to_consider = j break classes = classes[:num_dets_to_consider] # added # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache # color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) #original color_idx = j # black if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if num_dets_to_consider > 0: # was ...>0 # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # print("masks_og.shape", masks.shape) # begin added // filter out the person masks and class indices people_masks_idxs = [] classes_to_mask = [] x = [ ] # save the center points of the boxes in the same order as the masks y = [] for i, j in enumerate(classes): if j == 0: # j = 0 for person class # filter out only people's masks people_masks_idxs.append(i) classes_to_mask.append(j) x1, y1, x2, y2 = boxes[i, :] x.append(int((x1 + x2) / 2)) y.append(int((y1 + y2) / 2)) num_dets_to_consider = len(classes_to_mask) if num_dets_to_consider == 0: # if no people, return black image return ((img_gpu * 0).byte().cpu().numpy() ) # make it black before returning x_arr = np.array(y) y_arr = np.array(x) obj_depths = [] for i in range(x_arr.size): # store the depths of the people obj_depths.append(depth_map[x_arr[i], y_arr[i], 0]) # print("depth at object i: ", x_arr[i], y_arr[i], " : ", obj_depths[i]) obj_depths = np.array(obj_depths) people_masks_idxs = np.array(people_masks_idxs) sorted_idx_by_depth = np.array( np.argsort(-obj_depths) ) # sort the masks and people_loc by depth in Descending order # x = x[sorted_idx_by_depth] # y = y[sorted_idx_by_depth] obj_depths = obj_depths[sorted_idx_by_depth] people_masks_idxs = people_masks_idxs[sorted_idx_by_depth] depth_thres = obj_depths[0] * ( 1.0 - rel_depth ) # filter out the people within the depth_threshold people_masks_idxs = people_masks_idxs[[ i for i, v in enumerate(obj_depths) if v >= depth_thres ]] np.array(people_masks_idxs).T.tolist() masks = masks[people_masks_idxs] num_dets_to_consider = len(people_masks_idxs) colors = torch.cat( [get_color(0, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)], dim=0) tmp = masks[0] if num_dets_to_consider > 1: for msk in masks[1:]: tmp = tmp + msk # print("masks.shape: ", masks.shape) # print("tmp.shape: ", (tmp.unsqueeze(0)).shape) masks = tmp.unsqueeze(0) masks[masks != 0.0] = 1.0 inv_alph_masks = masks * (-mask_alpha) + 1 masks_color = (inv_alph_masks.repeat(1, 1, 1, 3)) * colors * mask_alpha inv_alph_masks = masks.repeat(1, 1, 1, 3) # inv_alph_masks = masks # inv_alph_masks = masks # print("masks : ", masks) # masks = (masks-1.)*-1. # print("masks : ", masks) # inv_alph_masks = masks * (-mask_alpha)+1 # masks_color = masks_color*0.5 # end added # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] # masks_color_summand = masks_color[0] # if num_dets_to_consider > 1: # inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) # masks_color_cumul = masks_color[1:] * inv_alph_cumul # masks_color_summand += masks_color_cumul.sum(dim=0) # img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand # original # print("inv_alph_masks.shape: ", (torch.squeeze(inv_alph_masks,0)).shape) # print("masks_color.shape: ", (torch.squeeze(masks_color,0)).shape) img_gpu = img_gpu * torch.squeeze(inv_alph_masks, 0) + torch.squeeze( masks_color, 0) # added # img_gpu = img_gpu img_numpy = (img_gpu * 255.0).byte().cpu().numpy() return img_numpy
# GPU net = net.cuda() torch.set_default_tensor_type('torch.cuda.FloatTensor') x = torch.zeros((1, 3, cfg.max_size, cfg.max_size)) y = net(x) for p in net.prediction_layers: print(p.last_conv_size) print() for k, a in y.items(): print(k + ': ', a.size(), torch.sum(a)) exit() net(x) # timer.disable('pass2') avg = MovingAverage() try: while True: timer.reset() with timer.env('everything else'): net(x) avg.add(timer.total_time()) print('\033[2J') # Moves console cursor to 0,0 timer.print_stats() print('Avg fps: %.2f\tAvg ms: %.2f ' % (1 / avg.get_avg(), avg.get_avg() * 1000)) except KeyboardInterrupt: pass
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ # print(img.shape) # torch.Size([480, 360, 3]) if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): # 这里面取了最高分的k个,由传入参数设定 classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] # 获取到了最高k个的类别、分数、框,因此可以在这里进行修改 # print(classes) # 类别说明 class 0: person, class 2: car # print(scores) # print(boxes) # 定义变量area_b,框的面积 person_index = (classes == 0) # person_index表示了第几个框是否是person类别 if person_index.any(): # 存在person这个类别 boxes = boxes[person_index] scores = scores[person_index] # 对person的框面积进行计算 area = np.zeros(len(scores)) for i in range(person_index.sum()): box = boxes[i] area[i] = (box[2] - box[0]) * (box[3] - box[1]) # 对person的框面积进行筛选 # 假设最小的人的面积: 25*100 像素,并约束阈值 # valid_person_index = ((area >= 2500) and (scores < 0.01)) valid_person_index = (area >= 2500) boxes = boxes[valid_person_index] scores = scores[valid_person_index] if valid_person_index.any(): # 筛选面积和阈值之后还有person print('----- Person detected -----') else: # 筛选面积和阈值之后已经没有person了 print('----- No person -----') num_dets_to_consider = valid_person_index.sum() else: # 直接就没有person类 print('----- No person -----') num_dets_to_consider = 0 if num_dets_to_consider == 0: # 没检测到人,返回原图 return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() # img_numpy = (masks * 255).byte().cpu().numpy() # 检测到框并输出文字 for j in reversed(range(num_dets_to_consider)): # 这个循环中的boxes, scores, classes都要减少一个维度 x1, y1, x2, y2 = boxes[j][:] color = get_color(classes[j]) score = scores[j] # 绘制检测框 cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) # 显示检测结果的文本 _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score ) # if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
images = glob.glob("/home/alex/Yolact_pytorch/results/images/10.png") num = len(images) print(num) for i, one_img in enumerate(images): img_name = one_img.split('/')[-1] img_origin = torch.from_numpy(cv2.imread(one_img)).float() if cuda: img_origin = img_origin.cuda() img_h, img_w = img_origin.shape[0], img_origin.shape[1] img_trans = FastBaseTransform()(img_origin.unsqueeze(0)) net_outs = net(img_trans) nms_outs = NMS(net_outs, args.traditional_nms) show_lincomb = bool(args.show_lincomb and args.image_path) with timer.env('after nms'): results = after_nms(nms_outs, img_h, img_w, show_lincomb=show_lincomb, crop_masks=not args.no_crop, visual_thre=args.visual_thre, img_name=img_name) if cuda: torch.cuda.synchronize() img_numpy = draw_img(results, img_origin, args) cv2.imwrite(f'results/images/{img_name}', img_numpy) print(f'\r{i + 1}/{num}', end='')
def _mask_iou(mask1, mask2, iscrowd=False): with timer.env('Mask IoU'): ret = mask_iou(mask1, mask2, iscrowd) return ret.cpu()
def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, detections: Detections = None): """ Returns a list of APs for this image, with each element being for a class """ if not args.output_coco_json: with timer.env('Prepare gt'): gt_boxes = torch.Tensor(gt[:, :4]) gt_boxes[:, [0, 2]] *= w gt_boxes[:, [1, 3]] *= h gt_classes = list(gt[:, 4].astype(int)) gt_masks = torch.Tensor(gt_masks).view(-1, h * w) if num_crowd > 0: split = lambda x: (x[-num_crowd:], x[:-num_crowd]) crowd_boxes, gt_boxes = split(gt_boxes) crowd_masks, gt_masks = split(gt_masks) crowd_classes, gt_classes = split(gt_classes) with timer.env('Postprocess'): classes, scores, boxes, masks = postprocess( dets, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) if classes.size(0) == 0: return classes = list(classes.cpu().numpy().astype(int)) scores = list(scores.cpu().numpy().astype(float)) masks = masks.view(-1, h * w).cuda() boxes = boxes.cuda() if args.output_coco_json: with timer.env('JSON Output'): boxes = boxes.cpu().numpy() masks = masks.view(-1, h, w).cpu().numpy() for i in range(masks.shape[0]): # Make sure that the bounding box actually makes sense and a mask was produced if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: detections.add_bbox(image_id, classes[i], boxes[i, :], scores[i]) detections.add_mask(image_id, classes[i], masks[i, :, :], scores[i]) return with timer.env('Eval Setup'): num_pred = len(classes) num_gt = len(gt_classes) mask_iou_cache = _mask_iou(masks, gt_masks) bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float()) if num_crowd > 0: crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True) crowd_bbox_iou_cache = _bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True) else: crowd_mask_iou_cache = None crowd_bbox_iou_cache = None iou_types = [('box', lambda i, j: bbox_iou_cache[i, j].item(), lambda i, j: crowd_bbox_iou_cache[i, j].item()), ('mask', lambda i, j: mask_iou_cache[i, j].item(), lambda i, j: crowd_mask_iou_cache[i, j].item())] timer.start('Main loop') for _class in set(classes + gt_classes): ap_per_iou = [] num_gt_for_class = sum([1 for x in gt_classes if x == _class]) for iouIdx in range(len(iou_thresholds)): iou_threshold = iou_thresholds[iouIdx] for iou_type, iou_func, crowd_func in iou_types: gt_used = [False] * len(gt_classes) ap_obj = ap_data[iou_type][iouIdx][_class] ap_obj.add_gt_positives(num_gt_for_class) for i in range(num_pred): if classes[i] != _class: continue max_iou_found = iou_threshold max_match_idx = -1 for j in range(num_gt): if gt_used[j] or gt_classes[j] != _class: continue iou = iou_func(i, j) if iou > max_iou_found: max_iou_found = iou max_match_idx = j if max_match_idx >= 0: gt_used[max_match_idx] = True ap_obj.push(scores[i], True) else: # If the detection matches a crowd, we can just ignore it matched_crowd = False if num_crowd > 0: for j in range(len(crowd_classes)): if crowd_classes[j] != _class: continue iou = crowd_func(i, j) if iou > iou_threshold: matched_crowd = True break # All this crowd code so that we can make sure that our eval code gives the # same result as COCOEval. There aren't even that many crowd annotations to # begin with, but accuracy is of the utmost importance. if not matched_crowd: ap_obj.push(scores[i], False) timer.stop('Main loop')
def instance_logit(dets, w, h, interpolation_mode='bilinear', visualize_lincomb=False, crop_masks=True, score_threshold=0, overlap_thr=0.5, mask_prune=False): with timer.env('Postprocess'): classes, scores, boxes, masks = postprocess( dets, w, h, score_threshold=score_threshold, mask_score=False) if classes.size(0) == 0: #no predicted mask return None, None, None classes = classes.cpu().numpy().astype(int) scores = scores.cpu().numpy().astype(float) masks = masks.view(-1, h, w).cuda() boxes = boxes used = np.zeros((np.max(classes) + 1, h, w), dtype=np.uint8) # used = np.zeros((h,w), dtype=np.uint8) keep_masks = [] keep_boxes = [] keep_classes = [] # mask_prune = True # if mask_prune is False: # return masks, boxes, classes # else: with timer.env('things mask pruning'): org_boxes = boxes.clone( ) #after sanitization, the bbox became absolute coord, but we want to keep it relative to apply in crop function boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, cast=False) boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, cast=False) boxes = boxes.cpu().long().numpy() for i in range(masks.size(0)): if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) <= 0: continue mask_crop = masks[i, boxes[i, 1]:boxes[i, 3], boxes[i, 0]:boxes[ i, 2]].cpu().numpy() #mask logit , before activation mask_crop = np.array(mask_crop > 0, dtype=np.uint8) used_crop = used[classes[i], boxes[i, 1]:boxes[i, 3], boxes[i, 0]:boxes[i, 2]] area = mask_crop.sum() if area == 0 or (np.logical_and(used_crop >= 1, mask_crop == 1).sum() / area > overlap_thr): continue used[classes[i], boxes[i, 1]:boxes[i, 3], boxes[i, 0]:boxes[i, 2]] += mask_crop keep_masks.append(masks[i, :, :]) keep_boxes.append(org_boxes[i, :]) keep_classes.append(classes[i]) if len(keep_masks) > 0: ins_logits = torch.stack(keep_masks, dim=0) keep_boxes = torch.stack(keep_boxes, dim=0) return ins_logits, keep_boxes, np.array(keep_classes) else: return None, None, None
def evaluate(net, dataset, max_num=-1, during_training=False, benchmark=False, cocoapi=False, traditional_nms=False): frame_times = MovingAverage() dataset_size = len(dataset) if max_num < 0 else min(max_num, len(dataset)) dataset_indices = list(range(len(dataset))) dataset_indices = dataset_indices[:dataset_size] progress_bar = ProgressBar(40, dataset_size) if benchmark: timer.disable('Data loading') else: # For each class and iou, stores tuples (score, isPositive) # Index ap_data[type][iouIdx][classIdx] ap_data = { 'box': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] } make_json = Make_json() for i, image_idx in enumerate(dataset_indices): timer.reset() with timer.env('Data loading'): img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) batch = Variable(img.unsqueeze(0)) if cuda: batch = batch.cuda() with timer.env('Network forward'): net_outs = net(batch) nms_outs = NMS(net_outs, traditional_nms) if benchmark: prep_benchmark(nms_outs, h, w) else: prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], make_json, cocoapi) # First couple of images take longer because we're constructing the graph. # Since that's technically initialization, don't include those in the FPS calculations. fps = 0 if i > 1 and not during_training: frame_times.add(timer.total_time()) fps = 1 / frame_times.get_avg() progress = (i + 1) / dataset_size * 100 progress_bar.set_val(i + 1) print('\rProcessing: %s %d / %d (%.2f%%) %.2f fps ' % (repr(progress_bar), i + 1, dataset_size, progress, fps), end='') if benchmark: print('\n\nStats for the last frame:') timer.print_stats() avg_seconds = frame_times.get_avg() print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000 * avg_seconds)) else: if cocoapi: make_json.dump() print(f'\nJson files dumped, saved in: {json_path}.') return table = calc_map(ap_data) print(table) return table
def process(): try: destFile = "" if request.method == 'POST': file = request.files['file'] if file and allowed_file(file.filename): filename = secure_filename(file.filename) destFile = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(destFile) app.logger.warning('filename=(%s)', filename) else: app.logger.warning("Request dictionary data: {}".format(request.data)) app.logger.warning("Request dictionary form: {}".format(request.form)) url = request.form["url"] print("url:", url) # download file destFile = download_file(url) # app.logger.error('An error occurred') app.logger.warning('destFile=(%s)', destFile) img_name = destFile.split('/')[-1] app.logger.warning('img_name=(%s)', img_name) img_origin = torch.from_numpy(cv2.imread(destFile)).float() if cuda: img_origin = img_origin.cuda() img_h, img_w = img_origin.shape[0], img_origin.shape[1] img_trans = FastBaseTransform()(img_origin.unsqueeze(0)) net_outs = net(img_trans) nms_outs = NMS(net_outs, args.traditional_nms) app.logger.warning('img_h=(%s)', img_h) app.logger.warning('img_w=(%s)', img_w) app.logger.warning('cuda=(%s)', cuda) app.logger.warning('args.show_lincomb=(%s)', args.show_lincomb) app.logger.warning('args.no_crop=(%s)', args.no_crop) app.logger.warning('args.visual_thre=(%s)', args.visual_thre) app.logger.warning('args=(%s)', args) show_lincomb = bool(args.show_lincomb) with timer.env('after nms'): results = after_nms(nms_outs, img_h, img_w, show_lincomb=show_lincomb, crop_masks=not args.no_crop, visual_thre=args.visual_thre, img_name=img_name) if cuda: torch.cuda.synchronize() # app.logger.warning('results=(%s)', results) img_numpy = draw_img(results, img_origin, args) cv2.imwrite(f'results/images/{img_name}', img_numpy) # print(f'\r{i + 1}/{num}', end='') try: im = Image.open(f'results/images/{img_name}') # im = Image.open(destFile) io = BytesIO() im.save(io, format='JPEG') return Response(io.getvalue(), mimetype='image/jpeg') except IOError: abort(404) # return send_from_directory('.', filename), 200 callback = json.dumps({"results": results}) return callback, 200 except: traceback.print_exc() return {'message': 'input error'}, 400
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ global first_frame, old_obj_info name = [] mask_img = [] if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): #idx = t[1].argsort(0, descending=True)[:args.top_k] idx1 = t[1].argsort() idx = idx1.argsort() if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] mask_picture = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] for i in range(len(classes)): name.append(cfg.dataset.class_names[classes[i]]) mask_img.append(mask_picture[i:i + 1, :, :, None]) #obj_info, obj_num = data_save(mask_img, classes, scores, boxes) start = time.time() obj_info, obj_num = sort_info.data_save(mask_img, classes, name, scores, boxes, first_frame, old_obj_info) end = time.time() print('aaaaaaaaaa', end - start) first_frame = True num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (obj_info[j][0] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] #img_gpu = img_gpu * (masks.sum(dim=0) > 0.5).float() #only show mask #img_gpu = img_gpu * masks[0] #mike0225 mask_img = img_gpu * (masks.sum(dim=0) > 0.5).float() #0209 global mask_numpy mask_numpy = (mask_img * 255).byte().cpu().numpy() #0209 mask_numpy = cv2.cvtColor(mask_numpy, cv2.COLOR_BGR2GRAY) # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: global frame_count, state_pre, flag, predict_pos, centerX, centerY, degree, mask_color, mask_flag, pub_Flag frame_count += 1 pub_array_msg = obj_array() for j in range(obj_num): global img_num, temp_x, temp_y, yhat if obj_info[j][2] != 0: #0502------------------------------------------------------------------- mask_image = img_gpu * (obj_info[j][3].sum(dim=0) > 0.5).float() mask_numpy1 = (mask_image * 255).byte().cpu().numpy() mask_color = cv2.cvtColor(mask_numpy1, cv2.COLOR_BGR2GRAY) ''' kernel = np.ones((5,5), np.uint8) mask_color = cv2.erode(mask_color, kernel, iterations = 1) mask_color = cv2.dilate(mask_color, kernel, iterations = 1) ''' mask_flag = False #------------------------------------------------------------------------- if frame_count % 20 == 3: #----------------------------- obj_info[j][5].append(mask_color) mask_flag = True #cv2.imwrite('/home/chien/123/test_{}.jpg'.format(j),mask_numpy1) if len(obj_info[j][5]) > 2: ''' for k in range(len(obj_info[j][5])): cv2.imwrite('/home/chien/123/test_{}.jpg'.format(k),obj_info[j][5][k]) ''' obj_msg = obj_infomsg() obj_msg.id = obj_info[j][0] obj_msg.object_name = obj_info[j][1] imagedata1 = np.array(obj_info[j][5]) imagedata1 = imagedata1.reshape((-1, 3, 480, 640, 1)) imagedata1 = imagedata1 / 255. start = time.time() yhat = model.predict(imagedata1, verbose=0) end = time.time() ''' print(end-start) print('---------------') ''' if obj_info[j][6] == []: for i in range(5): x1 = yhat[1][0][i][1] * 320 + 320 y1 = yhat[1][0][i][2] * 240 + 240 degree1 = arctan_recovery( yhat[1][0][i][3], yhat[1][0][i][4]) temp_x1, temp_y1 = trans_degree( x1, y1, degree1) obj_info[j][6].append( (x1, y1, temp_x1, temp_y1)) else: for i in range(5): x1 = yhat[1][0][i][1] * 320 + 320 y1 = yhat[1][0][i][2] * 240 + 240 degree1 = arctan_recovery( yhat[1][0][i][3], yhat[1][0][i][4]) temp_x1, temp_y1 = trans_degree( x1, y1, degree1) obj_info[j][6][i] = (x1, y1, temp_x1, temp_y1) ''' obj_info[j][6].pop(0) x1 = yhat[1][0][4][1]*320+320 y1 = yhat[1][0][4][2]*240+240 degree1 = arctan_recovery(yhat[1][0][4][3],yhat[1][0][4][4]) temp_x1,temp_y1=trans_degree(x1,y1,degree1) obj_info[j][6].append((x1,y1,temp_x1,temp_y1)) ''' obj_msg.x = yhat[1][0][4][ 1] * 320 + 320 #yhat[1][0][3][1]*320+320 obj_msg.y = yhat[1][0][4][2] * 240 + 240 obj_msg.degree = arctan_recovery( yhat[1][0][4][3], yhat[1][0][4][4]) tx1, ty1 = trans_degree(obj_msg.x, obj_msg.y, obj_msg.degree) ''' print( obj_msg.degree) cv2.circle(img_numpy, (int(obj_msg.x),int(obj_msg.y)),5,(0, 0, 255),5) cv2.line(img_numpy,(int(obj_msg.x+tx1),int(obj_msg.y+ty1)),(int(obj_msg.x-tx1),int(obj_msg.y-ty1)),(0,0,255),5) ''' #print( obj_msg.degree) pub_array_msg.Obj_list.append(obj_msg) pub_Flag = True obj_info[j][5].pop(0) #0->1 ''' global pointx,pointy,real_pointx,real_pointy, point_count ,use_count use_count+=1 if use_count >=10: pointx.append(obj_info[j][6][4][0]) pointy.append(obj_info[j][6][4][1]) point_count += 1 if point_count >= 5: real_pointx.append(yhat[0][0][2][1]*320+320) real_pointy.append(yhat[0][0][2][2]*240+240) ''' if obj_info[j][6] != []: for i in range(5): px = obj_info[j][6][i][0] py = obj_info[j][6][i][1] temp_px = obj_info[j][6][i][2] temp_py = obj_info[j][6][i][3] cv2.circle(img_numpy, (int(px), int(py)), 5, (0, 0, 255), 5) cv2.line(img_numpy, (int(px + temp_px), int(py + temp_py)), (int(px - temp_px), int(py - temp_py)), (0, 0, 255), 5) color = get_color(obj_info[j][0]) score = obj_info[j][3] if args.display_bboxes: cv2.rectangle(img_numpy, (obj_info[j][4][2], obj_info[j][4][4]), (obj_info[j][4][3], obj_info[j][4][5]), color, 1) if args.display_text: _class = obj_info[j][1] #text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class text_str = '%s: %s' % (obj_info[j][0], _class ) if args.display_scores else _class #text_str = '%s: %s' % (_class, obj_info[j][2]) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (obj_info[j][4][2], obj_info[j][4][4] - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (obj_info[j][4][2], obj_info[j][4][4]), (obj_info[j][4][2] + text_w, obj_info[j][4][4] - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if pub_Flag == True: #print(pub_array_msg) array_pub.publish(pub_array_msg) pub_Flag = False old_obj_info = obj_info return img_numpy
def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ lineThickness = 2 if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=self.display_lincomb, crop_masks=self.crop, score_threshold=self.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): # idx = t[1].argsort(0, descending=True)[top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:self.top_k] classes, scores, boxes = [ x[:self.top_k].cpu().detach().numpy() for x in t[:3] ] num_dets_to_consider = min(self.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < self.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if self.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand if self.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().detach().numpy() if self.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if self.display_text or self.display_bboxes: distance_boxes = [] def all_subsets(ss): return chain( *map(lambda x: combinations(ss, x), range(0, len(ss) + 1))) def draw_distance(boxes): """ input : boxes(type=list) Make all possible combinations between the detected boxes of persons perform distance measurement between the boxes to measure distancing """ red_counter = 0 ## Countting people who are in high risk green_counter = 0 for subset in all_subsets(boxes): if len(subset) == 2: a = np.array((subset[0][2], subset[0][3])) b = np.array((subset[1][2], subset[1][3])) dist = np.linalg.norm( a - b ) ## Eucledian distance if you want differnt ways to measure distance b/w two boxes you can use the following options # dist = spatial.distance.cosine(a, b) # # print ('Eucledian distance is version-1', dist) # # print ('Eucledian distance is', spatial.distance.euclidean(a, b)) # print ('Cosine distance is', dist) if dist < 250: red_counter += len(subset) cv2.line(img_numpy, (subset[0][2], subset[0][3]), (subset[1][2], subset[1][3]), (0, 0, 255), lineThickness) elif dist < 300: green_counter += len(subset) cv2.line(img_numpy, (subset[0][2], subset[0][3]), (subset[1][2], subset[1][3]), (0, 255, 0), lineThickness) log["total_person_in_red_zone"] = red_counter // 2 log["total_person_in_green_zone"] = green_counter // 2 for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if self.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if self.display_text: _class = cfg.dataset.class_names[classes[j]] if _class == "person": log["total_person"] = num_dets_to_consider distance_boxes.append(boxes[j, :].tolist()) draw_distance(distance_boxes) text_str = '%s: %.2f' % ( _class, score) if self.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def predictions_to_rois(dets_out, width, height, top_k, score_threshold, output_polygons, mask_threshold, mask_nth, output_minrect, view_margin, fully_connected, fit_bbox_to_polygon, bbox_as_fallback, scale, output_mask_image): """ Turns the predictions into ROI objects :param dets_out: the predictions :param width: the width of the image :type width: int :param height: the height of the image :type height: int :param top_k: the maximum number of top predictions to use :type top_k: int :param score_threshold: the minimum score predictions have to have :type score_threshold: float :param output_polygons: whether the model predicts masks and polygons should be stored in the CSV files :type output_polygons: bool :param mask_threshold: the threshold to use for determining the contour of a mask :type mask_threshold: float :param mask_nth: to speed up polygon computation, use only every nth row and column from mask :type mask_nth: int :param output_minrect: when predicting polygons, whether to output the minimal rectangles around the objects as well :type output_minrect: bool :param view_margin: the margin in pixels to use around the masks :type view_margin: int :param fully_connected: whether regions of 'high' or 'low' values should be fully-connected at isthmuses :type fully_connected: str :param fit_bbox_to_polygon: whether to fit the bounding box to the polygon :type fit_bbox_to_polygon: bool :param bbox_as_fallback: if ratio between polygon-bbox and bbox is smaller than this value, use bbox as fallback polygon, ignored if < 0 :type bbox_as_fallback: float :param scale: the scale to use for the image (0-1) :type scale: float :param output_mask_image: when generating masks, whether to output a combined mask image as well :type output_mask_image: bool :return: the list of ROIObjects and output_mask image :rtype: tuple """ result = [] mask_comb = None with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, width, height, crop_masks=False, score_threshold=score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:top_k] if output_polygons or output_mask_image: classes, scores, boxes, masks = [x[idx].cpu().numpy() for x in t] else: classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < score_threshold: num_dets_to_consider = j break # the class labels if isinstance(cfg.dataset.class_names, list): class_labels = cfg.dataset.class_names elif isinstance(cfg.dataset.class_names, tuple): class_labels = list(cfg.dataset.class_names) else: class_labels = [cfg.dataset.class_names] if num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] if output_polygons or output_mask_image: masks = masks[:num_dets_to_consider, :, :, None] mask = masks[j, :, :][:, :, 0] for j in range(num_dets_to_consider): x0, y0, x1, y1 = boxes[j, :] x0n = x0 / width y0n = y0 / height x1n = x1 / width y1n = y1 / height if scale != 1.0: x0 = int(x0 / scale) y0 = int(y0 / scale) x1 = int(x1 / scale) y1 = int(y1 / scale) label = classes[j] score = scores[j] label_str = class_labels[classes[j]] px = None py = None pxn = None pyn = None bw = None bh = None if output_polygons: px = [] py = [] pxn = [] pyn = [] poly = mask_to_polygon(mask, mask_threshold=mask_threshold, mask_nth=mask_nth, view=(int(x0 * scale), int(y0 * scale), int(x1 * scale), int(y1 * scale)), view_margin=view_margin, fully_connected=fully_connected) if len(poly) > 0: px, py = polygon_to_lists(poly[0], swap_x_y=True, normalize=False) if scale != 1.0: px = [x / scale for x in px] py = [y / scale for y in py] pxn, pyn = polygon_to_lists(poly[0], swap_x_y=True, normalize=True, img_width=width, img_height=height) if output_minrect: bw, bh = polygon_to_minrect(poly[0]) if scale != 1.0: bw = bw / scale bh = bh / scale if bbox_as_fallback >= 0: if len(px) >= 3: p_x0n, p_y0n, p_x1n, p_y1n = polygon_to_bbox( lists_to_polygon(pxn, pyn)) p_area = (p_x1n - p_x0n) * (p_y1n - p_y0n) b_area = (x1n - x0n) * (y1n - y0n) if (b_area > 0) and (p_area / b_area < bbox_as_fallback): px = [float(i) for i in [x0, x1, x1, x0]] py = [float(i) for i in [y0, y0, y1, y1]] pxn = [float(i) for i in [x0n, x1n, x1n, x0n]] pyn = [float(i) for i in [y0n, y0n, y1n, y1n]] else: px = [float(i) for i in [x0, x1, x1, x0]] py = [float(i) for i in [y0, y0, y1, y1]] pxn = [float(i) for i in [x0n, x1n, x1n, x0n]] pyn = [float(i) for i in [y0n, y0n, y1n, y1n]] if output_minrect: bw = x1 - x0 + 1 bh = y1 - y0 + 1 if fit_bbox_to_polygon: if len(px) >= 3: x0, y0, x1, y1 = polygon_to_bbox( lists_to_polygon(px, py)) x0n, y0n, x1n, y1n = polygon_to_bbox( lists_to_polygon(pxn, pyn)) if output_mask_image: mask_img = mask.copy() # apply threshold mask_img[mask_img < mask_threshold] = 0 # mask out everything outside detected box m = np.zeros(mask.shape) s = np.ones((y1 - y0, x1 - x0)) m[y0:y0 + s.shape[0], x0:x0 + s.shape[1]] = s mask_img = np.where(m == 1, mask_img, 0) # use label for color mask_img[mask_img < mask_threshold] = 0 mask_img[ mask_img >= mask_threshold] = label + 1 # first label is 0 if mask_comb is None: mask_comb = mask_img else: tmp = np.where(mask_comb == 0, mask_img, mask_comb) mask_comb = tmp roiobj = ROIObject(x0, y0, x1, y1, x0n, y0n, x1n, y1n, label, label_str, score=score, poly_x=px, poly_y=py, poly_xn=pxn, poly_yn=pyn, minrect_w=bw, minrect_h=bh) result.append(roiobj) return result, mask_comb
def prep_display(dets_out, img, gt, gt_masks, h, w, undo_transform=True, class_color=False): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. gt and gt_masks are also allowed to be none (until I reimplement that functionality). """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: masks = t[3][:args.top_k] # We'll need this later classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] if classes.shape[0] == 0: return (img_gpu * 255).byte().cpu().numpy() def get_color(j): color = COLORS[(classes[j] * 5 if class_color else j * 5) % len(COLORS)] if not undo_transform: color = (color[2], color[1], color[0]) return color # Draw masks first on the gpu if args.display_masks and cfg.eval_mask_branch: for j in reversed(range(min(args.top_k, classes.shape[0]))): if scores[j] >= args.score_threshold: color = get_color(j) mask = masks[j, :, :, None] mask_color = mask @ (torch.Tensor(color).view(1, 3) / 255.0) mask_alpha = 0.45 # Alpha only the region of the image that contains the mask img_gpu = img_gpu * (1 - mask) \ + img_gpu * mask * (1-mask_alpha) + mask_color * mask_alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_text or args.display_bboxes: for j in reversed(range(min(args.top_k, classes.shape[0]))): score = scores[j] if scores[j] >= args.score_threshold: x1, y1, x2, y2 = boxes[j, :] color = get_color(j) if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def evaluate(net: Yolact, dataset, train_mode=False): net.detect.use_fast_nms = args.fast_nms net.detect.use_cross_class_nms = args.cross_class_nms cfg.mask_proto_debug = args.mask_proto_debug if args.image is not None: if ':' in args.image: inp, out = args.image.split(':') evalimage(net, inp, out) else: evalimage(net, args.image) return elif args.images is not None: inp, out = args.images.split(':') evalimages(net, inp, out) return elif args.video is not None: if ':' in args.video: inp, out = args.video.split(':') evalvideo(net, inp, out) else: evalvideo(net, args.video) return frame_times = MovingAverage() dataset_size = len(dataset) if args.max_images < 0 else min( args.max_images, len(dataset)) progress_bar = ProgressBar(30, dataset_size) print() if not args.display and not args.benchmark: # For each class and iou, stores tuples (score, isPositive) # Index ap_data[type][iouIdx][classIdx] ap_data = { 'box': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] } detections = Detections() else: timer.disable('Load Data') dataset_indices = list(range(len(dataset))) if args.shuffle: random.shuffle(dataset_indices) elif not args.no_sort: # Do a deterministic shuffle based on the image ids # # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's # the order of insertion. That means on python 3.6, the images come in the order they are in # in the annotations file. For some reason, the first images in the annotations file are # the hardest. To combat this, I use a hard-coded hash function based on the image ids # to shuffle the indices we use. That way, no matter what python version or how pycocotools # handles the data, we get the same result every time. hashed = [badhash(x) for x in dataset.ids] dataset_indices.sort(key=lambda x: hashed[x]) dataset_indices = dataset_indices[:dataset_size] try: # Main eval loop for it, image_idx in enumerate(dataset_indices): timer.reset() with timer.env('Load Data'): img, gt, gt_masks, h, w, num_crowd = dataset.pull_item( image_idx) # Test flag, do not upvote if cfg.mask_proto_debug: with open('scripts/info.txt', 'w') as f: f.write(str(dataset.ids[image_idx])) np.save('scripts/gt.npy', gt_masks) batch = Variable(img.unsqueeze(0)) if args.cuda: batch = batch.cuda() with timer.env('Network Extra'): preds = net(batch) # Perform the meat of the operation here depending on our mode. if args.display: img_numpy = prep_display(preds, img, h, w) elif args.benchmark: prep_benchmark(preds, h, w) else: prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections) # First couple of images take longer because we're constructing the graph. # Since that's technically initialization, don't include those in the FPS calculations. if it > 1: frame_times.add(timer.total_time()) if args.display: if it > 1: print('Avg FPS: %.4f' % (1 / frame_times.get_avg())) plt.imshow(img_numpy) plt.title(str(dataset.ids[image_idx])) plt.show() elif not args.no_bar: if it > 1: fps = 1 / frame_times.get_avg() else: fps = 0 progress = (it + 1) / dataset_size * 100 progress_bar.set_val(it + 1) print( '\rProcessing Images %s %6d / %6d (%5.2f%%) %5.2f fps ' % (repr(progress_bar), it + 1, dataset_size, progress, fps), end='') if not args.display and not args.benchmark: print() if args.output_coco_json: print('Dumping detections...') if args.output_web_json: detections.dump_web() else: detections.dump() else: if not train_mode: print('Saving data...') with open(args.ap_data_file, 'wb') as f: pickle.dump(ap_data, f) return calc_map(ap_data) elif args.benchmark: print() print() print('Stats for the last frame:') timer.print_stats() avg_seconds = frame_times.get_avg() print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000 * avg_seconds)) except KeyboardInterrupt: print('Stopping...')
# GPU net = net.cuda() torch.set_default_tensor_type("torch.cuda.FloatTensor") x = torch.zeros((1, 3, cfg.max_size, cfg.max_size)) y = net(x) for p in net.prediction_layers: print(p.last_conv_size) print() for k, a in y.items(): print(k + ": ", a.size(), torch.sum(a)) exit() net(x) # timer.disable('pass2') avg = MovingAverage() try: while True: timer.reset() with timer.env("everything else"): net(x) avg.add(timer.total_time()) print("\033[2J") # Moves console cursor to 0,0 timer.print_stats() print("Avg fps: %.2f\tAvg ms: %.2f " % (1 / avg.get_avg(), avg.get_avg() * 1000)) except KeyboardInterrupt: pass
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=True, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:args.top_k] classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) #color_idx = classes[j] if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def forward(self, x): """ The input should be of size [batch_size, 3, img_h, img_w] """ # plt.imshow(x.permute(0,2,3,1)[0,:,:,:].detach().cpu().numpy()) # plt.savefig('visual_test/input.png') # plt.cla() with timer.env('backbone'): outs = self.backbone(x) if cfg.fpn is not None: with timer.env('fpn'): # Use backbone.selected_layers because we overwrote self.selected_layers outs = [outs[i] for i in cfg.backbone.selected_layers] outs = self.fpn(outs) proto_out = None if cfg.fpn_fusion is True: fusion_maps = self.fusion_module( outs[:self.fusion_layers] ) # fusion all levels feature map from map into single one if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch: with timer.env('proto'): proto_x = x if self.proto_src is None else outs[self.proto_src] # FPN Fusion if cfg.proto_src_fusion is True: proto_x = fusion_maps if cfg.cross_attention_fusion is True: P_query = outs[0] proto_x = P_query for layer in range(self.fusion_layers): z = self.CALayer(x_query=P_query, x_key=outs[layer]) - P_query proto_x = proto_x + z if self.num_grids > 0: grids = self.grid.repeat(proto_x.size(0), 1, 1, 1) proto_x = torch.cat([proto_x, grids], dim=1) if cfg.proto_coordconv: proto_x = self.addcoords(proto_x) proto_out = self.proto_net(proto_x) proto_out = cfg.mask_proto_prototype_activation(proto_out) if cfg.mask_proto_prototypes_as_features: # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary proto_downsampled = proto_out.clone() if cfg.mask_proto_prototypes_as_features_no_grad: proto_downsampled = proto_out.detach() # Move the features last so the multiplication is easy proto_out = proto_out.permute(0, 2, 3, 1).contiguous() if cfg.mask_proto_bias: bias_shape = [x for x in proto_out.size()] bias_shape[-1] = 1 proto_out = torch.cat( [proto_out, torch.ones(*bias_shape)], -1) with timer.env('pred_heads'): pred_outs = {'loc': [], 'conf': [], 'mask': [], 'priors': []} if cfg.use_instance_coeff: pred_outs['inst'] = [] for idx, pred_layer in zip(self.selected_layers, self.prediction_layers): pred_x = outs[idx] if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features: # Scale the prototypes down to the current prediction layer's size and add it as inputs proto_downsampled = F.interpolate( proto_downsampled, size=outs[idx].size()[2:], mode='bilinear', align_corners=False) pred_x = torch.cat([pred_x, proto_downsampled], dim=1) # A hack for the way dataparallel works if cfg.share_prediction_module and pred_layer is not self.prediction_layers[ 0]: pred_layer.parent = [self.prediction_layers[0]] if cfg.ins_coordconv: pred_x = self.addcoords(pred_x) p = pred_layer(pred_x) for k, v in p.items(): pred_outs[k].append(v) # ===revised=== num_priors = [] for k, v in pred_outs.items(): if k == 'loc': for _v in v: num_priors.append(_v.size(1)) pred_outs[k] = torch.cat(v, -2) pred_outs['layer'] = num_priors if proto_out is not None: pred_outs['proto'] = proto_out if self.training: # For the extra loss functions if cfg.use_class_existence_loss: pred_outs['classes'] = self.class_existence_fc( outs[-1].mean(dim=(2, 3))) with timer.env('segm'): if cfg.use_semantic_segmentation_loss: sem_in = None if cfg.sem_src_fusion is True: sem_in = fusion_maps elif cfg.sem_lincomb is True: sem_in = outs[-1] if cfg.sem_coordconv: sem_in = self.addcoords(sem_in) pred_outs['segm'] = self.semantic_seg_conv(sem_in) # pred_outs['segm'] = self.semantic_seg_conv(outs[-1]) #lincomb version return pred_outs else: if cfg.use_sigmoid_focal_loss: # Note: even though conf[0] exists, this mode doesn't train it so don't use it pred_outs['conf'] = torch.sigmoid(pred_outs['conf']) elif cfg.use_objectness_score: # See focal_loss_sigmoid in multibox_loss.py for details objectness = torch.sigmoid(pred_outs['conf'][:, :, 0]) pred_outs['conf'][:, :, 1:] = objectness[:, :, None] * F.softmax( pred_outs['conf'][:, :, 1:], -1) pred_outs['conf'][:, :, 0] = 1 - objectness else: pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) if cfg.use_sem_output is True: sem_in = None if cfg.sem_src_fusion is True: sem_in = fusion_maps elif cfg.sem_lincomb is True: sem_in = outs[-1] if cfg.sem_coordconv: sem_in = self.addcoords(sem_in) pred_outs['segm'] = self.semantic_seg_conv(sem_in) return self.detect(pred_outs)
def _bbox_iou(bbox1, bbox2, iscrowd=False): with timer.env('BBox IoU'): ret = jaccard(bbox1, bbox2, iscrowd) return ret.cpu()
def postprocess(det_output, w, h, batch_idx=0, interpolation_mode='bilinear', visualize_lincomb=False, crop_masks=True, score_threshold=0): """ Postprocesses the output of Sewer on testing mode into a format that makes sense, accounting for all the possible configuration settings. Args: - det_output: The lost of dicts that Detect outputs. - w: The real with of the image. - h: The real height of the image. - batch_idx: If you have multiple images for this batch, the image's index in the batch. - interpolation_mode: Can be 'nearest' | 'area' | 'bilinear' (see torch.nn.functional.interpolate) Returns 4 torch Tensors (in the following order): - classes [num_det]: The class idx for each detection. - scores [num_det]: The confidence score for each detection. - boxes [num_det, 4]: The bounding box for each detection in absolute point form. - masks [num_det, h, w]: Full image masks for each detection. """ dets = det_output[batch_idx] net = dets['net'] dets = dets['detection'] if dets is None: return [torch.Tensor() ] * 4 # Warning, this is 4 copies of the same thing if score_threshold > 0: keep = dets['score'] > score_threshold for k in dets: if k != 'proto': dets[k] = dets[k][keep] if dets['score'].size(0) == 0: return [torch.Tensor()] * 4 # Actually extract everything from dets now classes = dets['class'] boxes = dets['box'] scores = dets['score'] masks = dets['mask'] if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch: # At this points masks is only the coefficients proto_data = dets['proto'] # Test flag, do not upvote if cfg.mask_proto_debug: np.save('scripts/proto.npy', proto_data.cpu().numpy()) if visualize_lincomb: display_lincomb(proto_data, masks) masks = proto_data @ masks.t() masks = cfg.mask_proto_mask_activation(masks) # Crop masks before upsampling because you know why if crop_masks: masks = crop(masks, boxes) # Permute into the correct output shape [num_dets, proto_h, proto_w] masks = masks.permute(2, 0, 1).contiguous() if cfg.use_maskiou: with timer.env('maskiou_net'): with torch.no_grad(): maskiou_p = net.maskiou_net(masks.unsqueeze(1)) maskiou_p = torch.gather( maskiou_p, dim=1, index=classes.unsqueeze(1)).squeeze(1) if cfg.rescore_mask: if cfg.rescore_bbox: scores = scores * maskiou_p else: scores = np.concatenate( (scores, scores * maskiou_p)) # Scale masks up to the full image masks = F.interpolate(masks.unsqueeze(0), (h, w), mode=interpolation_mode, align_corners=False).squeeze(0) # Binarize the masks masks.gt_(0.5) boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, cast=False) boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, cast=False) boxes = boxes.long() if cfg.mask_type == mask_type.direct and cfg.eval_mask_branch: # Upscale masks full_masks = torch.zeros(masks.size(0), h, w) for jdx in range(masks.size(0)): x1, y1, x2, y2 = boxes[jdx, :] mask_w = x2 - x1 mask_h = y2 - y1 # Just in case if mask_w * mask_h <= 0 or mask_w < 0: continue mask = masks[jdx, :].view(1, 1, cfg.mask_size, cfg.mask_size) mask = F.interpolate(mask, (mask_h, mask_w), mode=interpolation_mode, align_corners=False) mask = mask.gt(0.5).float() full_masks[jdx, y1:y2, x1:x2] = mask masks = full_masks return classes, scores, boxes, masks
def image_callback(image_data): time_start = time.time() global cv_image cv_image = np.frombuffer(image_data.data, dtype=np.uint8).reshape(image_data.height, image_data.width, -1) # region_output是8行4列数组,第i行存储第i个区域的信息 # 每行的第1列为污染等级(0,1,2,3,4)、第2列为植被类型(0无,1草,2灌木,3花)、第3列为行人标志(0无,1有)、第4列为区域ID(1,2,3,4,5,6,7,8) region_output = np.zeros((8, 4)) for region_i in range(8): region_output[region_i, 3] = region_i + 1 with torch.no_grad(): # 目标检测 frame = torch.from_numpy(cv_image).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = net(batch) # 建立每个目标的蒙版target_masks、类别target_classes、置信度target_scores、边界框target_boxes的一一对应关系 h, w, _ = frame.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True # 检测结果 t = postprocess(preds, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break if num_dets_to_consider > 0: target_masks = masks[:num_dets_to_consider, :, :] target_classes = classes[:num_dets_to_consider] target_scores = scores[:num_dets_to_consider] target_boxes = boxes[:num_dets_to_consider, :] # 显示检测结果 if display_switch: result_image = result_display(frame, target_masks, target_classes, target_scores, target_boxes, num_dets_to_consider) else: result_image = frame.byte().cpu().numpy() # 分别存储垃圾目标和植被目标 check_k = 0 rubbish_remain_list = [] vegetation_remain_list = [] rubbish_items = [ 'ads', 'cigarette', 'firecracker', 'glass bottle', 'leaves', 'metal', 'paper', 'peel', 'plastic', 'solid clod', 'solid crumb' ] vegetation_items = ['grass', 'shrub', 'flower'] while check_k < target_classes.shape[0]: if cfg.dataset.class_names[ target_classes[check_k]] in rubbish_items: rubbish_remain_list.append(check_k) if cfg.dataset.class_names[ target_classes[check_k]] in vegetation_items: vegetation_remain_list.append(check_k) check_k += 1 rubbish_masks = target_masks[rubbish_remain_list, :, :] rubbish_classes = target_classes[rubbish_remain_list] rubbish_scores = target_scores[rubbish_remain_list] rubbish_boxes = target_boxes[rubbish_remain_list, :] vegetation_masks = target_masks[vegetation_remain_list, :, :] vegetation_classes = target_classes[vegetation_remain_list] vegetation_scores = target_scores[vegetation_remain_list] vegetation_boxes = target_boxes[vegetation_remain_list, :] rubbsih_num = len(rubbish_remain_list) vegetation_num = len(vegetation_remain_list) # 针对垃圾目标的处理 if rubbsih_num > 0: # 掩膜边界取点 result_image, rubbish_boundary_pts = get_boundary( result_image, rubbsih_num, rubbish_masks, cpt_num=10) # s_polygon存储每个垃圾目标在世界坐标系中投影于地面的面积 s_polygon = np.zeros((rubbsih_num, 1)) rubbish_list = [ 'ads', 'cigarette', 'firecracker', 'glass bottle', 'leaves', 'metal', 'paper', 'peel', 'plastic', 'solid clod', 'solid crumb' ] rubbish_weight_coefficient_list = [ 80, 200, 200, 8000, 80, 1050, 80, 6000, 775, 15750, 4000 ] # region_w存储各区域内垃圾目标的质量的总和 region_w = np.zeros((8, 1)) for i in range(rubbish_boundary_pts.shape[0]): effective_pt_num = 0 b_x, b_z = [], [] b_area_id = [] for b_pt in range(rubbish_boundary_pts.shape[1]): b_pt_u = rubbish_boundary_pts[i, b_pt, 0, 0] b_pt_v = rubbish_boundary_pts[i, b_pt, 0, 1] # 排除像素坐标无效点(u=0,v=0) if b_pt_u or b_pt_v: loc_b_pt = p2d_table[b_pt_u, b_pt_v] # 排除世界坐标无效点(x=0,z=0) if loc_b_pt[0] or loc_b_pt[1]: effective_pt_num += 1 b_x.append(loc_b_pt[0]) b_z.append(loc_b_pt[1]) b_area_id.append( CameraT.whatArea(loc_b_pt[0], loc_b_pt[1])) if effective_pt_num >= 3: s_sum = 0 for b_pt in range(effective_pt_num): s_sum += b_x[b_pt] * b_z[ (b_pt + 1) % effective_pt_num] - b_z[b_pt] * b_x[ (b_pt + 1) % effective_pt_num] s_polygon[i, 0] = abs(s_sum) / 2 for b_pt in range(effective_pt_num): # 排除区域ID无效点(ID=0) if b_area_id[b_pt]: rubbish_weight = s_polygon[ i, 0] * rubbish_weight_coefficient_list[ rubbish_list.index( cfg.dataset.class_names[ rubbish_classes[i]])] region_w[ b_area_id[b_pt] - 1, 0] += rubbish_weight / effective_pt_num # 界定污染等级 for region_i in range(8): if region_w[region_i, 0] > 0 and region_w[region_i, 0] <= 50: region_output[region_i, 0] = 1 elif region_w[region_i, 0] > 50 and region_w[region_i, 0] <= 100: region_output[region_i, 0] = 2 elif region_w[region_i, 0] > 100 and region_w[region_i, 0] <= 150: region_output[region_i, 0] = 3 elif region_w[region_i, 0] > 150: region_output[region_i, 0] = 4 if display_switch: print('region_w') print(region_w) result_image = w_display(result_image, region_w, font_face=cv2.FONT_HERSHEY_DUPLEX, font_scale=0.5, font_thickness=1) # 针对植被目标的处理 if vegetation_num > 0: # 掩膜边界取点 result_image, vegetation_boundary_pts = get_boundary( result_image, vegetation_num, vegetation_masks, cpt_num=20) # region_vegetation_type存储各区域内植被类型 region_vegetation_type = np.zeros((8, 1)) for i in range(vegetation_boundary_pts.shape[0]): effective_pt_num = 0 b_area_id = [] for b_pt in range(vegetation_boundary_pts.shape[1]): b_pt_u = vegetation_boundary_pts[i, b_pt, 0, 0] b_pt_v = vegetation_boundary_pts[i, b_pt, 0, 1] # 排除像素坐标无效点(u=0,v=0) if b_pt_u or b_pt_v: loc_b_pt = p2d_table[b_pt_u, b_pt_v] # 排除世界坐标无效点(x=0,z=0) if loc_b_pt[0] or loc_b_pt[1]: effective_pt_num += 1 b_area_id.append( CameraT.whatArea(loc_b_pt[0], loc_b_pt[1])) for b_pt in range(effective_pt_num): # 排除区域ID无效点(ID=0) if b_area_id[b_pt]: # 优先级顺序 vegetation_list = ['grass', 'shrub', 'flower'] v_type = vegetation_list.index( cfg.dataset.class_names[ vegetation_classes[i]]) + 1 current_v_type = region_vegetation_type[ b_area_id[b_pt] - 1, 0] if v_type > current_v_type: region_vegetation_type[b_area_id[b_pt] - 1, 0] = v_type for region_i in range(8): region_output[region_i, 1] = region_vegetation_type[region_i, 0] else: result_image = frame.byte().cpu().numpy() areasinfo_msg = AreasInfo() for region_i in range(8): region_output_msg = AreaInfo() region_output_msg.rubbish_grade = int(region_output[region_i, 0]) region_output_msg.has_person = bool(region_output[region_i, 2]) region_output_msg.vegetation_type = int(region_output[region_i, 1]) region_output_msg.area_id = int(region_output[region_i, 3]) areasinfo_msg.infos.append(region_output_msg) pub.publish(areasinfo_msg) if display_switch: print('region_output') print(region_output) result_image = CameraT.drawLine(result_image, w=1) result_image = output_display(result_image, region_output, font_face=cv2.FONT_HERSHEY_DUPLEX, font_scale=0.5, font_thickness=1) cv2.putText(result_image, str(time.time()), (5, 20), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) cv2.imshow("result_image", result_image) if record_switch: video_out.write(result_image) if cv2.waitKey(1) == 27: if record_switch: video_out.release() cv2.destroyAllWindows() rospy.signal_shutdown("It's over.") time_end_all = time.time() print("totally time cost:", time_end_all - time_start)
# detect images if args.image is not None: images = glob.glob(args.image + '/*.jpg') num = len(images) for i, one_img in enumerate(images): img_name = one_img.split('/')[-1] img_origin = torch.from_numpy(cv2.imread(one_img)).cuda().float() img_h, img_w = img_origin.shape[0], img_origin.shape[1] img_trans = FastBaseTransform()(img_origin.unsqueeze(0)) net_outs = net(img_trans) nms_outs = NMS(net_outs, args.traditional_nms) show_lincomb = bool(args.show_lincomb and args.image_path) with timer.env('after nms'): results = after_nms(nms_outs, img_h, img_w, show_lincomb=show_lincomb, crop_masks=not args.no_crop, visual_thre=args.visual_thre, img_name=img_name) torch.cuda.synchronize() img_numpy = draw_img(results, img_origin, args) cv2.imwrite(f'{img_path}/{img_name}', img_numpy) print(f'{i + 1}/{num}', end='\r')