def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = tf.keras.backend.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, tf.cast(best_iou < ignore_thresh, tf.keras.backend.dtype(true_box))) return b + 1, ignore_mask
def assign_detections_to_trackers(trackers_obj, detections_obj, iou_thrd=0.3): ''' From current list of trackers and new detections, output matched detections, unmatchted trackers, unmatched detections. ''' trackers = [temp_obj['bbox'] for temp_obj in trackers_obj] detections = [temp_obj['bbox'] for temp_obj in detections_obj] IOU_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32) Motion_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32) Shape_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32) for t, trk in enumerate(trackers): #trk = convert_to_cv2bbox(trk) for d, det in enumerate(detections): # det = convert_to_cv2bbox(det) IOU_mat[t, d] = utils.box_iou(trk, det) Motion_mat[t, d] = get_motion_score(trk, det) Shape_mat[t, d] = get_shape_score(trk, det) # Produces matches # Solve the maximizing the sum of IOU assignment problem using the # Hungarian algorithm (also known as Munkres algorithm) matched_idx = linear_assignment(-IOU_mat) unmatched_trackers, unmatched_detections = [], [] for t, trk in enumerate(trackers): if (t not in matched_idx[:, 0]): unmatched_trackers.append(t) for d, det in enumerate(detections): if (d not in matched_idx[:, 1]): unmatched_detections.append(d) matches = [] # For creating trackers we consider any detection with an # overlap less than iou_thrd to signifiy the existence of # an untracked object for m in matched_idx: if (IOU_mat[m[0], m[1]] < iou_thrd): unmatched_trackers.append(m[0]) unmatched_detections.append(m[1]) else: matches.append(m.reshape(1, 2)) if (len(matches) == 0): matches = np.empty((0, 2), dtype=int) else: matches = np.concatenate(matches, axis=0) return matches, np.array(unmatched_detections), np.array( unmatched_trackers)
def nms_fine_tune(detected_objects_list, th=0.5): ret_list = [] for i in range(len(detected_objects_list)): is_append = True for j in range(len(detected_objects_list)): if i == j: continue iou = box_iou(detected_objects_list[i]['bbox'], detected_objects_list[j]['bbox']) if iou > th and _box_area( detected_objects_list[i]['bbox']) <= _box_area( detected_objects_list[j]['bbox']): is_append = False break if is_append: ret_list.append(detected_objects_list[i]) return ret_list
def encode(self, boxes, labels, input_size): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (int/tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] cls_targets[max_ious < 0.5] = 0 ignore = (max_ious > 0.4) & (max_ious < 0.5 ) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # for now just mark ignored to -1 return loc_targets, cls_targets
def validation_step(self, opt, outputs, batch, batch_idx, epoch): imgs, targets, paths, shapes, pad = batch _, _, height, width = imgs.shape inf_out, train_out = outputs whwh = torch.Tensor([width, height, width, height]).to(imgs.device) losses = compute_loss(train_out, targets, self.model)[1][:3] # GIoU, obj, cls output = non_max_suppression(inf_out, conf_thres=opt.conf_thres, iou_thres=opt.iou_thres, multi_label=self.calc_ni(batch_idx, epoch) > self.n_burn) # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class self.seen += 1 if pred is None: if nl: self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file # with open('test.txt', 'a') as file: # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], self.niou, dtype=torch.bool, device=imgs.device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view(-1) # target indices pi = (cls == pred[:, 5]).nonzero().view(-1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections for j in (ious > self.iouv[0].to(ious.device)).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[j] > self.iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) self.stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) return losses
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = './yolov3.onnx' engine_file_path = "yolov3.trt" data_path = "./data/unrel.data" data = parse_data_cfg(data_path) nc = int(data['classes']) # number of classes path = data['valid'] # path to test images names = load_classes(data['names']) # class names iouv = torch.linspace(0.5, 0.95, 1, dtype=torch.float32) # iou vector for [email protected]:0.95 niou = 1 conf_thres = 0.001 iou_thres = 0.6 verbose = True # Genearte custom dataloader img_size = 448 # copy form pytorch src batch_size = 16 dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True) batch_size = min(batch_size, len(dataset)) dataloader = data_loader(dataset, batch_size, img_size) # Output shapes expected by the post-processor output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. pbar = tqdm.tqdm(dataloader, desc=s) stats, ap, ap_class = [], [], [] seen = 0 for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar): imgs = imgs.astype(np.float32) / 255.0 nb, _, height, width = imgs.shape # batch size, channels, height, width whwh = np.array([width, height, width, height]) inputs[0].host = imgs postprocessor_args = { "yolo_masks": [ (6, 7, 8), (3, 4, 5), (0, 1, 2) ], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), ( 62, 45 ), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "num_classes": 37, "stride": [32, 16, 8] } postprocessor = PostprocessYOLO(**postprocessor_args) # Do layers before yolo t = time.time() trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] trt_outputs = [ np.ascontiguousarray( otpt[:, :, :int(imgs.shape[2] * (2**i) / 32), :int(imgs.shape[3] * (2**i) / 32)], dtype=np.float32) for i, otpt in enumerate(trt_outputs) ] output_list = postprocessor.process(trt_outputs) t0 += time.time() - t inf_out = torch.cat(output_list, 1) t = time.time() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) # nms t1 += time.time() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh tbox = tbox.type(torch.float32) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero().view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[ j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = 'test_batch%g_gt.jpg' % batch_i # filename plot_images(imgs, targets, paths=paths, names=names, fname=f) # ground truth f = 'test_batch%g_pred.jpg' % batch_i plot_images(imgs, output_to_target(output, width, height), paths=paths, names=names, fname=f) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) if niou > 1: p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean( 1), ap[:, 0] # [P, R, [email protected]:0.95, [email protected]] mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Print speeds if verbose: t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + ( img_size, img_size, batch_size) # tuple print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
def attack_video(params, video_path=None, attack_det_id_dict=None, patch_bbox=None, moving_direction=None, verbose=0, is_return=False): detector = KerasYOLOv3Model_plus(sess=K.get_session()) n_attacks = None videogen = skvideo.io.FFmpegReader(video_path) virtual_attack = False detected_objects_list_prev = None match_info_prev = None cal_dx_dy_flag = True attack_frame_list = [*attack_det_id_dict] attack_frame_list.sort() attacking_flag = False attack_count_idx = 0 is_init = True params_min_hits = params['min_hits'] for frame_count, image in enumerate(videogen.nextFrame()): if frame_count > 1: is_init = False image_yolo, _ = letterbox_image(image, shape=(416, 416), data_format='channels_last') image = bgr2rgb((image_yolo * 255).astype(np.uint8)) image_yolo_pil = Image.fromarray((image_yolo * 255).astype(np.uint8)) detected_objects_list = detector.detect_image(image_yolo_pil) detected_objects_list = nms_fine_tune(detected_objects_list) detected_objects_list = sort_bbox_by_area(detected_objects_list) if len(detected_objects_list) != 0: nat_detected_objects_list = copy.deepcopy(detected_objects_list) if frame_count in attack_frame_list or attacking_flag == True: target_det_id = attack_det_id_dict[ frame_count - attack_count_idx][attack_count_idx] if attack_count_idx == 0: attacking_flag = True target_trk_id = find_match_trk(match_info_prev, target_det_id) target_init_bbox = detected_objects_list[target_det_id]['bbox'] target_init_trk_bbox = ( params_prev['tracker_list'][target_trk_id].obj)['bbox'] print("Attack starts at frame {}".format(frame_count)) print("Target bbox location in the original frame {}: {} ". format(frame_count, target_init_bbox)) if attack_count_idx != 0: _, _, match_info_nat = pipeline(image, nat_detected_objects_list, frame_count, params_prev, detect_output=True, verbose=0, virtual_attack=virtual_attack, return_match_info=True) attacking_flag = is_match(target_trk_id, target_det_id, match_info_nat) if not attacking_flag: detection_missing = is_missing_detection( nat_detected_objects_list, target_init_bbox, target_det_id) try: tracking_missing = is_missing_detection( tracker_bbox_list(params_prev['tracker_list']), target_init_trk_bbox, target_trk_id) except: pdb.set_trace() if detection_missing and not tracking_missing: attacking_flag = True else: print('Attack finished with {0} attacks.'.format( attack_count_idx)) n_attacks = attack_count_idx cal_dx_dy_flag = True attack_count_idx = 0 return n_attacks if attacking_flag: temp_attack_obj = detected_objects_list_prev[target_det_id] target_det_prev = temp_attack_obj target_trk_prev = params_prev['tracker_list'][ target_trk_id].obj translation_vecter_center = calculate_translation_center( target_trk_prev['bbox'], target_det_prev['bbox']) attack_bbox = temp_attack_obj['bbox'] attack_param = params_prev L = 5 #bbox moving pixel length if cal_dx_dy_flag and moving_direction is None: if translation_vecter_center[0] == 0.: ratio = 1000.0 else: ratio = abs(translation_vecter_center[1] / translation_vecter_center[0]) dx = L * 1 / math.sqrt((1 + ratio * ratio)) dy = dx * ratio if translation_vecter_center[0] > 0: dx *= -1 if translation_vecter_center[1] > 0: dy *= -1 cal_dx_dy_flag = False if attack_count_idx == 0: for sub_attack_count in range(100): if moving_direction is None: fake_det_bbox = ( target_trk_prev['bbox'] + np.array([dx, dy, dx, dy]) * (sub_attack_count + 1)).astype(int) else: fake_det_bbox = ( target_trk_prev['bbox'] + np.array(moving_direction) * (sub_attack_count + 1)).astype(int) detected_objects_list[target_det_id][ 'bbox'] = fake_det_bbox _, param_attack, match_info = pipeline( image, detected_objects_list, frame_count, params, detect_output=True, verbose=0, virtual_attack=virtual_attack, return_match_info=True) if is_match(target_trk_id, target_det_id, match_info): attack_bbox = fake_det_bbox attack_param = param_attack if box_iou(patch_bbox, fake_det_bbox) <= 0.0: break else: break detected_objects_list[target_det_id]['bbox'] = attack_bbox else: del detected_objects_list[target_det_id] print("Fabricate bbox location {} at frame {}".format( attack_bbox, frame_count)) image_yolo_pil.save('./output/' + 'ori_' + str(frame_count) + '.png') attack_count_idx += 1 image_track, params, match_info = pipeline( image, detected_objects_list, frame_count, params, detect_output=True, verbose=verbose, virtual_attack=virtual_attack, return_match_info=True, is_init=is_init) cv2.imwrite('./output/track/' + str(frame_count) + '.png', image_track) match_info_prev = copy.deepcopy(match_info) detected_objects_list_prev = copy.deepcopy(nat_detected_objects_list) params_prev = copy.deepcopy(params) return n_attacks