def __init__(self, transform, opt, save_img=False): self.opt = opt self.debug = opt.debug self.save_txt = opt.save_txt self.view_img = opt.view_img self.transform = transform self.save_img = save_img self.fourcc = 'mp4v' self.resolution = ( 1920, 1080 ) # Can also be set to False to get standard camera resolution self.vid_writer = None self.vid_path = None self.out = self.create_dir(opt.output) self.webcam = self.check_webcam(opt.source) self.device = select_device(opt.device) self.half = self.device.type != 'cpu' # half precision only supported on CUDA self.model, self.imgsz, self.class_names = self.load_model() self.overlay_images = self.get_overlay_icons() self.banner_icon = self.get_banner_icon() self.dataset = self.set_dataloader() self.meta_data_writer = self.get_meta_data_writer() set_logging()
def custom(path_or_model='path/to/model.pt', autoshape=True, verbose=True): """YOLOv5-custom model https://github.com/ultralytics/yolov5 Arguments (3 options): path_or_model (str): 'path/to/model.pt' path_or_model (dict): torch.load('path/to/model.pt') path_or_model (nn.Module): torch.load('path/to/model.pt')['model'] Returns: pytorch model """ set_logging(verbose=verbose) model = torch.load(path_or_model) if isinstance( path_or_model, str) else path_or_model # load checkpoint if isinstance(model, dict): model = model['ema' if model.get('ema') else 'model'] # load model hub_model = Model(model.yaml).to(next(model.parameters()).device) # create hub_model.load_state_dict(model.float().state_dict()) # load state_dict hub_model.names = model.names # class names if autoshape: hub_model = hub_model.autoshape( ) # for file/URI/PIL/cv2/np inputs and NMS device = select_device('0' if torch.cuda.is_available() else 'cpu') # default to GPU if available return hub_model.to(device)
def load_model(self,dev="cpu"): # Initialize set_logging() device = select_device(dev) # Load model # model = attempt_load("yolov5_model/yolov5x.pt", map_location=device) # load FP32 model model = torch.hub.load('ultralytics/yolov5', 'custom', path_or_model='yolov5_model/yolov5x.pt') stride = int(model.stride.max()) # model stride imgsz = check_img_size(640, s=stride) # check img_size # Run inference if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) return model,stride,device,imgsz
def load_model(model_path, device=None, autoshape=True, verbose=False): """ Creates a specified YOLOv5 model Arguments: model_path (str): path of the model config_path (str): path of the config file device (str): select device that model will be loaded (cpu, cuda) pretrained (bool): load pretrained weights into the model autoshape (bool): make model ready for inference verbose (bool): if False, yolov5 logs will be silent Returns: pytorch model (Adapted from yolov5.hubconf.create) """ # set logging set_logging(verbose=verbose) # set device if not given if not device: device = "cuda:0" if torch.cuda.is_available() else "cpu" # add yolov5 folder to system path here = Path(__file__).parent.absolute() yolov5_folder_dir = str(here) sys.path.insert(0, yolov5_folder_dir) attempt_download(model_path) # download if not found locally model = torch.load(model_path, map_location=torch.device(device)) if isinstance(model, dict): model = model["model"] # load model hub_model = Model(model.yaml).to(next(model.parameters()).device) # create hub_model.load_state_dict(model.float().state_dict()) # load state_dict hub_model.names = model.names # class names model = hub_model # remove yolov5 folder from system path sys.path.remove(yolov5_folder_dir) if autoshape: model = model.autoshape() return model """
def create(name, pretrained, channels, classes, autoshape, verbose): """Creates a specified YOLOv5 model Arguments: name (str): name of model, i.e. 'yolov5s' pretrained (bool): load pretrained weights into the model channels (int): number of input channels classes (int): number of model classes Returns: pytorch model """ try: set_logging(verbose=verbose) cfg = list((Path(__file__).parent / 'models').rglob(f'{name}.yaml'))[0] # model.yaml path model = Model(cfg, channels, classes) if pretrained: fname = f'{name}.pt' # checkpoint filename attempt_download(fname) # download if not found locally ckpt = torch.load(fname, map_location=torch.device('cpu')) # load msd = model.state_dict() # model state_dict csd = ckpt['model'].float().state_dict( ) # checkpoint state_dict as FP32 csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape} # filter model.load_state_dict(csd, strict=False) # load if len(ckpt['model'].names) == classes: model.names = ckpt['model'].names # set class names attribute if autoshape: model = model.autoshape( ) # for file/URI/PIL/cv2/np inputs and NMS device = select_device('0' if torch.cuda.is_available() else 'cpu') # default to GPU if available return model.to(device) except Exception as e: help_url = 'https://github.com/ultralytics/yolov5/issues/36' s = 'Cache maybe be out of date, try force_reload=True. See %s for help.' % help_url raise Exception(s) from e
def main(): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path') parser.add_argument('--cfg', type=str, default='', help='model.yaml path') #parser.add_argument('--data', type=str, default='yolov5/data/coco128.yaml', help='data.yaml path') #parser.add_argument('--hyp', type=str, default='yolov5/data/hyp.scratch.yaml', help='hyperparameters path') parser.add_argument('--data', type=str, default='', help='data.yaml path') parser.add_argument('--hyp', type=str, default='', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=300) parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs') parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check') parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers') parser.add_argument('--project', default='runs/train', help='save to project/name') parser.add_argument('--entity', default=None, help='W&B entity') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') parser.add_argument('--linear-lr', action='store_true', help='linear LR') parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table') parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B') parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch') parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used') opt = parser.parse_args() # Set DDP variables opt.world_size = int( os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 set_logging(opt.global_rank) if opt.global_rank in [-1, 0]: check_git_status() #check_requirements() # Resume wandb_run = check_wandb_resume(opt) if opt.resume and not wandb_run: # resume an interrupted run ckpt = opt.resume if isinstance( opt.resume, str) else get_latest_run() # specified or most recent path assert os.path.isfile( ckpt), 'ERROR: --resume checkpoint does not exist' apriori = opt.global_rank, opt.local_rank with open(Path(ckpt).parent.parent / 'opt.yaml') as f: opt = argparse.Namespace(**yaml.safe_load(f)) # replace opt.cfg, opt.weights, opt.resume, opt.batch_size, opt.global_rank, opt.local_rank = \ '', ckpt, True, opt.total_batch_size, *apriori # reinstate logger.info('Resuming training from %s' % ckpt) else: opt.hyp = opt.hyp or str( Path(__file__).parent / 'data' / ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')) opt.data = opt.data or str(Path(__file__).parent / 'data/coco128.yaml') opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file( opt.cfg), check_file(opt.hyp) # check files assert len(opt.cfg) or len( opt.weights), 'either --cfg or --weights must be specified' opt.img_size.extend( [opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) opt.name = 'evolve' if opt.evolve else opt.name opt.save_dir = str( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve)) # DDP mode opt.total_batch_size = opt.batch_size device = select_device(opt.device, batch_size=opt.batch_size) if opt.local_rank != -1: assert torch.cuda.device_count() > opt.local_rank torch.cuda.set_device(opt.local_rank) device = torch.device('cuda', opt.local_rank) dist.init_process_group(backend='nccl', init_method='env://') # distributed backend assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count' opt.batch_size = opt.total_batch_size // opt.world_size # Hyperparameters with open(opt.hyp) as f: hyp = yaml.safe_load(f) # load hyps # Train logger.info(opt) if not opt.evolve: tb_writer = None # init loggers if opt.global_rank in [-1, 0]: prefix = colorstr('tensorboard: ') logger.info( f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/" ) tb_writer = SummaryWriter(opt.save_dir) # Tensorboard train(hyp, opt, device, tb_writer) # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = { 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 'box': (1, 0.02, 0.2), # box loss gain 'cls': (1, 0.2, 4.0), # cls loss gain 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 'mixup': (1, 0.0, 1.0) } # image mixup (probability) assert opt.local_rank == -1, 'DDP mode not implemented for --evolve' opt.notest, opt.nosave = True, True # only test/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices yaml_file = Path( opt.save_dir) / 'hyp_evolved.yaml' # save best result here if opt.bucket: os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists for _ in range(300): # generations to evolve if Path('evolve.txt').exists( ): # if evolve.txt exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' x = np.loadtxt('evolve.txt', ndmin=2) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() # weights if parent == 'single' or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape( n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([x[0] for x in meta.values()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all( v == 1 ): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device) # Write mutation results print_mutation(hyp.copy(), results, yaml_file, opt.bucket) # Plot results plot_evolution(yaml_file) print( f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n' f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}' )
def mainFunc(args): # Set the main function flag print("Main Function Start...") # Check the GPU device print("Number of available GPUs: {}".format(torch.cuda.device_count())) # Check whether using the distributed runing for the network is_distributed = initDistributed(args) master = True if is_distributed and os.environ["RANK"]: master = int( os.environ["RANK"]) == 0 # check whether this node is master node # Configuration for device setting set_logging() if is_distributed: device = torch.device('cuda:{}'.format(args.local_rank)) else: device = select_device(args.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load the configuration config = loadConfig(args.config) # CuDNN related setting if torch.cuda.is_available(): cudnn.benchmark = config.DEVICE.CUDNN.BENCHMARK cudnn.deterministic = config.DEVICE.CUDNN.DETERMINISTIC cudnn.enabled = config.DEVICE.CUDNN.ENABLED # Configurations for dirctories save_img, save_dir, source, yolov5_weights, view_img, save_txt, imgsz = \ False, Path(args.save_dir), args.source, args.weights, args.view_img, args.save_txt, args.img_size webcam = source.isnumeric() or source.startswith( ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt') if save_dir == Path('runs/detect'): # if default os.makedirs('runs/detect', exist_ok=True) # make base save_dir = Path(increment_dir(save_dir / 'exp', args.name)) # increment run os.makedirs(save_dir / 'labels' if save_txt else save_dir, exist_ok=True) # make new dir # Load yolov5 model for human detection model_yolov5 = attempt_load(config.MODEL.PRETRAINED.YOLOV5, map_location=device) imgsz = check_img_size(imgsz, s=model_yolov5.stride.max()) # check img_size if half: model_yolov5.half() # to FP16 # Second-stage classifier classify = False if classify: model_classifier = load_classifier(name='resnet101', n=2) # initialize model_classifier.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights model_classifier.to(device).eval() # Load resnet model for human keypoints estimation model_resnet = eval('pose_models.' + config.MODEL.NAME.RESNET + '.get_pose_net')(config, is_train=False) if config.EVAL.RESNET.MODEL_FILE: print('=> loading model from {}'.format(config.EVAL.RESNET.MODEL_FILE)) model_resnet.load_state_dict(torch.load(config.EVAL.RESNET.MODEL_FILE), strict=False) else: print('expected model defined in config at EVAL.RESNET.MODEL_FILE') model_resnet.to(device) model_resnet.eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) pose_transform = transforms.Compose( [ # input transformation for 2d human pose estimation transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Get names and colors names = model_yolov5.module.names if hasattr( model_yolov5, 'module') else model_yolov5.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Construt filters for filtering 2D/3D human keypoints # filters_2d = constructFilters((1,16,2), freq=25, mincutoff=1, beta=0.01) # for test # filters_3d = constructFilters((1,16,3), freq=25, mincutoff=1, beta=0.01) # Run the yolov5 and resnet for 2d human pose estimation # with torch.no_grad(): # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model_yolov5(img.half() if half else img ) if device.type != 'cpu' else None # run once # Process every video frame for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred_boxes = model_yolov5(img, augment=args.augment)[0] # Apply NMS pred_boxes = non_max_suppression(pred_boxes, args.conf_thres, args.iou_thres, classes=args.classes, agnostic=args.agnostic_nms) t2 = time_synchronized() # Can not find people and move to next frame if pred_boxes[0] is None: # show the frame with no human detected cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL) cv2.imshow("2D Human Pose Estimation", im0s[0].copy()) # wait manual operations # with kb.Listener(on_press=on_press) as listener: # listener.join() # return # if kb.is_pressed('t'): # return print("No Human Detected and Move on.") print("-" * 30) continue # Print time (inference + NMS) detect_time = t2 - t1 detect_fps = 1.0 / detect_time print("Human Detection Time: {}, Human Detection FPS: {}".format( detect_time, detect_fps)) # Apply Classifier if classify: # false pred_boxes = apply_classifier(pred_boxes, model_classifier, img, im0s) # Estimate 2d human pose(multiple person) centers = [] scales = [] for id, boxes in enumerate(pred_boxes): if boxes is not None and len(boxes): boxes[:, :4] = scale_coords(img.shape[2:], boxes[:, :4], im0s[id].copy().shape).round() # convert tensor to list format boxes = np.delete(boxes.cpu().numpy(), [-2, -1], axis=1).tolist() for l in range(len(boxes)): boxes[l] = [tuple(boxes[l][0:2]), tuple(boxes[l][2:4])] # convert box to center and scale for box in boxes: center, scale = box_to_center_scale(box, imgsz, imgsz) centers.append(center) scales.append(scale) t3 = time_synchronized() pred_pose_2d = get_pose_estimation_prediction(config, model_resnet, im0s[0], centers, scales, transform=pose_transform, device=device) t4 = time_synchronized() # Print time (2d human pose estimation) estimate_time = t4 - t3 estimate_fps = 1.0 / estimate_time print("Pose Estimation Time: {}, Pose Estimation FPS: {}".format( estimate_time, estimate_fps)) # Filter the predicted 2d human pose(multiple person) t5 = time_synchronized() # if False: # for test if config.EVAL.RESNET.USE_FILTERS_2D: # construct filters for every keypoints of every person in 2D filters_2d = constructFilters(pred_pose_2d.shape, freq=1, mincutoff=1, beta=0.01) print("Shape of filters_2d: ({}, {}, {})".format( len(filters_2d), len(filters_2d[0]), len(filters_2d[0][0]))) # for test for per in range(pred_pose_2d.shape[0]): for kp in range(pred_pose_2d.shape[1]): for coord in range(pred_pose_2d.shape[2]): pred_pose_2d[per][kp][coord] = filters_2d[per][kp][ coord](pred_pose_2d[per][kp][coord]) t6 = time_synchronized() # Print time (filter 2d human pose) filter_time_2d = t6 - t5 filter_fps_2d = 1.0 / filter_time_2d print("Filter 2D Pose Time: {}, Filter 2D Pose FPS: {}".format( filter_time_2d, filter_fps_2d)) # Process detections and estimations in 2D for i, box in enumerate(pred_boxes): if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if box is not None and len(box): # Rescale boxes from img_size to im0 size box[:, :4] = scale_coords(img.shape[2:], box[:, :4], im0.shape).round() # Print results for c in box[:, -1].unique(): n = (box[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(box): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if args.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line) + '\n') % line) # Add bbox to image if save_img or view_img: label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Draw joint keypoints, number orders and human skeletons for every detected people in 2D for person in pred_pose_2d: # draw the human keypoints for idx, coord in enumerate(person): x_coord, y_coord = int(coord[0]), int(coord[1]) cv2.circle(im0, (x_coord, y_coord), 1, (0, 0, 255), 5) cv2.putText(im0, str(idx), (x_coord, y_coord), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2, cv2.LINE_AA) # draw the human skeletons in PACIFIC mode for skeleton in PACIFIC_SKELETON_INDEXES: cv2.line(im0, (int(person[skeleton[0]][0]), int(person[skeleton[0]][1])), (int(person[skeleton[1]][0]), int(person[skeleton[1]][1])), skeleton[2], 2) # Print time (inference + NMS + estimation) print('%sDone. (%.3fs)' % (s, t4 - t1)) # Stream results if view_img: detect_text = "Detect FPS:{0:0>5.2f}/{1:0>6.2f}ms".format( detect_fps, detect_time * 1000) estimate_text = "Estimate FPS:{0:0>5.2f}/{1:0>6.2f}ms".format( estimate_fps, estimate_time * 1000) cv2.putText(im0, detect_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.putText(im0, estimate_text, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL) cv2.imshow("2D Human Pose Estimation", im0) if cv2.waitKey(1) & 0xFF == ord('q'): # q to quit return # goto .mainFunc # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) # Print time (inference + NMS + estimation + 2d filtering) all_process_time = t6 - t1 all_process_fps = 1.0 / all_process_time print("All Process Time: {}, All Process FPS: {}".format( all_process_time, all_process_fps)) print("-" * 30) # Goto label # label .mainFunc # Print saving results if save_txt or save_img: print('Results saved to %s' % save_dir) # Release video reader and writer, then destory all opencv windows dataset.vid_cap.release() vid_writer.release() cv2.destroyAllWindows() print('Present 2D Human Pose Inference Done. Total Time:(%.3f seconds)' % (time.time() - t0))
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: # cv2.imshow(p, im0) cv2.imwrite("C:/Users/lenovo/Desktop/server/output/camera.jpg", im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIterationq # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc('X', '2', '6', '4'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size set_logging() device = select_device(opt.device) # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.float() img /= 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=opt.augment)[0] pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) pred = sort_pred( pred, iou_tr=0.6) # отсекает пересекающиеся боксы с меньшей вероятностью for i, det in enumerate(pred): if len(det): # Rescale boxes from img_size to im0 size #det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0s.shape).round() det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0s.shape).round() for k in range(det.size(0)): box = det[k].tolist() x1 = int(box[0]) y1 = int(box[1]) x2 = int(box[2]) y2 = int(box[3]) text = names[int(box[5])] + ' ' + str(round(box[4], 4)) print('classes:', names[int(box[5])]) cv2.rectangle(im0s, (x1, y1), (x2, y2), colors[int(box[5])], 2) cv2.putText(im0s, text, tuple([x1, y2]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=1) cv2.imshow('Image', im0s) if cv2.waitKey(0) & 0xFF == ord('q'): break
def test( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a cocoapi-compatible JSON results file project='runs/test', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference model=None, dataloader=None, save_dir=Path(''), plots=True, wandb_logger=None, compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(imgsz, s=gs) # check image size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half &= device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() if isinstance(data, str): with open(data) as f: data = yaml.safe_load(f) check_dataset(data) # check is_coco = data['val'].endswith('coco/val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs = 0 if wandb_logger and wandb_logger.wandb: log_imgs = min(wandb_logger.log_imgs, 100) # Dataloader if not training: if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once task = task if task in ( 'train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=0.5, rect=True, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, 'names') else model.module.names) } coco91class = coco80_to_coco91_class() s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1, t2 = 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t_ = time_synchronized() img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width t = time_synchronized() t0 += t - t_ # Run model out, train_out = model( img, augment=augment) # inference and training outputs t1 += time_synchronized() - t # Compute loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t = time_synchronized() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) t2 += time_synchronized() - t # Statistics per image for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # W&B logging - Media Panel plots if len( wandb_images ) < log_imgs and wandb_logger.current_epoch > 0: # Check for test operation if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0: box_data = [{ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3] }, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": { "class_score": conf }, "domain": "pixel" } for *xyxy, conf, cls in pred.tolist()] boxes = { "predictions": { "box_data": box_data, "class_labels": names } } # inference-space wandb_images.append( wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name)) wandb_logger.log_training_progress( predn, path, names) if wandb_logger and wandb_logger.wandb_run else None # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int( path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch( predn, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # target indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # [email protected], [email protected]:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%11i' * 2 + '%11.3g' * 4 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t2)) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb_logger and wandb_logger.wandb: val_batches = [ wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg')) ] wandb_logger.log({"Validation": val_batches}) if wandb_images: wandb_logger.log({"Bounding Box Debugger/Images": wandb_images}) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights ).stem if weights is not None else '' # weights anno_json = '../coco/annotations/instances_val2017.json' # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements(['pycocotools']) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.img_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Find index corresponding to a person idx_person = names.index("person") # SORT: initialize the tracker mot_tracker = sort_module.Sort(max_age=opt.max_age, min_hits=opt.min_hits, iou_threshold=opt.iou_threshold) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # SORT: number of people detected idxs_ppl = ( det[:, -1] == idx_person ).nonzero(as_tuple=False).squeeze( dim=1) # 1. List of indices with 'person' class detections dets_ppl = det[idxs_ppl, :-1].to( "cpu") # 2. Torch.tensor with 'person' detections print('\n {} people were detected!'.format(len(idxs_ppl))) # SORT: feed detections to the tracker if len(dets_ppl) != 0: trackers = mot_tracker.update(dets_ppl) for d in trackers: plot_one_box(d[:-1], im0, label='ID' + str(int(d[-1])), color=colors[1], line_thickness=1) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def test( weights=None, data="yolov5/data/coco128.yaml", batch_size=32, image_size=640, conf_thres=0.001, iou_thres=0.6, # for NMS task="val", device="", single_cls=False, augment=False, verbose=False, save_txt=False, # for auto-labelling save_hybrid=False, # for hybrid auto-labelling save_conf=False, # save auto-label confidences save_json=False, project="runs/test", name="exp", exist_ok=False, model=None, dataloader=None, save_dir=Path(""), # for saving images plots=True, log_imgs=0, # number of logged images ): arguments = locals() # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(device, batch_size=batch_size) # Directories save_dir = Path(increment_path(Path(project) / name, exist_ok=exist_ok)) # increment run (save_dir / "labels" if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model image_size = check_img_size(image_size, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != "cpu" # half precision only supported on CUDA if half: model.half() # Configure model.eval() is_coco = data.endswith("coco.yaml") # is COCO dataset with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data["nc"]) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs, wandb = min(log_imgs, 100), None # ceil try: import wandb # Weights & Biases except ImportError: log_imgs = 0 # Dataloader if not training: img = torch.zeros((1, 3, image_size, image_size), device=device) # init img _ = (model(img.half() if half else img) if device.type != "cpu" else None) # run once path = (data["test"] if task == "test" else data["val"] ) # path to val/test images opt = OptFactory(arguments) dataloader = create_dataloader(path, image_size, batch_size, model.stride.max(), opt, pad=0.5, rect=True)[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, "names") else model.module.names) } coco91class = coco80_to_coco91_class() s = ("%20s" + "%12s" * 6) % ( "Class", "Images", "Targets", "P", "R", "[email protected]", "[email protected]:.95", ) p, r, f1, mp, mr, map50, map, t0, t1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model( img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = ([targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] ) # for autolabelling t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append(( torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls, )) continue # Predictions predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh line = ((cls, *xywh, conf) if save_conf else (cls, *xywh)) # label format with open(save_dir / "labels" / (path.stem + ".txt"), "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") # W&B logging if plots and len(wandb_images) < log_imgs: box_data = [{ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3], }, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": { "class_score": conf }, "domain": "pixel", } for *xyxy, conf, cls in pred.tolist()] boxes = { "predictions": { "box_data": box_data, "class_labels": names } } # inference-space wandb_images.append( wandb.Image(img[si], boxes=boxes, caption=path.name)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int( path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ "image_id": image_id, "category_id": coco91class[int(p[5])] if is_coco else int(p[5]), "bbox": [round(x, 3) for x in b], "score": round(p[4], 5), }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch( pred, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = ((cls == tcls_tensor).nonzero(as_tuple=False).view(-1) ) # prediction indices pi = ((cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) ) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if (len(detected) == nl ): # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f"test_batch{batch_i}_labels.jpg" # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f"test_batch{batch_i}_pred.jpg" # predictions Thread( target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True, ).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) p, r, ap50, ap = ( p[:, 0], r[:, 0], ap[:, 0], ap.mean(1), ) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = "%20s" + "%12.3g" * 6 # print format print(pf % ("all", seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1e3 for x in (t0, t1, t0 + t1)) + ( image_size, image_size, batch_size, ) # tuple if not training: print( "Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g" % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb and wandb.run: wandb.log({"Images": wandb_images}) wandb.log({ "Validation": [ wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob("test*.jpg")) ] }) # Save JSON if save_json and len(jdict): w = (Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else "") # weights anno_json = "../coco/annotations/instances_val2017.json" # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print("\nEvaluating pycocotools mAP... saving %s..." % pred_json) with open(pred_json, "w") as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, "bbox") if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.img_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f"pycocotools unable to run: {e}") # Return results if not training: s = ( f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else "") print(f"Results saved to {save_dir}{s}") model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def __init__(self, master, opt): self.parent = master self.parent.title("Semi Automatic Image Annotation Tool") self.frame = Frame(self.parent) self.frame.pack(fill=BOTH, expand=1) self.parent.resizable(width=False, height=False) # setup yolov5 model self.opt = opt opt.augment = True opt.update = False opt.agnostic_nms = False # initialize set_logging() device = select_device(opt.device) self.device = device # load yolo5 object detection model model = attempt_load(opt.weights, map_location=device) # load FP32 model self.img_size = check_img_size(opt.img_size, s=model.stride.max()) # check img_size half = device.type != 'cpu' # half precision only supported on CUDA self.half = half if half: model.half() # to FP16 self.object_model = model # get object detection names self.names = ['person'] # load face detect model self.face_model = MTCNN(keep_all=True, margin=opt.face_margin, device=device) self.face_landmarks = opt.face_landmarks # Initialize class variables self.img = None self.tkimg = None self.imageDir = '' self.imageDirPathBuffer = '' self.imageList = [] self.imageTotal = 0 self.imageCur = 0 self.cur = 0 self.bboxIdList = [] self.bboxList = [] self.bboxPointList = [] self.o1 = None self.o2 = None self.o3 = None self.o4 = None self.bboxId = None self.currLabel = None self.editbboxId = None self.currBboxColor = None self.zoomImgId = None self.zoomImg = None self.zoomImgCrop = None self.tkZoomImg = None self.hl = None self.vl = None self.editPointId = None self.filename = None self.filenameBuffer = None self.objectLabelList = [] self.EDIT = False # record image names for saving previous results self.annoList = {} # initialize mouse state self.STATE = {'x': 0, 'y': 0} self.STATE_COCO = {'click': 0} # initialize annotation file self.anno_filename = 'annotations.csv' self.annotation_file = open('annotations/' + self.anno_filename, 'w+') self.annotation_file.write("") self.annotation_file.close() # ------------------ GUI --------------------- # Control Panel self.ctrlPanel = Frame(self.frame) self.ctrlPanel.grid(row=0, column=0, sticky=W + N) self.openBtn = Button(self.ctrlPanel, text='Open', command=self.open_image) self.openBtn.pack(fill=X, side=TOP) self.openDirBtn = Button(self.ctrlPanel, text='Open Dir', command=self.open_image_dir) self.openDirBtn.pack(fill=X, side=TOP) self.nextBtn = Button(self.ctrlPanel, text='Next -->', command=self.open_next) self.nextBtn.pack(fill=X, side=TOP) self.previousBtn = Button(self.ctrlPanel, text='<-- Previous', command=self.open_previous) self.previousBtn.pack(fill=X, side=TOP) self.saveBtn = Button(self.ctrlPanel, text='Save', command=self.save) self.saveBtn.pack(fill=X, side=TOP) self.semiAutoBtn = Button(self.ctrlPanel, text="Show Suggestions", command=self.automate) self.semiAutoBtn.pack(fill=X, side=TOP) self.disp = Label(self.ctrlPanel, text='Coordinates:') self.disp.pack(fill=X, side=TOP) self.mb = Menubutton(self.ctrlPanel, text="COCO Classes for Suggestions", relief=RAISED) self.mb.pack(fill=X, side=TOP) self.mb.menu = Menu(self.mb, tearoff=0) self.mb["menu"] = self.mb.menu self.addCocoBtn = Button(self.ctrlPanel, text="+", command=self.add_labels_coco) self.addCocoBtn.pack(fill=X, side=TOP) self.zoomPanelLabel = Label(self.ctrlPanel, text="Precision View Panel") self.zoomPanelLabel.pack(fill=X, side=TOP) self.zoomcanvas = Canvas(self.ctrlPanel, width=150, height=150) self.zoomcanvas.pack(fill=X, side=TOP, anchor='center') # Image Editing Region self.canvas = Canvas(self.frame, width=self.img_size, height=self.img_size) self.canvas.grid(row=0, column=1, sticky=W + N) self.canvas.bind("<Button-1>", self.mouse_click) self.canvas.bind("<Motion>", self.mouse_move, "+") self.canvas.bind("<B1-Motion>", self.mouse_drag) self.canvas.bind("<ButtonRelease-1>", self.mouse_release) self.parent.bind("<Key-Left>", self.open_previous) self.parent.bind("<Key-Right>", self.open_next) self.parent.bind("Escape", self.cancel_bbox) # Labels and Bounding Box Lists Panel self.listPanel = Frame(self.frame) self.listPanel.grid(row=0, column=2, sticky=W + N) self.listBoxNameLabel = Label(self.listPanel, text="List of Objects").pack(fill=X, side=TOP) self.objectListBox = Listbox(self.listPanel, width=40) self.objectListBox.pack(fill=X, side=TOP) self.delObjectBtn = Button(self.listPanel, text="Delete", command=self.del_bbox) self.delObjectBtn.pack(fill=X, side=TOP) self.clearAllBtn = Button(self.listPanel, text="Clear All", command=self.clear_bbox) self.clearAllBtn.pack(fill=X, side=TOP) self.classesNameLabel = Label(self.listPanel, text="Classes").pack(fill=X, side=TOP) self.textBox = Entry(self.listPanel, text="Enter label") self.textBox.pack(fill=X, side=TOP) self.addLabelBtn = Button(self.listPanel, text="+", command=self.add_label).pack(fill=X, side=TOP) self.delLabelBtn = Button(self.listPanel, text="-", command=self.del_label).pack(fill=X, side=TOP) self.labelListBox = Listbox(self.listPanel) self.labelListBox.pack(fill=X, side=TOP) for name in self.names + ['face']: self.labelListBox.insert(END, str(name)) self.cocoLabels = config.labels_to_names.values() self.cocoIntVars = [] for idxcoco, label_coco in enumerate(self.cocoLabels): self.cocoIntVars.append(IntVar()) self.mb.menu.add_checkbutton(label=label_coco, variable=self.cocoIntVars[idxcoco]) # print(self.cocoIntVars) # STATUS BAR self.statusBar = Frame(self.frame, width=500) self.statusBar.grid(row=1, column=1, sticky=W + N) self.processingLabel = Label(self.statusBar, text=" ") self.processingLabel.pack(side="left", fill=X) self.imageIdxLabel = Label(self.statusBar, text=" ") self.imageIdxLabel.pack(side="right", fill=X)
def test(data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling save_conf=False, plots=True): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) save_txt = opt.save_txt # save *.txt labels # Remove previous if os.path.exists(save_dir): shutil.rmtree(save_dir) # delete dir os.makedirs(save_dir) # make new dir if save_txt: out = save_dir / 'autolabels' if os.path.exists(out): shutil.rmtree(out) # delete dir os.makedirs(out) # make new dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] seen = 0 names = model.names if hasattr(model, 'names') else model.module.names coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model(img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh x = pred.clone() x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original for *xyxy, conf, cls in x: xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, conf, *xywh) if save_conf else (cls, *xywh) # label format with open(str(out / Path(paths[si]).stem) + '.txt', 'a') as f: f.write(('%g ' * len(line) + '\n') % line) # Clip boxes to image bounds clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = Path(paths[si]).stem box = pred[:, :4].clone() # xyxy scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': coco91class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and save_dir and batch_i < 1: f = save_dir / f'test_batch{batch_i}_gt.jpg' # filename plot_images(img, targets, paths, str(f), names) # ground truth f = save_dir / f'test_batch{batch_i}_pred.jpg' plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, fname=os.path.join(save_dir, 'precision-recall_curve.png')) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights file = save_dir / f"detections_val2017_{w}_results.json" # predicted annotations file print('\nCOCO mAP with pycocotools... saving %s...' % file) with open(file, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO ground truth api cocoDt = cocoGt.loadRes(str(file)) # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[:2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = not opt.nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect( weights="yolov5s.pt", source="yolov5/data/images", img_size=640, conf_thres=0.75, iou_thres=0.45, device="", view_img=False, save_txt=False, save_conf=False, classes=None, agnostic_nms=False, augment=False, update=False, project="runs/detect", name="exp", exist_ok=False, save_img=False, ): """ Args: weights: str model.pt path(s) source: str file/folder, 0 for webcam img_size: int inference size (pixels) conf_thres: float object confidence threshold iou_thres: float IOU threshold for NMS device: str cuda device, i.e. 0 or 0,1,2,3 or cpu view_img: bool display results save_txt: bool save results to *.txt save_conf: bool save confidences in save_txt labels classes: int filter by class: [0], or [0, 2, 3] agnostic-nms: bool class-agnostic NMS augment: bool augmented inference update: bool update all models project: str save results to project/name name: str save results to project/name exist_ok: bool existing project/name ok, do not increment """ source, weights, view_img, save_txt, imgsz = ( source, weights, view_img, save_txt, img_size, ) webcam = ( source.isnumeric() or source.endswith(".txt") or source.lower().startswith(("rtsp://", "rtmp://", "http://")) ) # Directories save_dir = Path( increment_path(Path(project) / name, exist_ok=exist_ok) ) # increment run (save_dir / "labels" if save_txt else save_dir).mkdir( parents=True, exist_ok=True ) # make dir # Initialize set_logging() device = select_device(device) half = device.type != "cpu" # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name="resnet101", n=2) # initialize modelc.load_state_dict( torch.load("weights/resnet101.pt", map_location=device)["model"] ).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, "module") else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != "cpu" else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression( pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms, ) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], "%g: " % i, im0s[i].copy(), dataset.count else: p, s, im0, frame = path, "", im0s, getattr(dataset, "frame", 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / "labels" / p.stem) + ( "" if dataset.mode == "image" else f"_{frame}" ) # img.txt s += "%gx%g " % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}s, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = ( (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn) .view(-1) .tolist() ) # normalized xywh line = ( (cls, *xywh, conf) if save_conf else (cls, *xywh) ) # label format with open(txt_path + ".txt", "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") if save_img or view_img: # Add bbox to image label = f"{names[int(cls)]} {conf:.2f}" plot_one_box( xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, ) # Print time (inference + NMS) print(f"{s}Done. ({t2 - t1:.3f}s)") # Stream results if view_img: cv2.imshow(str(p), im0) # Save results (image with detections) if save_img: if dataset.mode == "image": cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = "mp4v" # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h) ) vid_writer.write(im0) if save_txt or save_img: s = ( f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else "" ) print(f"Results saved to {save_dir}{s}") print(f"Done. ({time.time() - t0:.3f}s)")
parser.add_argument('--logdir', type=str, default='runs/', help='logging directory') parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers') opt = parser.parse_args() # Set DDP variables opt.total_batch_size = opt.batch_size opt.world_size = int( os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 set_logging(opt.global_rank) # Resume if opt.resume: # resume an interrupted run ckpt = opt.resume if isinstance( opt.resume, str) else get_latest_run() # specified or most recent path log_dir = Path(ckpt).parent.parent # runs/exp0 assert os.path.isfile( ckpt), 'ERROR: --resume checkpoint does not exist' with open(log_dir / 'opt.yaml') as f: opt = argparse.Namespace(**yaml.load( f, Loader=yaml.FullLoader)) # replace opt.cfg, opt.weights, opt.resume = '', ckpt, True logger.info('Resuming training from %s' % ckpt)
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) or source.lower().startswith('intel') # Initialize set_logging() device = select_device(opt.device) folder_main = out.split('/')[0] if os.path.exists(out): shutil.rmtree(out) # delete output folder folder_features = folder_main + '/features' if os.path.exists(folder_features): shutil.rmtree(folder_features) # delete features output folder folder_crops = folder_main + '/image_crops' if os.path.exists(folder_crops): shutil.rmtree(folder_crops) # delete output folder with object crops os.makedirs(out) # make new output folder os.makedirs(folder_features) # make new output folder os.makedirs(folder_crops) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights[0], map_location=device)['model'].float() # load to FP32 model.to(device).eval() imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference if source.lower().startswith('intel'): dataset = LoadRealSense2() save_img = True else: dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # frames per second # TODO if use intel or if use given footage fps = 30 # dataset.cap.get(cv2.CAP_PROP_FPS) critical_time_frames = opt.time * fps # COUNTER: initialization counter = VoteCounter(critical_time_frames, fps) print('CRITICAL TIME IS ', opt.time, 'sec, or ', counter.critical_time, ' frames') # Find index corresponding to a person idx_person = names.index("person") # Deep SORT: initialize the tracker cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # AlphaPose: initialization # args_p = update_config(opt.config_alphapose) # cfg_p = update_config(args_p.ALPHAPOSE.cfg) # # args_p.ALPHAPOSE.tracking = args_p.ALPHAPOSE.pose_track or args_p.ALPHAPOSE.pose_flow # # demo = SingleImageAlphaPose(args_p.ALPHAPOSE, cfg_p, device) # output_pose = opt.output.split('/')[0] + '/pose' # if not os.path.exists(output_pose): # os.mkdir(output_pose) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # TODO => COUNTER: draw queueing ROI # compute urn centoid (1st frame only) and plot a bounding box around it # if dataset.frame == 1: # counter.read_urn_coordinates(opt.urn, im0s, opt.radius) # counter.plot_urn_bbox(im0s) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 if source.lower().startswith('intel'): p, s, im0, frame = path, '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) print(save_path) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Deep SORT: person class only idxs_ppl = ( det[:, -1] == idx_person ).nonzero(as_tuple=False).squeeze( dim=1) # 1. List of indices with 'person' class detections dets_ppl = det[idxs_ppl, : -1] # 2. Torch.tensor with 'person' detections print('\n {} people were detected!'.format(len(idxs_ppl))) # Deep SORT: convert data into a proper format xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu") confs = dets_ppl[:, 4].to("cpu") # Deep SORT: feed detections to the tracker if len(dets_ppl) != 0: trackers, features = deepsort.update(xywhs, confs, im0) # tracks inside a critical sphere trackers_inside = [] for i, d in enumerate(trackers): plot_one_box(d[:-1], im0, label='ID' + str(int(d[-1])), color=colors[1], line_thickness=1) # TODO: queue COUNTER # d_include = counter.centroid_distance(d, im0, colors[1], dataset.frame) # if d_include: # trackers_inside.append(d) # ALPHAPOSE: show skeletons for bounding boxes inside the critical sphere # if len(trackers_inside) > 0: # pose = demo.process('frame_'+str(dataset.frame), im0, trackers_inside) # im0 = demo.vis(im0, pose) # demo.writeJson([pose], output_pose, form=args_p.ALPHAPOSE.format, for_eval=args_p.ALPHAPOSE.eval) # # counter.save_features_and_crops(im0, dataset.frame, trackers_inside, features, folder_main) cv2.putText(im0, 'Voted ' + str(len(counter.voters_count)), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2) print('NUM VOTERS', len(counter.voters)) print(list(counter.voters.keys())) # COUNTER if len(counter.voters) > 0: counter.save_voter_trajectory(dataset.frame, folder_main) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec if type(vid_cap ) is dict: # estimate distance_in_meters # TODO hard code w, h, fps = 640, 480, 6 else: fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
model = torch.hub.load('ultralytics/yolov5', 'yolov5s') """ from pathlib import Path import torch from yolov5.models.yolo import Model from yolov5.utils.general import check_requirements, set_logging from yolov5.utils.google_utils import attempt_download from yolov5.utils.torch_utils import select_device dependencies = ['torch', 'yaml'] check_requirements(Path(__file__).parent / 'requirements.txt', exclude=('pycocotools', 'thop')) set_logging() def create(name, pretrained, channels, classes, autoshape): """Creates a specified YOLOv5 model Arguments: name (str): name of model, i.e. 'yolov5s' pretrained (bool): load pretrained weights into the model channels (int): number of input channels classes (int): number of model classes Returns: pytorch model """ config = Path(
def main(): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width parser.add_argument('--batch-size', type=int, default=1, help='batch size') parser.add_argument('--grid', action='store_true', help='export Detect() layer grid') parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes') # ONNX-only parser.add_argument('--simplify', action='store_true', help='simplify ONNX model') # ONNX-only opt = parser.parse_args() opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand print(opt) set_logging() t = time.time() # Load PyTorch model device = select_device(opt.device) # add yolov5 folder to system path here = Path(__file__).parents[1].absolute() yolov5_folder_dir = str(here) sys.path.insert(0, yolov5_folder_dir) model = attempt_load(opt.weights, map_location=device) # load FP32 model labels = model.names # Checks gs = int(max(model.stride)) # grid size (max stride) opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples # Input img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection # Update model for k, m in model.named_modules(): m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility if isinstance(m, models.common.Conv): # assign export-friendly activations if isinstance(m.act, nn.Hardswish): m.act = Hardswish() elif isinstance(m.act, nn.SiLU): m.act = SiLU() # elif isinstance(m, models.yolo.Detect): # m.forward = m.forward_export # assign forward (optional) model.model[-1].export = not opt.grid # set Detect() layer grid export for _ in range(2): y = model(img) # dry runs print(f"\n{colorstr('PyTorch:')} starting from {opt.weights} ({file_size(opt.weights):.1f} MB)") # remove yolov5 folder from system path sys.path.remove(yolov5_folder_dir) # TorchScript export ----------------------------------------------------------------------------------------------- prefix = colorstr('TorchScript:') try: print(f'\n{prefix} starting export with torch {torch.__version__}...') f = opt.weights.replace('.pt', '.torchscript.pt') # filename ts = torch.jit.trace(model, img, strict=False) ts = optimize_for_mobile(ts) # https://pytorch.org/tutorials/recipes/script_optimized.html ts.save(f) print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') except Exception as e: print(f'{prefix} export failure: {e}') # ONNX export ------------------------------------------------------------------------------------------------------ prefix = colorstr('ONNX:') try: import onnx print(f'{prefix} starting export with onnx {onnx.__version__}...') f = opt.weights.replace('.pt', '.onnx') # filename torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640) 'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None) # Checks model_onnx = onnx.load(f) # load onnx model onnx.checker.check_model(model_onnx) # check onnx model # print(onnx.helper.printable_graph(model_onnx.graph)) # print # Simplify if opt.simplify: try: check_requirements(['onnx-simplifier']) import onnxsim print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') model_onnx, check = onnxsim.simplify(model_onnx, dynamic_input_shape=opt.dynamic, input_shapes={'images': list(img.shape)} if opt.dynamic else None) assert check, 'assert check failed' onnx.save(model_onnx, f) except Exception as e: print(f'{prefix} simplifier failure: {e}') print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') except Exception as e: print(f'{prefix} export failure: {e}') # CoreML export ---------------------------------------------------------------------------------------------------- prefix = colorstr('CoreML:') try: import coremltools as ct print(f'{prefix} starting export with coremltools {ct.__version__}...') # convert model from torchscript and apply pixel scaling as per detect.py model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) f = opt.weights.replace('.pt', '.mlmodel') # filename model.save(f) print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') except Exception as e: print(f'{prefix} export failure: {e}') # Finish print(f'\nExport complete ({time.time() - t:.2f}s). Visualize with https://github.com/lutzroeder/netron.')