def __init__(self, weights,service_name, image_size=640, nms_conf_thresh=0.4, nms_iou_thresh=0.5, device=""): try: from yolov5.utils.general import set_logging from yolov5.utils.torch_utils import select_device from yolov5.models.experimental import attempt_load from yolov5.utils.general import check_img_size from yolov5.utils.general import non_max_suppression from yolov5.utils.general import scale_coords from yolov5.utils.datasets import letterbox except ImportError: raise self.device = select_device(device) self.conf_thresh = nms_conf_thresh self.iou_thresh = nms_iou_thresh self.half = self.device.type != 'cpu' self.model = attempt_load(weights, map_location=self.device) self.image_size = check_img_size(image_size, s=self.model.stride.max()) if self.half: self.model.half() # to FP16 self.names = self.model.module.names if hasattr(self.model, 'module') else self.model.names self.currently_busy = Event() # Base class must be called at the end due to self.service_server.spin() BaseDetectionServer.__init__(self, service_name=service_name)
def run( weights=ROOT / 'yolov5s.pt', # weights path imgsz=(640, 640), # inference size h,w batch_size=1, # batch size dynamic=False, # dynamic batch size ): # PyTorch model im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image model = attempt_load(weights, map_location=torch.device('cpu'), inplace=True, fuse=False) _ = model(im) # inference model.info() # TensorFlow model im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz) _ = tf_model.predict(im) # inference # Keras model im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size) keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im)) keras_model.summary() LOGGER.info( 'PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.' )
def process_sample(image_filepath): rospy.loginfo('Starting yolo sample node...') rospy.init_node('sample', anonymous=True) rospy.loginfo('Loading model...') device = select_device('') half = device.type != 'cpu' # half precision only supported on CUDA # Load model imgsz = 224 model = attempt_load(f'{os.path.dirname(__file__)}/yolov5/runs/train/exp4/weights/best.pt', map_location=device) imgsz = check_img_size(imgsz, s=model.stride.max()) if half: model.half() rospy.loginfo('Reading image...') im = cv2.imread(image_filepath) if (im is None): rospy.loginfo('Failed to read image.') return rospy.loginfo('Running inference...') preds = detect(model, im) rospy.loginfo('Predictions:') for pred in preds: rospy.loginfo(f' Class: {pred[0]}') rospy.loginfo(f' Confidence: {pred[1]}') rospy.loginfo(f' Bounds: {pred[2]}')
def load_model(self, weights: str, device: str = 'cuda') -> None: device = select_device(device) model = attempt_load(weights, map_location=device) # load FP32 model half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # to FP16 self.model = model self.device = device self.half = half
def load_model(self): """ Load model """ Path(self.opt.weights).parent.mkdir(parents=True, exist_ok=True) model = attempt_load(self.opt.weights, map_location=self.device) # load FP32 model if self.half: model.half() # to FP16 imgsz = check_img_size(self.opt.img_size, s=model.stride.max()) # check img_size class_names = model.module.names if hasattr(model, 'module') else model.names return model, imgsz, class_names
def load_models(self, weights_file_path, **kwargs): """ 加载模型 :param model_file_path: weights文件路径 :param kwargs: 其它控制参数 """ # 选择设备 device = torch_utils.select_device() # 加载模型 model = experimental.attempt_load([ weights_file_path, ], map_location=device) model.float().eval() self.model = model self.device = device
def __init__(self): rospy.loginfo('Starting yolo video node...') rospy.init_node('yolo', anonymous=True) self.bridge = CvBridge() self.pointcloud = None self.saved = False rospy.loginfo('Loading model...') device = select_device('') half = device.type != 'cpu' # half precision only supported on CUDA # Load model imgsz = 224 self.model = attempt_load( f'{os.path.dirname(__file__)}/yolov5/runs/train/exp4/weights/best.pt', map_location=device) imgsz = check_img_size(imgsz, s=self.model.stride.max()) if half: self.model.half() # Can you recycle dict class_dict = dict() class_dict['Cardboard'] = True class_dict['Chips Bag'] = False class_dict['Disposable Cup'] = False class_dict['Napkin'] = False class_dict['Plastic Bottle'] = True class_dict['Plastic Container'] = True class_dict['Soda Can'] = True self.class_dict = class_dict # Subscribe to pointcloud to make predictions rospy.loginfo('Subscribing to camera pointcloud topic...') rospy.Subscriber('camera/depth_registered/points', PointCloud2, self.process_pointcloud) rospy.loginfo('Awaiting images...') # Create publisher to publish predictions self.predict_pub = rospy.Publisher('yolo/detections', String, queue_size=10) # Create publisher to publish markers to RViz self.rviz_pub = rospy.Publisher('visualization_marker', Marker, queue_size=10)
def __init__(self, video_folder, transform, resize_height, resize_width, dataset='', time_step=4, num_pred=1, bbox_folder=None, device=None, flow_folder=None): self.dir = video_folder self.transform = transform self.videos = OrderedDict() self._resize_height = resize_height self._resize_width = resize_width self._time_step = time_step self._num_pred = num_pred self.dataset = dataset #ped2 or avenue or ShanghaiTech self.bbox_folder = bbox_folder #如果box已经预处理了,则直接将npy数据读出来, 如果没有,则在get_item的时候计算 if bbox_folder == None: #装载yolo模型 self.yolo_weights = 'yolov5/weights/yolov5s.pt' self.yolo_device = device self.yolo_model = attempt_load( self.yolo_weights, map_location=self.yolo_device) # load FP32 model self.flow_folder = flow_folder if self.flow_folder == None: #装载flownet parser = argparse.ArgumentParser() parser.add_argument( '--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument("--rgb_max", type=float, default=255.) args = parser.parse_args() self.device = device self.flownet = FlowNet2(args).to(self.device) dict_ = torch.load("flownet2/FlowNet2_checkpoint.pth.tar") self.flownet.load_state_dict(dict_["state_dict"]) self.setup() self.samples = self.get_all_samples()
def __init__(self): if not ModelLoader._instance: print("ModelLoader.__init__ method called but nothing is created") else: print("ModelLoader instance already created", self.getInstance()) before = time.time() self.__device = select_device('0') # weights='yolov5/runs/train/exp/weights/best.pt' weights = 'yolov5/runs/train/exp/weights/best.pt' self.__model =attempt_load(weights, map_location=self.__device) self.__modelc = load_classifier(name="resnet50", n=2) self.__retrival = ImageRetrievalClass("MobileNetV3",True,False) self.__retrival.buildModel(shape_img=(256,256,3)) after = time.time() self.__elapsed_time = after-before
def __init__(self, weights_path, img_size=640, device='', conf_thres=0.5, iou_thres=0.5, classes=None): self._weights_path = weights_path self._img_size = img_size self._device = select_device(device) self._conf_thres = conf_thres self._iou_thres = iou_thres self._classes = classes self._half = self._device.type != 'cpu' # half precision only supported on CUDA # Load model self._model = attempt_load( self._weights_path, map_location=self._device) # load FP32 model self._img_size = check_img_size( self._img_size, s=self._model.stride.max()) # check img_size if self._half: self._model.half() # to FP16
def detect( weights="yolov5s.pt", source="yolov5/data/images", img_size=640, conf_thres=0.75, iou_thres=0.45, device="", view_img=False, save_txt=False, save_conf=False, classes=None, agnostic_nms=False, augment=False, update=False, project="runs/detect", name="exp", exist_ok=False, save_img=False, ): """ Args: weights: str model.pt path(s) source: str file/folder, 0 for webcam img_size: int inference size (pixels) conf_thres: float object confidence threshold iou_thres: float IOU threshold for NMS device: str cuda device, i.e. 0 or 0,1,2,3 or cpu view_img: bool display results save_txt: bool save results to *.txt save_conf: bool save confidences in save_txt labels classes: int filter by class: [0], or [0, 2, 3] agnostic-nms: bool class-agnostic NMS augment: bool augmented inference update: bool update all models project: str save results to project/name name: str save results to project/name exist_ok: bool existing project/name ok, do not increment """ source, weights, view_img, save_txt, imgsz = ( source, weights, view_img, save_txt, img_size, ) webcam = ( source.isnumeric() or source.endswith(".txt") or source.lower().startswith(("rtsp://", "rtmp://", "http://")) ) # Directories save_dir = Path( increment_path(Path(project) / name, exist_ok=exist_ok) ) # increment run (save_dir / "labels" if save_txt else save_dir).mkdir( parents=True, exist_ok=True ) # make dir # Initialize set_logging() device = select_device(device) half = device.type != "cpu" # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name="resnet101", n=2) # initialize modelc.load_state_dict( torch.load("weights/resnet101.pt", map_location=device)["model"] ).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, "module") else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != "cpu" else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression( pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms, ) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], "%g: " % i, im0s[i].copy(), dataset.count else: p, s, im0, frame = path, "", im0s, getattr(dataset, "frame", 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / "labels" / p.stem) + ( "" if dataset.mode == "image" else f"_{frame}" ) # img.txt s += "%gx%g " % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}s, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = ( (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn) .view(-1) .tolist() ) # normalized xywh line = ( (cls, *xywh, conf) if save_conf else (cls, *xywh) ) # label format with open(txt_path + ".txt", "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") if save_img or view_img: # Add bbox to image label = f"{names[int(cls)]} {conf:.2f}" plot_one_box( xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, ) # Print time (inference + NMS) print(f"{s}Done. ({t2 - t1:.3f}s)") # Stream results if view_img: cv2.imshow(str(p), im0) # Save results (image with detections) if save_img: if dataset.mode == "image": cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = "mp4v" # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h) ) vid_writer.write(im0) if save_txt or save_img: s = ( f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else "" ) print(f"Results saved to {save_dir}{s}") print(f"Done. ({time.time() - t0:.3f}s)")
def load_model(weights=WEIGHTS, device=DEVICE, imgsz=IMGSZ): """ Load the YoloV5 model. """ model = attempt_load(weights, map_location=device) stride = int(model.stride.max()) imgsz = check_img_size(imgsz, s=stride) return (model, stride, imgsz)
def predict(self, src_image): param = self.getParam() # Initialize init_logging() half = self.device.type != 'cpu' # half precision only supported on CUDA # Load model if self.model is None or param.update: self.model = attempt_load(param.model_path, map_location=self.device) # load FP32 model stride = int(self.model.stride.max()) # model stride param.input_size = check_img_size(param.input_size, s=stride) # check img_size if half: self.model.half() # to FP16F # Get names and colors self.names = self.model.module.names if hasattr(self.model, 'module') else self.model.names self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names] param.update = False else: stride = int(self.model.stride.max()) # model stride # Resize image image = letterbox(src_image, param.input_size, stride)[0] image = image.transpose(2, 0, 1) image = np.ascontiguousarray(image) self.emitStepProgress() # Run inference image = torch.from_numpy(image).to(self.device) image = image.half() if half else image.float() # uint8 to fp16/32 image /= 255.0 # 0 - 255 to 0.0 - 1.0 if image.ndimension() == 3: image = image.unsqueeze(0) self.emitStepProgress() # Inference pred = self.model(image, augment=param.augment)[0] self.emitStepProgress() # Apply NMS pred = non_max_suppression(pred, param.conf_thres, param.iou_thres, agnostic=param.agnostic_nms) self.emitStepProgress() graphics_output = self.getOutput(1) graphics_output.setNewLayer("YoloV5") graphics_output.setImageIndex(0) detected_names = [] detected_conf = [] # Process detections for i, det in enumerate(pred): # detections per image if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(image.shape[2:], det[:, :4], src_image.shape).round() # Results for *xyxy, conf, cls in reversed(det): # Box w = float(xyxy[2] - xyxy[0]) h = float(xyxy[3] - xyxy[1]) prop_rect = core.GraphicsRectProperty() prop_rect.pen_color = self.colors[int(cls)] graphics_box = graphics_output.addRectangle(float(xyxy[0]), float(xyxy[1]), w, h, prop_rect) graphics_box.setCategory(self.names[int(cls)]) # Label name = self.names[int(cls)] prop_text = core.GraphicsTextProperty() prop_text.font_size = 8 prop_text.color = self.colors[int(cls)] graphics_output.addText(name, float(xyxy[0]), float(xyxy[1]), prop_text) detected_names.append(name) detected_conf.append(conf.item()) # Init numeric output numeric_ouput = self.getOutput(2) numeric_ouput.clearData() numeric_ouput.setOutputType(dataprocess.NumericOutputType.TABLE) numeric_ouput.addValueList(detected_conf, "Confidence", detected_names) self.emitStepProgress()
def mainFunc(args): # Set the main function flag print("Main Function Start...") # Check the GPU device print("Number of available GPUs: {}".format(torch.cuda.device_count())) # Check whether using the distributed runing for the network is_distributed = initDistributed(args) master = True if is_distributed and os.environ["RANK"]: master = int( os.environ["RANK"]) == 0 # check whether this node is master node # Configuration for device setting set_logging() if is_distributed: device = torch.device('cuda:{}'.format(args.local_rank)) else: device = select_device(args.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load the configuration config = loadConfig(args.config) # CuDNN related setting if torch.cuda.is_available(): cudnn.benchmark = config.DEVICE.CUDNN.BENCHMARK cudnn.deterministic = config.DEVICE.CUDNN.DETERMINISTIC cudnn.enabled = config.DEVICE.CUDNN.ENABLED # Configurations for dirctories save_img, save_dir, source, yolov5_weights, view_img, save_txt, imgsz = \ False, Path(args.save_dir), args.source, args.weights, args.view_img, args.save_txt, args.img_size webcam = source.isnumeric() or source.startswith( ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt') if save_dir == Path('runs/detect'): # if default os.makedirs('runs/detect', exist_ok=True) # make base save_dir = Path(increment_dir(save_dir / 'exp', args.name)) # increment run os.makedirs(save_dir / 'labels' if save_txt else save_dir, exist_ok=True) # make new dir # Load yolov5 model for human detection model_yolov5 = attempt_load(config.MODEL.PRETRAINED.YOLOV5, map_location=device) imgsz = check_img_size(imgsz, s=model_yolov5.stride.max()) # check img_size if half: model_yolov5.half() # to FP16 # Second-stage classifier classify = False if classify: model_classifier = load_classifier(name='resnet101', n=2) # initialize model_classifier.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights model_classifier.to(device).eval() # Load resnet model for human keypoints estimation model_resnet = eval('pose_models.' + config.MODEL.NAME.RESNET + '.get_pose_net')(config, is_train=False) if config.EVAL.RESNET.MODEL_FILE: print('=> loading model from {}'.format(config.EVAL.RESNET.MODEL_FILE)) model_resnet.load_state_dict(torch.load(config.EVAL.RESNET.MODEL_FILE), strict=False) else: print('expected model defined in config at EVAL.RESNET.MODEL_FILE') model_resnet.to(device) model_resnet.eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) pose_transform = transforms.Compose( [ # input transformation for 2d human pose estimation transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Get names and colors names = model_yolov5.module.names if hasattr( model_yolov5, 'module') else model_yolov5.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Construt filters for filtering 2D/3D human keypoints # filters_2d = constructFilters((1,16,2), freq=25, mincutoff=1, beta=0.01) # for test # filters_3d = constructFilters((1,16,3), freq=25, mincutoff=1, beta=0.01) # Run the yolov5 and resnet for 2d human pose estimation # with torch.no_grad(): # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model_yolov5(img.half() if half else img ) if device.type != 'cpu' else None # run once # Process every video frame for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred_boxes = model_yolov5(img, augment=args.augment)[0] # Apply NMS pred_boxes = non_max_suppression(pred_boxes, args.conf_thres, args.iou_thres, classes=args.classes, agnostic=args.agnostic_nms) t2 = time_synchronized() # Can not find people and move to next frame if pred_boxes[0] is None: # show the frame with no human detected cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL) cv2.imshow("2D Human Pose Estimation", im0s[0].copy()) # wait manual operations # with kb.Listener(on_press=on_press) as listener: # listener.join() # return # if kb.is_pressed('t'): # return print("No Human Detected and Move on.") print("-" * 30) continue # Print time (inference + NMS) detect_time = t2 - t1 detect_fps = 1.0 / detect_time print("Human Detection Time: {}, Human Detection FPS: {}".format( detect_time, detect_fps)) # Apply Classifier if classify: # false pred_boxes = apply_classifier(pred_boxes, model_classifier, img, im0s) # Estimate 2d human pose(multiple person) centers = [] scales = [] for id, boxes in enumerate(pred_boxes): if boxes is not None and len(boxes): boxes[:, :4] = scale_coords(img.shape[2:], boxes[:, :4], im0s[id].copy().shape).round() # convert tensor to list format boxes = np.delete(boxes.cpu().numpy(), [-2, -1], axis=1).tolist() for l in range(len(boxes)): boxes[l] = [tuple(boxes[l][0:2]), tuple(boxes[l][2:4])] # convert box to center and scale for box in boxes: center, scale = box_to_center_scale(box, imgsz, imgsz) centers.append(center) scales.append(scale) t3 = time_synchronized() pred_pose_2d = get_pose_estimation_prediction(config, model_resnet, im0s[0], centers, scales, transform=pose_transform, device=device) t4 = time_synchronized() # Print time (2d human pose estimation) estimate_time = t4 - t3 estimate_fps = 1.0 / estimate_time print("Pose Estimation Time: {}, Pose Estimation FPS: {}".format( estimate_time, estimate_fps)) # Filter the predicted 2d human pose(multiple person) t5 = time_synchronized() # if False: # for test if config.EVAL.RESNET.USE_FILTERS_2D: # construct filters for every keypoints of every person in 2D filters_2d = constructFilters(pred_pose_2d.shape, freq=1, mincutoff=1, beta=0.01) print("Shape of filters_2d: ({}, {}, {})".format( len(filters_2d), len(filters_2d[0]), len(filters_2d[0][0]))) # for test for per in range(pred_pose_2d.shape[0]): for kp in range(pred_pose_2d.shape[1]): for coord in range(pred_pose_2d.shape[2]): pred_pose_2d[per][kp][coord] = filters_2d[per][kp][ coord](pred_pose_2d[per][kp][coord]) t6 = time_synchronized() # Print time (filter 2d human pose) filter_time_2d = t6 - t5 filter_fps_2d = 1.0 / filter_time_2d print("Filter 2D Pose Time: {}, Filter 2D Pose FPS: {}".format( filter_time_2d, filter_fps_2d)) # Process detections and estimations in 2D for i, box in enumerate(pred_boxes): if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if box is not None and len(box): # Rescale boxes from img_size to im0 size box[:, :4] = scale_coords(img.shape[2:], box[:, :4], im0.shape).round() # Print results for c in box[:, -1].unique(): n = (box[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(box): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if args.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line) + '\n') % line) # Add bbox to image if save_img or view_img: label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Draw joint keypoints, number orders and human skeletons for every detected people in 2D for person in pred_pose_2d: # draw the human keypoints for idx, coord in enumerate(person): x_coord, y_coord = int(coord[0]), int(coord[1]) cv2.circle(im0, (x_coord, y_coord), 1, (0, 0, 255), 5) cv2.putText(im0, str(idx), (x_coord, y_coord), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2, cv2.LINE_AA) # draw the human skeletons in PACIFIC mode for skeleton in PACIFIC_SKELETON_INDEXES: cv2.line(im0, (int(person[skeleton[0]][0]), int(person[skeleton[0]][1])), (int(person[skeleton[1]][0]), int(person[skeleton[1]][1])), skeleton[2], 2) # Print time (inference + NMS + estimation) print('%sDone. (%.3fs)' % (s, t4 - t1)) # Stream results if view_img: detect_text = "Detect FPS:{0:0>5.2f}/{1:0>6.2f}ms".format( detect_fps, detect_time * 1000) estimate_text = "Estimate FPS:{0:0>5.2f}/{1:0>6.2f}ms".format( estimate_fps, estimate_time * 1000) cv2.putText(im0, detect_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.putText(im0, estimate_text, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL) cv2.imshow("2D Human Pose Estimation", im0) if cv2.waitKey(1) & 0xFF == ord('q'): # q to quit return # goto .mainFunc # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) # Print time (inference + NMS + estimation + 2d filtering) all_process_time = t6 - t1 all_process_fps = 1.0 / all_process_time print("All Process Time: {}, All Process FPS: {}".format( all_process_time, all_process_fps)) print("-" * 30) # Goto label # label .mainFunc # Print saving results if save_txt or save_img: print('Results saved to %s' % save_dir) # Release video reader and writer, then destory all opencv windows dataset.vid_cap.release() vid_writer.release() cv2.destroyAllWindows() print('Present 2D Human Pose Inference Done. Total Time:(%.3f seconds)' % (time.time() - t0))
def test(data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling save_conf=False, plots=True): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) save_txt = opt.save_txt # save *.txt labels # Remove previous if os.path.exists(save_dir): shutil.rmtree(save_dir) # delete dir os.makedirs(save_dir) # make new dir if save_txt: out = save_dir / 'autolabels' if os.path.exists(out): shutil.rmtree(out) # delete dir os.makedirs(out) # make new dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] seen = 0 names = model.names if hasattr(model, 'names') else model.module.names coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model(img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh x = pred.clone() x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original for *xyxy, conf, cls in x: xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, conf, *xywh) if save_conf else (cls, *xywh) # label format with open(str(out / Path(paths[si]).stem) + '.txt', 'a') as f: f.write(('%g ' * len(line) + '\n') % line) # Clip boxes to image bounds clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = Path(paths[si]).stem box = pred[:, :4].clone() # xyxy scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': coco91class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and save_dir and batch_i < 1: f = save_dir / f'test_batch{batch_i}_gt.jpg' # filename plot_images(img, targets, paths, str(f), names) # ground truth f = save_dir / f'test_batch{batch_i}_pred.jpg' plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, fname=os.path.join(save_dir, 'precision-recall_curve.png')) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights file = save_dir / f"detections_val2017_{w}_results.json" # predicted annotations file print('\nCOCO mAP with pycocotools... saving %s...' % file) with open(file, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO ground truth api cocoDt = cocoGt.loadRes(str(file)) # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[:2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def run(data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a COCO-JSON results file project=ROOT / 'runs/val', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference model=None, dataloader=None, save_dir=Path(''), callbacks=Callbacks(), compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model check_suffix(weights, '.pt') model = attempt_load(weights, map_location=device) # load FP32 model gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(imgsz, s=gs) # check image size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Data data = check_dataset(data) # check # Half half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() # Configure model.eval() is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once pad = 0.0 if task == 'speed' else 0.5 task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=pad, rect=True, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95') dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t1 = time_sync() img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width t2 = time_sync() dt[0] += t2 - t1 # Run model out, train_out = model(img, augment=augment) # inference and training outputs dt[1] += time_sync() - t2 # Compute loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t3 = time_sync() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) dt[2] += time_sync() - t3 # Statistics per image for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path, shape = Path(paths[si]), shapes[si][0] seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes scale_coords(img[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct = process_batch(predn, labelsn, iouv) else: correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # (correct, conf, pcls, tcls) # Save/log if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) if save_json: save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary callbacks.run('on_val_image_end', pred, predn, path, names, img[si]) # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # [email protected], [email protected]:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%11i' * 2 + '%11.3g' * 4 # print format LOGGER.info(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): LOGGER.info(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in dt) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements(['pycocotools']) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[:2] # update results ([email protected]:0.95, [email protected]) except Exception as e: LOGGER.info(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def train(hyp, opt, device, tb_writer=None): logger.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) save_dir, epochs, batch_size, total_batch_size, weights, rank = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # Directories wdir = save_dir / 'weights' wdir.mkdir(parents=True, exist_ok=True) # make dir last = wdir / 'last.pt' best = wdir / 'best.pt' results_file = save_dir / 'results.txt' # Save run settings with open(save_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure plots = not opt.evolve # create plots cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.SafeLoader) # data dict is_coco = opt.data.endswith('coco.yaml') # Logging- Doing this before checking the dataset. Might update data_dict loggers = {'wandb': None} # loggers dict if rank in [-1, 0]: opt.hyp = hyp # add hyperparameters run_id = torch.load(weights).get('wandb_id') if weights.endswith('.pt') and os.path.isfile(weights) else None wandb_logger = WandbLogger(opt, Path(opt.save_dir).stem, run_id, data_dict) loggers['wandb'] = wandb_logger.wandb data_dict = wandb_logger.data_dict if wandb_logger.wandb: weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp # WandbLogger might update weights, epochs if resuming nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check # Model pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create exclude = ['anchor'] if (opt.cfg or hyp.get('anchors')) and not opt.resume else [] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: model = Model(opt.cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] # Freeze freeze = [] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay logger.info(f"Scaled weight_decay = {hyp['weight_decay']}") pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): pg2.append(v.bias) # biases if isinstance(v, nn.BatchNorm2d): pg0.append(v.weight) # no decay elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): pg1.append(v.weight) # apply decay if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR if opt.linear_lr: lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear else: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if rank in [-1, 0] else None # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # EMA if ema and ckpt.get('ema'): ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) ema.updates = ckpt['updates'] # Results if ckpt.get('training_results') is not None: results_file.write_text(ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 if opt.resume: assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs) if epochs < start_epoch: logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, state_dict # Image sizes gs = max(int(model.stride.max()), 32) # grid size (max stride) nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj']) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers, image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: ')) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: testloader = create_dataloader(test_path, imgsz_test, batch_size * 2, gs, opt, # testloader hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers, pad=0.5, prefix=colorstr('val: '))[0] if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) if plots: plot_labels(labels, names, save_dir, loggers) if tb_writer: tb_writer.add_histogram('classes', c, 0) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) model.half().float() # pre-reduce anchor precision # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Model parameters hyp['box'] *= 3. / nl # scale to layers hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) compute_loss = ComputeLoss(model) # init loss class logger.info(f'Image sizes {imgsz} train, {imgsz_test} test\n' f'Using {dataloader.num_workers} dataloader workers\n' f'Logging results to {save_dir}\n' f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'labels', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ( '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if plots and ni < 3: f = save_dir / f'train_batch{ni}.jpg' # filename Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() # if tb_writer: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard elif plots and ni == 10 and wandb_logger.wandb: wandb_logger.log({"Mosaics": [wandb_logger.wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg') if x.exists()]}) # end batch ------------------------------------------------------------------------------------------------ # end epoch ---------------------------------------------------------------------------------------------------- # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights']) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP wandb_logger.current_epoch = epoch + 1 results, maps, times = test.test(data_dict, batch_size=batch_size * 2, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, verbose=nc < 50 and final_epoch, plots=plots and final_epoch, wandb_logger=wandb_logger, compute_loss=compute_loss, is_coco=is_coco) # Write with open(results_file, 'a') as f: f.write(s + '%10.4g' * 7 % results + '\n') # append metrics, val_loss if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Log tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2'] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if tb_writer: tb_writer.add_scalar(tag, x, epoch) # tensorboard if wandb_logger.wandb: wandb_logger.log({tag: x}) # W&B # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi wandb_logger.end_epoch(best_result=best_fitness == fi) # Save model if (not opt.nosave) or (final_epoch and not opt.evolve): # if save ckpt = {'epoch': epoch, 'best_fitness': best_fitness, 'training_results': results_file.read_text(), 'model': deepcopy(model.module if is_parallel(model) else model).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), 'wandb_id': wandb_logger.wandb_run.id if wandb_logger.wandb else None} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if wandb_logger.wandb: if ((epoch + 1) % opt.save_period == 0 and not final_epoch) and opt.save_period != -1: wandb_logger.log_model( last.parent, opt, epoch, fi, best_model=best_fitness == fi) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Plots if plots: plot_results(save_dir=save_dir) # save as results.png if wandb_logger.wandb: files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]] wandb_logger.log({"Results": [wandb_logger.wandb.Image(str(save_dir / f), caption=f) for f in files if (save_dir / f).exists()]}) # Test best.pt logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) if opt.data.endswith('coco.yaml') and nc == 80: # if COCO for m in (last, best) if best.exists() else (last): # speed, mAP tests results, _, _ = test.test(opt.data, batch_size=batch_size * 2, imgsz=imgsz_test, conf_thres=0.001, iou_thres=0.7, model=attempt_load(m, device).half(), single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, save_json=True, plots=False, is_coco=is_coco) # Strip optimizers final = best if best.exists() else last # final model for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if opt.bucket: os.system(f'gsutil cp {final} gs://{opt.bucket}/weights') # upload if wandb_logger.wandb and not opt.evolve: # Log the stripped model wandb_logger.wandb.log_artifact(str(final), type='model', name='run_' + wandb_logger.wandb_run.id + '_model', aliases=['last', 'best', 'stripped']) wandb_logger.finish_run() else: dist.destroy_process_group() torch.cuda.empty_cache() return results
def __init__(self, master, opt): self.parent = master self.parent.title("Semi Automatic Image Annotation Tool") self.frame = Frame(self.parent) self.frame.pack(fill=BOTH, expand=1) self.parent.resizable(width=False, height=False) # setup yolov5 model self.opt = opt opt.augment = True opt.update = False opt.agnostic_nms = False # initialize set_logging() device = select_device(opt.device) self.device = device # load yolo5 object detection model model = attempt_load(opt.weights, map_location=device) # load FP32 model self.img_size = check_img_size(opt.img_size, s=model.stride.max()) # check img_size half = device.type != 'cpu' # half precision only supported on CUDA self.half = half if half: model.half() # to FP16 self.object_model = model # get object detection names self.names = ['person'] # load face detect model self.face_model = MTCNN(keep_all=True, margin=opt.face_margin, device=device) self.face_landmarks = opt.face_landmarks # Initialize class variables self.img = None self.tkimg = None self.imageDir = '' self.imageDirPathBuffer = '' self.imageList = [] self.imageTotal = 0 self.imageCur = 0 self.cur = 0 self.bboxIdList = [] self.bboxList = [] self.bboxPointList = [] self.o1 = None self.o2 = None self.o3 = None self.o4 = None self.bboxId = None self.currLabel = None self.editbboxId = None self.currBboxColor = None self.zoomImgId = None self.zoomImg = None self.zoomImgCrop = None self.tkZoomImg = None self.hl = None self.vl = None self.editPointId = None self.filename = None self.filenameBuffer = None self.objectLabelList = [] self.EDIT = False # record image names for saving previous results self.annoList = {} # initialize mouse state self.STATE = {'x': 0, 'y': 0} self.STATE_COCO = {'click': 0} # initialize annotation file self.anno_filename = 'annotations.csv' self.annotation_file = open('annotations/' + self.anno_filename, 'w+') self.annotation_file.write("") self.annotation_file.close() # ------------------ GUI --------------------- # Control Panel self.ctrlPanel = Frame(self.frame) self.ctrlPanel.grid(row=0, column=0, sticky=W + N) self.openBtn = Button(self.ctrlPanel, text='Open', command=self.open_image) self.openBtn.pack(fill=X, side=TOP) self.openDirBtn = Button(self.ctrlPanel, text='Open Dir', command=self.open_image_dir) self.openDirBtn.pack(fill=X, side=TOP) self.nextBtn = Button(self.ctrlPanel, text='Next -->', command=self.open_next) self.nextBtn.pack(fill=X, side=TOP) self.previousBtn = Button(self.ctrlPanel, text='<-- Previous', command=self.open_previous) self.previousBtn.pack(fill=X, side=TOP) self.saveBtn = Button(self.ctrlPanel, text='Save', command=self.save) self.saveBtn.pack(fill=X, side=TOP) self.semiAutoBtn = Button(self.ctrlPanel, text="Show Suggestions", command=self.automate) self.semiAutoBtn.pack(fill=X, side=TOP) self.disp = Label(self.ctrlPanel, text='Coordinates:') self.disp.pack(fill=X, side=TOP) self.mb = Menubutton(self.ctrlPanel, text="COCO Classes for Suggestions", relief=RAISED) self.mb.pack(fill=X, side=TOP) self.mb.menu = Menu(self.mb, tearoff=0) self.mb["menu"] = self.mb.menu self.addCocoBtn = Button(self.ctrlPanel, text="+", command=self.add_labels_coco) self.addCocoBtn.pack(fill=X, side=TOP) self.zoomPanelLabel = Label(self.ctrlPanel, text="Precision View Panel") self.zoomPanelLabel.pack(fill=X, side=TOP) self.zoomcanvas = Canvas(self.ctrlPanel, width=150, height=150) self.zoomcanvas.pack(fill=X, side=TOP, anchor='center') # Image Editing Region self.canvas = Canvas(self.frame, width=self.img_size, height=self.img_size) self.canvas.grid(row=0, column=1, sticky=W + N) self.canvas.bind("<Button-1>", self.mouse_click) self.canvas.bind("<Motion>", self.mouse_move, "+") self.canvas.bind("<B1-Motion>", self.mouse_drag) self.canvas.bind("<ButtonRelease-1>", self.mouse_release) self.parent.bind("<Key-Left>", self.open_previous) self.parent.bind("<Key-Right>", self.open_next) self.parent.bind("Escape", self.cancel_bbox) # Labels and Bounding Box Lists Panel self.listPanel = Frame(self.frame) self.listPanel.grid(row=0, column=2, sticky=W + N) self.listBoxNameLabel = Label(self.listPanel, text="List of Objects").pack(fill=X, side=TOP) self.objectListBox = Listbox(self.listPanel, width=40) self.objectListBox.pack(fill=X, side=TOP) self.delObjectBtn = Button(self.listPanel, text="Delete", command=self.del_bbox) self.delObjectBtn.pack(fill=X, side=TOP) self.clearAllBtn = Button(self.listPanel, text="Clear All", command=self.clear_bbox) self.clearAllBtn.pack(fill=X, side=TOP) self.classesNameLabel = Label(self.listPanel, text="Classes").pack(fill=X, side=TOP) self.textBox = Entry(self.listPanel, text="Enter label") self.textBox.pack(fill=X, side=TOP) self.addLabelBtn = Button(self.listPanel, text="+", command=self.add_label).pack(fill=X, side=TOP) self.delLabelBtn = Button(self.listPanel, text="-", command=self.del_label).pack(fill=X, side=TOP) self.labelListBox = Listbox(self.listPanel) self.labelListBox.pack(fill=X, side=TOP) for name in self.names + ['face']: self.labelListBox.insert(END, str(name)) self.cocoLabels = config.labels_to_names.values() self.cocoIntVars = [] for idxcoco, label_coco in enumerate(self.cocoLabels): self.cocoIntVars.append(IntVar()) self.mb.menu.add_checkbutton(label=label_coco, variable=self.cocoIntVars[idxcoco]) # print(self.cocoIntVars) # STATUS BAR self.statusBar = Frame(self.frame, width=500) self.statusBar.grid(row=1, column=1, sticky=W + N) self.processingLabel = Label(self.statusBar, text=" ") self.processingLabel.pack(side="left", fill=X) self.imageIdxLabel = Label(self.statusBar, text=" ") self.imageIdxLabel.pack(side="right", fill=X)
def __init__(self, model_path): self.yolo_model = attempt_load(weights=model_path, map_location=device) print("Yolo model loaded!") self.conf_thres = 0.75 self.iou_thres = 0.7
def RoI(frames, dataset, model, device): yolo_boxes = get_yolo_roi(frames[int(len(frames)/2)], model, device, dataset) yolo_boxes = delCoverBboxes(yolo_boxes, dataset) motion_boxes = get_motion_roi(frames, yolo_boxes, dataset) bboxes = delCoverBboxes(yolo_boxes+motion_boxes, dataset) return bboxes if __name__=="__main__": # load yolov3 model weights = 'yolov5/weights/yolov5s.pt' device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu") model = attempt_load(weights, map_location=device) # load FP32 model frame_path = "../AllDatasets/avenue/testing/frames" # frame path clips = os.listdir(frame_path) # 记录bounding boxes for clip in clips: path = os.path.join(frame_path,clip) filenames = os.listdir(path) save_file = "./bboxes/avenue/test/"+ str(clip)+".npy" clips_roi = [] #读取图片开始预测 for index in range(2,len(filenames)-2): img1 = os.path.join(path, filenames[index-2]) img2 = os.path.join(path, filenames[index-1]) img3 = os.path.join(path, filenames[index])
def test( weights=None, data="yolov5/data/coco128.yaml", batch_size=32, image_size=640, conf_thres=0.001, iou_thres=0.6, # for NMS task="val", device="", single_cls=False, augment=False, verbose=False, save_txt=False, # for auto-labelling save_hybrid=False, # for hybrid auto-labelling save_conf=False, # save auto-label confidences save_json=False, project="runs/test", name="exp", exist_ok=False, model=None, dataloader=None, save_dir=Path(""), # for saving images plots=True, log_imgs=0, # number of logged images ): arguments = locals() # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(device, batch_size=batch_size) # Directories save_dir = Path(increment_path(Path(project) / name, exist_ok=exist_ok)) # increment run (save_dir / "labels" if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model image_size = check_img_size(image_size, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != "cpu" # half precision only supported on CUDA if half: model.half() # Configure model.eval() is_coco = data.endswith("coco.yaml") # is COCO dataset with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data["nc"]) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs, wandb = min(log_imgs, 100), None # ceil try: import wandb # Weights & Biases except ImportError: log_imgs = 0 # Dataloader if not training: img = torch.zeros((1, 3, image_size, image_size), device=device) # init img _ = (model(img.half() if half else img) if device.type != "cpu" else None) # run once path = (data["test"] if task == "test" else data["val"] ) # path to val/test images opt = OptFactory(arguments) dataloader = create_dataloader(path, image_size, batch_size, model.stride.max(), opt, pad=0.5, rect=True)[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, "names") else model.module.names) } coco91class = coco80_to_coco91_class() s = ("%20s" + "%12s" * 6) % ( "Class", "Images", "Targets", "P", "R", "[email protected]", "[email protected]:.95", ) p, r, f1, mp, mr, map50, map, t0, t1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model( img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = ([targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] ) # for autolabelling t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append(( torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls, )) continue # Predictions predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh line = ((cls, *xywh, conf) if save_conf else (cls, *xywh)) # label format with open(save_dir / "labels" / (path.stem + ".txt"), "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") # W&B logging if plots and len(wandb_images) < log_imgs: box_data = [{ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3], }, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": { "class_score": conf }, "domain": "pixel", } for *xyxy, conf, cls in pred.tolist()] boxes = { "predictions": { "box_data": box_data, "class_labels": names } } # inference-space wandb_images.append( wandb.Image(img[si], boxes=boxes, caption=path.name)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int( path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ "image_id": image_id, "category_id": coco91class[int(p[5])] if is_coco else int(p[5]), "bbox": [round(x, 3) for x in b], "score": round(p[4], 5), }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch( pred, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = ((cls == tcls_tensor).nonzero(as_tuple=False).view(-1) ) # prediction indices pi = ((cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) ) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if (len(detected) == nl ): # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f"test_batch{batch_i}_labels.jpg" # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f"test_batch{batch_i}_pred.jpg" # predictions Thread( target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True, ).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) p, r, ap50, ap = ( p[:, 0], r[:, 0], ap[:, 0], ap.mean(1), ) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = "%20s" + "%12.3g" * 6 # print format print(pf % ("all", seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1e3 for x in (t0, t1, t0 + t1)) + ( image_size, image_size, batch_size, ) # tuple if not training: print( "Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g" % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb and wandb.run: wandb.log({"Images": wandb_images}) wandb.log({ "Validation": [ wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob("test*.jpg")) ] }) # Save JSON if save_json and len(jdict): w = (Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else "") # weights anno_json = "../coco/annotations/instances_val2017.json" # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print("\nEvaluating pycocotools mAP... saving %s..." % pred_json) with open(pred_json, "w") as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, "bbox") if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.img_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f"pycocotools unable to run: {e}") # Return results if not training: s = ( f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else "") print(f"Results saved to {save_dir}{s}") model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Find index corresponding to a person idx_person = names.index("person") # SORT: initialize the tracker mot_tracker = sort_module.Sort(max_age=opt.max_age, min_hits=opt.min_hits, iou_threshold=opt.iou_threshold) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # SORT: number of people detected idxs_ppl = ( det[:, -1] == idx_person ).nonzero(as_tuple=False).squeeze( dim=1) # 1. List of indices with 'person' class detections dets_ppl = det[idxs_ppl, :-1].to( "cpu") # 2. Torch.tensor with 'person' detections print('\n {} people were detected!'.format(len(idxs_ppl))) # SORT: feed detections to the tracker if len(dets_ppl) != 0: trackers = mot_tracker.update(dets_ppl) for d in trackers: plot_one_box(d[:-1], im0, label='ID' + str(int(d[-1])), color=colors[1], line_thickness=1) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def train(hyp, opt, device, tb_writer=None, wandb=None): logger.info(f"Hyperparameters {hyp}") save_dir, epochs, batch_size, total_batch_size, weights, rank = ( Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank, ) # Directories wdir = save_dir / "weights" wdir.mkdir(parents=True, exist_ok=True) # make dir last = wdir / "last.pt" best = wdir / "best.pt" results_file = save_dir / "results.txt" # Save run settings with open(save_dir / "hyp.yaml", "w") as f: yaml.dump(hyp, f, sort_keys=False) with open(save_dir / "opt.yaml", "w") as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure plots = not opt.evolve # create plots cuda = device.type != "cpu" init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict["train"] test_path = data_dict["val"] nc = 1 if opt.single_cls else int(data_dict["nc"]) # number of classes names = ( ["item"] if opt.single_cls and len(data_dict["names"]) != 1 else data_dict["names"] ) # class names assert len(names) == nc, "%g names found for nc=%g dataset in %s" % ( len(names), nc, opt.data, ) # check # Model pretrained = weights.endswith(".pt") if pretrained: with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally # add yolov5 folder to system path here = Path(__file__).parent.absolute() yolov5_folder_dir = str(here) sys.path.insert(0, yolov5_folder_dir) # load checkpoint ckpt = torch.load(weights, map_location=device) # remove yolov5 folder from system path sys.path.remove(yolov5_folder_dir) if hyp.get("anchors"): ckpt["model"].yaml["anchors"] = round(hyp["anchors"]) # force autoanchor model = Model(opt.cfg or ckpt["model"].yaml, ch=3, nc=nc).to(device) # create exclude = ["anchor"] if opt.cfg or hyp.get("anchors") else [] # exclude keys state_dict = ckpt["model"].float().state_dict() # to FP32 state_dict = intersect_dicts( state_dict, model.state_dict(), exclude=exclude ) # intersect model.load_state_dict(state_dict, strict=False) # load logger.info( "Transferred %g/%g items from %s" % (len(state_dict), len(model.state_dict()), weights) ) # report else: model = Model(opt.cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create # Freeze freeze = [] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): print("freezing %s" % k) v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max( round(nbs / total_batch_size), 1 ) # accumulate loss before optimizing hyp["weight_decay"] *= total_batch_size * accumulate / nbs # scale weight_decay logger.info(f"Scaled weight_decay = {hyp['weight_decay']}") pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_modules(): if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): pg2.append(v.bias) # biases if isinstance(v, nn.BatchNorm2d): pg0.append(v.weight) # no decay elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): pg1.append(v.weight) # apply decay if opt.adam: optimizer = optim.Adam( pg0, lr=hyp["lr0"], betas=(hyp["momentum"], 0.999) ) # adjust beta1 to momentum else: optimizer = optim.SGD( pg0, lr=hyp["lr0"], momentum=hyp["momentum"], nesterov=True ) optimizer.add_param_group( {"params": pg1, "weight_decay": hyp["weight_decay"]} ) # add pg1 with weight_decay optimizer.add_param_group({"params": pg2}) # add pg2 (biases) logger.info( "Optimizer groups: %g .bias, %g conv.weight, %g other" % (len(pg2), len(pg1), len(pg0)) ) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = one_cycle(1, hyp["lrf"], epochs) # cosine 1->hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Logging if rank in [-1, 0] and wandb and wandb.run is None: opt.hyp = hyp # add hyperparameters wandb_run = wandb.init( config=opt, resume="allow", project="YOLOv5" if opt.project == "runs/train" else Path(opt.project).stem, name=save_dir.stem, id=ckpt.get("wandb_id") if "ckpt" in locals() else None, ) loggers = {"wandb": wandb} # loggers dict # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt["optimizer"] is not None: optimizer.load_state_dict(ckpt["optimizer"]) best_fitness = ckpt["best_fitness"] # Results if ckpt.get("training_results") is not None: with open(results_file, "w") as file: file.write(ckpt["training_results"]) # write results.txt # Epochs start_epoch = ckpt["epoch"] + 1 if opt.resume: assert ( start_epoch > 0 ), "%s training to %g epochs is finished, nothing to resume." % ( weights, epochs, ) if epochs < start_epoch: logger.info( "%s has been trained for %g epochs. Fine-tuning for %g additional epochs." % (weights, ckpt["epoch"], epochs) ) epochs += ckpt["epoch"] # finetune additional epochs del ckpt, state_dict # Image sizes gs = int(model.stride.max()) # grid size (max stride) nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj']) imgsz, imgsz_test = [ check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info("Using SyncBatchNorm()") # EMA ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader dataloader, dataset = create_dataloader( train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers, image_weights=opt.image_weights, quad=opt.quad, ) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert ( mlc < nc ), "Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g" % ( mlc, nc, opt.data, nc - 1, ) # Process 0 if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader( test_path, imgsz_test, total_batch_size, gs, opt, # testloader hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers, pad=0.5, )[0] if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) if plots: plot_labels(labels, save_dir, loggers) if tb_writer: tb_writer.add_histogram("classes", c, 0) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp["anchor_t"], imgsz=imgsz) # Model parameters hyp["cls"] *= nc / 80.0 # scale hyp['cls'] to class count hyp["obj"] *= ( imgsz ** 2 / 640.0 ** 2 * 3.0 / nl ) # scale hyp['obj'] to image size and output layers model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) model.class_weights = ( labels_to_class_weights(dataset.labels, nc).to(device) * nc ) # attach class weights model.names = names # Start training t0 = time.time() nw = max( round(hyp["warmup_epochs"] * nb), 1000 ) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) logger.info( "Image sizes %g train, %g test\n" "Using %g dataloader workers\nLogging results to %s\n" "Starting training for %g epochs..." % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs) ) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: cw = ( model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc ) # class weights iw = labels_to_image_weights( dataset.labels, nc=nc, class_weights=cw ) # image weights dataset.indices = random.choices( range(dataset.n), weights=iw, k=dataset.n ) # rand weighted idx # Broadcast if DDP if rank != -1: indices = ( torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n) ).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "obj", "cls", "total", "targets", "img_size") ) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _, ) in ( pbar ): # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = ( imgs.to(device, non_blocking=True).float() / 255.0 ) # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round() ) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x["lr"] = np.interp( ni, xi, [ hyp["warmup_bias_lr"] if j == 2 else 0.0, x["initial_lr"] * lf(epoch), ], ) if "momentum" in x: x["momentum"] = np.interp( ni, xi, [hyp["warmup_momentum"], hyp["momentum"]] ) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [ math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate( imgs, size=ns, mode="bilinear", align_corners=False ) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss( pred, targets.to(device), model ) # loss scaled by batch_size if rank != -1: loss *= ( opt.world_size ) # gradient averaged between devices in DDP mode if opt.quad: loss *= 4.0 # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = "%.3gG" % ( torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0 ) # (GB) s = ("%10s" * 2 + "%10.4g" * 6) % ( "%g/%g" % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1], ) pbar.set_description(s) # Plot if plots and ni < 3: f = save_dir / f"train_batch{ni}.jpg" # filename Thread( target=plot_images, args=(imgs, targets, paths, f), daemon=True ).start() # if tb_writer: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard elif plots and ni == 3 and wandb: wandb.log( { "Mosaics": [ wandb.Image(str(x), caption=x.name) for x in save_dir.glob("train*.jpg") ] } ) # end batch ------------------------------------------------------------------------------------------------ # end epoch ---------------------------------------------------------------------------------------------------- # Scheduler lr = [x["lr"] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema: ema.update_attr( model, include=[ "yaml", "nc", "hyp", "gr", "names", "stride", "class_weights", ], ) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test( data=opt.data, batch_size=total_batch_size, image_size=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, plots=plots and final_epoch, log_imgs=opt.log_imgs if wandb else 0, ) # Write with open(results_file, "a") as f: f.write( s + "%10.4g" * 7 % results + "\n" ) # P, R, [email protected], [email protected], val_loss(box, obj, cls) if len(opt.name) and opt.bucket: os.system( "gsutil cp %s gs://%s/results/results%s.txt" % (results_file, opt.bucket, opt.name) ) # Log tags = [ "train/box_loss", "train/obj_loss", "train/cls_loss", # train loss "metrics/precision", "metrics/recall", "metrics/mAP_0.5", "metrics/mAP_0.5:0.95", "val/box_loss", "val/obj_loss", "val/cls_loss", # val loss "x/lr0", "x/lr1", "x/lr2", ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if tb_writer: tb_writer.add_scalar(tag, x, epoch) # tensorboard if wandb: wandb.log({tag: x}) # W&B # Update best mAP fi = fitness( np.array(results).reshape(1, -1) ) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, "r") as f: # create checkpoint ckpt = { "epoch": epoch, "best_fitness": best_fitness, "training_results": f.read(), "model": ema.ema, "optimizer": None if final_epoch else optimizer.state_dict(), "wandb_id": wandb_run.id if wandb else None, } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers final = best if best.exists() else last # final model for f in [last, best]: if f.exists(): strip_optimizer(f) # strip optimizers if opt.bucket: os.system(f"gsutil cp {final} gs://{opt.bucket}/weights") # upload # Plots if plots: plot_results(save_dir=save_dir) # save as results.png if wandb: files = [ "results.png", "precision_recall_curve.png", "confusion_matrix.png", ] wandb.log( { "Results": [ wandb.Image(str(save_dir / f), caption=f) for f in files if (save_dir / f).exists() ] } ) if opt.log_artifacts: wandb.log_artifact( artifact_or_path=str(final), type="model", name=save_dir.stem ) # Test best.pt logger.info( "%g epochs completed in %.3f hours.\n" % (epoch - start_epoch + 1, (time.time() - t0) / 3600) ) if opt.data.endswith("coco.yaml") and nc == 80: # if COCO for conf, iou, save_json in ( [0.25, 0.45, False], [0.001, 0.65, True], ): # speed, mAP tests results, _, _ = test( data=opt.data, batch_size=total_batch_size, image_size=imgsz_test, conf_thres=conf, iou_thres=iou, model=attempt_load(final, device).half(), single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, save_json=save_json, plots=False, ) else: dist.destroy_process_group() wandb.run.finish() if wandb and wandb.run else None torch.cuda.empty_cache() return results
from yolov5.utils.activations import Hardswish from yolov5.utils.general import set_logging, check_img_size if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width parser.add_argument('--batch-size', type=int, default=1, help='batch size') opt = parser.parse_args() opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand print(opt) set_logging() t = time.time() # Load PyTorch model model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model labels = model.names # Checks gs = int(max(model.stride)) # grid size (max stride) opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples # Input img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection # Update model for k, m in model.named_modules(): m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility if isinstance(m, Conv) and isinstance(m.act, nn.Hardswish): m.act = Hardswish() # assign activation # if isinstance(m, models.yolo.Detect):
def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): # Usage: # PyTorch: weights = *.pt # TorchScript: *.torchscript # CoreML: *.mlmodel # OpenVINO: *.xml # TensorFlow: *_saved_model # TensorFlow: *.pb # TensorFlow Lite: *.tflite # TensorFlow Edge TPU: *_edgetpu.tflite # ONNX Runtime: *.onnx # OpenCV DNN: *.onnx with dnn=True # TensorRT: *.engine from yolov5.models.experimental import attempt_download, attempt_load # scoped to avoid circular import super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix = Path(w).suffix.lower() suffixes = [ '.pt', '.torchscript', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel', '.xml' ] check_suffix(w, suffixes) # check weights have acceptable suffix pt, jit, onnx, engine, tflite, pb, saved_model, coreml, xml = ( suffix == x for x in suffixes) # backends stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults w = attempt_download(w) # download if not local if data: # data.yaml path (optional) with open(data, errors='ignore') as f: names = yaml.safe_load(f)['names'] # class names if pt: # PyTorch model = attempt_load(weights if isinstance(weights, list) else w, map_location=device) stride = max(int(model.stride.max()), 32) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names self.model = model # explicitly assign for to(), cpu(), cuda(), half() elif jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata model = torch.jit.load(w, _extra_files=extra_files) if extra_files['config.txt']: d = json.loads(extra_files['config.txt']) # extra_files dict stride, names = int(d['stride']), d['names'] elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4', )) net = cv2.dnn.readNetFromONNX(w) elif onnx: # ONNX Runtime LOGGER.info(f'Loading {w} for ONNX Runtime inference...') cuda = torch.cuda.is_available() check_requirements( ('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) import onnxruntime providers = ['CUDAExecutionProvider', 'CPUExecutionProvider' ] if cuda else ['CPUExecutionProvider'] session = onnxruntime.InferenceSession(w, providers=providers) elif xml: # OpenVINO LOGGER.info(f'Loading {w} for OpenVINO inference...') check_requirements( ('openvino-dev', ) ) # requires openvino-dev: https://pypi.org/project/openvino-dev/ import openvino.inference_engine as ie core = ie.IECore() network = core.read_network( model=w, weights=Path(w).with_suffix('.bin')) # *.xml, *.bin paths executable_network = core.load_network(network, device_name='CPU', num_requests=1) elif engine: # TensorRT LOGGER.info(f'Loading {w} for TensorRT inference...') import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) logger = trt.Logger(trt.Logger.INFO) with open(w, 'rb') as f, trt.Runtime(logger) as runtime: model = runtime.deserialize_cuda_engine(f.read()) bindings = OrderedDict() for index in range(model.num_bindings): name = model.get_binding_name(index) dtype = trt.nptype(model.get_binding_dtype(index)) shape = tuple(model.get_binding_shape(index)) data = torch.from_numpy(np.empty( shape, dtype=np.dtype(dtype))).to(device) bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr())) binding_addrs = OrderedDict( (n, d.ptr) for n, d in bindings.items()) context = model.create_execution_context() batch_size = bindings['images'].shape[0] elif coreml: # CoreML LOGGER.info(f'Loading {w} for CoreML inference...') import coremltools as ct model = ct.models.MLModel(w) else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) if saved_model: # SavedModel LOGGER.info( f'Loading {w} for TensorFlow SavedModel inference...') import tensorflow as tf model = tf.keras.models.load_model(w) elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt LOGGER.info( f'Loading {w} for TensorFlow GraphDef inference...') import tensorflow as tf def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped return x.prune( tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu from tflite_runtime.interpreter import Interpreter, load_delegate except ImportError: import tensorflow as tf Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate, if 'edgetpu' in w.lower( ): # Edge TPU https://coral.ai/software/#edgetpu-runtime LOGGER.info( f'Loading {w} for TensorFlow Lite Edge TPU inference...' ) delegate = { 'Linux': 'libedgetpu.so.1', 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll' }[platform.system()] interpreter = Interpreter( model_path=w, experimental_delegates=[load_delegate(delegate)]) else: # Lite LOGGER.info( f'Loading {w} for TensorFlow Lite inference...') interpreter = Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs self.__dict__.update(locals()) # assign all variables to self
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: # cv2.imshow(p, im0) cv2.imwrite("C:/Users/lenovo/Desktop/server/output/camera.jpg", im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIterationq # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc('X', '2', '6', '4'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def test( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a cocoapi-compatible JSON results file project='runs/test', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference model=None, dataloader=None, save_dir=Path(''), plots=True, wandb_logger=None, compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(imgsz, s=gs) # check image size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half &= device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() if isinstance(data, str): with open(data) as f: data = yaml.safe_load(f) check_dataset(data) # check is_coco = data['val'].endswith('coco/val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs = 0 if wandb_logger and wandb_logger.wandb: log_imgs = min(wandb_logger.log_imgs, 100) # Dataloader if not training: if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once task = task if task in ( 'train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=0.5, rect=True, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, 'names') else model.module.names) } coco91class = coco80_to_coco91_class() s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1, t2 = 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t_ = time_synchronized() img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width t = time_synchronized() t0 += t - t_ # Run model out, train_out = model( img, augment=augment) # inference and training outputs t1 += time_synchronized() - t # Compute loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t = time_synchronized() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) t2 += time_synchronized() - t # Statistics per image for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # W&B logging - Media Panel plots if len( wandb_images ) < log_imgs and wandb_logger.current_epoch > 0: # Check for test operation if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0: box_data = [{ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3] }, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": { "class_score": conf }, "domain": "pixel" } for *xyxy, conf, cls in pred.tolist()] boxes = { "predictions": { "box_data": box_data, "class_labels": names } } # inference-space wandb_images.append( wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name)) wandb_logger.log_training_progress( predn, path, names) if wandb_logger and wandb_logger.wandb_run else None # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int( path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch( predn, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # target indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # [email protected], [email protected]:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%11i' * 2 + '%11.3g' * 4 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t2)) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb_logger and wandb_logger.wandb: val_batches = [ wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg')) ] wandb_logger.log({"Validation": val_batches}) if wandb_images: wandb_logger.log({"Bounding Box Debugger/Images": wandb_images}) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights ).stem if weights is not None else '' # weights anno_json = '../coco/annotations/instances_val2017.json' # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements(['pycocotools']) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.img_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def detect(opt, device, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') colorOrder = ['red', 'purple', 'blue', 'green', 'yellow', 'orange'] frame_num = 0 framestr = 'Frame {frame}' fpses = [] frame_catch_pairs = [] ball_person_pairs = {} for color in colorDict: ball_person_pairs[color] = 0 # Read Class Name Yaml with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) names = data_dict['names'] # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() bbox_xywh = [] confs = [] clses = [] # Write results for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape # get image shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) clses.append([cls.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) clses = torch.Tensor(clses) # Pass detections to deepsort outputs = [] global groundtruths_path if not 'disable' in groundtruths_path: # print('\nenabled', groundtruths_path) groundtruths = solution.load_labels( groundtruths_path, img_w, img_h, frame_num) if (groundtruths.shape[0] == 0): outputs = deepsort.update(xywhs, confss, clses, im0) else: # print(groundtruths) xywhs = groundtruths[:, 2:] tensor = torch.tensor((), dtype=torch.int32) confss = tensor.new_ones((groundtruths.shape[0], 1)) clses = groundtruths[:, 0:1] outputs = deepsort.update(xywhs, confss, clses, im0) if frame_num >= 2: for real_ID in groundtruths[:, 1:].tolist(): for DS_ID in xyxy2xywh(outputs[:, :5]): if (abs(DS_ID[0] - real_ID[1]) / img_w < 0.005 ) and (abs(DS_ID[1] - real_ID[2]) / img_h < 0.005) and ( abs(DS_ID[2] - real_ID[3]) / img_w < 0.005) and ( abs(DS_ID[3] - real_ID[4]) / img_w < 0.005): id_mapping[DS_ID[4]] = int(real_ID[0]) else: outputs = deepsort.update(xywhs, confss, clses, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, 4] clses = outputs[:, 5] scores = outputs[:, 6] #Temp solution to get correct id's mapped_id_list = [] for ids in identities: if (ids in id_mapping): mapped_id_list.append(int(id_mapping[ids])) else: mapped_id_list.append(ids) ball_detect, frame_catch_pairs, ball_person_pairs = solution.detect_catches( im0, bbox_xyxy, clses, mapped_id_list, frame_num, colorDict, frame_catch_pairs, ball_person_pairs, colorOrder, save_img) t3 = time_synchronized() draw_boxes(im0, bbox_xyxy, [names[i] for i in clses], scores, ball_detect, identities) else: t3 = time_synchronized() #Draw frame number tmp = framestr.format(frame=frame_num) t_size = cv2.getTextSize(tmp, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] cv2.putText(im0, tmp, (0, (t_size[1] + 10)), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2) #Inference Time fps = (1 / (t3 - t1)) fpses.append(fps) print('FPS=%.2f' % fps) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) frame_num += 1 #t4 = time_synchronized() avgFps = (sum(fpses) / len(fpses)) print('Average FPS = %.2f' % avgFps) #print('Total Runtime = %.2f' % (t4 - t0)) outpath = os.path.basename(source) outpath = outpath[:-4] outpath = out + '/' + outpath + '_out.csv' solution.write_catches(outpath, frame_catch_pairs, colorOrder) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path)
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: print('saving img!') if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def train(hyp, # path/to/hyp.yaml or hyp dictionary opt, device, callbacks ): save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze # Directories w = save_dir / 'weights' # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir last, best = w / 'last.pt', w / 'best.pt' # Hyperparameters if isinstance(hyp, str): with open(hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) # Save run settings if not evolve: with open(save_dir / 'hyp.yaml', 'w') as f: yaml.safe_dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.safe_dump(vars(opt), f, sort_keys=False) # Loggers data_dict = None if RANK in [-1, 0]: loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance if loggers.wandb: data_dict = loggers.wandb.data_dict if resume: weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size # Register actions for k in methods(loggers): callbacks.register_action(k, callback=getattr(loggers, k)) # Config plots = not evolve # create plots cuda = device.type != 'cpu' init_seeds(1 + RANK) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset # Model check_suffix(weights, '.pt') # check weights pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(LOCAL_RANK): weights = attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(csd, strict=False) # load LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report else: model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create # Freeze freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): LOGGER.info(f'freezing {k}') v.requires_grad = False # Image size gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # Batch size if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size batch_size = check_train_batch_size(model, imgsz) loggers.on_params_update({"batch_size": batch_size}) # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}") g0, g1, g2 = [], [], [] # optimizer parameter groups for v in model.modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias g2.append(v.bias) if isinstance(v, nn.BatchNorm2d): # weight (no decay) g0.append(v.weight) elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) g1.append(v.weight) if opt.optimizer == 'Adam': optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum elif opt.optimizer == 'AdamW': optimizer = AdamW(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({'params': g1, 'weight_decay': hyp['weight_decay']}) # add g1 with weight_decay optimizer.add_param_group({'params': g2}) # add g2 (biases) LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups " f"{len(g0)} weight (no decay), {len(g1)} weight, {len(g2)} bias") del g0, g1, g2 # Scheduler if opt.linear_lr: lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear else: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in [-1, 0] else None # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # EMA if ema and ckpt.get('ema'): ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) ema.updates = ckpt['updates'] # Epochs start_epoch = ckpt['epoch'] + 1 if resume: assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.' if epochs < start_epoch: LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.") epochs += ckpt['epoch'] # finetune additional epochs del ckpt, csd # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n' 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.') model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info('Using SyncBatchNorm()') # Trainloader train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=LOCAL_RANK, workers=workers, image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: '), shuffle=True) mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class nb = len(train_loader) # number of batches assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' # Process 0 if RANK in [-1, 0]: val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1, workers=workers, pad=0.5, prefix=colorstr('val: '))[0] if not resume: labels = np.concatenate(dataset.labels, 0) # c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) if plots: plot_labels(labels, names, save_dir) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) model.half().float() # pre-reduce anchor precision callbacks.run('on_pretrain_routine_end') # DDP mode if cuda and RANK != -1: model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) # Model attributes nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) hyp['box'] *= 3 / nl # scale to layers hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) stopper = EarlyStopping(patience=opt.patience) compute_loss = ComputeLoss(model) # init loss class LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional, single-GPU only) if opt.image_weights: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(3, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size')) if RANK in [-1, 0]: pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward scaler.scale(loss).backward() # Optimize if ni - last_opt_step >= accumulate: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Log if RANK in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % ( f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])) callbacks.run('on_train_batch_end', ni, model, imgs, targets, paths, plots, opt.sync_bn) if callbacks.stop_training: return # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for loggers scheduler.step() if RANK in [-1, 0]: # mAP callbacks.run('on_train_epoch_end', epoch=epoch) ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP results, maps, _ = val.run(data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=ema.ema, single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, plots=False, callbacks=callbacks, compute_loss=compute_loss) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = {'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(de_parallel(model)).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), 'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None, 'date': datetime.now().isoformat()} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0): torch.save(ckpt, w / f'epoch{epoch}.pt') del ckpt callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) # Stop Single-GPU if RANK == -1 and stopper(epoch=epoch, fitness=fi): break # Stop DDP TODO: known issues shttps://github.com/ultralytics/yolov5/pull/4576 # stop = stopper(epoch=epoch, fitness=fi) # if RANK == 0: # dist.broadcast_object_list([stop], 0) # broadcast 'stop' to all ranks # Stop DPP # with torch_distributed_zero_first(RANK): # if stop: # break # must break all DDP ranks # end epoch ---------------------------------------------------------------------------------------------------- # end training ----------------------------------------------------------------------------------------------------- if RANK in [-1, 0]: LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') results, _, _ = val.run(data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=attempt_load(f, device).half(), iou_thres=0.65 if is_coco else 0.60, # best pycocotools results at 0.65 single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=is_coco, verbose=True, plots=True, callbacks=callbacks, compute_loss=compute_loss) # val best model with plots if is_coco: callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) callbacks.run('on_train_end', last, best, plots, epoch, results) LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") torch.cuda.empty_cache() return results