def __init__(self, cfg, weight, batchSize=1): self.det_model = Darknet(cfg) # self.det_model.load_state_dict(torch.load('models/yolo/yolov3-spp.weights', map_location="cuda:0")['model']) self.det_model.load_weights(weight) self.det_model.net_info['height'] = input_size self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 if device != "cpu": self.det_model.cuda() inf_time = get_inference_time(self.det_model, height=input_size, width=input_size) flops = print_model_param_flops(self.det_model, input_width=input_size, input_height=input_size) params = print_model_param_nums(self.det_model) print("Detection: Inference time {}s, Params {}, FLOPs {}".format( inf_time, params, flops)) if libtorch: example = torch.rand(2, 3, 224, 224) traced_model = torch.jit.trace(self.det_model, example) traced_model.save("det_lib.pt") self.det_model.eval() self.im_dim_list = [] self.batchSize = batchSize self.mul_img = False
def __init__(self, batchSize=1): self.det_model = Darknet("src/yolo/cfg/yolov3-spp.cfg") self.det_model.load_weights('models/yolo/yolov3-spp.weights') self.det_model.net_info['height'] = config.input_size self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 self.det_model.cuda() self.det_model.eval() self.stopped = False self.batchSize = batchSize
def __init__(self, batchSize=1): self.det_model = Darknet(config.yolo_cfg) # self.det_model.load_state_dict(torch.load('models/yolo/yolov3-spp.weights', map_location="cuda:0")['model']) self.det_model.load_weights(config.yolo_model) self.det_model.net_info['height'] = config.input_size self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 if device != "cpu": self.det_model.cuda() self.det_model.eval() self.stopped = False self.batchSize = batchSize
def __init__(self, cfg, weight, batchSize=1): self.det_model = Darknet(cfg) self.det_model.load_weights(weight) self.det_model.net_info['height'] = opt.input_size self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 if device != "cpu": self.det_model.cuda() inf_time = get_inference_time(self.det_model, height=opt.input_size, width=opt.input_size) flops = print_model_param_flops(self.det_model, input_width=opt.input_size, input_height=opt.input_size) params = print_model_param_nums(self.det_model) print("Detection: Inference time {}s, Params {}, FLOPs {}".format(inf_time, params, flops)) self.det_model.eval() self.im_dim_list = [] self.batchSize = batchSize
def __init__(self, cfg, weight, batchSize=1): self.det_model = Darknet(cfg) # self.det_model.load_state_dict(torch.load('models/yolo/yolov3-spp.weights', map_location="cuda:0")['model']) self.det_model.load_weights(weight) self.det_model.net_info['height'] = config.input_size self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 if device != "cpu": self.det_model.cuda() inf_time = get_inference_time(self.det_model, height=config.input_size, width=config.input_size) flops = print_model_param_flops(self.det_model, input_width=config.input_size, input_height=config.input_size) params = print_model_param_nums(self.det_model) print("Detection: Inference time {}s, Params {}, FLOPs {}".format( inf_time, params, flops)) self.det_model.eval() self.im_dim_list = [] self.batchSize = batchSize
class ObjectDetectionYolo(object): def __init__(self, batchSize=1): self.det_model = Darknet(config.yolo_cfg) # self.det_model.load_state_dict(torch.load('models/yolo/yolov3-spp.weights', map_location="cuda:0")['model']) self.det_model.load_weights(config.yolo_model) self.det_model.net_info['height'] = config.input_size self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 if device != "cpu": self.det_model.cuda() self.det_model.eval() self.stopped = False self.batchSize = batchSize def __video_process(self, frame): img = [] orig_img = [] im_name = [] im_dim_list = [] img_k, orig_img_k, im_dim_list_k = prep_frame(frame, int(config.input_size)) img.append(img_k) orig_img.append(orig_img_k) im_name.append('0.jpg') im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) return img, orig_img, im_name, im_dim_list def __get_bbox(self, img, orig_img, im_name, im_dim_list): with torch.no_grad(): # Human Detection if device != "cpu": img = img.cuda() prediction = self.det_model(img, CUDA=True) else: prediction = self.det_model(img, CUDA=False) # NMS process dets = dynamic_write_results(prediction, config.confidence, config.num_classes, nms=True, nms_conf=config.nms_thresh) if isinstance(dets, int) or dets.shape[0] == 0: return orig_img[0], im_name[0], None, None, None, None, None dets = dets.cpu() im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor for j in range(dets.shape[0]): dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0]) dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1]) boxes = dets[:, 1:5] scores = dets[:, 5:6] boxes_k = boxes[dets[:, 0] == 0] if isinstance(boxes_k, int) or boxes_k.shape[0] == 0: return orig_img[0], im_name[0], None, None, None, None, None inps = torch.zeros(boxes_k.size(0), 3, config.input_height, config.input_width) pt1 = torch.zeros(boxes_k.size(0), 2) pt2 = torch.zeros(boxes_k.size(0), 2) return orig_img[0], im_name[0], boxes_k, scores[dets[:, 0] == 0], inps, pt1, pt2 def __crop_bbox(self, orig_img, im_name, boxes, scores, inps, pt1, pt2): with torch.no_grad(): if orig_img is None: return None, None, None, None, None, None, None if boxes is None or boxes.nelement() == 0: return None, orig_img, im_name, boxes, scores, None, None inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)) inps, pt1, pt2 = self.__crop_from_dets(inp, boxes, inps, pt1, pt2) return inps, orig_img, im_name, boxes, scores, pt1, pt2 @staticmethod def __crop_from_dets(img, boxes, inps, pt1, pt2): ''' Crop human from origin image according to Dectecion Results ''' imght = img.size(1) imgwidth = img.size(2) tmp_img = img tmp_img[0].add_(-0.406) tmp_img[1].add_(-0.457) tmp_img[2].add_(-0.480) for i, box in enumerate(boxes): upLeft = torch.Tensor((float(box[0]), float(box[1]))) bottomRight = torch.Tensor((float(box[2]), float(box[3]))) ht = bottomRight[1] - upLeft[1] width = bottomRight[0] - upLeft[0] scaleRate = 0.3 upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2) upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2) bottomRight[0] = max( min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2), upLeft[0] + 5) bottomRight[1] = max( min(imght - 1, bottomRight[1] + ht * scaleRate / 2), upLeft[1] + 5) try: inps[i] = cropBox(tmp_img.clone(), upLeft, bottomRight, config.input_height, config.input_width) except IndexError: print(tmp_img.shape) print(upLeft) print(bottomRight) print('===') pt1[i] = upLeft pt2[i] = bottomRight return inps, pt1, pt2 def process(self, frame): img, orig_img, im_name, im_dim_list = self.__video_process(frame) inps, orig_img, im_name, boxes, scores, pt1, pt2 = self.__get_bbox( img, orig_img, im_name, im_dim_list) inps, orig_img, im_name, boxes, scores, pt1, pt2 = self.__crop_bbox( inps, orig_img, im_name, boxes, scores, pt1, pt2) return inps, orig_img, boxes, scores, pt1, pt2
class ObjectDetectionYolo(object): def __init__(self, cfg, weight, batchSize=1): self.det_model = Darknet(cfg) # self.det_model.load_state_dict(torch.load('models/yolo/yolov3-spp.weights', map_location="cuda:0")['model']) self.det_model.load_weights(weight) self.det_model.net_info['height'] = config.input_size self.det_inp_dim = int(self.det_model.net_info['height']) assert self.det_inp_dim % 32 == 0 assert self.det_inp_dim > 32 if device != "cpu": self.det_model.cuda() inf_time = get_inference_time(self.det_model, height=config.input_size, width=config.input_size) flops = print_model_param_flops(self.det_model, input_width=config.input_size, input_height=config.input_size) params = print_model_param_nums(self.det_model) print("Detection: Inference time {}s, Params {}, FLOPs {}".format( inf_time, params, flops)) self.det_model.eval() self.im_dim_list = [] self.batchSize = batchSize def __preprocess(self, frame): img = [] orig_img = [] # im_name = [] im_dim_list = [] img_k, orig_img_k, im_dim_list_k = prep_frame(frame, int(config.input_size)) img.append(img_k) orig_img.append(orig_img_k) # im_name.append('0.jpg') im_dim_list.append(im_dim_list_k) with torch.no_grad(): # Human Detection img = torch.cat(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) return img, im_dim_list def __detect(self, img, im_dim_list): self.im_dim_list = im_dim_list with torch.no_grad(): # Human Detection if device != "cpu": img = img.cuda() prediction = self.det_model(img) # NMS process dets = dynamic_write_results(prediction, config.confidence, config.num_classes, nms=True, nms_conf=config.nms_thresh) if isinstance(dets, int) or dets.shape[0] == 0: return None dets = dets.cpu() self.im_dim_list = torch.index_select(self.im_dim_list, 0, dets[:, 0].long()) scaling_factor = torch.min(self.det_inp_dim / self.im_dim_list, 1)[0].view(-1, 1) # coordinate transfer dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * self.im_dim_list[:, 0].view(-1, 1)) / 2 dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * self.im_dim_list[:, 1].view(-1, 1)) / 2 dets[:, 1:5] /= scaling_factor return dets[:, 1:] def process(self, frame): img, im_dim_list = self.__preprocess(frame) det_res = self.__detect(img, im_dim_list) # boxes, scores = self.cut_box_score(det_res) # return boxes, scores return det_res def cut_box_score(self, results): if results is None: return None, None for j in range(results.shape[0]): results[j, [0, 2]] = torch.clamp(results[j, [0, 2]], 0.0, self.im_dim_list[j, 0]) results[j, [1, 3]] = torch.clamp(results[j, [1, 3]], 0.0, self.im_dim_list[j, 1]) boxes = results[:, 0:4] scores = results[:, 4:5] # boxes_k = boxes[results[:, 0] == 0] # if isinstance(boxes, int) or boxes.shape[0] == 0: # return None, None return boxes, scores