class YOLO(object): _defaults = { "model_path": 'model_data/yolo4_weights.pth', "anchors_path": 'model_data/yolo_anchors.txt', "classes_path": 'model_data/coco_classes.txt', "model_image_size" : (416, 416, 3), "confidence": 0.5, "cuda": True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化YOLO #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.anchors = self._get_anchors() self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的先验框 #---------------------------------------------------# def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1,:,:] #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): self.net = YoloBody(len(self.anchors[0]),len(self.class_names)).eval() # 加快模型训练的效率 print('Loading weights into state dict...') state_dict = torch.load(self.model_path) self.net.load_state_dict(state_dict) if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() print('Finished!') self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append(DecodeBox(self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.model_image_size[0],self.model_image_size[1]))) photo = np.array(crop_img,dtype = np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence top_conf = batch_detections[top_index,4]*batch_detections[top_index,5] top_label = np.array(batch_detections[top_index,-1],np.int32) top_bboxes = np.array(batch_detections[top_index,:4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index(predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
class YOLO(object): _defaults = { "model_path": 'model_data/yolov4_mobilenet_v1_map76.62.pth', "anchors_path": 'model_data/yolo_anchors.txt', "classes_path": 'model_data/voc_classes.txt', "backbone": 'mobilenetv1', "model_image_size": (416, 416, 3), "confidence": 0.5, "iou": 0.3, "cuda": True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化YOLO #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.anchors = self._get_anchors() self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的先验框 #---------------------------------------------------# def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] #---------------------------------------------------# # 生成模型 #---------------------------------------------------# def generate(self): #---------------------------------------------------# # 建立yolov4模型 #---------------------------------------------------# self.net = YoloBody(len(self.anchors[0]), len(self.class_names), backbone=self.backbone).eval() #---------------------------------------------------# # 载入yolov4模型的权重 #---------------------------------------------------# print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) print('Finished!') if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() #---------------------------------------------------# # 建立三个特征层解码用的工具 #---------------------------------------------------# self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return image #---------------------------------------------------------# # 对预测框进行得分筛选 #---------------------------------------------------------# top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 #-----------------------------------------------------------------# boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
class YOLO(object): _defaults = { "model_path": 'logs/Epoch92-Total_Loss2.1432-Val_Loss4.1385.pth', "anchors_path": 'model_data/yolo_anchors.txt', "classes_path": 'model_data/voc_classes.txt', "model_image_size": (416, 416, 3), "confidence": 0.5, "cuda": True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" # ---------------------------------------------------# # 初始化YOLO # ---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.anchors = self._get_anchors() self.generate() # ---------------------------------------------------# # 获得所有的分类 # ---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names # ---------------------------------------------------# # 获得所有的先验框 # ---------------------------------------------------# def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] # ---------------------------------------------------# # 获得所有的分类 # ---------------------------------------------------# def generate(self): self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval() # 加快模型训练的效率 print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() print('Finished!') self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox(self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) # ---------------------------------------------------# # 检测图片 # ---------------------------------------------------# def detect_image(self, image, aligned_depth_frame=None, color_intrin_part=None, mode=1): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # print(np.shape(image)[0], np.shape(image)[1]) # print("left:{}, top:{}, right:{}, bottom:{}".format(left, top, right, bottom)) fail = 0 if (aligned_depth_frame and color_intrin_part): ppx = color_intrin_part[0] ppy = color_intrin_part[1] fx = color_intrin_part[2] fy = color_intrin_part[3] width = aligned_depth_frame.width height = aligned_depth_frame.height # ---------------------------------------------------------------------------------------------------------------- # 1、取中心点像素深度 # ---------------------------------------------------------------------------------------------------------------- if mode == 1: center_x = int(round((left + right) / 2)) center_y = int(round((top + bottom) / 2)) # print("center:", center_x, center_y) # print("depth size:", width, height) center_x = min(max(1, center_x), width - 1) center_y = min(max(1, center_y), height - 1) # print("center_after:", center_x, center_y) # center_x = min(max(0,center_x),width) # center_y = min(max(0,center_y),height) target_xy_pixel = [center_x, center_y] target_depth = aligned_depth_frame.get_distance(target_xy_pixel[0], target_xy_pixel[1]) strDistance = "\n%.2f m" % target_depth target_xy_true = [(target_xy_pixel[0] - ppx) * target_depth / fx, (target_xy_pixel[1] - ppy) * target_depth / fy] # # ---------------------------------------------------------------------------------------------------------------- # # 2、取box里面所有像素深度值后平均 # # ---------------------------------------------------------------------------------------------------------------- # elif mode == 2: # depth = 0 # cnt = 0 # depth_matrix = np.zeros((width, height)) # for x in range(left, right): # for y in range(top, bottom): # depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y) # # print("x:{}, y:{}".format(x,y),depth_matrix[x][y]) # depth += depth_matrix[x][y] # cnt += 1 # target_depth = depth / cnt # minn = 1000000 # pseudo_x = 0 # pseudo_y = 0 # for x in range(left, right): # for y in range(top, bottom): # if minn > abs(depth_matrix[x][y] - target_depth): # minn = abs(depth_matrix[x][y] - target_depth) # pseudo_x = x # pseudo_y = y # target_xy_pixel = [pseudo_x, pseudo_y] # strDistance = " depth: %.2f m" % target_depth # target_xy_true = [(pseudo_x - ppx) * target_depth / fx, # (pseudo_y - ppy) * target_depth / fy] # # # ---------------------------------------------------------------------------------------------------------------- # # 3、去前后百分之十的极值后再平均 # # ---------------------------------------------------------------------------------------------------------------- # elif mode == 3: # depth = 0 # cnt = 0 # depth_matrix = np.zeros((width, height)) # for x in range(left, right): # for y in range(top, bottom): # depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y) # # depth_matrix_flat = depth_matrix[left:right, top:bottom].reshape((right - left) * (bottom - top), ) # matrix_flat_len = depth_matrix_flat.shape[0] # drop_len = int(matrix_flat_len * 0.1) # depth_matrix_flat.sort() # depth_matrix_flat = depth_matrix_flat[drop_len:-drop_len] # depth = depth_matrix_flat.sum() # # target_depth = depth / (matrix_flat_len - 2 * drop_len) # minn = 1000000 # pseudo_x = 0 # pseudo_y = 0 # for x in range(left, right): # for y in range(top, bottom): # if minn > abs(depth_matrix[x][y] - target_depth): # minn = abs(depth_matrix[x][y] - target_depth) # pseudo_x = x # pseudo_y = y # target_xy_pixel = [pseudo_x, pseudo_y] # strDistance = " depth: %.2f m" % target_depth # target_xy_true = [(pseudo_x - ppx) * target_depth / fx, # (pseudo_y - ppy) * target_depth / fy] # # # ---------------------------------------------------------------------------------------------------------------- # # 4、去掉深度缺失的像素(深度为0)后再平均 # # ---------------------------------------------------------------------------------------------------------------- # elif mode == 4: # depth = 0 # cnt = 0 # depth_matrix = np.zeros((width, height)) # for x in range(left, right): # for y in range(top, bottom): # depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y) # if depth_matrix[x][y] > 0: # depth += depth_matrix[x][y] # cnt += 1 # if cnt == 0: # print("该目标框内所有像素均检测缺失,无法计算深度") # fail = 1 # else: # target_depth = depth / cnt # minn = 1000000 # pseudo_x = 0 # pseudo_y = 0 # for x in range(left, right): # for y in range(top, bottom): # if minn > abs(depth_matrix[x][y] - target_depth): # minn = abs(depth_matrix[x][y] - target_depth) # pseudo_x = x # pseudo_y = y # target_xy_pixel = [pseudo_x, pseudo_y] # strDistance = " depth: %.2f m" % target_depth # target_xy_true = [(pseudo_x - ppx) * target_depth / fx, # (pseudo_y - ppy) * target_depth / fy] else: strDistance = "\n 0 m" # 画框框---------------------------------------------------------------------------------------------------- if fail == 0: label = '{} {:.2f}'.format(predicted_class, score) label = label + strDistance draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') # print(label) print('检测出目标:{} ;实际坐标为(m):({:.3f}, {:.3f}, {:.3f}) \n中心点像素坐标(pixel):({}, {}) ;中心点相机坐标(m):({},{});深度: {} m\n'.format(predicted_class, target_xy_true[0],target_xy_true[1],target_depth, target_xy_pixel[0], target_xy_pixel[1], target_xy_true[0], target_xy_true[1], target_depth)) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index(predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
class YOLO(object): _defaults = { "model_path": './logs/best110.pth', "anchors_path": './data/anchors/yolov4_anchors.txt', "classes_path": './data/classes/yolov4_classes.txt', "model_image_size": (416, 416, 3), "confidence": 0.6, "cuda": False } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" # ---------------------------------------------------# # 初始化YOLO # ---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.anchors = self._get_anchors() self.generate() # ---------------------------------------------------# # 获得所有的分类 # ---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names # ---------------------------------------------------# # 获得所有的先验框 # ---------------------------------------------------# def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] # ---------------------------------------------------# # 获得所有的分类 # ---------------------------------------------------# def generate(self): self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval() print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() print('Finished!') self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) # ---------------------------------------------------# # 检测图片 # ---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 # 归一化 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return [] top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) l = [] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) l.append([left, top, right, bottom, score, predicted_class]) return l
class YOLO(object): _defaults = { #"model_path": 'model_data/yolo4_weights.pth', "model_path": 'logs/Epoch14-Total_Loss16.0980-Val_Loss0.0000.pth', "anchors_path": 'model_data/yolo_anchors.txt', #"classes_path": 'model_data/coco_classes.txt', "classes_path": 'model_data/voc_classes.txt', "model_image_size": (416, 416, 3), "confidence": 0.1, "cuda": True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化YOLO #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.anchors = self._get_anchors() self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的先验框 #---------------------------------------------------# def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval() # 加快模型训练的效率 print('Loading weights into state dict...') state_dict = torch.load(self.model_path) self.net.load_state_dict(state_dict) if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() print('Finished!') self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] # top = top - 250 # left = left - 250 # bottom = bottom + 250 # right = right + 250 top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 # 从左上角开始 剪切 200*200的图片 img2 = image.crop((left, top, right, bottom)) img2.save("lena2.jpg") top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom).astype('int32')) right = min(np.shape(image)[1], np.floor(right).astype('int32')) # 画框框 # 画框框 if predicted_class == 'person': predicted_class_ch = "Ren" elif predicted_class == 'chair': predicted_class_ch = "椅子" elif predicted_class == 'clock': predicted_class_ch = "钟" elif predicted_class == 'tie': predicted_class_ch = "厂牌吗??" elif predicted_class == 'cell phone': predicted_class_ch = "手机" elif predicted_class == 'laptop': predicted_class_ch = "笔记本电脑" elif predicted_class == 'QR': predicted_class_ch = "2维码" else: predicted_class_ch = "单号" label = '{} {} {:.2f} {}'.format(predicted_class_ch, '置信度', score, '%') draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 3]) for i in range(thickness): draw.rectangle( #边框 [left + i, top + i, right - i, bottom - i], outline=self.colors[c]) #draw.rectangle( # [tuple(text_origin), tuple(text_origin)+ label_size], # fill=self.colors[c])Y1909170500-F1-1568720302878.jpg 不行 # 绘制文本E:\发货单\截图20200727212747.png # font = ImageFont.truetype("consola.ttf", 40, encoding="unic") # 设置字体 # draw.text((100, 50), u'Hello World', 'fuchsia', font) #draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) #del draw draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
class YOLO4_inference(object): # ---------------------------------------------------# # 初始化YOLO # ---------------------------------------------------# def __init__(self, model_path, input_shape=416,confidence=0.5, cuda=True): self.class_names = ID2CLASS self.anchors = anchors self.model_path=model_path self.input_shape=(input_shape,input_shape,3) self.confidence=confidence self.cuda=cuda # 画框设置不同的颜色 self.colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255)] self.generate() # ---------------------------------------------------# # 加载训练好的模型 # ---------------------------------------------------# def generate(self): self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval() print('Loading pretrained weights.') model_dict = self.net.state_dict() pretrained_dict = torch.load(self.model_path) pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} model_dict.update(pretrained_dict) self.net.load_state_dict(model_dict) if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() print('Finish loading!') self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox(self.anchors[i], len(self.class_names), (self.input_shape[1], self.input_shape[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) def predict(self,image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.input_shape[0], self.input_shape[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), \ np.expand_dims(top_bboxes[:, 1], -1), \ np.expand_dims(top_bboxes[:, 2], -1), \ np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) return boxes,top_label,top_conf # ---------------------------------------------------# # 检测图片 # ---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.input_shape[0], self.input_shape[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) font = ImageFont.truetype(font='simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{}: {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index(predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
class YOLO(object): _defaults = { "model_path": 'model_data/yolo4_weights.pth', "anchors_path": 'model_data/yolo_anchors.txt', "classes_path": 'model_data/coco_classes.txt', "model_image_size": (416, 416, 3), #这里的model_image_size是什么,不会跟图像size产生冲突吗,为什么不可以改???? "confidence": 0.3, "iou": 0.3, "cuda": False } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化YOLO #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.anchors = self._get_anchors() self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的先验框 #---------------------------------------------------# def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] #---------------------------------------------------# # 生成模型 #---------------------------------------------------# def generate(self): #---------------------------------------------------# # 建立yolov4模型 #---------------------------------------------------# self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval() #---------------------------------------------------# # 载入yolov4模型的权重 #---------------------------------------------------# print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) print('Finished!') if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() #---------------------------------------------------# # 建立三个特征层解码用的工具 #---------------------------------------------------# self.yolo_decodes = [] # 创建数组,将三个解码器放到数组中 for i in range(3): self.yolo_decodes.append( DecodeBox( self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): # embed() image_shape = np.array(np.shape(image)[0:2]) num_class = len(self.class_names) # 有80类 # embed() #---------------------------------------------------------# # 给图像增加灰条(什么是灰条),实现不失真的resize #---------------------------------------------------------# # 复制image return new_image crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) photo = np.array(crop_img, dtype=np.float32) / 255.0 # 归一化? photo = np.transpose( photo, (2, 0, 1) ) # 转置:将Image.open(img)得到的[H,W,C]格式转换permute为pytorch可以处理的[C,H,W]格式 #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] # 将photo变为list类型 with torch.no_grad( ): # disabled gradient calculation,reduce memory consumption for computations images = torch.from_numpy( np.asarray(images) ) # Creates a Tensor from a numpy.ndarray,此时images的shape为[1, 3, 416, 416] if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# # embed() # 从这里开始处理 # 特征提取 # 输出outputs为tuple,len=3,每个tensor的shape分别为 第一个特征层[1, 255, 13, 13],第二个特征层[1, 255, 26, 26],第三个特征层[1, 255, 52, 52] outputs = self.net(images) # embed() output_list = [] for i in range(3): # 为什么是3 # 有三个特征层,每个特征层对应自己的decode解码器 output_list.append(self.yolo_decodes[i]( outputs[i])) # 在这里打几个断点看看 #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# # torch.cat()对矩阵按行进行拼接得到向量 output = torch.cat(output_list, 1) # 这里也打几个断点 # output就是predictions,格式为[batch_size, num_anchors, 85] batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) # embed() #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return image #---------------------------------------------------------# # 对预测框进行得分筛选 #---------------------------------------------------------# # coordinates = []# bboxes的坐标 top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) # 得到坐标点 top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # coordinates.append((top_xmin,top_xmax,top_ymin,top_ymax))# 把四个坐标点看做一个整体 #-----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 #-----------------------------------------------------------------# # boxes存放各目标的坐标 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1) for i, c in enumerate(top_label): # embed() predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 # 左上角点的坐标 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) # 右下角点的坐标 bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( # 画框框 [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image, boxes # 将boxes返回
anchors_path = os.path.expanduser(anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] anchors = np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] net = YoloBody(len(anchors[0]), len(class_names)).eval() print('Loading weights into state dict...') is_cuda = 'cuda' if torch.cuda.is_available() else 'cpu' device = torch.device(is_cuda) state_dict = torch.load(model_path, map_location=device) net.load_state_dict(state_dict) if is_cuda == "cuda": os.environ["CUDA_VISIBLE_DEVICES"] = '0' net = net.cuda() print("model set!") # -------------- export the model input_names = ["input_0"] output_names = ["output_0", "output_1", "output_2"] output_path = "deploy/models/yolov4.onnx" print('exporting model to ONNX...') torch.onnx.export(net, example, output_path, verbose=True, input_names=input_names, output_names=output_names,
class YOLO(object): _defaults = { "model_path": 'model_data/yolo4_weights.pth', "anchors_path": 'model_data/yolo_anchors.txt', "classes_path": 'model_data/coco_classes.txt', "model_image_size": (416, 416, 3), "confidence": 0.5, "cuda": True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化YOLO #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.anchors = self._get_anchors() self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的先验框 #---------------------------------------------------# def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval() # 加快模型训练的效率 #print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() #print('Finished!') self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): predict = np.zeros(12) image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image, predict top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] predict = self.calsquare(image, boxes, top_label, predict) return image, predict def calsquare(self, image, boxes, top_label, res): dic = {2: 2, 7: 4, 5: 6, 3: 8, 1: 10} w, h = image.size top_main = h / 8 bottom_main = 7 * h / 8 left_main = w / 8 right_main = 7 * w / 8 box_main = [top_main, left_main, bottom_main, right_main] square_rate = 0 square1 = 0 square2 = 0 pre = [] for i, c in enumerate(top_label): if c not in dic: continue predicted_class = self.class_names[c] top, left, bottom, right = boxes[i] square = (bottom - top) * (right - left) if square >= 0.4 * w * h: continue if bottom > h - 30 and left < 30 and right > w - 30: continue flag = 0 for j in range(len(pre)): temp = abs(pre[j] - boxes[i]) if sum(temp) < 20: flag = 1 break if flag: continue pre.append(list(boxes[i])) if self.mat_inter(box_main, boxes[i]): top, left, bottom, right = boxes[i] w_con = min(right, right_main) - max(left, left_main) h_con = min(bottom, bottom_main) - max(top, top_main) square_con = w_con * h_con square1 = square1 + square_con square2 = square2 + (square - square_con) if square_con >= 0.5 * square: idx = dic[c] res[idx] = res[idx] + 1 else: idx = dic[c] + 1 res[idx] = res[idx] + 1 else: square2 = square2 + square idx = dic[c] + 1 res[idx] = res[idx] + 1 res[0] = square1 / (h * w / 4) res[1] = square2 / (3 * h * w / 4) return res def mat_inter(self, box1, box2): # 判断两个矩形是否相交 y01, x01, y02, x02 = box1 y11, x11, y12, x12 = box2 lx = abs((x01 + x02) / 2 - (x11 + x12) / 2) ly = abs((y01 + y02) / 2 - (y11 + y12) / 2) sax = abs(x01 - x02) sbx = abs(x11 - x12) say = abs(y01 - y02) sby = abs(y11 - y12) if lx <= (sax + sbx) / 2 and ly <= (say + sby) / 2: return True else: return False
class YOLO(object): _defaults = { "model_path": 'logs/Epoch100-Train_loss6.1517-Val_Loss4.5064.pth', "anchors_path": 'model_data/yolo_anchors.txt', "classes_path": 'model_data/voc_class.txt', "model_image_size": (416, 416, 3), "confidence": 0.5, "iou": 0.3, "cuda": True } # classmethod 修饰符对应的函数不需要实例化即可调用 # 使用特殊参数cls而非self @classmethod def get_defaults(cls, attribute_name): if attribute_name in cls._defaults: return cls._defaults[attribute_name] else: return '没有定义的属性:' + attribute_name #---------------------------------------------------# # 初始化YOLO #---------------------------------------------------# def __init__(self, **kwargs): # 类的静态函数、类函数、普通函数、全局变量以及一些内置的属性都是放在类__dict__里的 self.__dict__.update(self._defaults) self.class_names = self._get_class() self.anchors = self._get_anchors() self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的先验框 #---------------------------------------------------# def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval() # 加快模型训练的效率 print('Loading weights into state dict...') # 放到GPU上跑 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 加载预训练模型参数 state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) if self.cuda: # 指定使用哪几块GPU '0, 1, 2' os.environ["CUDA_VISIBLE_DEVICES"] = '0' # 等我有多张卡的时候再说 /(ㄒoㄒ)/~~ self.net = nn.DataParallel(self.net) self.net = self.net.cuda() print('Finished!') # 实例化三类size的anchor的DecodeBox并存入列表 self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) # 更改格式为float32 photo = np.array(crop_img, dtype=np.float32) # 像素值压缩到0-1之间 photo /= 255.0 # 读进来得图片是H*W*C,输入网络时要求C*H*W,因此在这里变换通道 photo = np.transpose(photo, (2, 0, 1)) # photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): # 把numpy格式的像素数组转为tensor images = torch.from_numpy(images) if self.cuda: # 把数据放到GPU上 images = images.cuda() # YoloBody得到预测结果 # self.net == self.net.forword(images) outputs = self.net(images) output_list = [] for i in range(3): # 用第i个DecodeBox来处理第i个output output_list.append(self.yolo_decodes[i](outputs[i])) # 将13、26、52的output拼接到一起 bs * 10647 * [4+1+num_classes] output = torch.cat(output_list, 1) # 使用非极大似然抑制剔除一定区域内的重复框 # bs * n * [(x1,y1,x2,y2)+obj_conf+class_conf+class_pred] batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) # 整理检测结果 try: batch_detections = batch_detections[0].cpu().numpy() except: return image # 根据score再筛选一遍,但是在non_max_suppression已经使用score筛选过了为什么还要筛选呢? top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence # 根据筛选结果得到符合要求的score、label、bboxes top_score = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) # 将(x1,y1,x2,y2)分别扩展至n*1维,n为box总数 top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条,得到原图上预测框(y1,x1,y2,x2)坐标(top,left,bottom,right) boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) # 绘制检测结果 font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) # 矩形框四边线条厚度 # thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] thickness = int( max(np.ceil(np.shape(image)[0] / self.model_image_size[0]), np.ceil(np.shape(image)[1] / self.model_image_size[0]))) + 1 for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_score[i] top, left, bottom, right = boxes[i] # top = top - 5 # left = left - 5 # bottom = bottom + 5 # right = right + 5 # top = max(0, np.floor(top + 0.5).astype('int32')) # left = max(0, np.floor(left + 0.5).astype('int32')) # bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) # right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) top = max(0, np.ceil(top).astype('int32')) left = max(0, np.ceil(left).astype('int32')) bottom = min(np.shape(image)[0], np.ceil(bottom).astype('int32')) right = min(np.shape(image)[1], np.ceil(right).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) # 返回使用指定字体对象显示给定字符串所需要的图像尺寸 label_size = draw.textsize(label, font) # label = label.encode('utf-8') # print(label) # 如果顶部有文本框的空间,文本框放置在预测框左上方的外部 if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) # 顶部没有文本框的空间,文本框放置在预测框左上方的内部 else: # text_origin = np.array([left, top + 1]) text_origin = np.array([left + 1, top + 1]) # 绘制预测框的空心矩形 draw.rectangle( [left, top, right, bottom], outline=self.colors[self.class_names.index(predicted_class)], width=thickness) # 绘制文本框的实心矩形 draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) # 绘制文本框内的文字 # draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font = font) # fill = (0, 0, 0) 文字颜色纯黑 draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw return image
class YOLO(object): _defaults = { "model_path": 'model_data/Epoch102-Total_Loss11.0130-Val_Loss8.8086.pth', "anchors_path": 'model_data/yolo_anchors.txt', "classes_path": 'model_data/helmet_classes.txt', "model_image_size": (416, 416, 3), "confidence": 0.5, "iou": 0.3, "cuda": False, #---------------------------------------------------------------------# # 该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize, # 在多次测试后,发现关闭letterbox_image直接resize的效果更好 #---------------------------------------------------------------------# "letterbox_image": False, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化YOLO #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.anchors = self._get_anchors() self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的先验框 #---------------------------------------------------# def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] #---------------------------------------------------# # 生成模型 #---------------------------------------------------# def generate(self): #---------------------------------------------------# # 建立yolov4模型 #---------------------------------------------------# self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval() #---------------------------------------------------# # 载入yolov4模型的权重 #---------------------------------------------------# print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) print('Finished!') if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() #---------------------------------------------------# # 建立三个特征层解码用的工具 #---------------------------------------------------# self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize( (self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) return output_list
anchors = get_anchors(anchors_path) num_classes = len(class_names) # %% # 创建模型 model = YoloBody(len(anchors[0]), num_classes) model_path = "/data/zihaosh/hw2_pretrain/yolov4_coco_pretrained_weights.pth" # 加快模型训练的效率 print('Loading pretrained model weights.') model_dict = model.state_dict() pretrained_dict = torch.load(model_path) pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('Finished!') model = model.cuda() # 建立loss函数 yolo_losses = [] for i in range(3): yolo_losses.append(YOLOLoss(np.reshape(anchors, [-1, 2]), num_classes, \ (input_shape[1], input_shape[0]), smoooth_label, Cuda)) # read train lines and val lines with open(train_annotation_path) as f: train_lines = f.readlines() with open(val_annotation_path) as f: val_lines = f.readlines() num_train = len(train_lines) num_val = len(val_lines) # ------------------------------------# # 先冻结backbone训练