class YOLO(object): _defaults = { "model_path": 'model_data/yolo_weights.pth', "classes_path": 'model_data/coco_classes.txt', "model_image_size": (416, 416, 3), "confidence": 0.5, "iou": 0.3, "cuda": True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化YOLO #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.config = Config self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): self.config["yolo"]["classes"] = len(self.class_names) self.net = YoloBody(self.config) # 加快模型训练的效率 print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) self.net = self.net.eval() if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.config["yolo"]["anchors"][i], self.config["yolo"]["classes"], (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) # letterbox_image加上灰条,防止放缩失真 crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
annotation_path = '2007_train.txt' model = YoloBody(Config) Cuda = True print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_dict = model.state_dict() pretrained_dict = torch.load("model_data/yolo_weights.pth", map_location=device) pretrained_dict = { k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v) } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('Finished!') net = model.train() if Cuda: net = torch.nn.DataParallel(model) cudnn.benchmark = True net = net.cuda() # 建立loss函数 yolo_losses = [] for i in range(3): yolo_losses.append( YOLOLoss(np.reshape(Config["yolo"]["anchors"], [-1, 2]), Config["yolo"]["classes"],
class YOLO(object): _defaults = { "model_path": 'logs/NewEpoch14-Total_Loss230.2813-Val_Loss222.9243.pth', "anchors_path": 'model_data/yolo_anchors.txt', "classes_path": 'model_data/homework_class.txt', "model_image_size": (416, 416, 3), "confidence": 0.5, "iou": 0.3, "cuda": False, # ---------------------------------------------------------------------# # 该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize, # 在多次测试后,发现关闭letterbox_image直接resize的效果更好 # ---------------------------------------------------------------------# "letterbox_image": False, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" # ---------------------------------------------------# # 初始化YOLO # ---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.anchors = self._get_anchors() self.generate() # ---------------------------------------------------# # 获得所有的分类 # ---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names # ---------------------------------------------------# # 获得所有的先验框 # ---------------------------------------------------# def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] # ---------------------------------------------------# # 生成模型 # ---------------------------------------------------# def generate(self): self.num_classes = len(self.class_names) # ---------------------------------------------------# # 建立yolov3模型 # ---------------------------------------------------# self.net = YoloBody(self.anchors, self.num_classes) # ---------------------------------------------------# # 载入yolov3模型的权重 # ---------------------------------------------------# print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) self.net = self.net.eval() if self.cuda: self.net = nn.DataParallel(self.net) self.net = self.net.cuda() # ---------------------------------------------------# # 建立三个特征层解码用的工具 # ---------------------------------------------------# self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.anchors[i], self.num_classes, (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) # ---------------------------------------------------# # 检测图片 # ---------------------------------------------------# def detect_image(self, image): # ---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # ---------------------------------------------------------# image = image.convert('RGB') image_shape = np.array(np.shape(image)[0:2]) # ---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 # ---------------------------------------------------------# if self.letterbox_image: crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) else: crop_img = image.resize( (self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) # ---------------------------------------------------------# # 添加上batch_size维度 # ---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() # ---------------------------------------------------------# # 将图像输入网络当中进行预测! # ---------------------------------------------------------# outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) # ---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 # ---------------------------------------------------------# output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, self.num_classes, conf_thres=self.confidence, nms_thres=self.iou) # ---------------------------------------------------------# # 如果没有检测出物体,返回原图 # ---------------------------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return image # ---------------------------------------------------------# # 对预测框进行得分筛选 # ---------------------------------------------------------# top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # -----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 # -----------------------------------------------------------------# if self.letterbox_image: boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array( [self.model_image_size[0], self.model_image_size[1]]), image_shape) else: top_xmin = top_xmin / self.model_image_size[1] * image_shape[1] top_ymin = top_ymin / self.model_image_size[0] * image_shape[0] top_xmax = top_xmax / self.model_image_size[1] * image_shape[1] top_ymax = top_ymax / self.model_image_size[0] * image_shape[0] boxes = np.concatenate( [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
class YOLO(object): _defaults = { "model_path": 'weight_logs/Epoch96-Total_Loss0.0286-Val_Loss0.1654.pth', "model_image_size": (416, 416, 3), "confidence": 0.5, "iou": 0.3, "cuda": True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # define YOLO #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.num_classes = 3 self.config = Config self.generate() #---------------------------------------------------# # generate all infor about model and data #---------------------------------------------------# def generate(self): self.config["yolo"]["classes"] = self.num_classes self.net = YoloBody(self.config) # load state of model print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) self.net = self.net.eval() if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.config["yolo"]["anchors"][i], self.config["yolo"]["classes"], (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # set inconsistent color for frame hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # detect object in image #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=self.iou) print(f'[INFO] batch_detections: {batch_detections[0].shape}') try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_angle = batch_detections[top_index, 5:8] top_bboxes = np.array(batch_detections[top_index, :4]) # print(f'[INFO] top_index: {top_index.shape}') # print(f'[INFO] top_conf: {top_conf.shape}') # print(f'[INFO] top_angle: {top_angle.shape}') # print(f'[INFO] top_bboxes: {top_bboxes.shape}') top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=15) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] predictions = [] for i, score in enumerate(top_conf): infor = {} top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 yaw, pitch, roll = top_angle[i] * 90 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) infor["box"] = [left, top, right, bottom] infor["angle"][yaw, pitch, roll] predictions.append(infor) # draw box and angle in image # draw = ImageDraw.Draw(image) # for i in range(thickness): # draw.rectangle( # [left + i, top + i, right - i, bottom - i], # outline="red") # draw.text([left + i*10, top + i*10], str(score), # fill=(255, 0, 0), font=font) # del draw # image_numpy = np.array(image) # print(f'[PREDICT] box: {[top, left, bottom, right]}') # print(f'[PREDICT] yaw = {yaw}, pitch = {pitch}, roll = {roll}') # img = draw_axis(image_numpy, yaw, pitch, roll, (left+right)//2, (top + bottom)//2) # image = Image.fromarray(img) # image.save('test.jpg') # print('save successfully !!!') return predictions
class YOLO(object): _defaults = { #--------------------------------------------# # 使用自己训练好的模型预测需要修改3个参数 # phi、model_path和classes_path都需要修改! # 如果出现shape不匹配,一定要注意 # 训练时的model_path和classes_path参数的修改 #--------------------------------------------# "model_path": 'model_data/efficientnet-b2-voc.pth', "classes_path": 'model_data/voc_classes.txt', "model_image_size": (416, 416, 3), "confidence": 0.3, "iou": 0.3, "phi": 2, "cuda": True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化YOLO #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.config = Config self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 生成模型 #---------------------------------------------------# def generate(self): self.config["yolo"]["classes"] = len(self.class_names) #---------------------------------------------------# # 建立yolov3模型 #---------------------------------------------------# self.net = YoloBody(self.config, phi=self.phi) #---------------------------------------------------# # 载入yolov3模型的权重 #---------------------------------------------------# print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) self.net = self.net.eval() if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() #---------------------------------------------------# # 建立三个特征层解码用的工具 #---------------------------------------------------# self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.config["yolo"]["anchors"][i], self.config["yolo"]["classes"], (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=self.iou) #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return image #---------------------------------------------------------# # 对预测框进行得分筛选 #---------------------------------------------------------# top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 #-----------------------------------------------------------------# boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
class YOLO(object): # _defaults = { "model_path": 'logs\Epoch1-Total_Loss63.1416-Val_Loss15.9550.pth', "classes_path": 'model_data/voc_classes.txt', "model_image_size": (416, 416, 3), "confidence": 0.5, "cuda": True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化YOLO #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.config = Config self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): # 载入目标包含的类数 classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): self.config["yolo"]["classes"] = len(self.class_names) self.net = YoloBody(self.config) # 加快模型训练的效率 print('Loading weights into state dict...') device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') # 利用GPU进行计算 state_dict = torch.load(self.model_path, map_location=device) # 载入权重文件 self.net.load_state_dict(state_dict) self.net = self.net.eval() if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.config["yolo"]["anchors"][i], self.config["yolo"]["classes"], (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) # 图片处理 crop_img = np.array( letterbox_image(image, (self.model_image_size[0], self.model_image_size[1]))) # 对图片RESIZE并加灰条 photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 # 归一化 photo = np.transpose(photo, (2, 0, 1)) # 在pytorch中通道数在第一个,所以在这调整顺序 photo = photo.astype(np.float32) # 转换数据类型 images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) # 将numpy转换成tenor类型 if self.cuda: images = images.cuda() # 放入网络中进行预测并画框 with torch.no_grad(): outputs = self.net(images) # 图片放入网络中 output_list = [] for i in range(3): # 特征层解码,因为特征金字塔有三个尺度的输出,所以要循环三次,将三个特征层全部解码。 output_list.append(self.yolo_decodes[i]( outputs[i])) # 解码:调整先验框 output = torch.cat(output_list, 1) # 将预测结果堆叠起来 batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=0.3) # non_max_suppression()是进行非极大抑制 try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence # 将框框的置信度和类的置信度相乘进行判断 top_conf = batch_detections[top_index, 4] * batch_detections[ top_index, 5] # 下面这三行是将置信度较高的筛选出来 top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 ''' 目前框框的位置是相对于有灰条图片左上角的位置。去掉灰条要转换为原图的左上角的位置。 yolo_correct_boxes函数就是完成这样的坐标变换 ''' boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) # 定义字体 thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] # 定义框框的宽度 # 下面的代码就是用来画图的 for i, c in enumerate(top_label): predicted_class = self.class_names[c] # 获得类的名称 score = top_conf[i] # 获得得分 # 获得位置信息 top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) # 在框上写字 del draw return image
class YOLO(object): _defaults = { "model_path": 'logs/Epoch48-Total_Loss1.6398-Val_Loss1.4981.pth', "classes_path": 'model_data/new_classes.txt', "model_image_size": (416, 416, 3), "confidence": 0.5, "cuda": False } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化YOLO #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.config = Config self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): self.config["yolo"]["classes"] = len(self.class_names) self.net = YoloBody(self.config) # 加快模型训练的效率 # print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.net.load_state_dict(state_dict) self.net = self.net.eval() if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox( self.config["yolo"]["anchors"][i], self.config["yolo"]["classes"], (self.model_image_size[1], self.model_image_size[0]))) # print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) return top, left, bottom, right