EPOCH_LENGTH = 2000 NUM_CLASSES = 20 IMAGE_SHAPE = [600,600,3] BACKBONE = "resnet50" model = FasterRCNN(NUM_CLASSES,backbone=BACKBONE).cuda() #-------------------------------------------# # 权值文件的下载请看README #-------------------------------------------# print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_dict = model.state_dict() pretrained_dict = torch.load("model_data/voc_weights_resnet.pth", map_location=device) pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('Finished!') cudnn.benchmark = True # 0.1用于验证,0.9用于训练 val_split = 0.1 with open(annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) num_val = int(len(lines)*val_split) num_train = len(lines) - num_val
class FRCNN2(object): _defaults = { "model_path": 'logs/Epoch49-Total_Loss0.3838-Val_Loss0.4336.pth', "classes_path": 'model_data/voc_classes.txt', "confidence": 0.6, "backbone": "resnet50" } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" # ---------------------------------------------------# # 初始化faster RCNN # ---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.generate() self.mean = torch.Tensor([0, 0, 0, 0]).cuda().repeat(self.num_classes + 1)[None] self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2 ]).cuda().repeat(self.num_classes + 1)[None] # ---------------------------------------------------# # 获得所有的分类 # ---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names # ---------------------------------------------------# # 获得所有的分类 # ---------------------------------------------------# def generate(self): # 计算总的种类 self.num_classes = len(self.class_names) # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 # 否则先构建模型再载入 self.model = FasterRCNN(self.num_classes, "predict", backbone=self.backbone).cuda() self.model.load_state_dict(torch.load(self.model_path)) cudnn.benchmark = True print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) # ---------------------------------------------------# # 检测图片 # ---------------------------------------------------# def detect_image(self, image_id, image, savepath): image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height]) photo = np.array(image, dtype=np.float32) / 255 photo = np.transpose(photo, (2, 0, 1)) with torch.no_grad(): images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images).cuda() roi_cls_locs, roi_scores, rois, roi_indices, feature = self.model( images) decodebox = DecodeBox(self.std, self.mean, self.num_classes) outputs = decodebox.forward(roi_cls_locs, roi_scores, rois, feature, height=height, width=width, score_thresh=self.confidence) if len(outputs) == 0: return old_image if np.size(outputs, 0) > 4: outputs = outputs[np.argsort(outputs[:, 4])] outputs = outputs[-4:, :] bbox = outputs[:, :4] conf = outputs[:, 4] label = outputs[:, 5] f = outputs[:, 6:] f_size = np.size(f, 0) if f_size < 4: k = 4 - f_size cc = 0 for lab in label: if (lab == 1) or (lab == 3) or (lab == 5): conbin_f = f[cc, :] conbin_f = conbin_f.reshape(1, 2048) for num in range(0, k): f = np.append(f, conbin_f, axis=0) break cc = cc + 1 bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height bbox = np.array(bbox, np.int32) f = np.array(f, np.float32).reshape((4, 2048)) print(np.size(f)) # image = old_image # thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2 # font = ImageFont.truetype(font='model_data/simhei.ttf', # size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) # # for i, c in enumerate(label): # predicted_class = self.class_names[int(c)] # score = conf[i] # # left, top, right, bottom = bbox[i] # top = top - 5 # left = left - 5 # bottom = bottom + 5 # right = right + 5 # # top = max(0, np.floor(top + 0.5).astype('int32')) # left = max(0, np.floor(left + 0.5).astype('int32')) # bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) # right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # # # 画框框 # label = '{} {:.2f}'.format(predicted_class, score) # draw = ImageDraw.Draw(image) # label_size = draw.textsize(label, font) # label = label.encode('utf-8') # print(label) # # if top - label_size[1] >= 0: # text_origin = np.array([left, top - label_size[1]]) # else: # text_origin = np.array([left, top + 1]) # # for i in range(thickness): # draw.rectangle( # [left + i, top + i, right - i, bottom - i], # outline=self.colors[int(c)]) # draw.rectangle( # [tuple(text_origin), tuple(text_origin + label_size)], # fill=self.colors[int(c)]) # draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) # del draw # # image.save(savepath) if np.size(f, 0) != 4: print( "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" ) return { 'image_id': image_id, 'image_h': height, 'image_w': width, 'num_boxes': np.size(bbox, 0), 'boxes': base64.b64encode(bbox), 'features': base64.b64encode(f) }
class FRCNN(object): _defaults = { "model_path": 'model_data/voc_weights_resnet.pth', "classes_path": 'model_data/voc_classes.txt', "confidence": 0.5, "backbone": "resnet50" } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化faster RCNN #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.generate() self.mean = torch.Tensor([0, 0, 0, 0]).cuda().repeat(self.num_classes + 1)[None] self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2 ]).cuda().repeat(self.num_classes + 1)[None] #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): # 计算总的种类 self.num_classes = len(self.class_names) # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 # 否则先构建模型再载入 self.model = FasterRCNN(self.num_classes, "predict", backbone=self.backbone).cuda() self.model.load_state_dict(torch.load(self.model_path)) cudnn.benchmark = True print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): start_time = time.time() image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height]) photo = np.array(image, dtype=np.float32) / 255 photo = np.transpose(photo, (2, 0, 1)) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images).cuda() roi_cls_locs, roi_scores, rois, roi_indices = self.model(images) decodebox = DecodeBox(self.std, self.mean, self.num_classes) outputs = decodebox.forward(roi_cls_locs, roi_scores, rois, height=height, width=width, score_thresh=self.confidence) if len(outputs) == 0: return old_image bbox = outputs[:, :4] conf = outputs[:, 4] label = outputs[:, 5] bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height bbox = np.array(bbox, np.int32) image = old_image thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2 font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) for i, c in enumerate(label): predicted_class = self.class_names[int(c)] score = conf[i] left, top, right, bottom = bbox[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw print("time:", time.time() - start_time) return image
class FRCNN(object): _defaults = { "model_path": 'model_data/voc_weights_resnet.pth', "classes_path": 'model_data/voc_classes.txt', "confidence": 0.5, "iou": 0.3, "backbone": "resnet50", "cuda": True, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #初始化程序 def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.generate() self.mean = torch.Tensor([0, 0, 0, 0]).repeat(self.num_classes + 1)[None] self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(self.num_classes + 1)[None] if self.cuda: self.mean = self.mean.cuda() self.std = self.std.cuda() def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names def generate(self): self.num_classes = len(self.class_names) #载入模型 self.model = FasterRCNN(self.num_classes, "predict", backbone=self.backbone) print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.model.load_state_dict(state_dict) self.model = self.model.eval() if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.model = nn.DataParallel(self.model) self.model = self.model.cuda() print('{} model, anchors, and classes loaded.'.format(self.model_path)) #目标定位并画框 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #检测图片 def detect_image(self, image): with torch.no_grad(): start_time = time.time() image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float32) / 255 photo = np.transpose(photo, (2, 0, 1)) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() #非凸性优化,边界框矫正 roi_cls_locs, roi_scores, rois, roi_indices = self.model(images) decodebox = DecodeBox(self.std, self.mean, self.num_classes) outputs = decodebox.forward(roi_cls_locs, roi_scores, rois, height=height, width=width, nms_iou=self.iou, score_thresh=self.confidence) if len(outputs) == 0: return old_image bbox = outputs[:, :4] conf = outputs[:, 4] label = outputs[:, 5] bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height bbox = np.array(bbox, np.int32) image = old_image thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2 font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) for i, c in enumerate(label): predicted_class = self.class_names[int(c)] score = conf[i] left, top, right, bottom = bbox[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) #图片输出 label = '{}'.format(predicted_class) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw print("time:", time.time() - start_time) return image
class FRCNN(object): _defaults = { "model_path": 'model_data/voc_weights_resnet.pth', "classes_path": 'model_data/voc_classes.txt', "confidence": 0.8, "iou": 0.3, "backbone": "vgg16", "cuda": True, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化faster RCNN #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.generate() self.mean = torch.Tensor([0, 0, 0, 0]).repeat(self.num_classes + 1)[None] self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(self.num_classes + 1)[None] if self.cuda: self.mean = self.mean.cuda() self.std = self.std.cuda() self.decodebox = DecodeBox(self.std, self.mean, self.num_classes) #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 载入模型 #---------------------------------------------------# def generate(self): #-------------------------------# # 计算总的类的数量 #-------------------------------# self.num_classes = len(self.class_names) #-------------------------------# # 载入模型与权值 #-------------------------------# self.model = FasterRCNN(self.num_classes, "predict", backbone=self.backbone).eval() print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.model.load_state_dict(state_dict) if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = "0" # self.model = nn.DataParallel(self.model) self.model = self.model.cuda() print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] old_image = copy.deepcopy(image) #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = np.transpose( np.array(image, dtype=np.float32) / 255, (2, 0, 1)) with torch.no_grad(): images = torch.from_numpy(np.asarray([photo])) if self.cuda: images = images.cuda() roi_cls_locs, roi_scores, rois, _ = self.model(images) #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# outputs = self.decodebox.forward(roi_cls_locs[0], roi_scores[0], rois, height=height, width=width, nms_iou=self.iou, score_thresh=self.confidence) #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# if len(outputs) == 0: return old_image outputs = np.array(outputs) bbox = outputs[:, :4] label = outputs[:, 4] conf = outputs[:, 5] bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2, 1) image = old_image for i, c in enumerate(label): predicted_class = self.class_names[int(c)] score = conf[i] left, top, right, bottom = bbox[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # draw bbox label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image