def generate(self): self.num_classes = len(self.class_names) #载入模型 self.model = FasterRCNN(self.num_classes, "predict", backbone=self.backbone) print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.model.load_state_dict(state_dict) self.model = self.model.eval() if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.model = nn.DataParallel(self.model) self.model = self.model.cuda() print('{} model, anchors, and classes loaded.'.format(self.model_path)) #目标定位并画框 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
def generate(self): #-------------------------------# # 计算总的类的数量 #-------------------------------# self.num_classes = len(self.class_names) #-------------------------------# # 载入模型与权值 #-------------------------------# self.model = FasterRCNN(self.num_classes, "predict", backbone=self.backbone).eval() print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.model.load_state_dict(state_dict) if self.cuda: # self.model = nn.DataParallel(self.model) self.model = self.model.cuda() print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
def generate(self): # 计算总的种类 self.num_classes = len(self.class_names)+1 # print(self.num_classes) # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 # 否则先构建模型再载入 self.model = FasterRCNN(self.num_classes,"predict",backbone=self.backbone).cuda() self.model.load_state_dict(torch.load(self.model_path)) cudnn.benchmark = True print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
val_toal_loss += val_total print('Finish Validation') print('\nEpoch:'+ str(epoch+1) + '/' + str(Epoch)) print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_toal_loss/(epoch_size_val+1))) print('Saving state, iter:', str(epoch+1)) torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch+1),total_loss/(epoch_size+1),val_toal_loss/(epoch_size_val+1))) if __name__ == "__main__": # 参数初始化 annotation_path = '2007_train.txt' EPOCH_LENGTH = 2000 NUM_CLASSES = 20 IMAGE_SHAPE = [600,600,3] BACKBONE = "resnet50" model = FasterRCNN(NUM_CLASSES,backbone=BACKBONE).cuda() #-------------------------------------------# # 权值文件的下载请看README #-------------------------------------------# print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_dict = model.state_dict() pretrained_dict = torch.load("model_data/voc_weights_resnet.pth", map_location=device) pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('Finished!') cudnn.benchmark = True
class FRCNN2(object): _defaults = { "model_path": 'logs/Epoch49-Total_Loss0.3838-Val_Loss0.4336.pth', "classes_path": 'model_data/voc_classes.txt', "confidence": 0.6, "backbone": "resnet50" } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" # ---------------------------------------------------# # 初始化faster RCNN # ---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.generate() self.mean = torch.Tensor([0, 0, 0, 0]).cuda().repeat(self.num_classes + 1)[None] self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2 ]).cuda().repeat(self.num_classes + 1)[None] # ---------------------------------------------------# # 获得所有的分类 # ---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names # ---------------------------------------------------# # 获得所有的分类 # ---------------------------------------------------# def generate(self): # 计算总的种类 self.num_classes = len(self.class_names) # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 # 否则先构建模型再载入 self.model = FasterRCNN(self.num_classes, "predict", backbone=self.backbone).cuda() self.model.load_state_dict(torch.load(self.model_path)) cudnn.benchmark = True print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) # ---------------------------------------------------# # 检测图片 # ---------------------------------------------------# def detect_image(self, image_id, image, savepath): image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height]) photo = np.array(image, dtype=np.float32) / 255 photo = np.transpose(photo, (2, 0, 1)) with torch.no_grad(): images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images).cuda() roi_cls_locs, roi_scores, rois, roi_indices, feature = self.model( images) decodebox = DecodeBox(self.std, self.mean, self.num_classes) outputs = decodebox.forward(roi_cls_locs, roi_scores, rois, feature, height=height, width=width, score_thresh=self.confidence) if len(outputs) == 0: return old_image if np.size(outputs, 0) > 4: outputs = outputs[np.argsort(outputs[:, 4])] outputs = outputs[-4:, :] bbox = outputs[:, :4] conf = outputs[:, 4] label = outputs[:, 5] f = outputs[:, 6:] f_size = np.size(f, 0) if f_size < 4: k = 4 - f_size cc = 0 for lab in label: if (lab == 1) or (lab == 3) or (lab == 5): conbin_f = f[cc, :] conbin_f = conbin_f.reshape(1, 2048) for num in range(0, k): f = np.append(f, conbin_f, axis=0) break cc = cc + 1 bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height bbox = np.array(bbox, np.int32) f = np.array(f, np.float32).reshape((4, 2048)) print(np.size(f)) # image = old_image # thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2 # font = ImageFont.truetype(font='model_data/simhei.ttf', # size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) # # for i, c in enumerate(label): # predicted_class = self.class_names[int(c)] # score = conf[i] # # left, top, right, bottom = bbox[i] # top = top - 5 # left = left - 5 # bottom = bottom + 5 # right = right + 5 # # top = max(0, np.floor(top + 0.5).astype('int32')) # left = max(0, np.floor(left + 0.5).astype('int32')) # bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) # right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # # # 画框框 # label = '{} {:.2f}'.format(predicted_class, score) # draw = ImageDraw.Draw(image) # label_size = draw.textsize(label, font) # label = label.encode('utf-8') # print(label) # # if top - label_size[1] >= 0: # text_origin = np.array([left, top - label_size[1]]) # else: # text_origin = np.array([left, top + 1]) # # for i in range(thickness): # draw.rectangle( # [left + i, top + i, right - i, bottom - i], # outline=self.colors[int(c)]) # draw.rectangle( # [tuple(text_origin), tuple(text_origin + label_size)], # fill=self.colors[int(c)]) # draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) # del draw # # image.save(savepath) if np.size(f, 0) != 4: print( "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" ) return { 'image_id': image_id, 'image_h': height, 'image_w': width, 'num_boxes': np.size(bbox, 0), 'boxes': base64.b64encode(bbox), 'features': base64.b64encode(f) }
#----------------------------------------------------# # 训练之前一定要修改NUM_CLASSES # 修改成所需要区分的类的个数。 #----------------------------------------------------# NUM_CLASSES = 20 #-------------------------------------------------------------------------------------# # input_shape是输入图片的大小,默认为800,800,3,随着输入图片的增大,占用显存会增大 # 视频上为600,600,3,实际测试中发现800,800,3效果更好 #-------------------------------------------------------------------------------------# input_shape = [800,800,3] #----------------------------------------------------# # 使用到的主干特征提取网络 # vgg或者resnet50 #----------------------------------------------------# backbone = "resnet50" model = FasterRCNN(NUM_CLASSES,backbone=backbone) weights_init(model) # #------------------------------------------------------# # 权值文件请看README,百度网盘下载 #------------------------------------------------------# model_path = 'model_data/voc_weights_resnet.pth' print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_dict = model.state_dict() pretrained_dict = torch.load(model_path, map_location=device) pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('Finished!')
class FRCNN(object): _defaults = { "model_path": 'model_data/voc_weights_resnet.pth', "classes_path": 'model_data/voc_classes.txt', "confidence": 0.5, "iou": 0.3, "backbone": "resnet50", "cuda": True, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #初始化程序 def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.generate() self.mean = torch.Tensor([0, 0, 0, 0]).repeat(self.num_classes + 1)[None] self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(self.num_classes + 1)[None] if self.cuda: self.mean = self.mean.cuda() self.std = self.std.cuda() def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names def generate(self): self.num_classes = len(self.class_names) #载入模型 self.model = FasterRCNN(self.num_classes, "predict", backbone=self.backbone) print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.model.load_state_dict(state_dict) self.model = self.model.eval() if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.model = nn.DataParallel(self.model) self.model = self.model.cuda() print('{} model, anchors, and classes loaded.'.format(self.model_path)) #目标定位并画框 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #检测图片 def detect_image(self, image): with torch.no_grad(): start_time = time.time() image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float32) / 255 photo = np.transpose(photo, (2, 0, 1)) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() #非凸性优化,边界框矫正 roi_cls_locs, roi_scores, rois, roi_indices = self.model(images) decodebox = DecodeBox(self.std, self.mean, self.num_classes) outputs = decodebox.forward(roi_cls_locs, roi_scores, rois, height=height, width=width, nms_iou=self.iou, score_thresh=self.confidence) if len(outputs) == 0: return old_image bbox = outputs[:, :4] conf = outputs[:, 4] label = outputs[:, 5] bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height bbox = np.array(bbox, np.int32) image = old_image thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2 font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) for i, c in enumerate(label): predicted_class = self.class_names[int(c)] score = conf[i] left, top, right, bottom = bbox[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) #图片输出 label = '{}'.format(predicted_class) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw print("time:", time.time() - start_time) return image
''' 一些建议的参数设置: VGG:SGD优化器,冻结时学习率1e-3,解冻时学习率1e-4 nets.rpn中ProposalCreator的n_train_post_nms=2000; utils.utils中ProposalTargetCreator的pos_ratio=0.25; RESNET50:Adam优化器,冻结时学习率1e-4,解冻时学习率1e-5 nets.rpn中ProposalCreator的n_train_post_nms=300; utils.utils中ProposalTargetCreator的pos_ratio=0.5; ''' if __name__ == "__main__": # 参数初始化 annotation_path = '2007_train.txt' NUM_CLASSES = 20 IMAGE_SHAPE = [600, 600, 3] BACKBONE = "resnet50" model = FasterRCNN(NUM_CLASSES, backbone=BACKBONE).cuda() #-------------------------------# # Dataloder的使用 #-------------------------------# Use_Data_Loader = True model_path = r'model_data/voc_weights_resnet.pth' print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_dict = model.state_dict() pretrained_dict = torch.load(model_path, map_location=device) pretrained_dict = { k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v) }
from nets.frcnn import FasterRCNN from torchsummary import summary from PIL import Image import numpy as np from utils.utils import loc2bbox import torch from thop import profile from thop import clever_format import torch model = FasterRCNN(20, backbone="resnet50").cuda() # model.load_state_dict(torch.load("logs/Epoch7-Total_Loss0.9575.pth")) a = np.array(Image.open("img/street.jpg").resize([600, 600])) a = np.transpose(a, [2, 0, 1]) a = torch.Tensor( np.concatenate( [np.expand_dims(a, 0), np.expand_dims(a, 0)], axis=0)).cuda() roi_cls_locs, roi_scores, rois, roi_indices = model(a) rois = torch.Tensor(rois) # mean = torch.Tensor([0,0,0,0]).cuda(). \ # repeat(21)[None] # std = torch.Tensor([0.1, 0.1, 0.2, 0.2]).cuda(). \ # repeat(21)[None] # roi_cls_loc = (roi_cls_locs * std + mean) # roi_cls_loc = roi_cls_loc.view([-1, 21, 4]) # roi = rois.view((-1, 1, 4)).expand_as(roi_cls_loc) # cls_bbox = loc2bbox((roi.cpu().detach().numpy()).reshape((-1, 4)), # (roi_cls_loc.cpu().detach().numpy()).reshape((-1, 4))) # cls_bbox = torch.Tensor(cls_bbox) # cls_bbox = cls_bbox.view([-1, 21 * 4]) # print(cls_bbox)
class FRCNN(object): _defaults = { "model_path": 'model_data/voc_weights_resnet.pth', "classes_path": 'model_data/voc_classes.txt', "confidence": 0.5, "backbone": "resnet50" } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化faster RCNN #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.generate() self.mean = torch.Tensor([0, 0, 0, 0]).cuda().repeat(self.num_classes + 1)[None] self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2 ]).cuda().repeat(self.num_classes + 1)[None] #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): # 计算总的种类 self.num_classes = len(self.class_names) # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 # 否则先构建模型再载入 self.model = FasterRCNN(self.num_classes, "predict", backbone=self.backbone).cuda() self.model.load_state_dict(torch.load(self.model_path)) cudnn.benchmark = True print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): start_time = time.time() image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height]) photo = np.array(image, dtype=np.float32) / 255 photo = np.transpose(photo, (2, 0, 1)) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images).cuda() roi_cls_locs, roi_scores, rois, roi_indices = self.model(images) decodebox = DecodeBox(self.std, self.mean, self.num_classes) outputs = decodebox.forward(roi_cls_locs, roi_scores, rois, height=height, width=width, score_thresh=self.confidence) if len(outputs) == 0: return old_image bbox = outputs[:, :4] conf = outputs[:, 4] label = outputs[:, 5] bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height bbox = np.array(bbox, np.int32) image = old_image thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2 font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) for i, c in enumerate(label): predicted_class = self.class_names[int(c)] score = conf[i] left, top, right, bottom = bbox[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw print("time:", time.time() - start_time) return image
#----------------------------------------------------# # 训练之前一定要修改NUM_CLASSES # 修改成所需要区分的类的个数。 #----------------------------------------------------# NUM_CLASSES = 20 #-------------------------------------------------------------------------------------# # input_shape是输入图片的大小,默认为800,800,3,随着输入图片的增大,占用显存会增大 # 视频上为600,600,3,实际测试中发现800,800,3效果更好 #-------------------------------------------------------------------------------------# input_shape = [800,800,3] #----------------------------------------------------# # 使用到的主干特征提取网络 # vgg或者resnet50 #----------------------------------------------------# backbone = "resnet50" model = FasterRCNN(NUM_CLASSES,backbone=backbone) # #------------------------------------------------------# # 权值文件请看README,百度网盘下载 #------------------------------------------------------# # model_path = 'model_data/voc_weights_resnet.pth' # print('Loading weights into state dict...') # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # model_dict = model.state_dict() # pretrained_dict = torch.load(model_path, map_location=device) # pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} # model_dict.update(pretrained_dict) # model.load_state_dict(model_dict) print('Finished!') net = model.train()
class FRCNN(object): _defaults = { "model_path": 'model_data/voc_weights_resnet.pth', "classes_path": 'model_data/voc_classes.txt', "confidence": 0.8, "iou": 0.3, "backbone": "vgg16", "cuda": True, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化faster RCNN #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.generate() self.mean = torch.Tensor([0, 0, 0, 0]).repeat(self.num_classes + 1)[None] self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(self.num_classes + 1)[None] if self.cuda: self.mean = self.mean.cuda() self.std = self.std.cuda() self.decodebox = DecodeBox(self.std, self.mean, self.num_classes) #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 载入模型 #---------------------------------------------------# def generate(self): #-------------------------------# # 计算总的类的数量 #-------------------------------# self.num_classes = len(self.class_names) #-------------------------------# # 载入模型与权值 #-------------------------------# self.model = FasterRCNN(self.num_classes, "predict", backbone=self.backbone).eval() print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state_dict = torch.load(self.model_path, map_location=device) self.model.load_state_dict(state_dict) if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = "0" # self.model = nn.DataParallel(self.model) self.model = self.model.cuda() print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] old_image = copy.deepcopy(image) #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = np.transpose( np.array(image, dtype=np.float32) / 255, (2, 0, 1)) with torch.no_grad(): images = torch.from_numpy(np.asarray([photo])) if self.cuda: images = images.cuda() roi_cls_locs, roi_scores, rois, _ = self.model(images) #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# outputs = self.decodebox.forward(roi_cls_locs[0], roi_scores[0], rois, height=height, width=width, nms_iou=self.iou, score_thresh=self.confidence) #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# if len(outputs) == 0: return old_image outputs = np.array(outputs) bbox = outputs[:, :4] label = outputs[:, 4] conf = outputs[:, 5] bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2, 1) image = old_image for i, c in enumerate(label): predicted_class = self.class_names[int(c)] score = conf[i] left, top, right, bottom = bbox[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # draw bbox label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
(epoch_size_val + 1))) print('Saving state, iter:', str(epoch + 1)) torch.save( model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth' % ((epoch + 1), total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1))) if __name__ == "__main__": # 参数初始化 annotation_path = 'traindata.txt' NUM_CLASSES = 1 IMAGE_SHAPE = [600, 600, 3] BACKBONE = "resnet50" model = FasterRCNN(NUM_CLASSES, backbone=BACKBONE) #-------------------------------# # Dataloder的使用 #-------------------------------# Use_Data_Loader = True Cuda = True model_path = r'model_data/voc_weights_resnet.pth' print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_dict = model.state_dict() pretrained_dict = torch.load(model_path, map_location=device) pretrained_dict = { k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)
Cuda = True #----------------------------------------------------# # 训练之前一定要修改NUM_CLASSES # 修改成所需要区分的类的个数。 #----------------------------------------------------# NUM_CLASSES = 6 #-------------------------------------------------------------------------------------# # input_shape是输入图片的大小,默认为800,800,3,随着输入图片的增大,占用显存会增大 #-------------------------------------------------------------------------------------# input_shape = [600, 600, 3] #----------------------------------------------------# # 使用到的主干特征提取网络 # vgg或者resnet50 #----------------------------------------------------# backbone = "resnet50" model = FasterRCNN(NUM_CLASSES, backbone=backbone) # #------------------------------------------------------# # 权值文件请看README,百度网盘下载 #------------------------------------------------------# model_path = 'model_data/voc_weights_resnet.pth' print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_dict = model.state_dict() #OrderedDict :328 pretrained_dict = torch.load(model_path, map_location=device) #OrderedDict :328 pretrained_dict = { k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v) }
parser.add_argument("--lr", type=float, help="learning_rate", default=1e-3) parser.add_argument("-b", "--backbone", type=str, help="frozen_epoches", default='resnet50', choices=['resnet50','vgg']) parser.add_argument("-f", "--freeze", type=int, help="frozen_epoches", default=5) parser.add_argument("-u", "--unfreeze", type=int, help="free_epoches", default=5) # parser.add_argument('-v', "--version", type=int, help='you want to use efficientdet-dX?', default=0) # parser.add_argument('-b', "--batch_size", type=int, help='just batch_size', default=64) parser.add_argument('-p', "--pre_model", type=str, help='just pretrained_model_path default = \'\'', default="") parser.add_argument('-s', '--val_split', type=float, help='验证集的比例', default=0.1) args = parser.parse_args() # 参数初始化 annotation_path = '2007_train.txt' EPOCH_LENGTH = 2000 IMAGE_SHAPE = [600,600,3] model = FasterRCNN(num_classes,backbone=args.backbone).cuda() model_urls = {'resnet50':'https://github.com/you-bowen/tutorical_myDL/releases/download/1.0/frcnn_resnet50.pth', 'vgg':'none'} #-------------------------------------------# # 权值文件的下载请看README #-------------------------------------------# print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_dict = model.state_dict() pretrained_dict = torch.load(args.pre_model) if args.pre_model else load_url(model_urls[args.backbone], map_location=device) pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v) and k[-19:] != 'num_batches_tracked'} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print(model.state_dict()['extractor.4.0.bn1.num_batches_tracked']) print('Finished!')