num_classes = len(class_names) #------------------------------------------------------# # 创建yolo模型 # 训练前一定要修改classes_path和对应的txt文件 #------------------------------------------------------# model = YoloBody(len(anchors[0]), num_classes) weights_init(model) #------------------------------------------------------# # 权值文件请看README,百度网盘下载 #------------------------------------------------------# model_path = "model_data/yolo4_weights.pth" print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_dict = model.state_dict() pretrained_dict = torch.load(model_path, map_location=device) pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('Finished!') net = model.train() if Cuda: net = torch.nn.DataParallel(model) cudnn.benchmark = True net = net.cuda() yolo_loss = YOLOLoss(np.reshape(anchors,[-1,2]), num_classes, (input_shape[1], input_shape[0]), smoooth_label, Cuda, normalize) loss_history = LossHistory("logs/")
class YOLO(object): _defaults = { "model_path": '/data/zihaosh/hw2_load/final.pth', "anchors_path": 'model_data/yolo_anchors.txt', "classes_path": 'model_data/mask_classes.txt', "model_image_size": (608, 608, 3), "confidence": 0.01, "cuda": True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.anchors = self._get_anchors() self.generate() def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] def generate(self): self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval() print('Loading pretrained weights.') model_dict = self.net.state_dict() pretrained_dict = torch.load(self.model_path) pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} model_dict.update(pretrained_dict) self.net.load_state_dict(model_dict) if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() print('Finish loading!') self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox(self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image, [(1, 1, 1, 1)], [1], [1] top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin = np.expand_dims(top_bboxes[:, 0], -1) top_ymin = np.expand_dims(top_bboxes[:, 1], -1) top_xmax = np.expand_dims(top_bboxes[:, 2], -1) top_ymax = np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=10) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = max(0, np.floor(top + 0.5).astype('int64')) left = max(0, np.floor(left + 0.5).astype('int64')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int64')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int64')) # 画框框 label = '{}: {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(2): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index(predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image, boxes, top_conf, top_label
def train(): num_classes = 3 # 建立loss函数 yolo_losses = [] for i in range(3): yolo_losses.append( YOLOLoss(np.reshape(anchors, [-1, 2]), num_classes, (args.input_shape, args.input_shape), 0.3, args.gpu)) train_data = MyDataset(train_root, input_shape=(args.input_shape, args.input_shape)) test_data = MyDataset(test_root, input_shape=(args.input_shape, args.input_shape)) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=my_collate) test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=my_collate) # 使用GPU if args.gpu: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") else: device = torch.device("cpu") # 创建模型 model = YoloBody(len(anchors[0]), num_classes) model_path = "weights/yolov4_coco_pretrained_weights.pth" # model_path = "model_data/yolov4_maskdetect_weights0.pth" # 加快模型训练的效率 print('Loading pretrained model weights.') model_dict = model.state_dict() pretrained_dict = torch.load(model_path) pretrained_dict = { k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v) } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('Finished!') net = model.to(device) optimizer = optim.Adam(net.parameters(), lr=1e-3, weight_decay=5e-4) lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5) # 冻结backbone for param in model.backbone.parameters(): param.requires_grad = False # 开始训练 train_losses = [] test_losses = [] min_loss = 1e10 for epoch in range(args.nepoch): train_loss = 0 for i, data in enumerate(train_loader): net.train() inputs, labels = data[0].to(device), data[1] optimizer.zero_grad() outputs = net(inputs) losses = [] for j in range(3): loss_item = yolo_losses[j](outputs[j], labels) losses.append(loss_item[0]) loss = sum(losses) loss.backward() optimizer.step() lr_scheduler.step() train_loss += loss.item() train_losses.append(loss.item()) print("epoch:%d/%d, batch:%d/%d, train_loss:%f" % (epoch, args.nepoch, i, len(train_loader), loss.item())) train_loss /= len(train_loader) # test test_loss = 0 net.eval() for i_test, data_test in enumerate(test_loader): with torch.no_grad(): inputs, labels = data_test[0].to(device), data_test[1] optimizer.zero_grad() outputs = net(inputs) losses = [] for j in range(3): loss_item = yolo_losses[j](outputs[j], labels) losses.append(loss_item[0]) loss = sum(losses) test_loss += loss.item() # print("epoch:%d/%d, batch:%d/%d, test_loss:%f" % (epoch, args.nepoch, i_test, len(test_loader), loss.item())) test_loss /= len(test_loader) test_losses.append(test_loss) if test_loss < min_loss: torch.save(net.state_dict(), 'weights/face_mask_weights0.pth') print("epoch:%d/%d, train_loss:%f, test_loss:%f" % (epoch, args.nepoch, train_loss, test_loss)) plot_loss_curve(train_losses, test_losses, len(train_loader))
class YOLO4_inference(object): # ---------------------------------------------------# # 初始化YOLO # ---------------------------------------------------# def __init__(self, model_path, input_shape=416,confidence=0.5, cuda=True): self.class_names = ID2CLASS self.anchors = anchors self.model_path=model_path self.input_shape=(input_shape,input_shape,3) self.confidence=confidence self.cuda=cuda # 画框设置不同的颜色 self.colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255)] self.generate() # ---------------------------------------------------# # 加载训练好的模型 # ---------------------------------------------------# def generate(self): self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval() print('Loading pretrained weights.') model_dict = self.net.state_dict() pretrained_dict = torch.load(self.model_path) pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} model_dict.update(pretrained_dict) self.net.load_state_dict(model_dict) if self.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' self.net = nn.DataParallel(self.net) self.net = self.net.cuda() print('Finish loading!') self.yolo_decodes = [] for i in range(3): self.yolo_decodes.append( DecodeBox(self.anchors[i], len(self.class_names), (self.input_shape[1], self.input_shape[0]))) print('{} model, anchors, and classes loaded.'.format(self.model_path)) def predict(self,image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.input_shape[0], self.input_shape[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), \ np.expand_dims(top_bboxes[:, 1], -1), \ np.expand_dims(top_bboxes[:, 2], -1), \ np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) return boxes,top_label,top_conf # ---------------------------------------------------# # 检测图片 # ---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.input_shape[0], self.input_shape[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) font = ImageFont.truetype(font='simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{}: {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index(predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
# -------------------------------# # 获得先验框和类 # -------------------------------# anchors_path = 'model_data/yolo_anchors.txt' classes_path = 'model_data/mask_classes.txt' class_names = get_classes(classes_path) anchors = get_anchors(anchors_path) num_classes = len(class_names) # %% # 创建模型 model = YoloBody(len(anchors[0]), num_classes) model_path = "/data/zihaosh/hw2_pretrain/yolov4_coco_pretrained_weights.pth" # 加快模型训练的效率 print('Loading pretrained model weights.') model_dict = model.state_dict() pretrained_dict = torch.load(model_path) pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('Finished!') model = model.cuda() # 建立loss函数 yolo_losses = [] for i in range(3): yolo_losses.append(YOLOLoss(np.reshape(anchors, [-1, 2]), num_classes, \ (input_shape[1], input_shape[0]), smoooth_label, Cuda)) # read train lines and val lines with open(train_annotation_path) as f: train_lines = f.readlines()