def get_map_txt(self, image_id, image, class_names, map_out_path): f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0) out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) for i, c in enumerate(out_classes): predicted_class = self.class_names[int(c)] score = str(out_scores[i]) top, left, bottom, right = out_boxes[i] if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) f.close() return
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = preprocess_input(np.expand_dims(np.array(image_data, dtype='float32'), 0)) preds = self.get_pred(image_data).numpy() #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.decode_box(preds, self.anchors, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) t1 = time.time() for _ in range(test_interval): preds = self.get_pred(image_data).numpy() #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.decode_box(preds, self.anchors, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_FPS(self, image, test_interval): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0) out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) t1 = time.time() for _ in range(test_interval): out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_FPS(self, image, test_interval): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, nw, nh = resize_image( image, (self.input_shape[1], self.input_shape[0])) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# pr = self.net(images)[0] #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy().argmax(axis=-1) #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] t1 = time.time() for _ in range(test_interval): with torch.no_grad(): #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# pr = self.net(images)[0] #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy().argmax(axis=-1) #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_map_txt(self, image_id, image, class_names, map_out_path): f = open( os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = preprocess_input( np.expand_dims(np.array(image_data, dtype='float32'), 0)) preds = self.m2det.predict(image_data) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.decode_box(preds, self.anchors, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results[0]) <= 0: return top_label = results[0][:, 4] top_conf = results[0][:, 5] top_boxes = results[0][:, :4] for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = str(top_conf[i]) top, left, bottom, right = box if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def get_FPS(self, image, test_interval): #---------------------------------------------------# # 获得输入图片的高和宽 #---------------------------------------------------# image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, dtype='float32')), 0) outputs = self.centernet.predict(image_data) #--------------------------------------------------------------------------------------------# # centernet后处理的过程,包括门限判断和传统非极大抑制。 # 对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。 # 此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框 # 这里面存在传统的nms处理方法,可以选择关闭和开启。 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #--------------------------------------------------------------------------------------------# results = self.bbox_util.postprocess(outputs, self.nms, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) t1 = time.time() for _ in range(test_interval): outputs = self.centernet.predict(image_data) #--------------------------------------------------------------------------------------------# # centernet后处理的过程,包括门限判断和传统非极大抑制。 # 对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。 # 此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框 # 这里面存在传统的nms处理方法,可以选择关闭和开启。 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #--------------------------------------------------------------------------------------------# results = self.bbox_util.postprocess(outputs, self.nms, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_map_txt(self, image_id, image, class_names, map_out_path): f = open(os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w") # ---------------------------------------------------# # 获得输入图片的高和宽 # ---------------------------------------------------# image_shape = np.array(np.shape(image)[0:2]) # ---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB # ---------------------------------------------------------# image = cvtColor(image) # ---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 # ---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) # ---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 # ---------------------------------------------------------# image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0) outputs = self.get_pred(image_data).numpy() # --------------------------------------------------------------------------------------------# # centernet后处理的过程,包括门限判断和传统非极大抑制。 # 对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。 # 此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框 # 这里面存在传统的nms处理方法,可以选择关闭和开启。 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 # --------------------------------------------------------------------------------------------# results = self.bbox_util.postprocess(outputs, self.nms, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) # --------------------------------------# # 如果没有检测到物体,则返回原图 # --------------------------------------# if results[0] is None: return top_label = np.array(results[0][:, 5], dtype='int32') top_conf = results[0][:, 4] top_boxes = results[0][:, :4] for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = str(top_conf[i]) top, left, bottom, right = box if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % ( predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_heatmap(self, image, heatmap_save_path): import cv2 import matplotlib.pyplot as plt def sigmoid(x): y = 1.0 / (1.0 + np.exp(-x)) return y #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, dtype='float32')), 0) output = self.yolo_model.predict(image_data) plt.imshow(image, alpha=1) plt.axis('off') mask = np.zeros((image.size[1], image.size[0])) for sub_output in output: b, h, w, c = np.shape(sub_output) sub_output = np.reshape(sub_output, [b, h, w, 3, -1])[0] score = np.max(sigmoid(sub_output[..., 4]), -1) score = cv2.resize(score, (image.size[0], image.size[1])) normed_score = (score * 255).astype('uint8') mask = np.maximum(mask, normed_score) plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet") plt.axis('off') plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches=-0.1) print("Save to the " + heatmap_save_path) plt.show()
def get_map_txt(self, image_id, image, class_names, map_out_path): f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 传入网络当中进行预测 #---------------------------------------------------------# _, regression, classification, anchors = self.net(images) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# outputs = decodebox(regression, anchors, self.input_shape) results = non_max_suppression(torch.cat([outputs, classification], axis=-1), self.input_shape, image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) if results[0] is None: return top_label = np.array(results[0][:, 5], dtype = 'int32') top_conf = results[0][:, 4] top_boxes = results[0][:, :4] for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = str(top_conf[i]) top, left, bottom, right = box if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) f.close() return
def get_FPS(self, image, test_interval): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# image_data, nw, nh = resize_image( image, (self.input_shape[1], self.input_shape[0])) #---------------------------------------------------------# # 归一化+添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, np.float32)), 0) #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# pr = self.model.predict(image_data)[0] #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = pr.argmax(axis=-1).reshape( [self.input_shape[0], self.input_shape[1]]) t1 = time.time() for _ in range(test_interval): #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# pr = self.model.predict(image_data)[0] #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = pr.argmax(axis=-1).reshape( [self.input_shape[0], self.input_shape[1]]) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 传入网络当中进行预测 #---------------------------------------------------------# _, regression, classification, anchors = self.net(images) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# outputs = decodebox(regression, anchors, self.input_shape) results = non_max_suppression(torch.cat([outputs, classification], axis=-1), self.input_shape, image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) t1 = time.time() for _ in range(test_interval): with torch.no_grad(): #---------------------------------------------------------# # 传入网络当中进行预测 #---------------------------------------------------------# _, regression, classification, anchors = self.net(images) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# outputs = decodebox(regression, anchors, self.input_shape) results = non_max_suppression(torch.cat([outputs, classification], axis=-1), self.input_shape, image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_miou_png(self, image): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) orininal_h = np.array(image).shape[0] orininal_w = np.array(image).shape[1] #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, nw, nh = resize_image( image, (self.input_shape[1], self.input_shape[0])) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# pr = self.net(images)[0] #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy() #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] #---------------------------------------------------# # 进行图片的resize #---------------------------------------------------# pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR) #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = pr.argmax(axis=-1) image = Image.fromarray(np.uint8(pr)) return image
def detect_heatmap(self, image, heatmap_save_path): import cv2 import matplotlib.pyplot as plt def sigmoid(x): y = 1.0 / (1.0 + np.exp(-x)) return y #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) plt.imshow(image, alpha=1) plt.axis('off') mask = np.zeros((image.size[1], image.size[0])) for sub_output in outputs: sub_output = sub_output.cpu().numpy() b, c, h, w = np.shape(sub_output) sub_output = np.transpose(np.reshape(sub_output, [b, 3, -1, h, w]), [0, 3, 4, 1, 2])[0] score = np.max(sigmoid(sub_output[..., 4]), -1) score = cv2.resize(score, (image.size[0], image.size[1])) normed_score = (score * 255).astype('uint8') mask = np.maximum(mask, normed_score) plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet") plt.axis('off') plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches = -0.1) print("Save to the " + heatmap_save_path) plt.show()
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, image_metas, windows = resize_image([np.array(image)], self.config) #---------------------------------------------------------# # 根据当前输入图像的大小,生成先验框 #---------------------------------------------------------# anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# detections, _, _, mrcnn_mask, _, _, _ = self.model.predict( [image_data, image_metas, anchors], verbose=0) #---------------------------------------------------# # 上面获得的预测结果是相对于padding后的图片的 # 我们需要将预测结果转换到原图上 #---------------------------------------------------# box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess( detections[0], mrcnn_mask[0], image_shape, image_data[0].shape, windows[0]) t1 = time.time() for _ in range(test_interval): #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# detections, _, _, mrcnn_mask, _, _, _ = self.model.predict( [image_data, image_metas, anchors], verbose=0) #---------------------------------------------------# # 上面获得的预测结果是相对于padding后的图片的 # 我们需要将预测结果转换到原图上 #---------------------------------------------------# box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess( detections[0], mrcnn_mask[0], image_shape, image_data[0].shape, windows[0]) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) t1 = time.time() for _ in range(test_interval): with torch.no_grad(): #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image_id, image, results, clsid2catid): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, dtype='float32')), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# input_image_shape = np.expand_dims( np.array([image.size[1], image.size[0]], dtype='float32'), 0) out_boxes, out_scores, out_classes = self.yolo_model.predict( [image_data, input_image_shape]) for i, c in enumerate(out_classes): result = {} top, left, bottom, right = out_boxes[i] result["image_id"] = int(image_id) result["category_id"] = clsid2catid[c] result["bbox"] = [ float(left), float(top), float(right - left), float(bottom - top) ] result["score"] = float(out_scores[i]) results.append(result) return results
def detect_image(self, image): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------# # 对图片进行不失真的resize #---------------------------------------------------# image_data = letterbox_image( image, [self.input_shape[1], self.input_shape[0]]) #---------------------------------------------------------# # 归一化+添加上batch_size维度+转置 #---------------------------------------------------------# image_data = np.transpose( np.expand_dims(preprocess_input(np.array(image_data, np.float32)), 0), (0, 3, 1, 2)) with torch.no_grad(): photo = torch.from_numpy(image_data) if self.cuda: photo = photo.cuda() #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# preds = torch.softmax(self.model(photo)[0], dim=-1).cpu().numpy() #---------------------------------------------------# # 获得所属种类 #---------------------------------------------------# class_name = self.class_names[np.argmax(preds)] probability = np.max(preds) #---------------------------------------------------# # 绘图并写字 #---------------------------------------------------# plt.subplot(1, 1, 1) plt.imshow(np.array(image)) plt.title('Class:%s Probability:%.3f' % (class_name, probability)) plt.show() return class_name
def get_miou_png(self, image): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) orininal_h = np.array(image).shape[0] orininal_w = np.array(image).shape[1] #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# image_data, nw, nh = resize_image( image, (self.input_shape[1], self.input_shape[0])) #---------------------------------------------------------# # 归一化+添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, np.float32)), 0) #--------------------------------------# # 图片传入网络进行预测 #--------------------------------------# pr = self.model.predict(image_data)[0] #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] #--------------------------------------# # 进行图片的resize #--------------------------------------# pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR) #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = pr.argmax(axis=-1) image = Image.fromarray(np.uint8(pr)) return image
def get_map_out(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, image_metas, windows = resize_image([np.array(image)], self.config) #---------------------------------------------------------# # 根据当前输入图像的大小,生成先验框 #---------------------------------------------------------# anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# detections, _, _, mrcnn_mask, _, _, _ = self.model.predict( [image_data, image_metas, anchors], verbose=0) #---------------------------------------------------# # 上面获得的预测结果是相对于padding后的图片的 # 我们需要将预测结果转换到原图上 #---------------------------------------------------# box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess( detections[0], mrcnn_mask[0], image_shape, image_data[0].shape, windows[0]) outboxes = None if box_thre is not None: outboxes = np.zeros_like(box_thre) outboxes[:, [0, 2]] = box_thre[:, [1, 3]] outboxes[:, [1, 3]] = box_thre[:, [0, 2]] return outboxes, class_thre, class_ids, masks_arg, masks_sigmoid
def get_random_data_with_Mosaic(self, annotation_line, input_shape, max_boxes=100, hue=.1, sat=1.5, val=1.5): h, w = input_shape min_offset_x = self.rand(0.25, 0.75) min_offset_y = self.rand(0.25, 0.75) nws = [ int(w * self.rand(0.4, 1)), int(w * self.rand(0.4, 1)), int(w * self.rand(0.4, 1)), int(w * self.rand(0.4, 1)) ] nhs = [ int(h * self.rand(0.4, 1)), int(h * self.rand(0.4, 1)), int(h * self.rand(0.4, 1)), int(h * self.rand(0.4, 1)) ] place_x = [ int(w * min_offset_x) - nws[0], int(w * min_offset_x) - nws[1], int(w * min_offset_x), int(w * min_offset_x) ] place_y = [ int(h * min_offset_y) - nhs[0], int(h * min_offset_y), int(h * min_offset_y), int(h * min_offset_y) - nhs[3] ] image_datas = [] box_datas = [] index = 0 for line in annotation_line: # 每一行进行分割 line_content = line.split() # 打开图片 image = Image.open(line_content[0]) image = cvtColor(image) # 图片的大小 iw, ih = image.size # 保存框的位置 box = np.array([ np.array(list(map(int, box.split(',')))) for box in line_content[1:] ]) # 是否翻转图片 flip = self.rand() < .5 if flip and len(box) > 0: image = image.transpose(Image.FLIP_LEFT_RIGHT) box[:, [0, 2]] = iw - box[:, [2, 0]] nw = nws[index] nh = nhs[index] image = image.resize((nw, nh), Image.BICUBIC) # 将图片进行放置,分别对应四张分割图片的位置 dx = place_x[index] dy = place_y[index] new_image = Image.new('RGB', (w, h), (128, 128, 128)) new_image.paste(image, (dx, dy)) image_data = np.array(new_image) index = index + 1 box_data = [] # 对box进行重新处理 if len(box) > 0: np.random.shuffle(box) box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy box[:, 0:2][box[:, 0:2] < 0] = 0 box[:, 2][box[:, 2] > w] = w box[:, 3][box[:, 3] > h] = h box_w = box[:, 2] - box[:, 0] box_h = box[:, 3] - box[:, 1] box = box[np.logical_and(box_w > 1, box_h > 1)] box_data = np.zeros((len(box), 5)) box_data[:len(box)] = box image_datas.append(image_data) box_datas.append(box_data) # 将图片分割,放在一起 cutx = int(w * min_offset_x) cuty = int(h * min_offset_y) new_image = np.zeros([h, w, 3]) new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :] new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :] new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :] new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :] # 进行色域变换 hue = self.rand(-hue, hue) sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat) val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val) x = cv2.cvtColor(np.array(new_image / 255, np.float32), cv2.COLOR_RGB2HSV) x[..., 0] += hue * 360 x[..., 0][x[..., 0] > 1] -= 1 x[..., 0][x[..., 0] < 0] += 1 x[..., 1] *= sat x[..., 2] *= val x[x[:, :, 0] > 360, 0] = 360 x[:, :, 1:][x[:, :, 1:] > 1] = 1 x[x < 0] = 0 new_image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255 # 对框进行进一步的处理 new_boxes = self.merge_bboxes(box_datas, cutx, cuty) return new_image, new_boxes
def get_map_txt(self, image_id, image, class_names, map_out_path): f = open( os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w") #---------------------------------------------------# # 计算输入图片的高和宽 #---------------------------------------------------# image_shape = np.array(np.shape(image)[0:2]) input_shape = get_new_img_size(image_shape[0], image_shape[1]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# image_data = resize_image(image, [input_shape[1], input_shape[0]]) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, dtype='float32')), 0) #---------------------------------------------------------# # 获得rpn网络预测结果和base_layer #---------------------------------------------------------# rpn_pred = self.model_rpn(image_data) rpn_pred = [x.numpy() for x in rpn_pred] #---------------------------------------------------------# # 生成先验框并解码 #---------------------------------------------------------# anchors = get_anchors(input_shape, self.backbone, self.anchors_size) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 利用建议框获得classifier网络预测结果 #-------------------------------------------------------------# classifier_pred = self.model_classifier( [rpn_pred[2], rpn_results[:, :, [1, 0, 3, 2]]]) classifier_pred = [x.numpy() for x in classifier_pred] #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier( classifier_pred, rpn_results, image_shape, input_shape, self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results[0]) <= 0: return top_label = np.array(results[0][:, 5], dtype='int32') top_conf = results[0][:, 4] top_boxes = results[0][:, :4] for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = str(top_conf[i]) top, left, bottom, right = box if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) image_origin = np.array(image, np.uint8) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, image_metas, windows = resize_image([np.array(image)], self.config) #---------------------------------------------------------# # 根据当前输入图像的大小,生成先验框 #---------------------------------------------------------# anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# detections, _, _, mrcnn_mask, _, _, _ = self.model.predict( [image_data, image_metas, anchors], verbose=0) #---------------------------------------------------# # 上面获得的预测结果是相对于padding后的图片的 # 我们需要将预测结果转换到原图上 #---------------------------------------------------# box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess( detections[0], mrcnn_mask[0], image_shape, image_data[0].shape, windows[0]) if box_thre is None: return image #----------------------------------------------------------------------# # masks_class [image_shape[0], image_shape[1]] # 根据每个像素点所属的实例和是否满足门限需求,判断每个像素点的种类 #----------------------------------------------------------------------# masks_class = masks_sigmoid * (class_ids[None, None, :] + 1) masks_class = np.reshape(masks_class, [-1, np.shape(masks_sigmoid)[-1]]) masks_class = np.reshape( masks_class[np.arange(np.shape(masks_class)[0]), np.reshape(masks_arg, [-1])], [image_shape[0], image_shape[1]]) #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# scale = 0.6 thickness = int( max((image.size[0] + image.size[1]) // self.IMAGE_MAX_DIM, 1)) font = cv2.FONT_HERSHEY_DUPLEX color_masks = self.colors[masks_class].astype('uint8') image_fused = cv2.addWeighted(color_masks, 0.4, image_origin, 0.6, gamma=0) for i in range(np.shape(class_ids)[0]): top, left, bottom, right = np.array(box_thre[i, :], np.int32) #---------------------------------------------------------# # 获取颜色并绘制预测框 #---------------------------------------------------------# color = self.colors[class_ids[i] + 1].tolist() cv2.rectangle(image_fused, (left, top), (right, bottom), color, thickness) #---------------------------------------------------------# # 获得这个框的种类并写在图片上 #---------------------------------------------------------# class_name = self.class_names[class_ids[i]] print(class_name, top, left, bottom, right) text_str = f'{class_name}: {class_thre[i]:.2f}' text_w, text_h = cv2.getTextSize(text_str, font, scale, 1)[0] cv2.rectangle(image_fused, (left, top), (left + text_w, top + text_h + 5), color, -1) cv2.putText(image_fused, text_str, (left, top + 15), font, scale, (255, 255, 255), 1, cv2.LINE_AA) image = Image.fromarray(np.uint8(image_fused)) return image
def detect_image(self, image): #---------------------------------------------------# # 计算输入图片的高和宽 #---------------------------------------------------# image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( np.transpose( preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# results = self.bbox_util.non_max_suppression( torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) if results[0] is None: return image top_label = np.array(results[0][:, 6], dtype='int32') top_conf = results[0][:, 4] * results[0][:, 5] top_boxes = results[0][:, :4] #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = int( max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) #---------------------------------------------------------# # 图像绘制 #---------------------------------------------------------# for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = top_conf[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------# # 对输入图像进行一个备份,后面用于绘图 #---------------------------------------------------# old_img = copy.deepcopy(image) orininal_h = np.array(image).shape[0] orininal_w = np.array(image).shape[1] #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# image_data, nw, nh = resize_image( image, (self.input_shape[1], self.input_shape[0])) #---------------------------------------------------------# # 归一化+添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, np.float32)), 0) #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# pr = self.model.predict(image_data)[0] #---------------------------------------------------# # 将灰条部分截取掉 #---------------------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] #---------------------------------------------------# # 进行图片的resize #---------------------------------------------------# pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR) #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = pr.argmax(axis=-1) if self.mix_type == 0: # seg_img = np.zeros((np.shape(pr)[0], np.shape(pr)[1], 3)) # for c in range(self.num_classes): # seg_img[:, :, 0] += ((pr[:, :] == c ) * self.colors[c][0]).astype('uint8') # seg_img[:, :, 1] += ((pr[:, :] == c ) * self.colors[c][1]).astype('uint8') # seg_img[:, :, 2] += ((pr[:, :] == c ) * self.colors[c][2]).astype('uint8') seg_img = np.reshape( np.array(self.colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1]) #------------------------------------------------# # 将新图片转换成Image的形式 #------------------------------------------------# image = Image.fromarray(np.uint8(seg_img)) #------------------------------------------------# # 将新图与原图及进行混合 #------------------------------------------------# image = Image.blend(old_img, image, 0.7) elif self.mix_type == 1: # seg_img = np.zeros((np.shape(pr)[0], np.shape(pr)[1], 3)) # for c in range(self.num_classes): # seg_img[:, :, 0] += ((pr[:, :] == c ) * self.colors[c][0]).astype('uint8') # seg_img[:, :, 1] += ((pr[:, :] == c ) * self.colors[c][1]).astype('uint8') # seg_img[:, :, 2] += ((pr[:, :] == c ) * self.colors[c][2]).astype('uint8') seg_img = np.reshape( np.array(self.colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1]) #------------------------------------------------# # 将新图片转换成Image的形式 #------------------------------------------------# image = Image.fromarray(np.uint8(seg_img)) elif self.mix_type == 2: seg_img = (np.expand_dims(pr != 0, -1) * np.array(old_img, np.float32)).astype('uint8') #------------------------------------------------# # 将新图片转换成Image的形式 #------------------------------------------------# image = Image.fromarray(np.uint8(seg_img)) return image
def detect_image(self, image, crop=False, count=False): #---------------------------------------------------# # 获得输入图片的高和宽 #---------------------------------------------------# image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, dtype='float32')), 0) outputs = self.centernet.predict(image_data) #--------------------------------------------------------------------------------------------# # centernet后处理的过程,包括门限判断和传统非极大抑制。 # 对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。 # 此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框 # 这里面存在传统的nms处理方法,可以选择关闭和开启。 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #--------------------------------------------------------------------------------------------# results = self.bbox_util.postprocess(outputs, self.nms, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if results[0] is None: return image top_label = np.array(results[0][:, 5], dtype='int32') top_conf = results[0][:, 4] top_boxes = results[0][:, :4] #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1) #---------------------------------------------------------# # 计数 #---------------------------------------------------------# if count: print("top_label:", top_label) classes_nums = np.zeros([self.num_classes]) for i in range(self.num_classes): num = np.sum(top_label == i) if num > 0: print(self.class_names[i], " : ", num) classes_nums[i] = num print("classes_nums:", classes_nums) #---------------------------------------------------------# # 是否进行目标的裁剪 #---------------------------------------------------------# if crop: for i, c in list(enumerate(top_label)): top, left, bottom, right = top_boxes[i] top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) dir_save_path = "img_crop" if not os.path.exists(dir_save_path): os.makedirs(dir_save_path) crop_image = image.crop([left, top, right, bottom]) crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0) print("save crop_" + str(i) + ".png to " + dir_save_path) #---------------------------------------------------------# # 图像绘制 #---------------------------------------------------------# for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = top_conf[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image, crop = False, count = False): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = preprocess_input(np.expand_dims(np.array(image_data, dtype='float32'), 0)) preds = self.get_pred(image_data).numpy() #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.decode_box(preds, self.anchors, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results[0])<=0: return image top_label = np.array(results[0][:, 4], dtype = 'int32') top_conf = results[0][:, 5] top_boxes = results[0][:, :4] #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1) #---------------------------------------------------------# # 计数 #---------------------------------------------------------# if count: print("top_label:", top_label) classes_nums = np.zeros([self.num_classes]) for i in range(self.num_classes): num = np.sum(top_label == i) if num > 0: print(self.class_names[i], " : ", num) classes_nums[i] = num print("classes_nums:", classes_nums) #---------------------------------------------------------# # 是否进行目标的裁剪 #---------------------------------------------------------# if crop: for i, c in list(enumerate(top_boxes)): top, left, bottom, right = top_boxes[i] top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) dir_save_path = "img_crop" if not os.path.exists(dir_save_path): os.makedirs(dir_save_path) crop_image = image.crop([left, top, right, bottom]) crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0) print("save crop_" + str(i) + ".png to " + dir_save_path) #---------------------------------------------------------# # 图像绘制 #---------------------------------------------------------# for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = top_conf[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image, crop = False, count = False): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0}) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) #---------------------------------------------------------# # 计数 #---------------------------------------------------------# if count: print("top_label:", out_classes) classes_nums = np.zeros([self.num_classes]) for i in range(self.num_classes): num = np.sum(out_classes == i) if num > 0: print(self.class_names[i], " : ", num) classes_nums[i] = num print("classes_nums:", classes_nums) #---------------------------------------------------------# # 是否进行目标的裁剪 #---------------------------------------------------------# if crop: for i, c in list(enumerate(out_boxes)): top, left, bottom, right = out_boxes[i] top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) dir_save_path = "img_crop" if not os.path.exists(dir_save_path): os.makedirs(dir_save_path) crop_image = image.crop([left, top, right, bottom]) crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0) print("save crop_" + str(i) + ".png to " + dir_save_path) #---------------------------------------------------------# # 图像绘制 #---------------------------------------------------------# for i, c in list(enumerate(out_classes)): predicted_class = self.class_names[int(c)] box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True): line = annotation_line.split() #------------------------------# # 读取图像并转换成RGB图像 #------------------------------# image = Image.open(line[0]) image = cvtColor(image) #------------------------------# # 获得图像的高宽与目标高宽 #------------------------------# iw, ih = image.size h, w = input_shape #------------------------------# # 获得预测框 #------------------------------# box = np.array( [np.array(list(map(int, box.split(',')))) for box in line[1:]]) if not random: scale = min(w / iw, h / ih) nw = int(iw * scale) nh = int(ih * scale) dx = (w - nw) // 2 dy = (h - nh) // 2 #---------------------------------# # 将图像多余的部分加上灰条 #---------------------------------# image = image.resize((nw, nh), Image.BICUBIC) new_image = Image.new('RGB', (w, h), (128, 128, 128)) new_image.paste(image, (dx, dy)) image_data = np.array(new_image, np.float32) #---------------------------------# # 对真实框进行调整 #---------------------------------# if len(box) > 0: np.random.shuffle(box) box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy box[:, 0:2][box[:, 0:2] < 0] = 0 box[:, 2][box[:, 2] > w] = w box[:, 3][box[:, 3] > h] = h box_w = box[:, 2] - box[:, 0] box_h = box[:, 3] - box[:, 1] box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box return image_data, box #------------------------------------------# # 对图像进行缩放并且进行长和宽的扭曲 #------------------------------------------# new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand( 1 - jitter, 1 + jitter) scale = self.rand(.25, 2) if new_ar < 1: nh = int(scale * h) nw = int(nh * new_ar) else: nw = int(scale * w) nh = int(nw / new_ar) image = image.resize((nw, nh), Image.BICUBIC) #------------------------------------------# # 将图像多余的部分加上灰条 #------------------------------------------# dx = int(self.rand(0, w - nw)) dy = int(self.rand(0, h - nh)) new_image = Image.new('RGB', (w, h), (128, 128, 128)) new_image.paste(image, (dx, dy)) image = new_image #------------------------------------------# # 翻转图像 #------------------------------------------# flip = self.rand() < .5 if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) #------------------------------------------# # 色域扭曲 #------------------------------------------# hue = self.rand(-hue, hue) sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat) val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val) x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV) x[..., 0] += hue * 360 x[..., 0][x[..., 0] > 1] -= 1 x[..., 0][x[..., 0] < 0] += 1 x[..., 1] *= sat x[..., 2] *= val x[x[:, :, 0] > 360, 0] = 360 x[:, :, 1:][x[:, :, 1:] > 1] = 1 x[x < 0] = 0 image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255 #---------------------------------# # 对真实框进行调整 #---------------------------------# if len(box) > 0: np.random.shuffle(box) box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy if flip: box[:, [0, 2]] = w - box[:, [2, 0]] box[:, 0:2][box[:, 0:2] < 0] = 0 box[:, 2][box[:, 2] > w] = w box[:, 3][box[:, 3] > h] = h box_w = box[:, 2] - box[:, 0] box_h = box[:, 3] - box[:, 1] box = box[np.logical_and(box_w > 1, box_h > 1)] return image_data, box
def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True): line = annotation_line.split() #------------------------------# # 读取图像并转换成RGB图像 #------------------------------# image = Image.open(line[0]) image = cvtColor(image) #------------------------------# # 获得图像的高宽与目标高宽 #------------------------------# iw, ih = image.size h, w = input_shape #------------------------------# # 获得预测框 #------------------------------# box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) if not random: scale = min(w/iw, h/ih) nw = int(iw*scale) nh = int(ih*scale) dx = (w-nw)//2 dy = (h-nh)//2 #---------------------------------# # 将图像多余的部分加上灰条 #---------------------------------# image = image.resize((nw,nh), Image.BICUBIC) new_image = Image.new('RGB', (w,h), (128,128,128)) new_image.paste(image, (dx, dy)) image_data = np.array(new_image, np.float32) #---------------------------------# # 对真实框进行调整 #---------------------------------# if len(box)>0: np.random.shuffle(box) box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy box[:, 0:2][box[:, 0:2]<0] = 0 box[:, 2][box[:, 2]>w] = w box[:, 3][box[:, 3]>h] = h box_w = box[:, 2] - box[:, 0] box_h = box[:, 3] - box[:, 1] box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box return image_data, box #------------------------------------------# # 对图像进行缩放并且进行长和宽的扭曲 #------------------------------------------# new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter) scale = self.rand(.25, 2) if new_ar < 1: nh = int(scale*h) nw = int(nh*new_ar) else: nw = int(scale*w) nh = int(nw/new_ar) image = image.resize((nw,nh), Image.BICUBIC) #------------------------------------------# # 将图像多余的部分加上灰条 #------------------------------------------# dx = int(self.rand(0, w-nw)) dy = int(self.rand(0, h-nh)) new_image = Image.new('RGB', (w,h), (128,128,128)) new_image.paste(image, (dx, dy)) image = new_image #------------------------------------------# # 翻转图像 #------------------------------------------# flip = self.rand()<.5 if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) image_data = np.array(image, np.uint8) #---------------------------------# # 对图像进行色域变换 # 计算色域变换的参数 #---------------------------------# r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 #---------------------------------# # 将图像转到HSV上 #---------------------------------# hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV)) dtype = image_data.dtype #---------------------------------# # 应用变换 #---------------------------------# x = np.arange(0, 256, dtype=r.dtype) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB) #---------------------------------# # 对真实框进行调整 #---------------------------------# if len(box)>0: np.random.shuffle(box) box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy if flip: box[:, [0,2]] = w - box[:, [2,0]] box[:, 0:2][box[:, 0:2]<0] = 0 box[:, 2][box[:, 2]>w] = w box[:, 3][box[:, 3]>h] = h box_w = box[:, 2] - box[:, 0] box_h = box[:, 3] - box[:, 1] box = box[np.logical_and(box_w>1, box_h>1)] return image_data, box
def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4): h, w = input_shape min_offset_x = self.rand(0.3, 0.7) min_offset_y = self.rand(0.3, 0.7) image_datas = [] box_datas = [] index = 0 for line in annotation_line: #---------------------------------# # 每一行进行分割 #---------------------------------# line_content = line.split() #---------------------------------# # 打开图片 #---------------------------------# image = Image.open(line_content[0]) image = cvtColor(image) #---------------------------------# # 图片的大小 #---------------------------------# iw, ih = image.size #---------------------------------# # 保存框的位置 #---------------------------------# box = np.array([ np.array(list(map(int, box.split(',')))) for box in line_content[1:] ]) #---------------------------------# # 是否翻转图片 #---------------------------------# flip = self.rand() < .5 if flip and len(box) > 0: image = image.transpose(Image.FLIP_LEFT_RIGHT) box[:, [0, 2]] = iw - box[:, [2, 0]] #------------------------------------------# # 对图像进行缩放并且进行长和宽的扭曲 #------------------------------------------# new_ar = iw / ih * self.rand(1 - jitter, 1 + jitter) / self.rand( 1 - jitter, 1 + jitter) scale = self.rand(.4, 1) if new_ar < 1: nh = int(scale * h) nw = int(nh * new_ar) else: nw = int(scale * w) nh = int(nw / new_ar) image = image.resize((nw, nh), Image.BICUBIC) #-----------------------------------------------# # 将图片进行放置,分别对应四张分割图片的位置 #-----------------------------------------------# if index == 0: dx = int(w * min_offset_x) - nw dy = int(h * min_offset_y) - nh elif index == 1: dx = int(w * min_offset_x) - nw dy = int(h * min_offset_y) elif index == 2: dx = int(w * min_offset_x) dy = int(h * min_offset_y) elif index == 3: dx = int(w * min_offset_x) dy = int(h * min_offset_y) - nh new_image = Image.new('RGB', (w, h), (128, 128, 128)) new_image.paste(image, (dx, dy)) image_data = np.array(new_image) index = index + 1 box_data = [] #---------------------------------# # 对box进行重新处理 #---------------------------------# if len(box) > 0: np.random.shuffle(box) box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy box[:, 0:2][box[:, 0:2] < 0] = 0 box[:, 2][box[:, 2] > w] = w box[:, 3][box[:, 3] > h] = h box_w = box[:, 2] - box[:, 0] box_h = box[:, 3] - box[:, 1] box = box[np.logical_and(box_w > 1, box_h > 1)] box_data = np.zeros((len(box), 5)) box_data[:len(box)] = box image_datas.append(image_data) box_datas.append(box_data) #---------------------------------# # 将图片分割,放在一起 #---------------------------------# cutx = int(w * min_offset_x) cuty = int(h * min_offset_y) new_image = np.zeros([h, w, 3]) new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :] new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :] new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :] new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :] new_image = np.array(new_image, np.uint8) #---------------------------------# # 对图像进行色域变换 # 计算色域变换的参数 #---------------------------------# r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 #---------------------------------# # 将图像转到HSV上 #---------------------------------# hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV)) dtype = new_image.dtype #---------------------------------# # 应用变换 #---------------------------------# x = np.arange(0, 256, dtype=r.dtype) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) new_image = cv2.merge( (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB) #---------------------------------# # 对框进行进一步的处理 #---------------------------------# new_boxes = self.merge_bboxes(box_datas, cutx, cuty) return new_image, new_boxes