def get_depths(self): # return input images and predictions def detach_tensor(tensor): return tensor.cpu().detach().numpy() tensorImage, disparities, masks, imageNetTensor, dataset_ids = next( iter(self.data_loader)) tensorImage = tensorImage.to(device, non_blocking=True) disparities = disparities.to(device, non_blocking=True) masks = masks.to(device, non_blocking=True) # pretrained networks from 3D KBE were trained with image normalized between 0 and 1 if self.eval_pretrained: tensorImage = (tensorImage + 1) / 2 tensorResized = resize_image(tensorImage) # retrieve parameters for different sets of images tensorFocal = torch.Tensor([ self.dataset_paths[int(id.item())]['params']['focal'] for id in dataset_ids ]) tensorBaseline = torch.Tensor([ self.dataset_paths[int(id.item())]['params']['baseline'] for id in dataset_ids ]) tensorFocal = tensorFocal.view(-1, 1).repeat( 1, 1, tensorImage.size(2) * tensorImage.size(3)).view(*disparities.size()) tensorBaseline = tensorBaseline.view(-1, 1).repeat( 1, 1, tensorImage.size(2) * tensorImage.size(3)).view(*disparities.size()) tensorBaseline = tensorBaseline.to(device) tensorFocal = tensorFocal.to(device) tensorDisparity = self.moduleDisparity( tensorResized, self.moduleSemantics(tensorResized)) # depth estimation objectPredictions = self.moduleMaskrcnn( tensorImage) # segment image in mask using Mask-RCNN tensorDisparityAdjusted = tensorDisparity tensorDisparityRefined = self.moduleRefine( tensorImage[:2, :, :, :], tensorDisparityAdjusted[:2, :, :, :]) # increase resolution return (detach_tensor(tensorDisparity), detach_tensor(tensorDisparityAdjusted), detach_tensor(tensorDisparityRefined), detach_tensor(disparities), detach_tensor(resize_image(disparities, max_size=256)), detach_tensor((tensorImage.permute(0, 2, 3, 1) + 1) / 2), objectPredictions, detach_tensor(masks), detach_tensor(resize_image(masks, max_size=256)))
def get_map_txt(self, image_id, image, class_names, map_out_path): f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0) out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) for i, c in enumerate(out_classes): predicted_class = self.class_names[int(c)] score = str(out_scores[i]) top, left, bottom, right = out_boxes[i] if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) f.close() return
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = preprocess_input(np.expand_dims(np.array(image_data, dtype='float32'), 0)) preds = self.get_pred(image_data).numpy() #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.decode_box(preds, self.anchors, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) t1 = time.time() for _ in range(test_interval): preds = self.get_pred(image_data).numpy() #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.decode_box(preds, self.anchors, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_FPS(self, image, test_interval): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0) out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) t1 = time.time() for _ in range(test_interval): out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_FPS(self, image, test_interval): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, nw, nh = resize_image( image, (self.input_shape[1], self.input_shape[0])) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# pr = self.net(images)[0] #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy().argmax(axis=-1) #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] t1 = time.time() for _ in range(test_interval): with torch.no_grad(): #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# pr = self.net(images)[0] #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy().argmax(axis=-1) #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_map_txt(self, image_id, image, class_names, map_out_path): f = open( os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = preprocess_input( np.expand_dims(np.array(image_data, dtype='float32'), 0)) preds = self.m2det.predict(image_data) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.decode_box(preds, self.anchors, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results[0]) <= 0: return top_label = results[0][:, 4] top_conf = results[0][:, 5] top_boxes = results[0][:, :4] for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = str(top_conf[i]) top, left, bottom, right = box if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def get_FPS(self, image, test_interval): #---------------------------------------------------# # 获得输入图片的高和宽 #---------------------------------------------------# image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, dtype='float32')), 0) outputs = self.centernet.predict(image_data) #--------------------------------------------------------------------------------------------# # centernet后处理的过程,包括门限判断和传统非极大抑制。 # 对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。 # 此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框 # 这里面存在传统的nms处理方法,可以选择关闭和开启。 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #--------------------------------------------------------------------------------------------# results = self.bbox_util.postprocess(outputs, self.nms, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) t1 = time.time() for _ in range(test_interval): outputs = self.centernet.predict(image_data) #--------------------------------------------------------------------------------------------# # centernet后处理的过程,包括门限判断和传统非极大抑制。 # 对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。 # 此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框 # 这里面存在传统的nms处理方法,可以选择关闭和开启。 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #--------------------------------------------------------------------------------------------# results = self.bbox_util.postprocess(outputs, self.nms, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_map_txt(self, image_id, image, class_names, map_out_path): f = open(os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w") # ---------------------------------------------------# # 获得输入图片的高和宽 # ---------------------------------------------------# image_shape = np.array(np.shape(image)[0:2]) # ---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB # ---------------------------------------------------------# image = cvtColor(image) # ---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 # ---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) # ---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 # ---------------------------------------------------------# image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0) outputs = self.get_pred(image_data).numpy() # --------------------------------------------------------------------------------------------# # centernet后处理的过程,包括门限判断和传统非极大抑制。 # 对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。 # 此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框 # 这里面存在传统的nms处理方法,可以选择关闭和开启。 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 # --------------------------------------------------------------------------------------------# results = self.bbox_util.postprocess(outputs, self.nms, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) # --------------------------------------# # 如果没有检测到物体,则返回原图 # --------------------------------------# if results[0] is None: return top_label = np.array(results[0][:, 5], dtype='int32') top_conf = results[0][:, 4] top_boxes = results[0][:, :4] for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = str(top_conf[i]) top, left, bottom, right = box if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % ( predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_heatmap(self, image, heatmap_save_path): import cv2 import matplotlib.pyplot as plt def sigmoid(x): y = 1.0 / (1.0 + np.exp(-x)) return y #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, dtype='float32')), 0) output = self.yolo_model.predict(image_data) plt.imshow(image, alpha=1) plt.axis('off') mask = np.zeros((image.size[1], image.size[0])) for sub_output in output: b, h, w, c = np.shape(sub_output) sub_output = np.reshape(sub_output, [b, h, w, 3, -1])[0] score = np.max(sigmoid(sub_output[..., 4]), -1) score = cv2.resize(score, (image.size[0], image.size[1])) normed_score = (score * 255).astype('uint8') mask = np.maximum(mask, normed_score) plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet") plt.axis('off') plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches=-0.1) print("Save to the " + heatmap_save_path) plt.show()
def thread_function(input_image, width, height, out_path): image = utils.read_image(input_image) if resize_images is not None: image = utils.resize_image(image, width, height) # remove all backgrounds if should_remove_backgrounds: image = utils.remove_background(image) utils.save_image(out_path, image)
def postprocess_image(img_path, mask_path, output_dir=None): image = cv2.imread(img_path) mask = cv2.imread(mask_path, 0) resized_image = resize_image(image, expected_size=512, pad_value=0) ret, thresh1 = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY) resized_mask = resize_image(thresh1, expected_size=512, pad_value=0) masked_image = cv2.bitwise_and(resized_image, resized_image, mask=resized_mask) name, ext = osp.splitext(osp.basename(mask_path)) if output_dir is None: output_dir = osp.split(mask_path)[0] no_bcg_path = osp.join(output_dir, (name + '_no_bcg' + ext)) resized_mask_path = osp.join(output_dir, (name + '_no_bcg_mask' + ext)) cv2.imwrite(str(resized_mask_path), resized_mask) cv2.imwrite(str(no_bcg_path), masked_image) print('Outputs were written to %s' % output_dir)
def get_map_txt(self, image_id, image, class_names, map_out_path): f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 传入网络当中进行预测 #---------------------------------------------------------# _, regression, classification, anchors = self.net(images) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# outputs = decodebox(regression, anchors, self.input_shape) results = non_max_suppression(torch.cat([outputs, classification], axis=-1), self.input_shape, image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) if results[0] is None: return top_label = np.array(results[0][:, 5], dtype = 'int32') top_conf = results[0][:, 4] top_boxes = results[0][:, :4] for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = str(top_conf[i]) top, left, bottom, right = box if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) f.close() return
def get_FPS(self, image, test_interval): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# image_data, nw, nh = resize_image( image, (self.input_shape[1], self.input_shape[0])) #---------------------------------------------------------# # 归一化+添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, np.float32)), 0) #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# pr = self.model.predict(image_data)[0] #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = pr.argmax(axis=-1).reshape( [self.input_shape[0], self.input_shape[1]]) t1 = time.time() for _ in range(test_interval): #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# pr = self.model.predict(image_data)[0] #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = pr.argmax(axis=-1).reshape( [self.input_shape[0], self.input_shape[1]]) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 传入网络当中进行预测 #---------------------------------------------------------# _, regression, classification, anchors = self.net(images) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# outputs = decodebox(regression, anchors, self.input_shape) results = non_max_suppression(torch.cat([outputs, classification], axis=-1), self.input_shape, image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) t1 = time.time() for _ in range(test_interval): with torch.no_grad(): #---------------------------------------------------------# # 传入网络当中进行预测 #---------------------------------------------------------# _, regression, classification, anchors = self.net(images) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# outputs = decodebox(regression, anchors, self.input_shape) results = non_max_suppression(torch.cat([outputs, classification], axis=-1), self.input_shape, image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_miou_png(self, image): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) orininal_h = np.array(image).shape[0] orininal_w = np.array(image).shape[1] #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, nw, nh = resize_image( image, (self.input_shape[1], self.input_shape[0])) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# pr = self.net(images)[0] #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy() #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] #---------------------------------------------------# # 进行图片的resize #---------------------------------------------------# pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR) #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = pr.argmax(axis=-1) image = Image.fromarray(np.uint8(pr)) return image
def detect_image(self, image_1, image_2): image_1 = resize_image(image_1, [self.input_shape[1], self.input_shape[0]], self.letterbox_image) image_2 = resize_image(image_2, [self.input_shape[1], self.input_shape[0]], self.letterbox_image) photo_1 = np.expand_dims( preprocess_input(np.array(image_1, np.float32)), 0) photo_2 = np.expand_dims( preprocess_input(np.array(image_2, np.float32)), 0) #---------------------------------------------------# # 图片传入网络进行预测 #---------------------------------------------------# output1 = self.model.predict(photo_1) output2 = self.model.predict(photo_2) #---------------------------------------------------# # 计算二者之间的距离 #---------------------------------------------------# l1 = np.linalg.norm(output1 - output2, axis=1) # l1 = np.sum(np.square(output1 - output2), axis=-1) plt.subplot(1, 2, 1) plt.imshow(np.array(image_1)) plt.subplot(1, 2, 2) plt.imshow(np.array(image_2)) plt.text(-12, -12, 'Distance:%.3f' % l1, ha='center', va='bottom', fontsize=11) plt.show() return l1
def _crop_bounding_boxes(self, image_array, assigned_data): data = self._select_samples(assigned_data) data = np.concatenate(data, axis=0) images = [] classes = [] for object_arg in range(len(data)): object_data = data[object_arg] cropped_array = self._crop_bounding_box(image_array, object_data) if 0 in cropped_array.shape: continue cropped_array = resize_image(cropped_array, self.image_size) images.append(cropped_array.astype('float32')) classes.append(data[object_arg][4:]) return images, classes
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, image_metas, windows = resize_image([np.array(image)], self.config) #---------------------------------------------------------# # 根据当前输入图像的大小,生成先验框 #---------------------------------------------------------# anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# detections, _, _, mrcnn_mask, _, _, _ = self.model.predict( [image_data, image_metas, anchors], verbose=0) #---------------------------------------------------# # 上面获得的预测结果是相对于padding后的图片的 # 我们需要将预测结果转换到原图上 #---------------------------------------------------# box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess( detections[0], mrcnn_mask[0], image_shape, image_data[0].shape, windows[0]) t1 = time.time() for _ in range(test_interval): #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# detections, _, _, mrcnn_mask, _, _, _ = self.model.predict( [image_data, image_metas, anchors], verbose=0) #---------------------------------------------------# # 上面获得的预测结果是相对于padding后的图片的 # 我们需要将预测结果转换到原图上 #---------------------------------------------------# box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess( detections[0], mrcnn_mask[0], image_shape, image_data[0].shape, windows[0]) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_heatmap(self, image, heatmap_save_path): import cv2 import matplotlib.pyplot as plt def sigmoid(x): y = 1.0 / (1.0 + np.exp(-x)) return y #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) plt.imshow(image, alpha=1) plt.axis('off') mask = np.zeros((image.size[1], image.size[0])) for sub_output in outputs: sub_output = sub_output.cpu().numpy() b, c, h, w = np.shape(sub_output) sub_output = np.transpose(np.reshape(sub_output, [b, 3, -1, h, w]), [0, 3, 4, 1, 2])[0] score = np.max(sigmoid(sub_output[..., 4]), -1) score = cv2.resize(score, (image.size[0], image.size[1])) normed_score = (score * 255).astype('uint8') mask = np.maximum(mask, normed_score) plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet") plt.axis('off') plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches = -0.1) print("Save to the " + heatmap_save_path) plt.show()
def load_img(path, new_size=None): """ Load a single image from disk. path: Image path resample_spacing: spacing to resample img """ sitk_t1 = sitk.ReadImage(path) # if we have a resample size: if new_size: sitk_t1 = resize_image(sitk_t1, new_size) img = sitk.GetArrayFromImage(sitk_t1) save_img(img, '/homedtic/gmarti/test.nii.gz') return img
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) t1 = time.time() for _ in range(test_interval): with torch.no_grad(): #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def representative_dataset_gen(CocoDataset, Config): for i in range(50000): image = CocoDataset.load_image(i) image, window, scale, padding, crop = resize_image( image, min_dim=Config.IMAGE_MIN_DIM, min_scale=Config.IMAGE_MIN_SCALE, max_dim=Config.IMAGE_MAX_DIM, mode=Config.IMAGE_RESIZE_MODE) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) mean = np.reshape(mean, [1, 1, 3]) std = np.reshape(std, [1, 1, 3]) img = (image / 255. - mean) / std yield [img]
def eval(self): # compute the metrics on the provided dataset with the provided networks measures = [] metrics = {} metrics_list = [ 'Abs rel', 'Sq rel', 'RMSE', 'log RMSE', 's1', 's2', 's3' ] MSELoss = nn.MSELoss() print( 'Starting evaluation on datasets: ', functools.reduce(lambda s1, s2: s1['path'] + ', ' + s2['path'], self.dataset_paths)) for idx, (tensorImage, disparities, masks, imageNetTensor, dataset_ids) in enumerate(tqdm(self.data_loader)): tensorImage = tensorImage.to(device, non_blocking=True) disparities = disparities.to(device, non_blocking=True) masks = masks.to(device, non_blocking=True) N = tensorImage.size()[2] * tensorImage.size()[3] # pretrained networks from 3D KBE were trained with image normalized between 0 and 1 if self.eval_pretrained: tensorImage = (tensorImage + 1) / 2 tensorResized = resize_image(tensorImage) tensorDisparity = self.moduleDisparity( tensorResized, self.moduleSemantics(tensorResized)) # depth estimation tensorDisparity = self.moduleRefine( tensorImage, tensorDisparity) # increase resolution tensorDisparity = F.threshold(tensorDisparity, threshold=0.0, value=0.0) masks = masks.clamp(0, 1) measures.append( np.array(compute_metrics(tensorDisparity, disparities, masks))) measures = np.array(measures).mean(axis=0) for i, name in enumerate(metrics_list): metrics[name] = measures[i] return metrics
def prepare_image(self, image_id): """use config to processing coco image size and others, augment: (deprecated. Use augmentation instead). If true, apply random image augmentation. Currently, only horizontal flipping is offered. augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) flips images right/left 50% of the time. Returns: image: [height, width, 3] image_meta: the original shape of the image and resizing and cropping. class_ids: [instance_count] Integer class IDs bbox: [instance_count, (y1, x1, y2, x2)] mask: [height, width, instance_count]. The height and width are those of the image. gt_y: [instance_count] gt_x: [instance_count] vector_mask: [height, width, 2*class_num]. Set pixel relative center vector. """ # Load image and mask image = self.load_image(image_id=image_id) mask, class_ids = self.load_mask(image_id=image_id) original_shape = image.shape # print(original_shape) # print(type(original_shape)) image, window, scale, padding, crop = cocoutils.resize_image( image, min_dim=self.config.IMAGE_MIN_DIM, min_scale=self.config.IMAGE_MIN_SCALE, max_dim=self.config.IMAGE_MAX_DIM, mode=self.config.IMAGE_RESIZE_MODE) mask = cocoutils.resize_mask(mask, scale, padding, 0, crop) _idx = np.sum(mask, axis=(0, 1)) > 16 class_ids = class_ids[_idx] if len(class_ids) != 0: # print(class_ids) # [y, x, num_instance] mask = mask[:, :, _idx] # print(np.amax(mask, axis=(0, 1))) # Bounding boxes. Note that some boxes might be all zeros # if the corresponding mask got cropped out. # bbox: [num_instances, (y1, x1, y2, x2)] bbox = cocoutils.extract_bboxes(mask) gt_cy, gt_cx = cocoutils.gravity_center(mask) return image, class_ids, bbox, mask, gt_cy, gt_cx print("return nothing") return None
def test_one_image(self, images, show=False): self.is_training = False image, window, scale, padding, crop = resize_image( images, min_dim=self.image_size, min_scale=0, max_dim=self.image_size, mode="square") mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) mean = np.reshape(mean, [1, 1, 3]) std = np.reshape(std, [1, 1, 3]) image = (image / 255. - mean) / std image = tf.convert_to_tensor(np.expand_dims(image, axis=0)) # self.CenterNetModel.save('./SAVE') # tf.saved_model.save(self.CenterNetModel, "./SAVE") # tf.keras.experimental.export_saved_model(self.CenterNetModel, "./SAVE") # self.CenterNetModel.save('my_model.h5') # # converter = tf.lite.TFLiteConverter.from_keras_model('my_model.h5') # converter.optimizations = [tf.lite.Optimize.DEFAULT] # converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] # tflite_model = converter.convert() # open("converted_model.tflite", "wb").write(tflite_model) # float16 quantilize # converter = tf.lite.TFLiteConverter.from_keras_model(self.CenterNetModel) # converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS] # converter.optimizations = [tf.lite.Optimize.DEFAULT] # converter.target_spec.supported_types = [tf.float16] # tflite_quant_model = converter.convert() # open("converted_model.tflite", "wb").write(tflite_quant_model) pred = self.CenterNetModel.predict( image, batch_size=1, verbose=0, steps=None, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False ) return pred
def draw_normalized_box(self, box_coordinates, image_key=None, color='r'): if len(box_coordinates.shape) == 1: box_coordinates = np.expand_dims(box_coordinates, 0) if image_key == None: image_path = self.random_instance.choice(self.image_paths) else: image_path = self.image_prefix + image_key image_array = read_image(image_path) image_array = resize_image(image_array, self.image_size) figure, axis = plt.subplots(1) axis.imshow(image_array) original_coordinates = self.denormalize_box(box_coordinates) x_min = original_coordinates[:, 0] y_min = original_coordinates[:, 1] x_max = original_coordinates[:, 2] y_max = original_coordinates[:, 3] width = x_max - x_min height = y_max - y_min if box_coordinates.shape[1] > 4: classes = box_coordinates[:, 4:] classes_flag = True else: classes_flag = False num_boxes = len(box_coordinates) for box_arg in range(num_boxes): x_min_box = x_min[box_arg] y_min_box = y_min[box_arg] box_width = width[box_arg] box_height = height[box_arg] x_text = x_min_box + (1 * box_width ) y_text = y_min_box #+ (1 * box_height ) rectangle = plt.Rectangle((x_min_box, y_min_box), box_width, box_height, linewidth=1, edgecolor=color, facecolor='none') axis.add_patch(rectangle) if self.classes_decoder != None and classes_flag: box_class = classes[box_arg] class_name = self.classes_decoder[np.argmax(box_class)] axis.text(x_text, y_text, class_name, style='italic', bbox={'facecolor':'red', 'alpha':0.5, 'pad':10}) plt.show()
def detect_image(self, image_id, image, results, clsid2catid): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, dtype='float32')), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# input_image_shape = np.expand_dims( np.array([image.size[1], image.size[0]], dtype='float32'), 0) out_boxes, out_scores, out_classes = self.yolo_model.predict( [image_data, input_image_shape]) for i, c in enumerate(out_classes): result = {} top, left, bottom, right = out_boxes[i] result["image_id"] = int(image_id) result["category_id"] = clsid2catid[c] result["bbox"] = [ float(left), float(top), float(right - left), float(bottom - top) ] result["score"] = float(out_scores[i]) results.append(result) return results
def get_miou_png(self, image): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) orininal_h = np.array(image).shape[0] orininal_w = np.array(image).shape[1] #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# image_data, nw, nh = resize_image( image, (self.input_shape[1], self.input_shape[0])) #---------------------------------------------------------# # 归一化+添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( preprocess_input(np.array(image_data, np.float32)), 0) #--------------------------------------# # 图片传入网络进行预测 #--------------------------------------# pr = self.model.predict(image_data)[0] #--------------------------------------# # 将灰条部分截取掉 #--------------------------------------# pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \ int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)] #--------------------------------------# # 进行图片的resize #--------------------------------------# pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR) #---------------------------------------------------# # 取出每一个像素点的种类 #---------------------------------------------------# pr = pr.argmax(axis=-1) image = Image.fromarray(np.uint8(pr)) return image
def get_map_out(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data, image_metas, windows = resize_image([np.array(image)], self.config) #---------------------------------------------------------# # 根据当前输入图像的大小,生成先验框 #---------------------------------------------------------# anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# detections, _, _, mrcnn_mask, _, _, _ = self.model.predict( [image_data, image_metas, anchors], verbose=0) #---------------------------------------------------# # 上面获得的预测结果是相对于padding后的图片的 # 我们需要将预测结果转换到原图上 #---------------------------------------------------# box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess( detections[0], mrcnn_mask[0], image_shape, image_data[0].shape, windows[0]) outboxes = None if box_thre is not None: outboxes = np.zeros_like(box_thre) outboxes[:, [0, 2]] = box_thre[:, [1, 3]] outboxes[:, [1, 3]] = box_thre[:, [0, 2]] return outboxes, class_thre, class_ids, masks_arg, masks_sigmoid
def test_one_image(self, images): self.is_training = False image, window, scale, padding, crop = resize_image( images, min_dim=self.image_size, min_scale=0, max_dim=self.image_size, mode="square") mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) mean = np.reshape(mean, [1, 1, 3]) std = np.reshape(std, [1, 1, 3]) image = (image / 255. - mean) / std image = tf.convert_to_tensor(np.expand_dims(image, axis=0)) # converter = tf.lite.TFLiteConverter.from_keras_model(self.CenterNetModel) # # # converter.optimizations = [tf.lite.Optimize.DEFAULT] # converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE] # converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] # # converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, # # tf.lite.OpsSet.SELECT_TF_OPS] # # # converter.representative_dataset = representative_dataset_gen # tf_lite_model = converter.convert() # open("converted_model.tflite", "wb").write(tf_lite_model) pred = self.CenterNetModel.predict( image, batch_size=1, verbose=0, steps=None, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False ) return pred