def detect_hand(img1): global device # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # create model model = create_model(num_classes=12) # 需要根据手势类别进行改变。 # load train weights # train_weights = "./save_weights/model.pth" train_weights = "./save_hand_weights/resNetFpn-model-13.pth" model.load_state_dict(torch.load(train_weights)["model"]) model.to(device) # read class_indict category_index = {} try: # json_file = open('./pascal_voc_classes.json', 'r')# voc 类别字典。 json_file = open('./hand_classes.json', 'r') # 手势文件字典。 class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} except Exception as e: print(e) exit(-1) # load image # original_img = Image.open("./test.jpg")#voc测试图片 # original_img = Image.open("./ChuangyeguBusstop_Single_Good_color_2.jpg") # 手势测试图片。 # from pil image to tensor, do not normalize image data_transform = transforms.Compose([transforms.ToTensor()]) img = data_transform(img1) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() with torch.no_grad(): predictions = model(img.to(device))[0] predict_boxes = predictions["boxes"].to("cpu").numpy() predict_classes = predictions["labels"].to("cpu").numpy() predict_scores = predictions["scores"].to("cpu").numpy() if len(predict_boxes) == 0: print("没有检测到任何目标!") draw_box(img1, predict_boxes, predict_classes, predict_scores, category_index, thresh=0.5, line_thickness=5) # plt.imshow(img1) # plt.show() return img1
def main(): img_size = 512 save_path = "./logs/yolov3spp.onnx" img_path = "test.jpg" input_size = (img_size, img_size) # h, w # check onnx model onnx_model = onnx.load(save_path) onnx.checker.check_model(onnx_model) # print(onnx.helper.printable_graph(onnx_model.graph)) ort_session = onnxruntime.InferenceSession(save_path) img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path # preprocessing img img, ratio, pad = scale_img(img_o, new_shape=input_size, auto=False, color=(0, 0, 0)) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img).astype(np.float32) img /= 255.0 # scale (0, 255) to (0, 1) img = np.expand_dims(img, axis=0) # add batch dimension # compute ONNX Runtime output prediction ort_inputs = {"images": img} t1 = time.time() # prediction: [num_obj, 85] pred = ort_session.run(None, ort_inputs)[0] t2 = time.time() print(t2 - t1) # print(predictions.shape[0]) # process detections # 这里预测的数值是相对坐标(0-1之间),乘上图像尺寸转回绝对坐标 pred[:, [0, 2]] *= input_size[1] pred[:, [1, 3]] *= input_size[0] pred = post_process(pred) # 将预测的bbox缩放回原图像尺度 p_boxes = turn_back_coords(img1_shape=img.shape[2:], coords=pred[:, :4], img0_shape=img_o.shape, ratio_pad=[ratio, pad]).round() # print(p_boxes.shape) bboxes = p_boxes scores = pred[:, 4] classes = pred[:, 5].astype(np.int) + 1 category_index = dict([(i + 1, str(i + 1)) for i in range(90)]) img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index) plt.imshow(img_o) plt.show()
def main(): img_size = 512 # 必须是32的整数倍 [416, 512, 608] cfg = "cfg/yolov3-spp.cfg" weights = "weights/yolov3-spp-ultralytics-{}.pt".format(img_size) img_path = "test.jpg" input_size = (img_size, img_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Darknet(cfg, img_size) model.load_state_dict(torch.load(weights, map_location=device)["model"]) model.to(device) model.eval() # init img = torch.zeros((1, 3, img_size, img_size), device=device) model(img) img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device).float() img /= 255.0 # scale (0, 255) to (0, 1) img = img.unsqueeze(0) # add batch dimension t1 = torch_utils.time_synchronized() pred = model(img)[0] # only get inference result t2 = torch_utils.time_synchronized() print(t2 - t1) pred = utils.non_max_suppression(pred, conf_thres=0.3, iou_thres=0.6, multi_label=True)[0] t3 = time.time() print(t3 - t2) # process detections pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round() print(pred.shape) bboxes = pred[:, :4].detach().cpu().numpy() scores = pred[:, 4].detach().cpu().numpy() classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1 category_index = dict([(i + 1, str(i + 1)) for i in range(90)]) img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index) plt.imshow(img_o) plt.show() img_o.save("test_result.jpg")
def main(): # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # create model model = create_model(num_classes=21) # load train weights train_weights = "./save_weights/model.pth" assert os.path.exists(train_weights), "{} file dose not exist.".format( train_weights) model.load_state_dict( torch.load(train_weights, map_location=device)["model"]) model.to(device) # read class_indict label_json_path = './pascal_voc_classes.json' assert os.path.exists( label_json_path), "json file {} dose not exist.".format( label_json_path) json_file = open(label_json_path, 'r') class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} # load image original_img = Image.open("./test.jpg") # from pil image to tensor, do not normalize image data_transform = transforms.Compose([transforms.ToTensor()]) img = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() # 进入验证模式 with torch.no_grad(): # init img_height, img_width = img.shape[-2:] init_img = torch.zeros((1, 3, img_height, img_width), device=device) model(init_img) t_start = time_synchronized() predictions = model(img.to(device))[0] t_end = time_synchronized() print("inference+NMS time: {}".format(t_end - t_start)) predict_boxes = predictions["boxes"].to("cpu").numpy() predict_classes = predictions["labels"].to("cpu").numpy() predict_scores = predictions["scores"].to("cpu").numpy() if len(predict_boxes) == 0: print("没有检测到任何目标!") draw_box(original_img, predict_boxes, predict_classes, predict_scores, category_index, thresh=0.5, line_thickness=3) plt.imshow(original_img) plt.show() # 保存预测的图片结果 original_img.save("test_result.jpg")
data_transform = transform.Compose( [transform.Resize(), transform.ToTensor(), transform.Normalization()]) img, _ = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() with torch.no_grad(): predictions = model( img.to(device))[0] # bboxes_out, labels_out, scores_out predict_boxes = predictions[0].to("cpu").numpy() predict_boxes[:, [0, 2]] = predict_boxes[:, [0, 2]] * original_img.size[0] predict_boxes[:, [1, 3]] = predict_boxes[:, [1, 3]] * original_img.size[1] predict_classes = predictions[1].to("cpu").numpy() predict_scores = predictions[2].to("cpu").numpy() if len(predict_boxes) == 0: print("没有检测到任何目标!") draw_box(original_img, predict_boxes, predict_classes, predict_scores, category_index, thresh=0.5, line_thickness=5) plt.imshow(original_img) plt.show()
def main(): img_size = 512 # 必须是32的整数倍 [416, 512, 608] cfg = "/home/mist/yolov3_spp/cfg/yolov3-spp.cfg" # 改成生成的.cfg文件 weights = "/home/mist/yolov3_spp/weights/yolov3spp-29.pt".format( img_size) # 改成自己训练好的权重文件 json_path = "/home/mist/yolov3_spp/data/pascal_voc_classes.json" # json标签文件 img_path = "test.jpg" assert os.path.exists(cfg), "cfg file {} dose not exist.".format(cfg) assert os.path.exists(weights), "weights file {} dose not exist.".format( weights) assert os.path.exists(json_path), "json file {} dose not exist.".format( json_path) assert os.path.exists(img_path), "image file {} dose not exist.".format( img_path) json_file = open(json_path, 'r') class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} input_size = (img_size, img_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Darknet(cfg, img_size) model.load_state_dict(torch.load(weights, map_location=device)["model"]) model.to(device) model.eval() with torch.no_grad(): # init img = torch.zeros((1, 3, img_size, img_size), device=device) model(img) img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device).float() img /= 255.0 # scale (0, 255) to (0, 1) img = img.unsqueeze(0) # add batch dimension t1 = torch_utils.time_synchronized() pred = model(img)[0] # only get inference result t2 = torch_utils.time_synchronized() print(t2 - t1) pred = utils.non_max_suppression(pred, conf_thres=0.1, iou_thres=0.6, multi_label=True)[0] t3 = time.time() print(t3 - t2) if pred is None: print("No target detected.") exit(0) # process detections pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round() print(pred.shape) bboxes = pred[:, :4].detach().cpu().numpy() scores = pred[:, 4].detach().cpu().numpy() classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1 img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index) plt.imshow(img_o) plt.show() img_o.save("test_result.jpg")
def main(): # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # create model # 目标检测数 + 背景 num_classes = 20 + 1 model = create_model(num_classes=num_classes) # load train weights train_weights = "./save_weights/ssd300-14.pth" train_weights_dict = torch.load(train_weights, map_location=device)['model'] model.load_state_dict(train_weights_dict) model.to(device) # read class_indict json_path = "./pascal_voc_classes.json" assert os.path.exists(json_path), "file '{}' dose not exist.".format(json_path) json_file = open(json_path, 'r') class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} # load image original_img = Image.open("./test.jpg") # from pil image to tensor, do not normalize image data_transform = transforms.Compose([transforms.Resize(), transforms.ToTensor(), transforms.Normalization()]) img, _ = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() with torch.no_grad(): # initial model init_img = torch.zeros((1, 3, 300, 300), device=device) model(init_img) time_start = time_synchronized() predictions = model(img.to(device))[0] # bboxes_out, labels_out, scores_out time_end = time_synchronized() print("inference+NMS time: {}".format(time_end - time_start)) predict_boxes = predictions[0].to("cpu").numpy() predict_boxes[:, [0, 2]] = predict_boxes[:, [0, 2]] * original_img.size[0] predict_boxes[:, [1, 3]] = predict_boxes[:, [1, 3]] * original_img.size[1] predict_classes = predictions[1].to("cpu").numpy() predict_scores = predictions[2].to("cpu").numpy() if len(predict_boxes) == 0: print("没有检测到任何目标!") draw_box(original_img, predict_boxes, predict_classes, predict_scores, category_index, thresh=0.5, line_thickness=5) plt.imshow(original_img) plt.show()
def detect(self): json_path = './data/pascal_voc_classes.json' json_file = open(json_path, 'r') class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} # All these classes will be counted as 'catch' list_of_catch = ["nephrops", "flat_fish", "round_fish"] # these classes will be counted as 'by-catch' list_of_bycatch = ["other"] LABELS = ['flat_fish', 'round_fish', 'nephrops', 'other'] # to store the object infomation key:id value: class all_obj_info = {} frame_no = -1 num_frames, nephrops_count, flatfish_count, roundfish_count, other_count = 0, 0, 0, 0, 0 catch_ratio, bycatch_ratio = 0, 0 # skip_no = 2 if self.output_file: f = open(output_file, "w") while self.vidCap.grab(): frame_no += 1 # skip frames every n frames # if frame_no % skip_no != 0: # continue # start time total_begin = time.time() _, img = self.vidCap.retrieve() #img = img[:, :1280] # yolov3 yolo_begin = time.time() # get the detections: bbx coordinates, confidences, classes bbox_xyxy_ori, cls_conf, cls_ids = self.yolov3.predict(img) print(cls_ids) # [x1,y1,x2,y2] yolo_end = time.time() # deepsort ds_begin = time.time() if bbox_xyxy_ori is not None: # transfer the coorinates bbox_cxcywh = xyxy2xywh(bbox_xyxy_ori) # use the tracker to update outputs = self.deepsort.update(bbox_cxcywh, cls_conf, cls_ids, img) if len(outputs) > 0: # [x1,y1,x2,y2] id class # now we can fetch the bbx info, ids and classes bbox_xyxy = outputs[:, :4] ids = outputs[:, -2] object_class = outputs[:, -1] print(ids) print(object_class) ## obj_id and class alignment has some problems # it is hard to be very acurate # need to make it better # for i in range(len(ids)): # if ids[i] not in all_obj_info: # if len(cls_ids) == len(ids) - 1: # all_obj_info[ids[i]] = cls_ids[i-1] # elif len(cls_ids) == len(ids) - 2: # all_obj_info[ids[i]] = cls_ids[i-2] # elif len(cls_ids) == len(ids) - 3: # all_obj_info[ids[i]] = cls_ids[i-3] # elif len(cls_ids) == len(ids) - 4: # all_obj_info[ids[i]] = cls_ids[i-4] # elif len(cls_ids) == len(ids) - 5: # all_obj_info[ids[i]] = cls_ids[i-5] # elif len(cls_ids) == len(ids) - 6: # all_obj_info[ids[i]] = cls_ids[i-6] # elif len(cls_ids) == len(ids) - 7: # all_obj_info[ids[i]] = cls_ids[i-7] # elif len(cls_ids) == len(ids) - 8: # all_obj_info[ids[i]] = cls_ids[i-8] # elif len(cls_ids) == len(ids) - 9: # all_obj_info[ids[i]] = cls_ids[i-9] # elif len(cls_ids) == len(ids) - 10: # all_obj_info[ids[i]] = cls_ids[i-10] # else: # all_obj_info[ids[i]] = cls_ids[i] for i in range(len(ids)): if ids[i] not in all_obj_info: all_obj_info[ids[i]] = object_class[i] else: continue print(all_obj_info) # draw the bbx img = draw_box(img, bbox_xyxy_ori, cls_ids, cls_conf, category_index) #img = draw_bboxes(img, bbox_xyxy, ids) # frame,id,tlwh,1,-1,-1,-1 # record the info if self.output_file: bbox_tlwh = xyxy2xywh(bbox_xyxy) for i in range(len(bbox_tlwh)): write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % ( frame_no + 1, outputs[i, -1], int(bbox_tlwh[i][0]), int(bbox_tlwh[i][1]), int(bbox_tlwh[i][2]), int(bbox_tlwh[i][3])) f.write(write_line) ds_end = time.time() total_end = time.time() # count the current number of each category cur_categories = list(all_obj_info.values()) flatfish_count = cur_categories.count(1) roundfish_count = cur_categories.count(2) nephrops_count = cur_categories.count(3) other_count = cur_categories.count(4) # start from frame 3 if frame_no >= 3: catch_ratio = round( (flatfish_count + roundfish_count + nephrops_count) / (flatfish_count + roundfish_count + nephrops_count + other_count), 2) bycatch_ratio = round( other_count / (flatfish_count + roundfish_count + nephrops_count + other_count), 2) else: catch_ratio = None bycatch_ratio = None # print info to the console if frame_no is not None: print( "frame:%04d|det:%.4f|deep sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" % (frame_no, (yolo_end - yolo_begin), (ds_end - ds_begin), (total_end - total_begin), ((yolo_end - yolo_begin) * 100 / (total_end - total_begin)), (1 / (total_end - total_begin)))) # display all the count info on the screen if self.display == True: img = np.uint8(img) displayNephropsCount(img, nephrops_count) displayFlatfishCount(img, flatfish_count) displayRoundfishCount(img, roundfish_count) displayOtherfishCount(img, other_count) displayCatchRatio(img, catch_ratio) displayByCatchRatio(img, bycatch_ratio) cv2.putText(img, 'FPS {:.1f}'.format(1 / (total_end - total_begin)), (20, 280), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2, cv2.FONT_HERSHEY_COMPLEX_SMALL) cv2.imshow("Test", img) cv2.waitKey(1) # press Q to quit if cv2.waitKey(1) & 0xFF == ord('q'): break # determine if output the new video if self.save_path: self.output.write(img) if self.output_file: f.close()
transforms.Compose( [transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } # load train data set train_data_set = VOC2012DataSet(os.getcwd(), data_transform["train"], "train.txt") # 自定义的数据集VOC2012DataSet,第一个参数是VOC所在的根目录,若是当前目录则写为os.getcwd() # 第二个参数是训练集对应的预训练方法data_transform["train"],可以跳转看看 print(len(train_data_set)) # 训练集的文件个数 for index in random.sample(range(0, len(train_data_set)), k=5): # 随机采样5张图 img, target = train_data_set[ index] # 传入索引就可以返回img和target信息,因为已经实现了__getitem__方法 img = ts.ToPILImage()(img) # 预处理将img变为了tensor,现在换为PIL格式 draw_box( img, # 传入图片 target["boxes"].numpy(), target["labels"].numpy(), [1 for i in range(len(target["labels"].numpy())) ], # 应该是类别概率信息,这里传入的都是1 category_index, # 刚刚的json文件(key、value翻转了的) thresh=0.5, # 阈值,去掉了低概率的(不过这里都设置的是1) line_thickness=5) # 线宽 plt.imshow(img) plt.show()
def main(): # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # create model model = create_model(num_classes=2) # load train weights train_weights = "./save_weights/model.pth" assert os.path.exists(train_weights), "{} file dose not exist.".format(train_weights) model.load_state_dict(torch.load(train_weights, map_location=device)["model"]) model.to(device) # read class_indict label_json_path = './tgk_classes.json' assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) json_file = open(label_json_path, 'r') class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} files_path = r"C:\Users\Administrator\Desktop\tgk-test" # 测试集图像路径 filelen = len(os.listdir(files_path)) for index, file in enumerate(os.listdir(files_path)): image_path = os.path.join(files_path, file) original_img = Image.open(image_path) # from pil image to tensor, do not normalize image data_transform = transforms.Compose([transforms.ToTensor()]) img = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() # 进入验证模式 with torch.no_grad(): # init img_height, img_width = img.shape[-2:] init_img = torch.zeros((1, 3, img_height, img_width), device=device) model(init_img) t_start = time.time() predictions = model(img.to(device))[0] print("inference+NMS time: {}".format(time.time() - t_start)) predict_boxes = predictions["boxes"].to("cpu").numpy() predict_classes = predictions["labels"].to("cpu").numpy() predict_scores = predictions["scores"].to("cpu").numpy() if len(predict_boxes) == 0: print("没有检测到任何目标!") draw_box(original_img, predict_boxes, predict_classes, predict_scores, category_index, thresh=0.5, line_thickness=3) # plt.imshow(original_img) # plt.show() # 保存预测的图片结果 save_path = r"C:\Users\Administrator\Desktop\tgk_result" image_name = file.split(".")[0] + str(predict_scores) + "_test_result.jpg" original_img.save(os.path.join(save_path, image_name)) print("Successful save image[{}/{}]".format(index+1 ,filelen))