def detect_hand(img1):
    global device
    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    # create model
    model = create_model(num_classes=12)  # 需要根据手势类别进行改变。
    # load train weights
    # train_weights = "./save_weights/model.pth"
    train_weights = "./save_hand_weights/resNetFpn-model-13.pth"
    model.load_state_dict(torch.load(train_weights)["model"])
    model.to(device)
    # read class_indict
    category_index = {}
    try:
        # json_file = open('./pascal_voc_classes.json', 'r')# voc 类别字典。
        json_file = open('./hand_classes.json', 'r')  # 手势文件字典。
        class_dict = json.load(json_file)
        category_index = {v: k for k, v in class_dict.items()}
    except Exception as e:
        print(e)
        exit(-1)
    # load image
    # original_img = Image.open("./test.jpg")#voc测试图片
    # original_img = Image.open("./ChuangyeguBusstop_Single_Good_color_2.jpg")  # 手势测试图片。
    # from pil image to tensor, do not normalize image
    data_transform = transforms.Compose([transforms.ToTensor()])
    img = data_transform(img1)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)
    model.eval()
    with torch.no_grad():
        predictions = model(img.to(device))[0]
        predict_boxes = predictions["boxes"].to("cpu").numpy()
        predict_classes = predictions["labels"].to("cpu").numpy()
        predict_scores = predictions["scores"].to("cpu").numpy()

        if len(predict_boxes) == 0:
            print("没有检测到任何目标!")

        draw_box(img1,
                 predict_boxes,
                 predict_classes,
                 predict_scores,
                 category_index,
                 thresh=0.5,
                 line_thickness=5)
        # plt.imshow(img1)
        # plt.show()
        return img1
def main():
    img_size = 512
    save_path = "./logs/yolov3spp.onnx"
    img_path = "test.jpg"
    input_size = (img_size, img_size)  # h, w

    # check onnx model
    onnx_model = onnx.load(save_path)
    onnx.checker.check_model(onnx_model)
    # print(onnx.helper.printable_graph(onnx_model.graph))
    ort_session = onnxruntime.InferenceSession(save_path)

    img_o = cv2.imread(img_path)  # BGR
    assert img_o is not None, "Image Not Found " + img_path

    # preprocessing img
    img, ratio, pad = scale_img(img_o,
                                new_shape=input_size,
                                auto=False,
                                color=(0, 0, 0))
    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img).astype(np.float32)

    img /= 255.0  # scale (0, 255) to (0, 1)
    img = np.expand_dims(img, axis=0)  # add batch dimension

    # compute ONNX Runtime output prediction
    ort_inputs = {"images": img}

    t1 = time.time()
    # prediction: [num_obj, 85]
    pred = ort_session.run(None, ort_inputs)[0]
    t2 = time.time()
    print(t2 - t1)
    # print(predictions.shape[0])
    # process detections
    # 这里预测的数值是相对坐标(0-1之间),乘上图像尺寸转回绝对坐标
    pred[:, [0, 2]] *= input_size[1]
    pred[:, [1, 3]] *= input_size[0]
    pred = post_process(pred)

    # 将预测的bbox缩放回原图像尺度
    p_boxes = turn_back_coords(img1_shape=img.shape[2:],
                               coords=pred[:, :4],
                               img0_shape=img_o.shape,
                               ratio_pad=[ratio, pad]).round()
    # print(p_boxes.shape)

    bboxes = p_boxes
    scores = pred[:, 4]
    classes = pred[:, 5].astype(np.int) + 1

    category_index = dict([(i + 1, str(i + 1)) for i in range(90)])
    img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores,
                     category_index)
    plt.imshow(img_o)
    plt.show()
Esempio n. 3
0
def main():
    img_size = 512  # 必须是32的整数倍 [416, 512, 608]
    cfg = "cfg/yolov3-spp.cfg"
    weights = "weights/yolov3-spp-ultralytics-{}.pt".format(img_size)
    img_path = "test.jpg"
    input_size = (img_size, img_size)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = Darknet(cfg, img_size)
    model.load_state_dict(torch.load(weights, map_location=device)["model"])
    model.to(device)

    model.eval()

    # init
    img = torch.zeros((1, 3, img_size, img_size), device=device)
    model(img)

    img_o = cv2.imread(img_path)  # BGR
    assert img_o is not None, "Image Not Found " + img_path

    img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0]
    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img)

    img = torch.from_numpy(img).to(device).float()
    img /= 255.0  # scale (0, 255) to (0, 1)
    img = img.unsqueeze(0)  # add batch dimension

    t1 = torch_utils.time_synchronized()
    pred = model(img)[0]  # only get inference result
    t2 = torch_utils.time_synchronized()
    print(t2 - t1)

    pred = utils.non_max_suppression(pred, conf_thres=0.3, iou_thres=0.6, multi_label=True)[0]
    t3 = time.time()
    print(t3 - t2)

    # process detections
    pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round()
    print(pred.shape)

    bboxes = pred[:, :4].detach().cpu().numpy()
    scores = pred[:, 4].detach().cpu().numpy()
    classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1

    category_index = dict([(i + 1, str(i + 1)) for i in range(90)])
    img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index)
    plt.imshow(img_o)
    plt.show()

    img_o.save("test_result.jpg")
Esempio n. 4
0
def main():
    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # create model
    model = create_model(num_classes=21)

    # load train weights
    train_weights = "./save_weights/model.pth"
    assert os.path.exists(train_weights), "{} file dose not exist.".format(
        train_weights)
    model.load_state_dict(
        torch.load(train_weights, map_location=device)["model"])
    model.to(device)

    # read class_indict
    label_json_path = './pascal_voc_classes.json'
    assert os.path.exists(
        label_json_path), "json file {} dose not exist.".format(
            label_json_path)
    json_file = open(label_json_path, 'r')
    class_dict = json.load(json_file)
    category_index = {v: k for k, v in class_dict.items()}

    # load image
    original_img = Image.open("./test.jpg")

    # from pil image to tensor, do not normalize image
    data_transform = transforms.Compose([transforms.ToTensor()])
    img = data_transform(original_img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    model.eval()  # 进入验证模式
    with torch.no_grad():
        # init
        img_height, img_width = img.shape[-2:]
        init_img = torch.zeros((1, 3, img_height, img_width), device=device)
        model(init_img)

        t_start = time_synchronized()
        predictions = model(img.to(device))[0]
        t_end = time_synchronized()
        print("inference+NMS time: {}".format(t_end - t_start))

        predict_boxes = predictions["boxes"].to("cpu").numpy()
        predict_classes = predictions["labels"].to("cpu").numpy()
        predict_scores = predictions["scores"].to("cpu").numpy()

        if len(predict_boxes) == 0:
            print("没有检测到任何目标!")

        draw_box(original_img,
                 predict_boxes,
                 predict_classes,
                 predict_scores,
                 category_index,
                 thresh=0.5,
                 line_thickness=3)
        plt.imshow(original_img)
        plt.show()
        # 保存预测的图片结果
        original_img.save("test_result.jpg")
Esempio n. 5
0
data_transform = transform.Compose(
    [transform.Resize(),
     transform.ToTensor(),
     transform.Normalization()])
img, _ = data_transform(original_img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)

model.eval()
with torch.no_grad():
    predictions = model(
        img.to(device))[0]  # bboxes_out, labels_out, scores_out
    predict_boxes = predictions[0].to("cpu").numpy()
    predict_boxes[:, [0, 2]] = predict_boxes[:, [0, 2]] * original_img.size[0]
    predict_boxes[:, [1, 3]] = predict_boxes[:, [1, 3]] * original_img.size[1]
    predict_classes = predictions[1].to("cpu").numpy()
    predict_scores = predictions[2].to("cpu").numpy()

    if len(predict_boxes) == 0:
        print("没有检测到任何目标!")

    draw_box(original_img,
             predict_boxes,
             predict_classes,
             predict_scores,
             category_index,
             thresh=0.5,
             line_thickness=5)
    plt.imshow(original_img)
    plt.show()
Esempio n. 6
0
def main():
    img_size = 512  # 必须是32的整数倍 [416, 512, 608]
    cfg = "/home/mist/yolov3_spp/cfg/yolov3-spp.cfg"  # 改成生成的.cfg文件
    weights = "/home/mist/yolov3_spp/weights/yolov3spp-29.pt".format(
        img_size)  # 改成自己训练好的权重文件
    json_path = "/home/mist/yolov3_spp/data/pascal_voc_classes.json"  # json标签文件
    img_path = "test.jpg"
    assert os.path.exists(cfg), "cfg file {} dose not exist.".format(cfg)
    assert os.path.exists(weights), "weights file {} dose not exist.".format(
        weights)
    assert os.path.exists(json_path), "json file {} dose not exist.".format(
        json_path)
    assert os.path.exists(img_path), "image file {} dose not exist.".format(
        img_path)

    json_file = open(json_path, 'r')
    class_dict = json.load(json_file)
    category_index = {v: k for k, v in class_dict.items()}

    input_size = (img_size, img_size)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = Darknet(cfg, img_size)
    model.load_state_dict(torch.load(weights, map_location=device)["model"])
    model.to(device)

    model.eval()
    with torch.no_grad():
        # init
        img = torch.zeros((1, 3, img_size, img_size), device=device)
        model(img)

        img_o = cv2.imread(img_path)  # BGR
        assert img_o is not None, "Image Not Found " + img_path

        img = img_utils.letterbox(img_o,
                                  new_shape=input_size,
                                  auto=True,
                                  color=(0, 0, 0))[0]
        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        img = torch.from_numpy(img).to(device).float()
        img /= 255.0  # scale (0, 255) to (0, 1)
        img = img.unsqueeze(0)  # add batch dimension

        t1 = torch_utils.time_synchronized()
        pred = model(img)[0]  # only get inference result
        t2 = torch_utils.time_synchronized()
        print(t2 - t1)

        pred = utils.non_max_suppression(pred,
                                         conf_thres=0.1,
                                         iou_thres=0.6,
                                         multi_label=True)[0]
        t3 = time.time()
        print(t3 - t2)

        if pred is None:
            print("No target detected.")
            exit(0)

        # process detections
        pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4],
                                         img_o.shape).round()
        print(pred.shape)

        bboxes = pred[:, :4].detach().cpu().numpy()
        scores = pred[:, 4].detach().cpu().numpy()
        classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1

        img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores,
                         category_index)
        plt.imshow(img_o)
        plt.show()

        img_o.save("test_result.jpg")
Esempio n. 7
0
def main():
    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)

    # create model
    # 目标检测数 + 背景
    num_classes = 20 + 1
    model = create_model(num_classes=num_classes)

    # load train weights
    train_weights = "./save_weights/ssd300-14.pth"
    train_weights_dict = torch.load(train_weights, map_location=device)['model']

    model.load_state_dict(train_weights_dict)
    model.to(device)

    # read class_indict
    json_path = "./pascal_voc_classes.json"
    assert os.path.exists(json_path), "file '{}' dose not exist.".format(json_path)
    json_file = open(json_path, 'r')
    class_dict = json.load(json_file)
    category_index = {v: k for k, v in class_dict.items()}

    # load image
    original_img = Image.open("./test.jpg")

    # from pil image to tensor, do not normalize image
    data_transform = transforms.Compose([transforms.Resize(),
                                         transforms.ToTensor(),
                                         transforms.Normalization()])
    img, _ = data_transform(original_img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    model.eval()
    with torch.no_grad():
        # initial model
        init_img = torch.zeros((1, 3, 300, 300), device=device)
        model(init_img)

        time_start = time_synchronized()
        predictions = model(img.to(device))[0]  # bboxes_out, labels_out, scores_out
        time_end = time_synchronized()
        print("inference+NMS time: {}".format(time_end - time_start))

        predict_boxes = predictions[0].to("cpu").numpy()
        predict_boxes[:, [0, 2]] = predict_boxes[:, [0, 2]] * original_img.size[0]
        predict_boxes[:, [1, 3]] = predict_boxes[:, [1, 3]] * original_img.size[1]
        predict_classes = predictions[1].to("cpu").numpy()
        predict_scores = predictions[2].to("cpu").numpy()

        if len(predict_boxes) == 0:
            print("没有检测到任何目标!")

        draw_box(original_img,
                 predict_boxes,
                 predict_classes,
                 predict_scores,
                 category_index,
                 thresh=0.5,
                 line_thickness=5)
        plt.imshow(original_img)
        plt.show()
Esempio n. 8
0
    def detect(self):
        json_path = './data/pascal_voc_classes.json'
        json_file = open(json_path, 'r')
        class_dict = json.load(json_file)
        category_index = {v: k for k, v in class_dict.items()}
        # All these classes will be counted as 'catch'
        list_of_catch = ["nephrops", "flat_fish", "round_fish"]
        # these classes will be counted as 'by-catch'
        list_of_bycatch = ["other"]
        LABELS = ['flat_fish', 'round_fish', 'nephrops', 'other']
        # to store the object infomation key:id value: class
        all_obj_info = {}

        frame_no = -1
        num_frames, nephrops_count, flatfish_count, roundfish_count, other_count = 0, 0, 0, 0, 0
        catch_ratio, bycatch_ratio = 0, 0
        # skip_no = 2

        if self.output_file:
            f = open(output_file, "w")

        while self.vidCap.grab():
            frame_no += 1

            # skip frames every n frames
            # if frame_no % skip_no != 0:
            #     continue

            # start time
            total_begin = time.time()

            _, img = self.vidCap.retrieve()
            #img = img[:, :1280]

            # yolov3
            yolo_begin = time.time()
            # get the detections: bbx coordinates, confidences, classes
            bbox_xyxy_ori, cls_conf, cls_ids = self.yolov3.predict(img)
            print(cls_ids)

            # [x1,y1,x2,y2]
            yolo_end = time.time()

            # deepsort
            ds_begin = time.time()
            if bbox_xyxy_ori is not None:
                # transfer the coorinates
                bbox_cxcywh = xyxy2xywh(bbox_xyxy_ori)
                # use the tracker to update
                outputs = self.deepsort.update(bbox_cxcywh, cls_conf, cls_ids,
                                               img)

                if len(outputs) > 0:
                    # [x1,y1,x2,y2] id class
                    # now we can fetch the bbx info, ids and classes
                    bbox_xyxy = outputs[:, :4]
                    ids = outputs[:, -2]
                    object_class = outputs[:, -1]
                    print(ids)
                    print(object_class)

                    ## obj_id and class alignment has some problems
                    #  it is hard to be very acurate
                    # need to make it better
                    # for i in range(len(ids)):
                    #     if ids[i] not in all_obj_info:
                    #         if len(cls_ids) == len(ids) - 1:
                    #             all_obj_info[ids[i]] = cls_ids[i-1]
                    #         elif len(cls_ids) == len(ids) - 2:
                    #             all_obj_info[ids[i]] = cls_ids[i-2]
                    #         elif len(cls_ids) == len(ids) - 3:
                    #             all_obj_info[ids[i]] = cls_ids[i-3]
                    #         elif len(cls_ids) == len(ids) - 4:
                    #             all_obj_info[ids[i]] = cls_ids[i-4]
                    #         elif len(cls_ids) == len(ids) - 5:
                    #             all_obj_info[ids[i]] = cls_ids[i-5]
                    #         elif len(cls_ids) == len(ids) - 6:
                    #             all_obj_info[ids[i]] = cls_ids[i-6]
                    #         elif len(cls_ids) == len(ids) - 7:
                    #             all_obj_info[ids[i]] = cls_ids[i-7]
                    #         elif len(cls_ids) == len(ids) - 8:
                    #             all_obj_info[ids[i]] = cls_ids[i-8]
                    #         elif len(cls_ids) == len(ids) - 9:
                    #             all_obj_info[ids[i]] = cls_ids[i-9]
                    #         elif len(cls_ids) == len(ids) - 10:
                    #             all_obj_info[ids[i]] = cls_ids[i-10]
                    #         else:
                    #             all_obj_info[ids[i]] = cls_ids[i]
                    for i in range(len(ids)):
                        if ids[i] not in all_obj_info:
                            all_obj_info[ids[i]] = object_class[i]
                        else:
                            continue
                    print(all_obj_info)

                    # draw the bbx
                    img = draw_box(img, bbox_xyxy_ori, cls_ids, cls_conf,
                                   category_index)
                    #img = draw_bboxes(img, bbox_xyxy, ids)

                    # frame,id,tlwh,1,-1,-1,-1
                    # record the info
                    if self.output_file:
                        bbox_tlwh = xyxy2xywh(bbox_xyxy)
                        for i in range(len(bbox_tlwh)):
                            write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % (
                                frame_no + 1, outputs[i, -1],
                                int(bbox_tlwh[i][0]), int(bbox_tlwh[i][1]),
                                int(bbox_tlwh[i][2]), int(bbox_tlwh[i][3]))
                            f.write(write_line)
            ds_end = time.time()

            total_end = time.time()

            # count the current number of each category
            cur_categories = list(all_obj_info.values())
            flatfish_count = cur_categories.count(1)
            roundfish_count = cur_categories.count(2)
            nephrops_count = cur_categories.count(3)
            other_count = cur_categories.count(4)
            # start from frame 3
            if frame_no >= 3:
                catch_ratio = round(
                    (flatfish_count + roundfish_count + nephrops_count) /
                    (flatfish_count + roundfish_count + nephrops_count +
                     other_count), 2)
                bycatch_ratio = round(
                    other_count / (flatfish_count + roundfish_count +
                                   nephrops_count + other_count), 2)
            else:
                catch_ratio = None
                bycatch_ratio = None

            # print info to the console
            if frame_no is not None:
                print(
                    "frame:%04d|det:%.4f|deep sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f"
                    % (frame_no, (yolo_end - yolo_begin), (ds_end - ds_begin),
                       (total_end - total_begin),
                       ((yolo_end - yolo_begin) * 100 /
                        (total_end - total_begin)),
                       (1 / (total_end - total_begin))))
            # display all the count info on the screen
            if self.display == True:
                img = np.uint8(img)
                displayNephropsCount(img, nephrops_count)
                displayFlatfishCount(img, flatfish_count)
                displayRoundfishCount(img, roundfish_count)
                displayOtherfishCount(img, other_count)
                displayCatchRatio(img, catch_ratio)
                displayByCatchRatio(img, bycatch_ratio)
                cv2.putText(img,
                            'FPS {:.1f}'.format(1 / (total_end - total_begin)),
                            (20, 280), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                            (255, 255, 255), 2, cv2.FONT_HERSHEY_COMPLEX_SMALL)
                cv2.imshow("Test", img)
                cv2.waitKey(1)

                # press Q to quit
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            # determine if output the new video
            if self.save_path:
                self.output.write(img)

        if self.output_file:
            f.close()
Esempio n. 9
0
    transforms.Compose(
        [transforms.ToTensor(),
         transforms.RandomHorizontalFlip(0.5)]),
    "val":
    transforms.Compose([transforms.ToTensor()])
}

# load train data set
train_data_set = VOC2012DataSet(os.getcwd(), data_transform["train"],
                                "train.txt")
# 自定义的数据集VOC2012DataSet,第一个参数是VOC所在的根目录,若是当前目录则写为os.getcwd()
# 第二个参数是训练集对应的预训练方法data_transform["train"],可以跳转看看

print(len(train_data_set))  # 训练集的文件个数
for index in random.sample(range(0, len(train_data_set)), k=5):  # 随机采样5张图
    img, target = train_data_set[
        index]  # 传入索引就可以返回img和target信息,因为已经实现了__getitem__方法
    img = ts.ToPILImage()(img)  # 预处理将img变为了tensor,现在换为PIL格式

    draw_box(
        img,  # 传入图片
        target["boxes"].numpy(),
        target["labels"].numpy(),
        [1 for i in range(len(target["labels"].numpy()))
         ],  # 应该是类别概率信息,这里传入的都是1  
        category_index,  # 刚刚的json文件(key、value翻转了的)
        thresh=0.5,  # 阈值,去掉了低概率的(不过这里都设置的是1)
        line_thickness=5)  # 线宽
    plt.imshow(img)
    plt.show()
Esempio n. 10
0
def main():
    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # create model
    model = create_model(num_classes=2)

    # load train weights
    train_weights = "./save_weights/model.pth"
    assert os.path.exists(train_weights), "{} file dose not exist.".format(train_weights)
    model.load_state_dict(torch.load(train_weights, map_location=device)["model"])
    model.to(device)

    # read class_indict
    label_json_path = './tgk_classes.json'
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    json_file = open(label_json_path, 'r')
    class_dict = json.load(json_file)
    category_index = {v: k for k, v in class_dict.items()}

    files_path = r"C:\Users\Administrator\Desktop\tgk-test" # 测试集图像路径
    filelen = len(os.listdir(files_path))
    for index, file in enumerate(os.listdir(files_path)):
        image_path = os.path.join(files_path, file)
        original_img = Image.open(image_path)
        # from pil image to tensor, do not normalize image
        data_transform = transforms.Compose([transforms.ToTensor()])
        img = data_transform(original_img)
        # expand batch dimension
        img = torch.unsqueeze(img, dim=0)

        model.eval()  # 进入验证模式
        with torch.no_grad():
            # init
            img_height, img_width = img.shape[-2:]
            init_img = torch.zeros((1, 3, img_height, img_width), device=device)
            model(init_img)

            t_start = time.time()
            predictions = model(img.to(device))[0]
            print("inference+NMS time: {}".format(time.time() - t_start))

            predict_boxes = predictions["boxes"].to("cpu").numpy()
            predict_classes = predictions["labels"].to("cpu").numpy()
            predict_scores = predictions["scores"].to("cpu").numpy()

            if len(predict_boxes) == 0:
                print("没有检测到任何目标!")


            draw_box(original_img,
                     predict_boxes,
                     predict_classes,
                     predict_scores,
                     category_index,
                     thresh=0.5,
                     line_thickness=3)
            # plt.imshow(original_img)
            # plt.show()
            # 保存预测的图片结果
            save_path = r"C:\Users\Administrator\Desktop\tgk_result"
            image_name = file.split(".")[0] + str(predict_scores) + "_test_result.jpg"
            original_img.save(os.path.join(save_path, image_name))
            print("Successful save image[{}/{}]".format(index+1 ,filelen))