Beispiel #1
0
def main(args):
    #image_file_list = get_image_file_list(args.image_dir)
    cap = cv2.VideoCapture("./video/test.mp4")
    text_sys = TextSystem(args)
    is_visualize = True
    font_path = args.vis_font_path
    #for image_file in image_file_list:
    #img, flag = check_and_read_gif(image_file)
    while cap.isOpened():
        flag, img = cap.read()
        # if not flag:
        #     img = cv2.imread(image_file)
        # if img is None:
        #     logger.info("error in loading image:{}".format(image_file))
        #     continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        print("Predict time: %.3fs" % (elapse))

        drop_score = 0.5
        dt_num = len(dt_boxes)
        boxes = []
        txts = []
        scores = []
        for dno in range(dt_num):
            text, score = rec_res[dno]
            if (score >= drop_score) and (text[0] in prov_list) \
                    and (text[1] in city_list) and (len(text) == 7 or 8):
                text_str = "%s, %.3f" % (text, score)
                txts.append(text)
                scores.append(score)
                boxes.append(dt_boxes[dno])
                print(text_str)

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            # boxes = list(boxes)
            # txts = list(txts)
            # scores = torch.Tensor(scores)

            draw_img = draw_ocr_box_txt(image,
                                        boxes,
                                        txts,
                                        scores,
                                        drop_score=drop_score,
                                        font_path=font_path)
            cv2.imshow('Car Plate Recognition', draw_img[:, :, ::-1])
            cv2.waitKey(1)
            # draw_img_save = "./inference_results/"
            # if not os.path.exists(draw_img_save):
            #     os.makedirs(draw_img_save)
            # cv2.imwrite(
            #     os.path.join(draw_img_save, os.path.basename(image_file)),
            #     draw_img[:, :, ::-1])
            # print("The visualized image saved in {}".format(
            #     os.path.join(draw_img_save, os.path.basename(image_file))))
        if not flag:
            cap.release()
            cv2.destroyAllWindows()
def main(args):
    cap = cv2.VideoCapture(0)
    text_sys = TextSystem(args)
    is_visualize = True

    while True:
        success, img = cap.read()
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        drop_score = 0.5
        dt_num = len(dt_boxes)
        for dno in range(dt_num):
            text, score = rec_res[dno]
            if score >= drop_score:
                text_str = "%s, %.3f" % (text, score)
                print(text_str)

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            boxes = dt_boxes
            txts = [rec_res[i][0] for i in range(len(rec_res))]
            scores = [rec_res[i][1] for i in range(len(rec_res))]
            draw_img = draw_ocr_box_txt(image, boxes, txts, scores)
            cv2.imshow('img', draw_img[:, :, ::-1])
            k = cv2.waitKey(1)
            if k == 27:
                break
Beispiel #3
0
def main(args):
    # print(1111)
    image_file_list = get_image_file_list(args.image_dir)
    # print(1111)
    text_sys = TextSystem(args)
    # print(1111)
    is_visualize = True
    font_path = args.vis_font_path
    print(111111)

    for image_file in image_file_list:
        img, flag = check_and_read_gif(image_file)
        if not flag:
            img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        print(1)
        print(image_file)
        print(1)
        print("Predict time of %s: %.3fs" % (image_file, elapse))

        drop_score = 0.5
        dt_num = len(dt_boxes)
        for dno in range(dt_num):
            text, score = rec_res[dno]
            if score >= drop_score:
                text_str = "%s, %.3f" % (text, score)
                print(text_str)

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            boxes = dt_boxes
            txts = [rec_res[i][0] for i in range(len(rec_res))]
            scores = [rec_res[i][1] for i in range(len(rec_res))]

            draw_img = draw_ocr_box_txt(image,
                                        boxes,
                                        txts,
                                        scores,
                                        drop_score=drop_score,
                                        font_path=font_path)
            draw_img_save = "./results"
            if not os.path.exists(draw_img_save):
                os.makedirs(draw_img_save)
            cv2.imwrite(
                os.path.join(draw_img_save, os.path.basename(image_file)),
                draw_img[:, :, ::-1])
            print("The visualized image saved in {}".format(
                os.path.join(draw_img_save, os.path.basename(image_file))))
Beispiel #4
0
def draw_structure_result(image, result, font_path):
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)
    boxes, txts, scores = [], [], []
    for region in result:
        if region['type'] == 'Table':
            pass
        else:
            for box, rec_res in zip(region['res'][0], region['res'][1]):
                boxes.append(np.array(box).reshape(-1, 2))
                txts.append(rec_res[0])
                scores.append(rec_res[1])
    im_show = draw_ocr_box_txt(image, boxes, txts, scores, font_path=font_path,drop_score=0)
    return im_show
Beispiel #5
0
def main(args):
    image_file_list = get_image_file_list(args.image_dir)
    text_sys = TextSystem(args)
    is_visualize = True
    font_path = args.vis_font_path
    drop_score = args.drop_score
    for image_file in image_file_list:
        img, flag = check_and_read_gif(image_file)
        if not flag:
            img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        logger.info("Predict time of %s: %.3fs" % (image_file, elapse))

        for text, score in rec_res:
            logger.info("{}, {:.3f}".format(text, score))

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            boxes = dt_boxes
            txts = [rec_res[i][0] for i in range(len(rec_res))]
            scores = [rec_res[i][1] for i in range(len(rec_res))]

            draw_img = draw_ocr_box_txt(image,
                                        boxes,
                                        txts,
                                        scores,
                                        drop_score=drop_score,
                                        font_path=font_path)
            draw_img_save = "./inference_results/"
            if not os.path.exists(draw_img_save):
                os.makedirs(draw_img_save)
            if flag:
                image_file = image_file[:-3] + "png"
            cv2.imwrite(
                os.path.join(draw_img_save, os.path.basename(image_file)),
                draw_img[:, :, ::-1])
            logger.info("The visualized image saved in {}".format(
                os.path.join(draw_img_save, os.path.basename(image_file))))
Beispiel #6
0
def main(args):
    image_file_list = get_image_file_list(args.image_dir)
    image_file_list = image_file_list[args.process_id::args.total_process_num]
    text_sys = TextSystem(args)
    is_visualize = False
    font_path = args.vis_font_path
    drop_score = args.drop_score
    num = 1
    loop_count = 20
    selected_imgs = random.sample(image_file_list, k=20)
    for image_file in selected_imgs:
        img, flag = check_and_read_gif(image_file)
        if not flag:
            img = cv2.imread(image_file)
        if img is None:
            # logger.info("error in loading image:{}".format(image_file))
            continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        # logger.info("Predict time of %s: %.3fs" % (image_file, elapse))

        for text, score in rec_res:
            logger.info("{}, {:.3f}".format(text, score))

        if args.is_save:
            dataset_dir = './final_results/20/'
            txts = [rec_res[i][0] for i in range(len(rec_res))]
            # 写入到res_text里
            print(image_file)
            path = os.path.join(
                dataset_dir,
                os.path.splitext(os.path.basename(image_file))[0])
            if os.path.exists(path + '.txt'):
                continue
            res_txt = open(path + '.txt', 'w', encoding="utf-8")
            for item in txts:
                res_txt.write(item + '\n')

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            boxes = dt_boxes
            txts = [rec_res[i][0] for i in range(len(rec_res))]
            scores = [rec_res[i][1] for i in range(len(rec_res))]

            draw_img = draw_ocr_box_txt(image,
                                        boxes,
                                        txts,
                                        scores,
                                        drop_score=drop_score,
                                        font_path=font_path)
            draw_img_save = "./inference_results/"
            if not os.path.exists(draw_img_save):
                os.makedirs(draw_img_save)
            cv2.imwrite(
                os.path.join(draw_img_save, os.path.basename(image_file)),
                draw_img[:, :, ::-1])
            logger.info("The visualized image saved in {}".format(
                os.path.join(draw_img_save, os.path.basename(image_file))))

        num = num + 1
        if num > loop_count:
            break
Beispiel #7
0
def main(args):
    for root, dirs, files in os.walk(args.image_dir):
        for file in files:
            with open(root + "/" + file, "r") as f:
                imgpath = root + "/" + file
                image_file_list = get_image_file_list(imgpath)
                text_sys = TextSystem(args)
                is_visualize = True
                font_path = args.vis_font_path
                for image_file in image_file_list:
                    img, flag = check_and_read_gif(image_file)
                    if not flag:
                        img = cv2.imread(image_file)
                    if img is None:
                        logger.info(
                            "error in loading image:{}".format(image_file))
                        continue
                    starttime = time.time()
                    # img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    # img_filter = cv2.bilateralFilter(img_gray, 5, 100, 100)
                    # img_filter = cv2.blur(img_gray, (5, 5))
                    # img_filter = cv2.GaussianBlur(img_gray, (5, 5), 0)
                    # img_filter = cv2.medianBlur(img_gray, 5)
                    # img_filter = cv2.cvtColor(img_filter, cv2.COLOR_GRAY2BGR)
                    # dt_boxes, rec_res = text_sys(img_filter)
                    dt_boxes, rec_res = text_sys(img)
                    roi = ()
                    name_box = np.empty(shape=(4, 2))
                    for i, val in enumerate(rec_res):
                        if "姓名" in val[0]:
                            if len(val[0]) < 3:
                                name_box = dt_boxes[i + 1]
                                rec_res.pop(i + 1)
                                dt_boxes.pop(i + 1)
                                break
                            else:
                                name_box = dt_boxes[i]
                                rec_res.pop(i)
                                dt_boxes.pop(i)
                                break
                        if "名" in val[0]:
                            if len(val[0]) <= 2:
                                name_box = dt_boxes[i + 1]
                                rec_res.pop(i + 1)
                                dt_boxes.pop(i + 1)
                                break
                            elif len(val[0]) <= 6:
                                name_box = dt_boxes[i]
                                rec_res.pop(i)
                                dt_boxes.pop(i)
                                break
                    roi = (int(name_box[0][0]), int(name_box[0][1]),
                           int(name_box[2][0]), int(name_box[2][1]))
                    elapse = time.time() - starttime
                    print("Predict time of %s: %.3fs" % (image_file, elapse))

                    drop_score = 0.5

                    json_img_save = "./inference_results_json/" + \
                        root.replace(args.image_dir+"\\", "")
                    if not os.path.exists(json_img_save):
                        os.makedirs(json_img_save)
                    with open(json_img_save + "/" + file.replace(".jpg", "") +
                              ".json",
                              'w',
                              encoding='utf-8') as file_obj:
                        ans_json = {'data': [{'str': i[0]} for i in rec_res]}
                        json.dump(ans_json,
                                  file_obj,
                                  indent=4,
                                  ensure_ascii=False)

                    if is_visualize:
                        image = Image.fromarray(
                            cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                        boxes = dt_boxes
                        txts = [rec_res[i][0] for i in range(len(rec_res))]
                        scores = [rec_res[i][1] for i in range(len(rec_res))]

                        draw_img = draw_ocr_box_txt(image,
                                                    boxes,
                                                    txts,
                                                    scores,
                                                    drop_score=args.drop_score,
                                                    font_path=font_path)
                        draw_img.paste((0, 0, 0), roi)
                        draw_img = np.array(draw_img)
                        draw_img_save = "./inference_results/" + \
                            root.replace(args.image_dir+"\\", "")
                        if not os.path.exists(draw_img_save):
                            os.makedirs(draw_img_save)
                        cv2.imwrite(
                            os.path.join(draw_img_save,
                                         os.path.basename(image_file)),
                            draw_img[:, :, ::-1])
                        print("The visualized image saved in {}".format(
                            os.path.join(draw_img_save,
                                         os.path.basename(image_file))))
Beispiel #8
0
def main(args):
    image_file_list = get_image_file_list(args.image_dir)
    text_sys = TextSystem(args)
    is_visualize = True
    font_path = args.vis_font_path
    drop_score = args.drop_score
    for image_file in image_file_list:
        img, flag = check_and_read_gif(image_file)
        if not flag:
            img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        logger.info("Predict time of %s: %.3fs" % (image_file, elapse))

        for text, score in rec_res:
            logger.info("{}, {:.3f}".format(text, score))

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            boxes = dt_boxes

            # for box in boxes:
            #     xy_sum = np.sum(box, axis=0) / 4.0
            #     cx = xy_sum[0]
            #     cy = xy_sum[1]
            #     degree = np.arcsin((box[1][1] - box[0][1]) / (box[1][0] - box[0][0]))
            #     w = abs(box[0][0] - box[1][0])
            #     h = abs(box[0][1] - box[3][1])
            #     x1, y1, x2, y2, x3, y3, x4, y4 = xy_rotate_box(cx, cy, w, h, degree / 180 * np.pi)
            #     box[0][0] = x1
            #     box[0][1] = y1
            #     box[1][0] = x2
            #     box[1][1] = y2
            #     box[2][0] = x3
            #     box[2][1] = y3
            #     box[3][0] = x4
            #     box[3][1] = y4

            txts = [rec_res[i][0] for i in range(len(rec_res))]
            scores = [rec_res[i][1] for i in range(len(rec_res))]

            assorted_results = {"text_boxes":
                                    [{'id': i + 1,
                                      'bbox': [float(dt_boxes[i][0][0]), float(dt_boxes[i][0][1]), float(dt_boxes[i][2][0]), float(dt_boxes[i][2][1])],
                                      'text': rec_res[i][0]} for i in range(len(rec_res))],
                                "fields":
                                    [{"field_name": "customer_number",
                                      "value_id": [],
                                      "value_text": [],
                                      "key_id": [],
                                      "key_text": []},
                                     {"field_name": "name",
                                      "value_id": [],
                                      "value_text": [],
                                      "key_id": [],
                                      "key_text": []},
                                     {"field_name": "address",
                                      "value_id": [],
                                      "value_text": [],
                                      "key_id": [],
                                      "key_text": []},
                                     {"field_name": "amount",
                                      "value_id": [],
                                      "value_text": [],
                                      "key_id": [],
                                      "key_text": []},
                                     {"field_name": "date",
                                      "value_id": [],
                                      "value_text": [],
                                      "key_id": [],
                                      "key_text": []},
                                     {"field_name": "content",
                                      "value_id": [],
                                      "value_text": [],
                                      "key_id": [],
                                      "key_text": []}],
                                "global_attributes": {
                                    "file_id": image_file.split('/')[-1]}
                                }
            with open(image_file + '.json', 'w', encoding='utf-8') as outfile:
                json.dump(assorted_results, outfile)
            #res = trainTicket.trainTicket(assorted_results, img=image)
            #res = res.res
            ##compare_img = clip_ground_truth_and_draw_txt(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), boxes, txts, scores, font_path=font_path)

            draw_img = draw_ocr_box_txt(
                image,
                boxes,
                txts,
                scores,
                drop_score=drop_score,
                font_path=font_path)
            draw_img_save = "./inference_results/"
            if not os.path.exists(draw_img_save):
                os.makedirs(draw_img_save)
            cv2.imwrite(
                os.path.join(draw_img_save, os.path.basename(image_file)),
                draw_img[:, :, ::-1])
            # cv2.imwrite(
            #     os.path.join(draw_img_save, os.path.basename(image_file)),
            #     compare_img)
            logger.info("The visualized image saved in {}".format(
                os.path.join(draw_img_save, os.path.basename(image_file))))
Beispiel #9
0
def main(args):
    image_file_list = get_image_file_list(args.image_dir)
    image_file_list = image_file_list[args.process_id::args.total_process_num]
    text_sys = TextSystem(args)
    is_visualize = True
    font_path = args.vis_font_path
    drop_score = args.drop_score

    # warm up 10 times
    if args.warmup:
        img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
        for i in range(10):
            res = text_sys(img)

    total_time = 0
    cpu_mem, gpu_mem, gpu_util = 0, 0, 0
    _st = time.time()
    count = 0
    for idx, image_file in enumerate(image_file_list):

        img, flag = check_and_read_gif(image_file)
        if not flag:
            img = cv2.imread(image_file)
        if img is None:
            logger.debug("error in loading image:{}".format(image_file))
            continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        total_time += elapse

        logger.debug(
            str(idx) + "  Predict time of %s: %.3fs" % (image_file, elapse))
        for text, score in rec_res:
            logger.debug("{}, {:.3f}".format(text, score))

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            boxes = dt_boxes
            txts = [rec_res[i][0] for i in range(len(rec_res))]
            scores = [rec_res[i][1] for i in range(len(rec_res))]

            draw_img = draw_ocr_box_txt(image,
                                        boxes,
                                        txts,
                                        scores,
                                        drop_score=drop_score,
                                        font_path=font_path)
            draw_img_save_dir = args.draw_img_save_dir
            os.makedirs(draw_img_save_dir, exist_ok=True)
            if flag:
                image_file = image_file[:-3] + "png"
            cv2.imwrite(
                os.path.join(draw_img_save_dir, os.path.basename(image_file)),
                draw_img[:, :, ::-1])
            logger.debug("The visualized image saved in {}".format(
                os.path.join(draw_img_save_dir, os.path.basename(image_file))))

    logger.info("The predict total time is {}".format(time.time() - _st))
    if args.benchmark:
        text_sys.text_detector.autolog.report()
        text_sys.text_recognizer.autolog.report()
Beispiel #10
0
def main(args):
    image_file_list = get_image_file_list(args.image_dir)
    image_file_list = image_file_list[args.process_id::args.total_process_num]
    text_sys = TextSystem(args)
    is_visualize = args.is_visualize
    font_path = args.vis_font_path
    drop_score = args.drop_score

    # warm up 10 times
    if args.warmup:
        img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
        for i in range(10):
            res = text_sys(img)

    total_time = 0
    cpu_mem, gpu_mem, gpu_util = 0, 0, 0
    _st = time.time()
    count = 0
    save_res = []
    for idx, image_file in enumerate(image_file_list):

        img, flag = check_and_read_gif(image_file)
        if not flag:
            img = cv2.imread(image_file)
        if img is None:
            logger.debug("error in loading image:{}".format(image_file))
            continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        total_time += elapse

        # save results
        preds = []
        dt_num = len(dt_boxes)
        for dno in range(dt_num):
            text, score = rec_res[dno]
            if score >= drop_score:
                preds.append({
                    "transcription": text,
                    "points": np.array(dt_boxes[dno]).tolist()
                })
                text_str = "%s, %.3f" % (text, score)
        save_res.append(image_file + '\t' +
                        json.dumps(preds, ensure_ascii=False) + '\n')

        # print predicted results
        logger.debug(
            str(idx) + "  Predict time of %s: %.3fs" % (image_file, elapse))
        for text, score in rec_res:
            logger.debug("{}, {:.3f}".format(text, score))

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            boxes = dt_boxes
            txts = [rec_res[i][0] for i in range(len(rec_res))]
            scores = [rec_res[i][1] for i in range(len(rec_res))]

            draw_img = draw_ocr_box_txt(image,
                                        boxes,
                                        txts,
                                        scores,
                                        drop_score=drop_score,
                                        font_path=font_path)
            draw_img_save_dir = args.draw_img_save_dir
            os.makedirs(draw_img_save_dir, exist_ok=True)
            if flag:
                image_file = image_file[:-3] + "png"
            cv2.imwrite(
                os.path.join(draw_img_save_dir, os.path.basename(image_file)),
                draw_img[:, :, ::-1])
            logger.debug("The visualized image saved in {}".format(
                os.path.join(draw_img_save_dir, os.path.basename(image_file))))

    # The predicted results will be saved in os.path.join(os.draw_img_save_dir, "results.txt")
    save_results_to_txt(save_res, args.draw_img_save_dir)

    logger.info("The predict total time is {}".format(time.time() - _st))
    if args.benchmark:
        text_sys.text_detector.autolog.report()
        text_sys.text_recognizer.autolog.report()