Beispiel #1
0
 def save_label(self, img_path, label):
     save_path = img_path.replace('img', 'save')
     if not os.path.exists(os.path.split(save_path)[0]):
         os.makedirs(os.path.split(save_path)[0])
     img = draw_bbox(img_path, label)
     cv2.imwrite(save_path, img)
     return img
Beispiel #2
0
    def run_detection_from_image(self, filenames, save_faces=True, outdir='./',
                                 plot_landmarks=False, show_frame=False):

        for filename in tqdm(filenames):

            # read the image
            image = Image.open(filename).convert('RGB')
            # create an image array copy so that we can use OpenCV functions on it
            image_array = np.array(image, dtype=np.float32)
            # cv2 image color conversion
            image_array = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)

            faces, probs, bounding_boxes, landmarks = self.fast_mtcnn(image_array,
                                                                      save_faces=save_faces,
                                                                      id=self.uuid,
                                                                      outdir=outdir,
                                                                      return_all=self.return_all)

            # draw the bounding boxes around the faces
            try:
                image_array = utils.draw_bbox(bounding_boxes, image_array, probs[0])
                if plot_landmarks:
                    image_array = utils.plot_landmarks(landmarks, image_array)
            except:
                pass

            # show the image
            if show_frame:
                cv2.imshow('Image', image_array / 255.0)
                cv2.waitKey(0)
Beispiel #3
0
def detect(image_path=''):

    main_path = os.path.abspath(os.path.join(os.getcwd()))
    image_result_path = os.path.join(main_path, 'result/step2/image/')
    label_result_path = os.path.join(main_path, 'result/step2/label/')

    start = time.time()

    model_path = os.path.join(main_path, 'detection/output/best_loss0.000151.pth')

    image_name = image_path.split('/')[-1]
    image_result_path += image_name
    label_result_path = label_result_path + image_name[:-3] + 'txt'
    # 初始化网络
    net = CTPN_Model(pretrained=False)
    model = Pytorch_model(model_path, net=net, gpu_id=None)
    boxes_list, t = model.predict(image_path)
    cost_time = (time.time() - start)
    print("cost time: {:.2f}s".format(cost_time))
    with open(label_result_path, "w") as f:
        for index, point in enumerate(boxes_list):
            box = point[0].astype(int)
            box[0][0] = max(box[0][0] - 10, 0)
            box[3][0] = max(box[3][0] - 10, 0)
            line = ",".join(str(k) for lst in box for k in lst)
            boxes_list[index][0] = box.astype(int)
            line += "\r\n"
            f.writelines(line)
    image = draw_bbox(image_path, boxes_list, color=(0, 0, 255), mode=1)
    cv2.imwrite(image_result_path, image)
    print('Detect Finished.')
    print('Created Image: ', image_result_path)
    print('Created Text: ', label_result_path)
Beispiel #4
0
    def gen_img(self):
        word, font, word_size = self.pick_font()

        # Background's height should much larger than raw word image's height,
        # to make sure we can crop full word image after apply perspective
        bg = self.gen_bg(width=word_size[0] * 8, height=word_size[1] * 8)
        if self.vertical:
            word_img, text_box_pnts, word_color = self.draw_vertical_text_on_bg(
                word, font, bg)
        else:
            word_img, text_box_pnts, word_color = self.draw_horizontal_text_on_bg(
                word, font, bg)

        if apply(self.cfg.line):
            word_img, text_box_pnts = self.liner.apply(word_img, text_box_pnts,
                                                       word_color)

        word_img, img_pnts_transformed, text_box_pnts_transformed = \
            self.apply_perspective_transform(word_img, text_box_pnts,
                                             max_x=self.cfg.perspective_transform.max_x,
                                             max_y=self.cfg.perspective_transform.max_y,
                                             max_z=self.cfg.perspective_transform.max_z,
                                             gpu=self.gpu)

        if self.debug:
            word_img = draw_box(word_img, img_pnts_transformed, (0, 255, 0))
            word_img = draw_box(word_img, text_box_pnts_transformed,
                                (0, 0, 255))
            _, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed)
            word_img = draw_bbox(word_img, crop_bbox, (255, 0, 0))
        else:
            word_img, crop_bbox = self.crop_img(word_img,
                                                text_box_pnts_transformed)

        if apply(self.cfg.noise):
            word_img = np.clip(word_img, 0., 255.)
            word_img = self.noiser.apply(word_img)

        blured = False
        if apply(self.cfg.blur):
            blured = True
            word_img = self.apply_blur_on_output(word_img)

        if not blured:
            if apply(self.cfg.prydown):
                word_img = self.apply_prydown(word_img)

        word_img = np.clip(word_img, 0., 255.)

        if apply(self.cfg.reverse_color):
            word_img = self.reverse_img(word_img)

        return word_img, word
Beispiel #5
0
def predict():
    time_str = time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(time.time()))
    if flask.request.method == 'POST':
        try:
            start = time.time()
            # 入参 {"imgData": img_base64_str}
            data = json.loads(flask.request.data)
            t1 = time.time()
            # BGR
            img = read_base64(data['imgData'], mode='opencv')
            t2 = time.time()
            preds, boxes_list, score_list, det_time = det_model.predict(img, is_output_polygon=False, short_size=args.det_short_size)
            if args.debug:
                draw_img = draw_bbox(img, boxes_list)
            result = []
            rec_time = 0
            for i, box in enumerate(boxes_list):
                rec_img = CropWordBox.crop_image_by_bbox(img, box, args.rec_crop_ratio)
                text, prob, t = rec_model.predict(rec_img)
                prob = round(prob, 3)
                rec_time += t
                result.append({'id':time_str+ '_' + str(i), 'box':box.tolist(), 'recognition':text, 'prob':prob})
                if args.debug:
                    draw_img = cv2ImgAddText(draw_img, text, (box[0][0], box[0][1]-40), textColor=(255, 255, 0), textSize=40)
                    draw_img = cv2ImgAddText(draw_img, f'{prob:.3f}', (box[3][0], box[3][1]+5), textColor=(255, 255, 0), textSize=40)
            if args.debug:
                cv2.imwrite(os.path.join('debug/draw_img', 'draw_' + time_str + '.jpg'), draw_img)
                cv2.imwrite(os.path.join('debug/org_img', time_str + '.png'), img)
            logger.info(f'get img time: {(t1-start)*1000: .1f}ms \n'
                        f'read base64 img time: {(t2-t1)*1000: .1f}ms \n'
                        f'det preprocess time: {det_time[0]*1000: .1f}ms \n'
                        f'det inference time: {det_time[1]*1000: .1f}ms \n'
                        f'det postprocess time: {det_time[2]*1000: .1f}ms \n'
                        f'det total time: {det_time[3]*1000: .1f}ms \n'
                        f'rec total time: {rec_time*1000: .1f}ms \n')
            end = time.time()
            out = {'data':result, 'code':1, 'message':'', 'getImageTime':time_str}
            logger.info(f'total cost time: {(end - start)*1000: .1f}ms')
            logger.info(pprint.pformat(out))
            logger.info('========================================================================')
            if args.file_record:
                file_record.write(str(out))
            return json.dumps(out, ensure_ascii=False)
        except:
            out = {'code':0, 'message':traceback.format_exc(), 'getImageTime':time_str}
            logger.error(traceback.format_exc())
            logger.info('========================================================================')
            return json.dumps(out, ensure_ascii=False)
    else:
        out = {'code':0, 'message':'request method must be post', 'getImageTime':time_str}
        logger.error('request method must be post')
        logger.info('========================================================================')
        return json.dumps(out, ensure_ascii=False)
Beispiel #6
0
    def run_detection_from_video_file(self, video_file, outdir='./', save_faces=True,
                                      profiling=False, plot_landmarks=False):

        print("[INFO] Loading video file")
        cap = cv2.VideoCapture(video_file)
        profiler = FPS()
        if not cap.isOpened():
            print('Error while trying to read video. Please check path again')

        frame_count, total_fps, faces_detected = 0, 0, 0  # to count total frames

        # read until end of video
        while cap.isOpened():
            # capture each frame of the video
            ret, frame = cap.read()
            if ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                profiler.start()
                faces, probs, bounding_boxes, landmarks = self.fast_mtcnn(frame,
                                                                          save_faces=save_faces,
                                                                          id=self.uuid,
                                                                          outdir=outdir,
                                                                          return_all=self.return_all)
                if faces is not None:
                    faces_detected += len(faces)
                profiler.update(1)
                # color conversion for OpenCV
                frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                # draw the bounding boxes around the faces
                try:
                    frame = utils.draw_bbox(bounding_boxes, frame, probs[0])
                    if plot_landmarks:
                        frame = utils.plot_landmarks(landmarks, frame)
                except:
                    pass

                cv2.imshow('Face detection frame', frame)
                # press `q` to exit
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            else:
                break

        profiler.stop()
        # release VideoCapture()
        print("[INFO] cleaning up...")
        cap.release()
        cv2.destroyAllWindows()
        # calculate and print the average FPS
        if profiling:
            print(f"Average FPS: {profiler.fps():.3f}")
Beispiel #7
0
def detect(image_path='', label_path=''):
    main_path = os.path.abspath(os.path.join(os.getcwd()))
    image_result_path = os.path.join(main_path,
                                     'media/image/CEIR/result/step2/image/')
    label_result_path = os.path.join(main_path,
                                     'media/image/CEIR/result/step2/label/')
    output_path = image_result_path
    print(output_path)
    output_image_path = os.path.join(main_path,
                                     'media/image/CEIR/result/step2/reshape/')
    output_label_path = label_result_path

    import config
    from model import CTPN_Model
    import matplotlib.pyplot as plt
    from utils.utils import show_img, draw_bbox, draw_anchor

    # os.environ['CUDA_VISIBLE_DEVICES'] = str('2')

    model_path = 'output/ctpn_1_gpu1111/best_loss0.000151.pth'

    image_id = 1
    if image_path == '':
        image_path = '/home/dong/Downloads/receipt/blog/CEIR/result/step2/image/test.jpg'
    if label_path == '':
        label_path = '/home/dong/Downloads/receipt/blog/CEIR/result/step2/label/test.txt'

    # 初始化网络
    net = CTPN_Model(pretrained=False)
    model = Pytorch_model(model_path, net=net, gpu_id=None)
    boxes_list, t = model.predict(image_path)
    with open(label_path, "w") as f:
        for index, point in enumerate(boxes_list):
            box = point[0].astype(int)
            box[0][0] = max(box[0][0] - 10, 0)
            box[3][0] = max(box[3][0] - 10, 0)
            print(box)
            line = ",".join(str(k) for lst in box for k in lst)
            boxes_list[index][0] = box.astype(int)
            print(line)
            line += "\r\n"
            f.writelines(line)
    image = draw_bbox(image_path, boxes_list, color=(0, 0, 255), mode=1)
    cv2.imwrite('result.jpg', image)
    print('Finished.')
    return image_path, label_path
Beispiel #8
0
    def run_detection_from_webcam_stream(self, save_faces=True, outdir='./', profiling=False,
                                         plot_landmarks=False):

        # initialize the video stream and allow the camera sensor to warm up
        print("[INFO] starting video stream...")
        v_cap = VideoStream().start()
        profiler = FPS().start()
        time.sleep(2.0)

        while True:
            # grab the frame from the threaded video stream and resize it
            # to have a maximum width of 600 pixels
            frame = v_cap.read()
            frame = imutils.resize(frame, width=600)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # grab the frame dimensions
            (self.frame_height, self.frame_width) = frame.shape[:2]
            faces, probs, bounding_boxes, landmarks = self.fast_mtcnn(frame, save_faces=save_faces,
                                                                      id=self.uuid,
                                                                      outdir=outdir,
                                                                      return_all=self.return_all)
            # color conversion for OpenCV
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            profiler.update(1)
            # draw the bounding boxes around the faces
            try:
                frame = utils.draw_bbox(bounding_boxes, frame, probs[0])
                if plot_landmarks:
                    frame = utils.plot_landmarks(landmarks, frame)
            except:
                pass
            # if the `q` key was pressed, break from the loop
            # show the output frame
            cv2.imshow("Output", frame)
            key = cv2.waitKey(1) & 0xFF
            if key == ord("q"):
                break

        # do a bit of cleanup
        profiler.stop()
        print("[INFO] cleaning up...")
        cv2.destroyAllWindows()
        v_cap.stop()
        if profiling:
            print(f"Average FPS: {profiler.fps(), profiler._numFrames}")
Beispiel #9
0
def predict():
    time_str = time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(time.time()))
    if flask.request.method == 'POST':
        start = time.time()
        received_file = request.files['input_image']
        imageFileName = received_file.filename
        if received_file:
            # 保存接收的图片到指定文件夹
            received_dirPath = 'static/images'
            if not os.path.isdir(received_dirPath):
                os.makedirs(received_dirPath)
            imageFilePath = os.path.join(received_dirPath, time_str + '_' + imageFileName)
            received_file.save(imageFilePath)
            print('receive image and save: %s' % imageFilePath)
            usedTime = time.time() - start
            print('receive image and save cost time: %f' % usedTime)
            preds, boxes_list, score_list, det_time = det_model.predict(imageFilePath, is_output_polygon=False,
                                                                        short_size=args.det_short_size)
            img = cv2.imread(imageFilePath)
            draw_img = draw_bbox(img, boxes_list)
            drawed_imageFileName = time_str + '_draw_' + os.path.splitext(imageFileName)[0] + '.jpg'
            drawed_imageFilePath = os.path.join('static', drawed_imageFileName)
            result = []
            for i, box in enumerate(boxes_list):
                rec_img = CropWordBox.crop_image_by_bbox(img, box, args.rec_crop_ratio)
                text, prob, t = rec_model.predict(rec_img)
                prob = round(prob, 3)
                draw_img = cv2ImgAddText(draw_img, text, (box[0][0], box[0][1] - 40), textColor=(255, 255, 0),
                                         textSize=40)
                draw_img = cv2ImgAddText(draw_img, f'{prob:.3f}', (box[3][0], box[3][1] + 5),
                                         textColor=(255, 255, 0), textSize=40)
                result.append(text)
            print(f'draw image save: {drawed_imageFilePath}')
            cv2.imwrite(drawed_imageFilePath, draw_img)
            image_source_url = url_for('static', filename=drawed_imageFileName)
            return jsonify(src=image_source_url, count=f'{result}')
Beispiel #10
0
    # model_path = 'output/psenet_icd2015_new_loss/final.pth'
    image_root = 'Test Set/Image'
    annotation_root = 'Test Set/Annotation'

    Images = os.listdir(image_root)
    Labels = os.listdir(annotation_root)
    image_id = Images[np.random.randint(1, len(Images) + 1)].split('.jpg')[0]

    img_path = image_root + os.sep + '{}.jpg'.format(image_id)
    label_path = annotation_root + os.sep + '{}.txt'.format(image_id)

    print('Predicting for image', img_path)

    label = _get_annotation(label_path)

    # 初始化网络
    net = PSENet(backbone='resnet18', pretrained=False, result_num=config.n)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = Pytorch_model(model_path, net=net, scale=1, device=device)
    # for i in range(100):
    #     models.predict(img_path)
    preds, boxes_list, t = model.predict(img_path)
    print(boxes_list)
    show_img(preds)
    img = draw_bbox(img_path, boxes_list, color=(0, 0, 255))
    cv2.imwrite('result.jpg', img)
    # img = draw_bbox(img, label,color=(0,0,255))
    show_img(img, color=True)

    plt.show()
Beispiel #11
0
if __name__ == '__main__':
    import pathlib
    from tqdm import tqdm
    import matplotlib.pyplot as plt
    from utils.utils import show_img, draw_bbox, save_result, get_file_list

    args = init_args()
    print(args)
    # 初始化网络
    model = DetModel(args.model_path, post_p_thre=args.thre, gpu_id=0)
    img_folder = pathlib.Path(args.input_folder)
    for img_path in tqdm(get_file_list(args.input_folder, p_postfix=['.jpg'])):
        preds, boxes_list, score_list, t = model.predict(
            img_path, is_output_polygon=args.polygon)
        img = draw_bbox(cv2.imread(img_path)[:, :, ::-1], boxes_list)
        if args.show:
            show_img(preds)
            show_img(img, title=os.path.basename(img_path))
            plt.show()
        # 保存结果到路径
        os.makedirs(args.output_folder, exist_ok=True)
        img_path = pathlib.Path(img_path)
        output_path = os.path.join(args.output_folder,
                                   img_path.stem + '_result.jpg')
        pred_path = os.path.join(args.output_folder,
                                 img_path.stem + '_pred.jpg')
        cv2.imwrite(output_path, img[:, :, ::-1])
        cv2.imwrite(pred_path, preds * 255)
        save_result(output_path.replace('_result.jpg', '.txt'), boxes_list,
                    score_list, args.polygon)
Beispiel #12
0
                print('load label failed on {}'.format(label_path))
    return np.array(boxes, dtype=np.float32)


if __name__ == '__main__':
    from configs import config_tips as config
    from models import PSENet
    from utils.utils import show_img, draw_bbox

    #model_path = 'outp ut/psenet_icd2015_resnet152_author_crop_adam_warm_up_myloss/best_r0.714011_p0.708214_f10.711100.pth'
    #model_path = 'output/psenet_icd2015_new_loss/final.pth'
    model_path = 'checkpoint/pse_epoch_30.pth'
    #image_files = Path("/home/peizhao/data/icdar/2019/tips/test/img").rglob('*.jpg')
    #image_files = Path("/home/peizhao/data/temp/doc").rglob("*.jpg")
    image_files = Path("/home/peizhao/data/temp").rglob("*.png")
    # image_files = Path("/home/peizhao/data/temp").rglob("*.jpg")
    #image_files = Path("/home/peizhao/data/temp/test").rglob("*.jpg")
    # 初始化网络
    #net = PSENet(backbone='resnet152', pretrained=False, result_num=config.n)
    net = PSENet(backbone='resnet50', pretrained=False, result_num=3)
    model = Pytorch_model2(model_path, net=net, scale=1, gpu_id=0)

    for item in image_files:
        preds, boxes_list, t = model.predict(str(item))
        img = draw_bbox(str(item), boxes_list, color=(0, 0, 255))
        h, w = img.shape[:2]
        scale = 640 / max(h, w)
        img_size = cv2.resize(img, None, fx=scale, fy=scale)
        cv2.imshow("result", img_size)
        cv2.waitKey(0)
Beispiel #13
0
    x = tf.Session().run(
        yolo_boxes_and_scores(features, anchors[anchor_mask[0]], nb_classes,
                              model_image_size, org_image_shape))
    boxes = np.concatenate(
        [x[0],
         np.reshape(x[2][0], (n_shape[1] * n_shape[1] * 3, 1)), x[1]],
        axis=1)
    all_boxes.extend(boxes)
boxes_, scores_, classes_ = postprocess_boxes_tf(all_boxes, score_threshold=.3)
image = draw_boxes_tf(boxes_, scores_, classes_, classes, org_image)
image.show()

#########################################################################################################
bboxes = postprocess_boxes(all_boxes, org_image, model_image_size[0], 0.3)
bboxes = nms(bboxes, 0.45, method='nms')
image = draw_bbox(org_image, bboxes, classes)
image = fromarray(image)
image.show()

#########################################################################################################
pred_bbox = np.concatenate([
    np.reshape(predictions[0], (-1, 5 + nb_classes)),
    np.reshape(predictions[0], (-1, 5 + nb_classes)),
    np.reshape(predictions[0], (-1, 5 + nb_classes))
],
                           axis=0)

bboxes = postprocess_boxes(pred_bbox, org_image, model_image_size[0], 0.3)
bboxes = nms(bboxes, 0.45, method='nms')
image = draw_bbox(org_image, bboxes, classes)
image = fromarray(image)
Beispiel #14
0
    # grab the frame dimensions and convert it to a blob
    (frame_height, frame_width) = frame.shape[:2]
    out = cv2.VideoWriter(
        '/Users/igkinis/Desktop/projects/faceBIO/data/output5.avi', fourcc,
        20.0, (frame_width, frame_height))
    faces, probs, bounding_boxes = fast_mtcnn(
        pil_image,
        save_faces=True,
        id="test_video",
        outdir='/Users/igkinis/Desktop/projects/faceBIO/data',
        return_prob=True)
    # color conversion for OpenCV
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    # draw the bounding boxes around the faces
    try:
        frame = utils.draw_bbox(bounding_boxes, frame, probs[0])
    except:
        pass
    # if the `q` key was pressed, break from the loop
    # show the output frame
    cv2.imshow("Output", frame)
    output = frame
    out.write(output)
    key = cv2.waitKey(1) & 0xFF
    if key == ord("q"):
        break

# do a bit of cleanup
print("[INFO] cleaning up...")
cv2.destroyAllWindows()
v_cap.stop()
Beispiel #15
0

if __name__ == '__main__':
    import config
    from model import PSENet
    import matplotlib.pyplot as plt
    from utils.utils import show_img, draw_bbox

    os.environ['CUDA_VISIBLE_DEVICES'] = str('2')
    model_path = 'output/psenet_icd2015_resnet152/best_r0.642754_p0.614924_f10.628531.pth'

    # model_path = 'output/psenet_icd2015_new_loss/final.pth'

    # img_path = '/data2/dataset/ICD15/img/img_1.jpg'
    img_path = '0.jpg'
    label_path = '/data2/dataset/ICD15/test/gt/gt_img_130.txt'
    # label = _get_annotation(label_path)

    # 初始化网络
    net = PSENet(backbone='resnet152', pretrained=False, result_num=config.n)
    model = Pytorch_model(model_path, net=net, scale=1, gpu_id=0)
    # for i in range(100):
    #     model.predict(img_path)
    preds, boxes_list = model.predict(img_path)
    show_img(preds)
    img = draw_bbox(img_path, boxes_list)
    # img = draw_bbox(img, label,color=(0,0,255))
    show_img(img, color=True)

    plt.show()