Esempio n. 1
0
class CMND(object):
    def __init__(self):
        self.Detect_cmnd = CENTER_MODEL(
            weight_path="Detect_cmnd/weights/model_cmnd_best.pth")
        self.Detect_fields = Detector_fields()
        self.ocr = OCR()
        self.fields = ['id', 'name', 'date', 'ad1', 'ad2']

    def predict(self, img):
        restext = {}
        t1 = time.time()
        font = cv2.FONT_HERSHEY_SIMPLEX
        img_aligned = self.Detect_cmnd.detect(img)
        print("centernet ", time.time() - t1)
        t1 = time.time()
        if (img_aligned is not None):
            img_aligned = cv2.resize(img_aligned, (800, 650))
            res, resimg = self.Detect_fields.detect(img_aligned, 0.3)
            print("yolo ", time.time() - t1)
            t1 = time.time()
            for x in self.fields:
                restext[x] = self.ocr.predict(resimg[x], res[x], x)
                if (restext[x] is None): restext[x] = ""
            print("ocr ", time.time() - t1)
            #     if(cl=="id"):
            #         text=(self.id.recognize(im))
            #     # if(cl=="name"):
            #     else:
            #         text=self.str.recognize(im)
            #     img_aligned=cv2.putText(img_aligned,text,(box[0],box[1]),font,1,(255,0,0),1,cv2.LINE_AA)
            #for box in res[x]:
            #print("a")
            #img_aligned=cv2.rectangle(img_aligned,(box[0],box[1]),(box[2],box[3]),(255,0,0),2,1)
        else:
            img_aligned = cv2.resize(img, (750, 600))
            res, resimg = self.Detect_fields.detect(img_aligned, 0.3)
            for x in self.fields:
                restext[x] = self.ocr.predict(resimg[x], res[x], x)
                #for box in res[x]:
                #img_aligned=cv2.rectangle(img_aligned,(box[0],box[1]),(box[2],box[3]),(255,0,0),2,1)
        restext['name'] = restext['name'].upper()
        return img_aligned, restext
Esempio n. 2
0
    cap = cv.VideoCapture(args.input)

    while (cap.isOpened()):
        hasFrame, frame = cap.read()
        if hasFrame:
            roi_imgs = yolo.detect(frame)
            index = 0
            for roi_img in roi_imgs:
                box = [
                    yolo.bounding_boxes[index][0],
                    yolo.bounding_boxes[index][1],
                    yolo.bounding_boxes[index][2],
                    yolo.bounding_boxes[index][3]
                ]
                pred = ocr.predict(roi_img)
                draw_bounding_box(input_image=frame,
                                  bounding_box=box,
                                  label=pred,
                                  background_color=(0, 255, 0),
                                  ocr=ocr)
                index += 1

            if cv.waitKey(1) & 0xFF == ord('q'):
                break

            cv.imshow('frame', frame)
            out.write(frame)
            yolo.clear()
        else:
            break
Esempio n. 3
0
    parser = argparse.ArgumentParser(description='Testing OCR.')
    parser.add_argument('--image', help='Path to image file.')
    args = parser.parse_args()

    logging.getLogger().setLevel(logging.DEBUG)

    # Open the image file
    if not os.path.isfile(args.image):
        logging.debug("Input image file ", args.image, " doesn't exist")
        sys.exit(1)
    cap = cv.VideoCapture(args.image)

    hasFrame, frame = cap.read()

    if hasFrame:
        img, characteres = extract_chars(frame,
                                         prefix_label='test_ocr',
                                         min_countours_area_ration=0.01,
                                         debug=True)

        #characteres = [img / 255 for img in characteres]
        #display_images(characteres, 3, 3)

        ocr = OCR(model_filename="../config/emnist_model.pt",
                  use_cuda=False,
                  debug=True)
        pred = ocr.predict(characteres)
        logging.info(f'\nPrediction: {pred}')

    else:
        logging.debug("Frame not found!")
Esempio n. 4
0
            n += 1
            images[str(n)] = img
        plot_images(images, row, col, cmap='gray')

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Testing OCR.')
    parser.add_argument('--image', help='Path to image file.')
    args = parser.parse_args()

    logging.getLogger().setLevel(logging.DEBUG)

    # Open the image file
    if not os.path.isfile(args.image):
        logging.error("Input image file ", args.image, " doesn't exist")
        sys.exit(1)
    cap = cv.VideoCapture(args.image)

    hasFrame, frame = cap.read()

    if hasFrame:
        images = {}
        images['frame'] = frame

        ocr = OCR(model_filename="../config/attention_ocr_model.pth", use_cuda=False)
        pred = ocr.predict(frame)
        logging.info(f'Prediction: {pred}')

        plot_images(images, 1, 3, cmap='gray')
        
    else:
        logging.debug("Frame not found!")