class CMND(object): def __init__(self): self.Detect_cmnd = CENTER_MODEL( weight_path="Detect_cmnd/weights/model_cmnd_best.pth") self.Detect_fields = Detector_fields() self.ocr = OCR() self.fields = ['id', 'name', 'date', 'ad1', 'ad2'] def predict(self, img): restext = {} t1 = time.time() font = cv2.FONT_HERSHEY_SIMPLEX img_aligned = self.Detect_cmnd.detect(img) print("centernet ", time.time() - t1) t1 = time.time() if (img_aligned is not None): img_aligned = cv2.resize(img_aligned, (800, 650)) res, resimg = self.Detect_fields.detect(img_aligned, 0.3) print("yolo ", time.time() - t1) t1 = time.time() for x in self.fields: restext[x] = self.ocr.predict(resimg[x], res[x], x) if (restext[x] is None): restext[x] = "" print("ocr ", time.time() - t1) # if(cl=="id"): # text=(self.id.recognize(im)) # # if(cl=="name"): # else: # text=self.str.recognize(im) # img_aligned=cv2.putText(img_aligned,text,(box[0],box[1]),font,1,(255,0,0),1,cv2.LINE_AA) #for box in res[x]: #print("a") #img_aligned=cv2.rectangle(img_aligned,(box[0],box[1]),(box[2],box[3]),(255,0,0),2,1) else: img_aligned = cv2.resize(img, (750, 600)) res, resimg = self.Detect_fields.detect(img_aligned, 0.3) for x in self.fields: restext[x] = self.ocr.predict(resimg[x], res[x], x) #for box in res[x]: #img_aligned=cv2.rectangle(img_aligned,(box[0],box[1]),(box[2],box[3]),(255,0,0),2,1) restext['name'] = restext['name'].upper() return img_aligned, restext
cap = cv.VideoCapture(args.input) while (cap.isOpened()): hasFrame, frame = cap.read() if hasFrame: roi_imgs = yolo.detect(frame) index = 0 for roi_img in roi_imgs: box = [ yolo.bounding_boxes[index][0], yolo.bounding_boxes[index][1], yolo.bounding_boxes[index][2], yolo.bounding_boxes[index][3] ] pred = ocr.predict(roi_img) draw_bounding_box(input_image=frame, bounding_box=box, label=pred, background_color=(0, 255, 0), ocr=ocr) index += 1 if cv.waitKey(1) & 0xFF == ord('q'): break cv.imshow('frame', frame) out.write(frame) yolo.clear() else: break
parser = argparse.ArgumentParser(description='Testing OCR.') parser.add_argument('--image', help='Path to image file.') args = parser.parse_args() logging.getLogger().setLevel(logging.DEBUG) # Open the image file if not os.path.isfile(args.image): logging.debug("Input image file ", args.image, " doesn't exist") sys.exit(1) cap = cv.VideoCapture(args.image) hasFrame, frame = cap.read() if hasFrame: img, characteres = extract_chars(frame, prefix_label='test_ocr', min_countours_area_ration=0.01, debug=True) #characteres = [img / 255 for img in characteres] #display_images(characteres, 3, 3) ocr = OCR(model_filename="../config/emnist_model.pt", use_cuda=False, debug=True) pred = ocr.predict(characteres) logging.info(f'\nPrediction: {pred}') else: logging.debug("Frame not found!")
n += 1 images[str(n)] = img plot_images(images, row, col, cmap='gray') if __name__ == '__main__': parser = argparse.ArgumentParser(description='Testing OCR.') parser.add_argument('--image', help='Path to image file.') args = parser.parse_args() logging.getLogger().setLevel(logging.DEBUG) # Open the image file if not os.path.isfile(args.image): logging.error("Input image file ", args.image, " doesn't exist") sys.exit(1) cap = cv.VideoCapture(args.image) hasFrame, frame = cap.read() if hasFrame: images = {} images['frame'] = frame ocr = OCR(model_filename="../config/attention_ocr_model.pth", use_cuda=False) pred = ocr.predict(frame) logging.info(f'Prediction: {pred}') plot_images(images, 1, 3, cmap='gray') else: logging.debug("Frame not found!")