def train(self, image_file): """feeds the training data to the OCR""" if not isinstance(image_file, ImageFile): image_file = ImageFile(image_file) if not image_file.is_grounded(): raise Exception("The provided file is not grounded") features = self.extractor.extract(image_file.image, image_file.ground.segments) self.classifier.train(features, image_file.ground.classes)
class TestGrounding(unittest.TestCase): def setUp(self): self.img = ImageFile('digits1') self.img.remove_ground() self.assertFalse(self.img.is_grounded) self.segments = ContourSegmenter().process(self.img.image) def test_textgrounder(self): grounder = TextGrounder() characters = "0" * len(self.segments) grounder.ground(self.img, self.segments, characters) self.assertTrue(self.img.is_grounded) self.assertEquals(reconstruct_chars(self.img.ground.classes), characters) def test_textgrounder_wrong_len(self): grounder = TextGrounder() characters = "0" * len(self.segments) with self.assertRaises(ValueError): grounder.ground(self.img, self.segments, characters[:-4]) self.assertFalse(self.img.is_grounded) def test_usergrounder(self): ESC_KEY = 27 ZERO_KEY = 48 keys = [ZERO_KEY] * len(self.segments) + [ESC_KEY] mock_generator = iter(keys) def mock_input(*args): return next(mock_generator) grounder = UserGrounder() with mock.patch('cv2.waitKey', mock_input): with mock.patch('cv2.imshow'): grounder.ground(self.img, self.segments) self.assertTrue(self.img.is_grounded) self.assertEquals(reconstruct_chars(self.img.ground.classes), "0" * len(self.segments)) def test_terminal_grounder(self): terminal = TerminalGrounder() characters = "0" * len(self.segments) mock_input_gen = iter(characters) def mock_input(prompt): return next(mock_input_gen) with mock.patch('__builtin__.raw_input', mock_input): terminal.ground(self.img, self.segments) self.assertTrue(self.img.is_grounded) self.assertEquals(reconstruct_chars(self.img.ground.classes), "0" * len(self.segments))
def test_ocr_digits(self): # get data from images img1 = ImageFile('digits1') img2 = ImageFile('digits2') ground_truth = img2.ground.classes img2.remove_ground() # create OCR segmenter = ContourSegmenter() extractor = SimpleFeatureExtractor() classifier = KNNClassifier() ocr = OCR(segmenter, extractor, classifier) # train and test ocr.train(img1) chars, classes, _ = ocr.ocr(img2, show_steps=False) self.assertEqual(list(classes), list(ground_truth)) self.assertEqual(chars, reconstruct_chars(ground_truth))
def ocr(self, image_file, show_steps=False): """performs ocr used trained classifier""" if not isinstance(image_file, ImageFile): image_file = ImageFile(image_file) segments = self.segmenter.process(image_file.image) if show_steps: self.segmenter.display() features = self.extractor.extract(image_file.image, segments) classes = self.classifier.classify(features) chars = reconstruct_chars(classes) return chars, classes, segments
def test_ground(self): imgf = ImageFile(TEST_FILE) self.assertEqual(imgf.is_grounded, True) imgf.set_ground(imgf.ground.segments, imgf.ground.classes, write_file=False) self.assertEqual(imgf.is_grounded, True) imgf.remove_ground(remove_file=False) self.assertEqual(imgf.is_grounded, False)
def ground(self, image_file, text=None): """ Ground an image file for use in the OCR object. :param image_file: The name of the image file or an ImageFile object :param text: The text, if self.grounder is a TextGrounder (defaults to None) :return: """ if not isinstance(image_file, ImageFile): image_file = ImageFile(image_file) segments = self.segmenter.process(image_file.image) if isinstance(self.grounder, grounders.TextGrounder): if not text: raise ValueError("Trying to ground file with TextGrounder without specifying text argument.") self.grounder.ground(image_file, segments, text) else: self.grounder.ground(image_file, segments) image_file.ground.write() # save to file
def test_opencv_brightness(self): image = ImageFile('digits1') processor = opencv_utils.BrightnessProcessor(brightness=0.5) processor._process(image.image)
def setUp(self): self.img = ImageFile('digits1') self.img.remove_ground() self.assertFalse(self.img.is_grounded()) self.segments = ContourSegmenter().process(self.img.image)
from files import ImageFile from grounding import UserGrounder from segmentation import ContourSegmenter, draw_segments segmenter = ContourSegmenter(blur_y=5, blur_x=5, block_size=11, c=10) new_image = ImageFile('digits1') segments = segmenter.process(new_image.image) grounder = UserGrounder() grounder.ground(new_image, segments) new_image.ground.write()
from files import ImageFile from segmentation import ContourSegmenter, draw_segments from feature_extraction import SimpleFeatureExtractor from classification import KNNClassifier from ocr import OCR, accuracy, show_differences, reconstruct_chars segmenter = ContourSegmenter(blur_y=5, blur_x=5, block_size=11, c=10) extractor = SimpleFeatureExtractor(feature_size=10, stretch=False) classifier = KNNClassifier() ocr = OCR(segmenter, extractor, classifier) ocr.train(ImageFile('digits1'), ImageFile('alpha')) test_image = ImageFile('alpha') test_classes, test_segments = ocr.ocr(test_image, show_steps=True) print "accuracy:", accuracy(test_image.ground.classes, test_classes) print "OCRed text:\n", reconstruct_chars(test_classes) show_differences(test_image.image, test_segments, test_image.ground.classes, test_classes)
force_train = args.retrain use_tesseract = args.tesseract tesslangpath = args.tesslangpath segmenter = MinContourSegmenter(blur_y=5, blur_x=5, min_width=5, block_size=17, c=6, max_ratio=4.0) extractor = SimpleFeatureExtractor(feature_size=10, stretch=False) classifier = KNNClassifier(k=3) ocr = OCR(segmenter, extractor, classifier) for file_to_train in args.trainfile: training_image = ImageFile(file_to_train) if not training_image.isGrounded() or force_train: #trainingsegmenter = ContourSegmenter(blur_y=1, blur_x=1, min_width=3, min_height=15, max_height=50, min_area=30, block_size=23, c=3) # tweaked for black font trainingsegmenter = ContourSegmenter( blur_y=1, blur_x=1, min_width=3, min_height=15, max_height=50, min_area=30, block_size=3, c=5, nearline_tolerance=10.0) # tweaked for white font segments = trainingsegmenter.process(training_image.image) if verbose: trainingsegmenter.display()
parser.add_argument('--terse', help='skip user prompts', action="store_true") args = parser.parse_args() verbose = args.verbose terse = args.terse force_train = args.retrain use_tesseract = args.tesseract tesslangpath = args.tesslangpath segmenter = MinContourSegmenter(blur_y=5, blur_x=5, min_width=5, block_size=17, c=6, max_ratio=4.0) extractor = SimpleFeatureExtractor(feature_size=10, stretch=False) classifier = KNNClassifier(k=3 ) ocr = OCR(segmenter, extractor, classifier) for file_to_train in args.trainfile: training_image = ImageFile(file_to_train) if not training_image.isGrounded() or force_train: #trainingsegmenter = ContourSegmenter(blur_y=1, blur_x=1, min_width=3, min_height=15, max_height=50, min_area=30, block_size=23, c=3) # tweaked for black font trainingsegmenter = ContourSegmenter(blur_y=1, blur_x=1, min_width=3, min_height=15, max_height=50, min_area=30, block_size=3 , c=5, nearline_tolerance=10.0 ) # tweaked for white font segments = trainingsegmenter.process(training_image.image) if verbose: trainingsegmenter.display() # grounder = UserGrounder() # interactive version; lets the user review, assign ground truth data grounder = TextGrounder() # non-interactive ground-truth - assumes clean, ordered input grounder.ground(training_image, segments, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ") # writes out a .box file of image ground truths ocr.train(training_image) # Classify given image(s) using training data
def test_ocr_unicode(self): self._test_ocr(ImageFile('unicode1'), ImageFile('unicode1'))
def test_ocr_digits(self): self._test_ocr(ImageFile('digits1'), ImageFile('digits2'))
def test_opencv_brightness_raise(self): image = ImageFile('digits1') processor = opencv_utils.BrightnessProcessor(brightness=2.0) self.assertRaises(AssertionError, lambda: processor._process(image.image))
from files import ImageFile from segmentation import ContourSegmenter from feature_extraction import SimpleFeatureExtractor from classification import KNNClassifier from ocr import OCR, accuracy, show_differences segmenter = ContourSegmenter(blur_y=5, blur_x=5, block_size=11, c=10) extractor = SimpleFeatureExtractor(feature_size=10, stretch=False) classifier = KNNClassifier() ocr = OCR(segmenter, extractor, classifier) ocr.train(ImageFile('digits1')) test_image = ImageFile('digits2') test_chars, test_classes, test_segments = ocr.ocr(test_image, show_steps=True) print("accuracy:", accuracy(test_image.ground.classes, test_classes)) print("OCRed text:\n", test_chars)
TODO: * scan for letters only from left to right, not top to bottom * add ability to filter out letters with a triangle above the letter ''' from os.path import isfile, getsize from os import remove from files import ImageFile from grounding import UserGrounder import segmentation_filters from segmentation import ContourSegmenter, draw_segments # input image file and output box file imgfile = 'captcha' boxfile = 'data/' + imgfile + '.box' new_image = ImageFile(imgfile) # delete the box file if it's empty if (isfile(boxfile)): if (getsize(boxfile) == 0): remove(boxfile) # define what to focus on and ignore in the image stack = [ segmentation_filters.LargeFilter(), segmentation_filters.SmallFilter(), segmentation_filters.LargeAreaFilter(), segmentation_filters.ContainedFilter() ] # process image, defining useful-looking segments