def test_raw_prediction(self): args = PredictionAttrs() predictor = Predictor.from_checkpoint(PredictorParams( progress_bar=False, silent=True), checkpoint=args.checkpoint[0]) images = [load_image(file) for file in args.files] for result in predictor.predict_raw(images): self.assertGreater(result.outputs.avg_char_probability, 0)
def test_raw_prediction_voted(self): args = PredictionAttrs() predictor = MultiPredictor.from_paths(checkpoints=args.checkpoint, predictor_params=PredictorParams(progress_bar=False, silent=True)) images = [load_image(file) for file in args.files] for sample in predictor.predict_raw(images): r, voted = sample.outputs print([rn.sentence for rn in r]) print(voted.sentence)
def test_raw_prediction(self): args = PredictionAttrs() predictor = Predictor.from_checkpoint(PredictorParams( progress_bar=False, silent=True), checkpoint=args.checkpoint[0]) images = [load_image(file) for file in args.files] for file, image in zip(args.files, images): _, prediction, _ = list(predictor.predict_raw([image]))[0] print(file, prediction.sentence)
def _load_line(self, image_path): if image_path is None: return None if not os.path.exists(image_path): if self._non_existing_as_empty: return np.zeros((1, 1), dtype=np.uint8) else: raise Exception( "Image file at '{}' does not exist".format(image_path)) try: img = load_image(image_path) except: return None return img
def _generate_epoch(self, text_only) -> Generator[InputSample, None, None]: fold_id = -1 for p, page in enumerate(self.book.pages): if self.mode in INPUT_PROCESSOR: img = load_image(page.imgFile) if self.binary: img = img > 0.9 else: img = None for l, line in enumerate(page.getLines()): for f, fo in enumerate(line.formats): fold_id += 1 sample_id = "{}_{}_{}_{}".format( os.path.splitext( page.xmlFile if page.xmlFile else page.imgFile)[0], p, l, f) text = None if self.mode in TARGETS_PROCESSOR: text = fo.text if text_only: yield InputSample( None, text, SampleMeta(id=sample_id, fold_id=fold_id)) else: cut_img = None if self.mode in INPUT_PROCESSOR: ly, lx = img.shape # Cut the Image cut_img = img[line.rect.top:-ly + line.rect.bottom, line.rect.left:-lx + line.rect.right] # add padding as required from normal files cut_img = np.pad(cut_img, ((3, 3), (0, 0)), mode='constant', constant_values=cut_img.max()) yield InputSample( cut_img, text, SampleMeta(id=sample_id, fold_id=fold_id))
def _load_sample(self, sample, text_only): loader = PageXMLDatasetLoader(self.mode, self._non_existing_as_empty, self.text_index, self.skip_invalid) image_path, xml_path = sample img = None if self.mode == DataSetMode.PREDICT or self.mode == DataSetMode.TRAIN or self.mode == DataSetMode.PRED_AND_EVAL: img = load_image(image_path) for sample in loader.load(image_path, xml_path): text = sample["text"] orientation = sample["orientation"] if not text_only and (self.mode == DataSetMode.PREDICT or self.mode == DataSetMode.TRAIN or self.mode == DataSetMode.PRED_AND_EVAL): ly, lx = img.shape[:2] line_img = PageXMLDataset.cutout(img, sample['coords'], lx / sample['img_width']) # rotate by orientation angle in clockwise direction to correct present skew # (skimage rotates in counter-clockwise direction) if orientation and orientation % 360 != 0: line_img = rotate(line_img, orientation * -1, resize=True, mode='constant', cval=line_img.max(), preserve_range=True).astype(np.uint8) # add padding as required from normal files if self.args.get('pad', None): pad = self.args['pad'] img = np.pad(img, pad, mode='constant', constant_values=img.max()) else: line_img = None yield line_img, text
def _load_sample(self, sample, text_only) -> Generator[InputSample, None, None]: loader = PageXMLDatasetLoader(self.mode, self._non_existing_as_empty, self.text_index, self.skip_invalid) image_path, xml_path = sample img = None if self.mode in INPUT_PROCESSOR: img = load_image(image_path) for sample in loader.load(image_path, xml_path): text = sample["text"] orientation = sample["orientation"] if not text_only and self.mode in INPUT_PROCESSOR: ly, lx = img.shape[:2] line_img = PageXMLReader.cutout(img, sample['coords'], lx / sample['img_width']) # rotate by orientation angle in clockwise direction to correct present skew # (skimage rotates in counter-clockwise direction) if orientation and orientation % 360 != 0: line_img = rotate(line_img, orientation * -1, resize=True, mode='constant', cval=line_img.max(), preserve_range=True).astype(np.uint8) # add padding as required from normal files if self.args.pad: pad = self.args.pad img = np.pad(img, pad, mode='constant', constant_values=img.max()) else: line_img = None yield InputSample(line_img, text, SampleMeta(id=sample['id']))
def _load_sample(self, sample, text_only) -> Generator[InputSample, None, None]: loader = PageXMLDatasetLoader(self.mode, self._non_existing_as_empty, self.text_index, self.skip_invalid) image_path, xml_path, idx = sample img = None if self.mode in INPUT_PROCESSOR: img = load_image(image_path) for i, sample in enumerate(loader.load(image_path, xml_path)): fold_id = (idx + i) % self.n_folds if self.n_folds > 0 else -1 text = sample["text"] orientation = sample["orientation"] if not text_only and self.mode in INPUT_PROCESSOR: ly, lx = img.shape[:2] # rotate by orientation angle in clockwise direction to correct present skew angle = orientation if orientation and orientation % 360 != 0 else 0 line_img = PageXMLReader.cutout(img, sample['coords'], mode=CutMode.POLYGON, angle=angle, cval=None, scale=lx / sample['img_width']) # add padding as required from normal files if self.args.pad: pad = self.args.pad img = np.pad(img, pad, mode='constant', constant_values=img.max()) else: line_img = None yield InputSample(line_img, text, SampleMeta(id=sample['id'], fold_id=fold_id))
class DefaultDataAugmenter(DataAugmenterBase[DefaultDataAugmenterParams]): def augment_single(self, data, gt_txt): import calamari_ocr.thirdparty.ocrodeg as ocrodeg original_dtype = data.dtype data = data.astype(np.float) m = data.max() data = data / (1 if m == 0 else m) data = ocrodeg.random_pad(data, (0, data.shape[1] * 2)) # data = ocrodeg.transform_image(data, **ocrodeg.random_transform(rotation=(-0.1, 0.1), translation=(-0.1, 0.1))) for sigma in [2, 5]: noise = ocrodeg.bounded_gaussian_noise(data.shape, sigma, 3.0) data = ocrodeg.distort_with_noise(data, noise) data = ocrodeg.printlike_multiscale(data, blur=1, inverted=True) data = (data * 255 / data.max()).astype(original_dtype) return data, gt_txt if __name__ == '__main__': import matplotlib.pyplot as plt aug = DefaultDataAugmenterParams().create() img = 255 - np.mean(load_image("../../test/data/uw3_50lines/train/010001.bin.png")[:, :, 0:2], axis=-1) aug_img = [aug.augment_single(img.T, '')[0].T for _ in range(4)] f, ax = plt.subplots(5, 1) ax[0].imshow(255 - img, cmap='gray') for i, x in enumerate(aug_img): ax[i + 1].imshow(255 - x, cmap='gray') plt.show()
data = data.astype(np.float) m = data.max() data = data / (1 if m == 0 else m) data = ocrodeg.random_pad(data, (0, data.shape[1] * 2)) # data = ocrodeg.transform_image(data, **ocrodeg.random_transform(rotation=(-0.1, 0.1), translation=(-0.1, 0.1))) for sigma in [2, 5]: noise = ocrodeg.bounded_gaussian_noise(data.shape, sigma, 3.0) data = ocrodeg.distort_with_noise(data, noise) data = ocrodeg.printlike_multiscale(data, blur=1, inverted=True) data = (data * 255 / data.max()).astype(original_dtype) return data, gt_txt NoopDataAugmenter.register() SimpleDataAugmenter.register() if __name__ == '__main__': aug = SimpleDataAugmenter() img = 255 - np.mean( load_image("../../test/data/uw3_50lines/train/010001.bin.png")[:, :, 0:2], axis=-1) aug_img = [aug.augment_single(img.T, '')[0].T for _ in range(4)] import matplotlib.pyplot as plt f, ax = plt.subplots(5, 1) ax[0].imshow(255 - img, cmap='gray') for i, x in enumerate(aug_img): ax[i + 1].imshow(255 - x, cmap='gray') plt.show()