def apply(self, sample: Sample) -> Sample: assert self.data_params.downscale_factor > 0 # Not instantiated yet codec = self.data_params.codec # final preparation if self.mode in {PipelineMode.TRAINING, PipelineMode.EVALUATION}: text = np.array(codec.encode(sample.targets) if sample.targets else np.zeros((0,), dtype="int32")) else: text = None line = sample.inputs # gray or binary input, add missing axis if len(line.shape) == 2: line = np.expand_dims(line, axis=-1) # Validate if the line is valid for training if not self.is_valid_line( text, len(line) // self.data_params.downscale_factor, len(line), sample.meta.get("id", "Unknown Sample ID") ): return sample.new_invalid() if text is not None: sample = sample.new_targets( { "gt": np.asarray(text), "gt_len": np.asarray([len(text)]), "fold_id": np.asarray([sample.meta.get("fold_id", -1)]), } ) return sample.new_inputs({"img": line.astype(np.uint8), "img_len": np.asarray([len(line)])})
def vote_prediction_result_tuple(self, predictions): p = Prediction() p.is_voted_result = True self._apply_vote(predictions, p) # postprocessing after voting # option 1: Use custom text postprocessor # option 2: (Not implemented) Use only the first text postprocessor # option 3: Apply all known postprocessors and apply a sequence voting if different results are received if self.text_postproc: p.sentence = self.text_postproc.apply_on_sample(Sample(inputs="", outputs=p.sentence)).outputs else: sentences = [ pred.text_postproc.apply_on_sample(Sample(inputs="", outputs=p.sentence)).outputs for pred in predictions ] if all([s == sentences[0] for s in sentences[1:]]): # usually all postproc should yield the same results p.sentence = sentences[0] else: # we need to vote again from calamari_ocr.ocr.voting import SequenceVoter sv = SequenceVoter() p.sentence = "".join([c for c, _ in sv.process_text(sentences)]) p.avg_char_probability = 0 for pos in p.positions: if len(pos.chars) > 0: p.avg_char_probability += pos.chars[0].probability p.avg_char_probability /= len(p.positions) if len(p.positions) > 0 else 1 return p
def generate(self) -> Iterable[Sample]: # Depending on the mode, do not produce images or targets (force it for the future pipeline) if self.mode == PipelineMode.PREDICTION: return map(lambda s: Sample(inputs=s.inputs, meta=s.meta), reader.generate()) elif self.mode == PipelineMode.TARGETS: return map(lambda s: Sample(targets=s.targets, meta=s.meta), reader.generate()) return reader.generate()
def apply(self, sample: Sample) -> Sample: try: return sample.new_inputs( self._apply_single(sample.inputs, sample.meta)) except Exception as e: logger.exception(e) logger.warning( "There was an unknown error when processing a line image. The line is skipped.\n" f"The error was caused by the line with meta data: {sample.meta}.\n" f"Please report this as an issue including the meta data, stack trace, the respective " f"image file and call.\n" f"You can ignore this error if it occurs only very rarely, only this particular line will " f"be skipped.") return sample.new_invalid()
def apply(self, sample: Sample) -> Sample: inputs = sample.inputs outputs = sample.outputs assert inputs["img_len"].shape == (1, ) inputs = inputs.copy() outputs = outputs.copy() inputs["img_len"] = inputs["img_len"][0] def reshape_outputs(suffix): out_len = "out_len" + suffix if out_len in outputs and outputs[out_len].shape == (1, ): outputs[out_len] = outputs[out_len][0] for name in { "logits", "softmax", "blank_last_logits", "blank_last_softmax", }: name += suffix if name in outputs: outputs[name] = outputs[name][:outputs[out_len]] reshape_outputs("") for i in range(self.data_params.ensemble): reshape_outputs(f"_{i}") return sample.new_inputs(inputs).new_outputs(outputs)
def vote(self, sample: Sample) -> Sample: inputs, outputs, meta = sample.inputs, sample.outputs, sample.meta prediction_results = [] def out_to_in(x: int) -> int: return self.out_to_in_transformer.local_to_global( x, model_factor=inputs["img_len"] / prediction.logits.shape[0], data_proc_params=meta, ) for i, (prediction, m, data, post_) in enumerate(zip(outputs, meta, self.datas, self.post_proc)): prediction.id = f"fold_{i}" prediction_results.append( PredictionResult( prediction, codec=data.params.codec, text_postproc=post_, out_to_in_trans=out_to_in, ) ) # vote the results (if only one model is given, this will just return the sentences) prediction = self.voter.vote_prediction_result(prediction_results) prediction.id = "voted" return Sample(inputs=inputs, outputs=(prediction_results, prediction), meta=meta[0])
def apply(self, sample: Sample) -> Sample: # data augmentation if not self.data_aug_params.no_augs() \ and sample.inputs is not None \ and self.data_augmenter \ and np.random.rand() <= self.data_aug_params.to_rel(): line, text = self.augment(sample.inputs, sample.targets, sample.meta) return sample.new_inputs(line).new_targets(text) return sample
def apply(self, sample: Sample) -> Sample: targets: str = sample.targets outputs: str = sample.outputs meta = sample.meta if isinstance(outputs, Prediction): prediction: Prediction = outputs prediction.sentence = self._apply_single(prediction.sentence, meta) return sample elif isinstance(targets, dict) and "sentence" in targets: targets["sentence"] = self._apply_single(targets["sentence"], meta) return sample elif isinstance(outputs, dict) and "sentence" in outputs: outputs["sentence"] = self._apply_single(outputs["sentence"], meta) return sample else: if targets: sample = sample.new_targets(self._apply_single(targets, meta)) if outputs: sample = sample.new_outputs(self._apply_single(outputs, meta)) return sample
def multi_augment(self, sample: Sample, n_augmentations=1, include_non_augmented=True): if include_non_augmented: out = [sample] else: out = [] for n in range(n_augmentations): meta = copy.deepcopy(sample.meta) l, t = self.augment(sample.inputs, sample.targets, meta) out.append(Sample(inputs=l, targets=t, meta=meta)) return out
def apply(self, sample: Sample) -> Sample: if sample.targets and 'gt' in sample.targets: sample.targets['sentence'] = "".join( self.data_params.codec.decode(sample.targets['gt'])) if sample.outputs: def decode(suffix): outputs = self.ctc_decoder.decode( sample.outputs['softmax' + suffix].astype(float)) outputs.labels = list(map(int, outputs.labels)) outputs.sentence = "".join( self.data_params.codec.decode(outputs.labels)) return outputs outputs = decode("") outputs.voter_predictions = [] for i in range(self.data_params.ensemble): outputs.voter_predictions.append(decode(f"_{i}")) sample = sample.new_outputs(outputs) return sample
def apply(self, sample: Sample) -> Sample: assert (self.data_params.downscale_factor > 0) # Not instantiated yet codec = self.data_params.codec # final preparation text = np.array( codec.encode(sample.targets) if sample.targets else np.zeros( (0, ), dtype='int32')) line = sample.inputs # gray or binary input, add missing axis if len(line.shape) == 2: line = np.expand_dims(line, axis=-1) if self.mode in {PipelineMode.TRAINING, PipelineMode.EVALUATION } and not self.is_valid_line( text, len(line) // self.data_params.downscale_factor): # skip longer outputs than inputs (also in evaluation due to loss computation) logger.warning( f"Skipping line with longer outputs than inputs (id={sample.meta['id']})" ) return sample.new_invalid() if self.mode in {PipelineMode.TRAINING, PipelineMode.EVALUATION } and len(text) == 0: logger.warning( f"Skipping empty line with empty GT (id={sample.meta['id']})") return sample.new_invalid() return sample.new_inputs({ 'img': line.astype(np.uint8), 'img_len': [len(line)] }).new_targets({ 'gt': text, 'gt_len': [len(text)], 'fold_id': [sample.meta.get('fold_id', -1)] })
def apply(self, sample: Sample) -> Optional[Sample]: # Apply the complete list of data processors # Non valid samples return None if sample.meta is None: sample = sample.new_meta({}) if not self.is_valid_sample(sample): return None for p in self.processors: sample = p(sample) if not self.is_valid_sample(sample): return None return sample
def __init__( self, prediction, codec, text_postproc, out_to_in_trans: Callable[[int], int], ground_truth=None, ): """The output of a networks prediction (PredictionProto) with additional information It stores all required information for decoding (`codec`) and interpreting the output. Parameters ---------- prediction : PredictionProto prediction the DNN codec : Codec codec required to decode the `prediction` text_postproc : TextPostprocessor text processor to apply to the decodec `prediction` to receive the actual prediction sentence """ self.prediction = prediction self.logits = prediction.logits self.codec = codec self.text_postproc = text_postproc self.chars = codec.decode(prediction.labels) self.sentence = self.text_postproc.apply_on_sample(Sample(inputs="", outputs="".join(self.chars))).outputs self.prediction.sentence = self.sentence self.out_to_in_trans = out_to_in_trans self.ground_truth = ground_truth self.prediction.avg_char_probability = 0 for p in self.prediction.positions: for c in p.chars: c.char = codec.code2char[c.label] p.global_start = int(self.out_to_in_trans(p.local_start)) p.global_end = int(self.out_to_in_trans(p.local_end)) if len(p.chars) > 0: self.prediction.avg_char_probability += p.chars[0].probability self.prediction.avg_char_probability /= ( len(self.prediction.positions) if len(self.prediction.positions) > 0 else 1 )
def apply(self, sample: Sample) -> Sample: inputs = sample.inputs outputs = sample.outputs assert (inputs['img_len'].shape == (1,)) inputs = inputs.copy() outputs = outputs.copy() inputs['img_len'] = inputs['img_len'][0] def reshape_outputs(suffix): out_len = 'out_len' + suffix if out_len in outputs and outputs[out_len].shape == (1,): outputs[out_len] = outputs[out_len][0] for name in {'logits', 'softmax', 'blank_last_logits', 'blank_last_softmax'}: name += suffix if name in outputs: outputs[name] = outputs[name][:outputs[out_len]] reshape_outputs('') for i in range(self.data_params.ensemble): reshape_outputs(f"_{i}") return sample.new_inputs(inputs).new_outputs(outputs)
def apply_on_sample(self, sample: Sample) -> Sample: if sample.meta is None: sample = sample.new_meta({}) return self.apply(sample.copy())
def is_valid_sample(sample: Sample, mode: PipelineMode) -> bool: if sample is None: return False return sample.is_valid(mode)
def to_input_target_sample(self) -> Sample: return Sample(inputs=self.image, targets=self.gt, meta=self.meta.to_dict())
def apply(self, sample: Sample) -> Sample: return sample.new_inputs(self._apply_single(sample.inputs, sample.meta))
@staticmethod def cls() -> Type['TextProcessor']: return TextRegularizerProcessor class TextRegularizerProcessor(TextProcessor[TextRegularizerProcessorParams]): def __init__(self, *args, **kwargs): super().__init__(*args, *kwargs) if self.params.replacements is None: self.params.replacements = default_text_regularizer_replacements( self.params.replacement_groups) def _apply_single(self, txt, meta): for replacement in self.params.replacements: if replacement.regex: txt = re.sub(replacement.old, replacement.new, txt) else: txt = txt.replace(replacement.old, replacement.new) return txt if __name__ == "__main__": n = TextRegularizerProcessorParams( replacement_groups=["quotes", "spaces"]).create( None, mode=PipelineMode.TRAINING) assert (n( Sample(targets="“Resolve quotes”")).targets == "''Resolve quotes''") assert (n(Sample( targets=" “Resolve spaces ” ")).targets == "''Resolve spaces ''")