def generate(self) -> Iterable[Sample]: # Depending on the mode, do not produce images or targets (force it for the future pipeline) if self.mode == PipelineMode.Prediction: return map(lambda s: Sample(inputs=s.inputs, meta=s.meta), reader.generate()) elif self.mode == PipelineMode.Targets: return map( lambda s: Sample(targets=s.targets, meta=s.meta), reader.generate()) return reader.generate()
def apply(self, sample: Sample) -> Sample: if sample.targets and 'gt' in sample.targets: sample.targets['sentence'] = "".join( self.params.codec.decode(sample.targets['gt'])) if sample.outputs: outputs = self.ctc_decoder.decode( sample.outputs['softmax'].astype(float)) outputs.labels = list(map(int, outputs.labels)) outputs.sentence = "".join(self.params.codec.decode( outputs.labels)) sample = sample.new_outputs(outputs) return sample
def apply(self, sample: Sample) -> Sample: inputs = sample.inputs outputs = sample.outputs assert(inputs['img_len'].shape == (1,)) assert(inputs['meta'].shape == (1,)) inputs = inputs.copy() outputs = outputs.copy() inputs['img_len'] = inputs['img_len'][0] inputs['meta'] = inputs['meta'][0] def reshape_outputs(suffix): out_len = 'out_len' + suffix if out_len in outputs and outputs[out_len].shape == (1,): outputs[out_len] = outputs[out_len][0] for name in {'logits', 'softmax', 'blank_last_logits', 'blank_last_softmax'}: name += suffix if name in outputs: outputs[name] = outputs[name][:outputs[out_len]] reshape_outputs('') for i in range(self.params.ensemble_): reshape_outputs(f"_{i}") return sample.new_inputs(inputs).new_outputs(outputs)
def vote(self, sample: Sample) -> Sample: inputs, outputs, meta = sample.inputs, sample.outputs, sample.meta prediction_results = [] input_meta = json.loads(inputs['meta']) def out_to_in(x: int) -> int: return self.out_to_in_transformer.local_to_global( x, model_factor=inputs['img_len'] / prediction.logits.shape[0], data_proc_params=input_meta) for i, (prediction, m, data, post_) in enumerate( zip(outputs, meta, self.datas, self.post_proc)): prediction.id = "fold_{}".format(i) prediction_results.append( PredictionResult( prediction, codec=data.params().codec, text_postproc=post_, out_to_in_trans=out_to_in, )) # vote the results (if only one model is given, this will just return the sentences) prediction = self.voter.vote_prediction_result(prediction_results) prediction.id = "voted" return Sample(inputs=inputs, outputs=(prediction_results, prediction), meta=input_meta)
def apply(self, sample: Sample) -> Sample: if sample.targets and 'gt' in sample.targets: sample.targets['sentence'] = "".join(self.params.codec.decode(sample.targets['gt'])) if sample.outputs: def decode(suffix): outputs = self.ctc_decoder.decode(sample.outputs['softmax' + suffix].astype(float)) outputs.labels = list(map(int, outputs.labels)) outputs.sentence = "".join(self.params.codec.decode(outputs.labels)) return outputs outputs = decode("") outputs.voter_predictions = [] for i in range(self.params.ensemble_): outputs.voter_predictions.append(decode(f"_{i}")) sample = sample.new_outputs(outputs) return sample
def apply(self, sample: Sample) -> Sample: # data augmentation if not self.params.data_aug_params.no_augs() \ and sample.inputs is not None \ and self.data_augmenter \ and np.random.rand() <= self.params.data_aug_params.to_rel(): line, text = self.augment(sample.inputs, sample.targets, sample.meta) return sample.new_inputs(line).new_targets(text) return sample
def apply(self, sample: Sample) -> Sample: codec = self.params.codec # final preparation text = np.array( codec.encode(sample.targets) if sample.targets else np.zeros( (0, ), dtype='int32')) line = sample.inputs # gray or binary input, add missing axis if len(line.shape) == 2: line = np.expand_dims(line, axis=-1) if line.shape[-1] != self.params.input_channels: raise ValueError( f"Expected {self.params.input_channels} channels but got {line.shape[-1]}. Shape of input {line.shape}" ) if self.mode in {PipelineMode.Training, PipelineMode.Evaluation } and not self.is_valid_line( text, len(line) // self.params.downscale_factor_): # skip longer outputs than inputs (also in evaluation due to loss computation) logger.warning( f"Skipping line with longer outputs than inputs (id={sample.meta['id']})" ) return sample.new_invalid() if self.mode in {PipelineMode.Training, PipelineMode.Evaluation } and len(text) == 0: logger.warning( f"Skipping empty line with empty GT (id={sample.meta['id']})") return sample.new_invalid() return sample.new_inputs({ 'img': line.astype(np.uint8), 'img_len': [len(line)], 'meta': [json.dumps(sample.meta)] }).new_targets({ 'gt': text, 'gt_len': [len(text)], 'fold_id': [sample.meta.get('fold_id', -1)] })
def apply(self, sample: Sample) -> Sample: targets: str = sample.targets outputs: str = sample.outputs meta = sample.meta if isinstance(outputs, Prediction): prediction: Prediction = outputs prediction.sentence = self._apply_single(prediction.sentence, meta) return sample elif isinstance(targets, dict) and 'sentence' in targets: targets['sentence'] = self._apply_single(targets['sentence'], meta) return sample elif isinstance(outputs, dict) and 'sentence' in outputs: outputs['sentence'] = self._apply_single(outputs['sentence'], meta) return sample else: if targets: sample = sample.new_targets(self._apply_single(targets, meta)) if outputs: sample = sample.new_outputs(self._apply_single(outputs, meta)) return sample
def vote_prediction_result_tuple(self, predictions): p = Prediction() p.is_voted_result = True self._apply_vote(predictions, p) # postprocessing after voting # option 1: Use custom text postprocessor # option 2: (Not implemented) Use only the first text postprocessor # option 3: Apply all known postprocessors and apply a sequence voting if different results are received if self.text_postproc: p.sentence = self.text_postproc.apply( Sample(inputs='', outputs=p.sentence)).outputs else: sentences = [ pred.text_postproc.apply(Sample(inputs='', outputs=p.sentence)).outputs for pred in predictions ] if all([s == sentences[0] for s in sentences[1:]]): # usually all postproc should yield the same results p.sentence = sentences[0] else: # we need to vote again from calamari_ocr.ocr.voting import SequenceVoter sv = SequenceVoter() p.sentence = "".join( [c for c, _ in sv.process_text(sentences)]) p.avg_char_probability = 0 for pos in p.positions: if len(pos.chars) > 0: p.avg_char_probability += pos.chars[0].probability p.avg_char_probability /= len( p.positions) if len(p.positions) > 0 else 1 return p
def multi_augment(self, sample: Sample, n_augmentations=1, include_non_augmented=True): if include_non_augmented: out = [sample] else: out = [] for n in range(n_augmentations): meta = copy.deepcopy(sample.meta) l, t = self.augment(sample.inputs, sample.targets, meta) out.append(Sample(inputs=l, targets=t, meta=meta)) return out
def apply(self, sample: Sample) -> Sample: inputs = sample.inputs outputs = sample.outputs assert (inputs['img_len'].shape == (1, )) assert (inputs['meta'].shape == (1, )) inputs = inputs.copy() outputs = outputs.copy() inputs['img_len'] = inputs['img_len'][0] inputs['meta'] = inputs['meta'][0] if 'out_len' in outputs and outputs['out_len'].shape == (1, ): outputs['out_len'] = outputs['out_len'][0] for name in { 'logits', 'softmax', 'blank_last_logits', 'blank_last_softmax' }: if name in outputs: outputs[name] = outputs[name][:outputs['out_len']] return sample.new_inputs(inputs).new_outputs(outputs)
def __init__(self, prediction, codec, text_postproc, out_to_in_trans: Callable[[int], int], ground_truth=None): """ The output of a networks prediction (PredictionProto) with additional information It stores all required information for decoding (`codec`) and interpreting the output. Parameters ---------- prediction : PredictionProto prediction the DNN codec : Codec codec required to decode the `prediction` text_postproc : TextPostprocessor text processor to apply to the decodec `prediction` to receive the actual prediction sentence """ self.prediction = prediction self.logits = prediction.logits self.codec = codec self.text_postproc = text_postproc self.chars = codec.decode(prediction.labels) self.sentence = self.text_postproc.apply( Sample(inputs='', outputs="".join(self.chars))).outputs self.prediction.sentence = self.sentence self.out_to_in_trans = out_to_in_trans self.ground_truth = ground_truth self.prediction.avg_char_probability = 0 for p in self.prediction.positions: for c in p.chars: c.char = codec.code2char[c.label] p.global_start = int(self.out_to_in_trans(p.local_start)) p.global_end = int(self.out_to_in_trans(p.local_end)) if len(p.chars) > 0: self.prediction.avg_char_probability += p.chars[0].probability self.prediction.avg_char_probability /= len( self.prediction.positions) if len( self.prediction.positions) > 0 else 1
def to_input_target_sample(self) -> Sample: return Sample(inputs=self.image, targets=self.gt, meta=self.meta.to_dict())
def apply(self, sample: Sample) -> Sample: return sample.new_inputs(self._apply_single(sample.inputs, sample.meta))