Ejemplo n.º 1
0
    def apply(self, sample: Sample) -> Sample:
        inputs = sample.inputs
        outputs = sample.outputs
        assert inputs["img_len"].shape == (1, )
        inputs = inputs.copy()
        outputs = outputs.copy()
        inputs["img_len"] = inputs["img_len"][0]

        def reshape_outputs(suffix):
            out_len = "out_len" + suffix
            if out_len in outputs and outputs[out_len].shape == (1, ):
                outputs[out_len] = outputs[out_len][0]

            for name in {
                    "logits",
                    "softmax",
                    "blank_last_logits",
                    "blank_last_softmax",
            }:
                name += suffix
                if name in outputs:
                    outputs[name] = outputs[name][:outputs[out_len]]

        reshape_outputs("")
        for i in range(self.data_params.ensemble):
            reshape_outputs(f"_{i}")

        return sample.new_inputs(inputs).new_outputs(outputs)
Ejemplo n.º 2
0
    def apply(self, sample: Sample) -> Sample:
        assert self.data_params.downscale_factor > 0  # Not instantiated yet
        codec = self.data_params.codec
        # final preparation
        if self.mode in {PipelineMode.TRAINING, PipelineMode.EVALUATION}:
            text = np.array(codec.encode(sample.targets) if sample.targets else np.zeros((0,), dtype="int32"))
        else:
            text = None

        line = sample.inputs

        # gray or binary input, add missing axis
        if len(line.shape) == 2:
            line = np.expand_dims(line, axis=-1)

        # Validate if the line is valid for training
        if not self.is_valid_line(
            text, len(line) // self.data_params.downscale_factor, len(line), sample.meta.get("id", "Unknown Sample ID")
        ):
            return sample.new_invalid()

        if text is not None:
            sample = sample.new_targets(
                {
                    "gt": np.asarray(text),
                    "gt_len": np.asarray([len(text)]),
                    "fold_id": np.asarray([sample.meta.get("fold_id", -1)]),
                }
            )

        return sample.new_inputs({"img": line.astype(np.uint8), "img_len": np.asarray([len(line)])})
Ejemplo n.º 3
0
 def apply(self, sample: Sample) -> Sample:
     # data augmentation
     if not self.data_aug_params.no_augs() \
             and sample.inputs is not None \
             and self.data_augmenter \
             and np.random.rand() <= self.data_aug_params.to_rel():
         line, text = self.augment(sample.inputs, sample.targets, sample.meta)
         return sample.new_inputs(line).new_targets(text)
     return sample
Ejemplo n.º 4
0
 def apply(self, sample: Sample) -> Sample:
     try:
         return sample.new_inputs(
             self._apply_single(sample.inputs, sample.meta))
     except Exception as e:
         logger.exception(e)
         logger.warning(
             "There was an unknown error when processing a line image. The line is skipped.\n"
             f"The error was caused by the line with meta data: {sample.meta}.\n"
             f"Please report this as an issue including the meta data, stack trace,  the respective "
             f"image file and call.\n"
             f"You can ignore this error if it occurs only very rarely, only this particular line will "
             f"be skipped.")
         return sample.new_invalid()
Ejemplo n.º 5
0
    def apply(self, sample: Sample) -> Sample:
        inputs = sample.inputs
        outputs = sample.outputs
        assert (inputs['img_len'].shape == (1,))
        inputs = inputs.copy()
        outputs = outputs.copy()
        inputs['img_len'] = inputs['img_len'][0]

        def reshape_outputs(suffix):
            out_len = 'out_len' + suffix
            if out_len in outputs and outputs[out_len].shape == (1,):
                outputs[out_len] = outputs[out_len][0]

            for name in {'logits', 'softmax', 'blank_last_logits', 'blank_last_softmax'}:
                name += suffix
                if name in outputs:
                    outputs[name] = outputs[name][:outputs[out_len]]

        reshape_outputs('')
        for i in range(self.data_params.ensemble):
            reshape_outputs(f"_{i}")

        return sample.new_inputs(inputs).new_outputs(outputs)
Ejemplo n.º 6
0
    def apply(self, sample: Sample) -> Sample:
        assert (self.data_params.downscale_factor > 0)  # Not instantiated yet
        codec = self.data_params.codec
        # final preparation
        text = np.array(
            codec.encode(sample.targets) if sample.targets else np.zeros(
                (0, ), dtype='int32'))
        line = sample.inputs

        # gray or binary input, add missing axis
        if len(line.shape) == 2:
            line = np.expand_dims(line, axis=-1)

        if self.mode in {PipelineMode.TRAINING, PipelineMode.EVALUATION
                         } and not self.is_valid_line(
                             text,
                             len(line) // self.data_params.downscale_factor):
            # skip longer outputs than inputs (also in evaluation due to loss computation)
            logger.warning(
                f"Skipping line with longer outputs than inputs (id={sample.meta['id']})"
            )
            return sample.new_invalid()

        if self.mode in {PipelineMode.TRAINING, PipelineMode.EVALUATION
                         } and len(text) == 0:
            logger.warning(
                f"Skipping empty line with empty GT (id={sample.meta['id']})")
            return sample.new_invalid()

        return sample.new_inputs({
            'img': line.astype(np.uint8),
            'img_len': [len(line)]
        }).new_targets({
            'gt': text,
            'gt_len': [len(text)],
            'fold_id': [sample.meta.get('fold_id', -1)]
        })
Ejemplo n.º 7
0
 def apply(self, sample: Sample) -> Sample:
     return sample.new_inputs(self._apply_single(sample.inputs,
                                                 sample.meta))