Python PytorchCodec.encode Examples

Programming Language: Python

Namespace/Package Name: kraken.lib.codec

Class/Type: PytorchCodec

Method/Function: encode

Examples at hotexamples.com: 4

Python PytorchCodec.encode - 4 examples found. These are the top rated real world Python examples of kraken.lib.codec.PytorchCodec.encode extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PytorchCodec(10)

encode(3)

Frequently Used Methods

PytorchCodec (10)

encode (3)

Example #1

Show file

File: dataset.py Project: mittagessen/kraken

class GroundTruthDataset(Dataset):
    """
    Dataset for ground truth used during training.

    All data is cached in memory.
    """
    def __init__(self, split: Callable[[str], str] = lambda x: os.path.splitext(x)[0],
                 suffix: str = '.gt.txt',
                 normalization: Optional[str] = None,
                 whitespace_normalization: bool = True,
                 reorder: bool = True,
                 im_transforms: Callable[[Any], torch.Tensor] = transforms.Compose([]),
                 preload: bool = True) -> None:
        """
        Reads a list of image-text pairs and creates a ground truth set.

        Args:
            split (func): Function for generating the base name without
                          extensions from paths
            suffix (str): Suffix to attach to image base name for text
                          retrieval
            mode (str): Image color space. Either RGB (color) or L
                        (grayscale/bw). Only L is compatible with vertical
                        scaling/dewarping.
            scale (int, tuple): Target height or (width, height) of dewarped
                                line images. Vertical-only scaling is through
                                CenterLineNormalizer, resizing with Lanczos
                                interpolation. Set to 0 to disable.
            normalization (str): Unicode normalization for gt
            whitespace_normalization (str): Normalizes unicode whitespace and
                                            strips whitespace.
            reorder (bool): Whether to rearrange code points in "display"/LTR
                            order
            im_transforms (func): Function taking an PIL.Image and returning a
                                  tensor suitable for forward passes.
            preload (bool): Enables preloading and preprocessing of image files.
        """
        self.suffix = suffix
        self.split = lambda x: split(x) + self.suffix
        self._images = []  # type:  Union[List[Image], List[torch.Tensor]]
        self._gt = []  # type:  List[str]
        self.alphabet = Counter()  # type: Counter
        self.text_transforms = []  # type: List[Callable[[str], str]]
        self.transforms = im_transforms
        self.preload = preload
        # built text transformations
        if normalization:
            self.text_transforms.append(lambda x: unicodedata.normalize(cast(str, normalization), x))
        if whitespace_normalization:
            self.text_transforms.append(lambda x: regex.sub('\s', ' ', x).strip())
        if reorder:
            self.text_transforms.append(bd.get_display)

    def add(self, image: str) -> None:
        """
        Adds a line-image-text pair to the dataset.

        Args:
            image (str): Input image path
        """
        with open(self.split(image), 'r', encoding='utf-8') as fp:
            gt = fp.read().strip('\n\r')
            for func in self.text_transforms:
                gt = func(gt)
            if not gt:
                raise KrakenInputException('Text line is empty ({})'.format(fp.name))
        if self.preload:
            im = Image.open(image)
            try:
                im = self.transforms(im)
            except ValueError as e:
                raise KrakenInputException('Image transforms failed on {}'.format(image))
            self._images.append(im)
        else:
            self._images.append(image)
        self._gt.append(gt)
        self.alphabet.update(gt)

    def add_loaded(self, image: Image.Image, gt: str) -> None:
        """
        Adds an already loaded  line-image-text pair to the dataset.

        Args:
            image (PIL.Image.Image): Line image
            gt (str): Text contained in the line image
        """
        if self.preload:
            try:
                im = self.transforms(image)
            except ValueError as e:
                raise KrakenInputException('Image transforms failed on {}'.format(image))
            self._images.append(im)
        else:
            self._images.append(image)
        for func in self.text_transforms:
            gt = func(gt)
        self._gt.append(gt)
        self.alphabet.update(gt)

    def encode(self, codec: Optional[PytorchCodec] = None) -> None:
        """
        Adds a codec to the dataset and encodes all text lines.

        Has to be run before sampling from the dataset.
        """
        if codec:
            self.codec = codec
        else:
            self.codec = PytorchCodec(''.join(self.alphabet.keys()))
        self.training_set = []  # type: List[Tuple[Union[Image, torch.Tensor], torch.Tensor]]
        for im, gt in zip(self._images, self._gt):
            self.training_set.append((im, self.codec.encode(gt)))

    def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
        if self.preload:
            return self.training_set[index]
        else:
            item = self.training_set[index]
            try:
                logger.debug('Attempting to load {}'.format(item[0]))
                im = item[0]
                if not isinstance(im, Image.Image):
                    im = Image.open(im)
                return (self.transforms(im), item[1])
            except Exception:
                idx = np.random.randint(0, len(self.training_set))
                logger.debug('Failed. Replacing with sample {}'.format(idx))
                return self[np.random.randint(0, len(self.training_set))]

    def __len__(self) -> int:
        return len(self.training_set)

Example #2

Show file

File: dataset.py Project: simhaonline/kraken-1

class GroundTruthDataset(Dataset):
    """
    Dataset for training a line recognition model.

    All data is cached in memory.
    """
    def __init__(self,
                 split: Callable[[str], str] = lambda x: path.splitext(x)[0],
                 suffix: str = '.gt.txt',
                 normalization: Optional[str] = None,
                 whitespace_normalization: bool = True,
                 reorder: bool = True,
                 im_transforms: Callable[[Any],
                                         torch.Tensor] = transforms.Compose(
                                             []),
                 preload: bool = True,
                 augmentation: bool = False) -> None:
        """
        Reads a list of image-text pairs and creates a ground truth set.

        Args:
            split (func): Function for generating the base name without
                          extensions from paths
            suffix (str): Suffix to attach to image base name for text
                          retrieval
            mode (str): Image color space. Either RGB (color) or L
                        (grayscale/bw). Only L is compatible with vertical
                        scaling/dewarping.
            scale (int, tuple): Target height or (width, height) of dewarped
                                line images. Vertical-only scaling is through
                                CenterLineNormalizer, resizing with Lanczos
                                interpolation. Set to 0 to disable.
            normalization (str): Unicode normalization for gt
            whitespace_normalization (str): Normalizes unicode whitespace and
                                            strips whitespace.
            reorder (bool): Whether to rearrange code points in "display"/LTR
                            order
            im_transforms (func): Function taking an PIL.Image and returning a
                                  tensor suitable for forward passes.
            preload (bool): Enables preloading and preprocessing of image files.
        """
        self.suffix = suffix
        self.split = lambda x: split(x) + self.suffix
        self._images = []  # type:  Union[List[Image], List[torch.Tensor]]
        self._gt = []  # type:  List[str]
        self.alphabet = Counter()  # type: Counter
        self.text_transforms = []  # type: List[Callable[[str], str]]
        # split image transforms into two. one part giving the final PIL image
        # before conversion to a tensor and the actual tensor conversion part.
        self.head_transforms = transforms.Compose(im_transforms.transforms[:2])
        self.tail_transforms = transforms.Compose(im_transforms.transforms[2:])
        self.aug = None

        self.preload = preload
        self.seg_type = 'bbox'
        # built text transformations
        if normalization:
            self.text_transforms.append(
                lambda x: unicodedata.normalize(cast(str, normalization), x))
        if whitespace_normalization:
            self.text_transforms.append(
                lambda x: regex.sub('\s', ' ', x).strip())
        if reorder:
            self.text_transforms.append(bd.get_display)
        if augmentation:
            from albumentations import (
                Compose,
                ToFloat,
                FromFloat,
                Flip,
                OneOf,
                MotionBlur,
                MedianBlur,
                Blur,
                ShiftScaleRotate,
                OpticalDistortion,
                ElasticTransform,
                RandomBrightnessContrast,
            )

            self.aug = Compose([
                ToFloat(),
                OneOf([
                    MotionBlur(p=0.2),
                    MedianBlur(blur_limit=3, p=0.1),
                    Blur(blur_limit=3, p=0.1),
                ],
                      p=0.2),
                ShiftScaleRotate(shift_limit=0.0625,
                                 scale_limit=0.2,
                                 rotate_limit=45,
                                 p=0.2),
                OneOf([
                    OpticalDistortion(p=0.3),
                    ElasticTransform(p=0.1),
                ],
                      p=0.2),
            ],
                               p=0.5)

        self.im_mode = '1'

    def add(self, image: Union[str, Image.Image], *args, **kwargs) -> None:
        """
        Adds a line-image-text pair to the dataset.

        Args:
            image (str): Input image path
        """
        with open(self.split(image), 'r', encoding='utf-8') as fp:
            gt = fp.read().strip('\n\r')
            for func in self.text_transforms:
                gt = func(gt)
            if not gt:
                raise KrakenInputException(f'Text line is empty ({fp.name})')
        if self.preload:
            try:
                im = Image.open(image)
                im = self.head_transforms(im)
                if not is_bitonal(im):
                    self.im_mode = im.mode
                im = self.tail_transforms(im)
            except ValueError:
                raise KrakenInputException(
                    f'Image transforms failed on {image}')
            self._images.append(im)
        else:
            self._images.append(image)
        self._gt.append(gt)
        self.alphabet.update(gt)

    def add_loaded(self, image: Image.Image, gt: str) -> None:
        """
        Adds an already loaded line-image-text pair to the dataset.

        Args:
            image (PIL.Image.Image): Line image
            gt (str): Text contained in the line image
        """
        if self.preload:
            try:
                im = self.head_transforms(im)
                if not is_bitonal(im):
                    self.im_mode = im.mode
                im = self.tail_transforms(im)
            except ValueError:
                raise KrakenInputException(
                    f'Image transforms failed on {image}')
            self._images.append(im)
        else:
            self._images.append(image)
        for func in self.text_transforms:
            gt = func(gt)
        self._gt.append(gt)
        self.alphabet.update(gt)

    def encode(self, codec: Optional[PytorchCodec] = None) -> None:
        """
        Adds a codec to the dataset and encodes all text lines.

        Has to be run before sampling from the dataset.
        """
        if codec:
            self.codec = codec
        else:
            self.codec = PytorchCodec(''.join(self.alphabet.keys()))
        self.training_set = [
        ]  # type: List[Tuple[Union[Image, torch.Tensor], torch.Tensor]]
        for im, gt in zip(self._images, self._gt):
            self.training_set.append((im, self.codec.encode(gt)))

    def no_encode(self) -> None:
        """
        Creates an unencoded dataset.
        """
        self.training_set = [
        ]  # type: List[Tuple[Union[Image, torch.Tensor], str]]
        for im, gt in zip(self._images, self._gt):
            self.training_set.append((im, gt))

    def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
        if self.preload:
            x, y = self.training_set[index]
            if self.aug:
                im = x.permute((1, 2, 0)).numpy()
                o = self.aug(image=im)
                im = torch.tensor(o['image'].transpose(2, 0, 1))
                return {'image': im, 'target': y}
            return {'image': x, 'target': y}
        else:
            item = self.training_set[index]
            try:
                logger.debug(f'Attempting to load {item[0]}')
                im = item[0]
                if not isinstance(im, Image.Image):
                    im = Image.open(im)
                im = self.head_transforms(im)
                if not is_bitonal(im):
                    self.im_mode = im.mode
                im = self.tail_transforms(im)
                if self.aug:
                    im = im.permute((1, 2, 0)).numpy()
                    o = self.aug(image=im)
                    im = torch.tensor(o['image'].transpose(2, 0, 1))
                return {'image': im, 'target': item[1]}
            except Exception:
                idx = np.random.randint(0, len(self.training_set))
                logger.debug(traceback.format_exc())
                logger.info(f'Failed. Replacing with sample {idx}')
                return self[np.random.randint(0, len(self.training_set))]

    def __len__(self) -> int:
        return len(self.training_set)

Example #3

Show file

File: dataset.py Project: manuelmonjarrezarias/kraken

class PolygonGTDataset(Dataset):
    """
    Dataset for training a line recognition model from polygonal/baseline data.
    """
    def __init__(self,
                 normalization: Optional[str] = None,
                 whitespace_normalization: bool = True,
                 reorder: bool = True,
                 im_transforms: Callable[[Any],
                                         torch.Tensor] = transforms.Compose(
                                             []),
                 preload: bool = True,
                 augmentation: bool = False) -> None:
        self._images = []  # type:  Union[List[Image], List[torch.Tensor]]
        self._gt = []  # type:  List[str]
        self.alphabet = Counter()  # type: Counter
        self.text_transforms = []  # type: List[Callable[[str], str]]
        # split image transforms into two. one part giving the final PIL image
        # before conversion to a tensor and the actual tensor conversion part.
        self.head_transforms = transforms.Compose(im_transforms.transforms[:2])
        self.tail_transforms = transforms.Compose(im_transforms.transforms[2:])
        self.transforms = im_transforms
        self.preload = preload
        self.aug = None

        self.seg_type = 'baselines'
        # built text transformations
        if normalization:
            self.text_transforms.append(
                lambda x: unicodedata.normalize(cast(str, normalization), x))
        if whitespace_normalization:
            self.text_transforms.append(
                lambda x: regex.sub('\s', ' ', x).strip())
        if reorder:
            self.text_transforms.append(bd.get_display)
        if augmentation:
            from albumentations import (
                Compose,
                ToFloat,
                FromFloat,
                Flip,
                OneOf,
                MotionBlur,
                MedianBlur,
                Blur,
                ShiftScaleRotate,
                OpticalDistortion,
                ElasticTransform,
                RandomBrightnessContrast,
            )

            self.aug = Compose([
                ToFloat(),
                OneOf([
                    MotionBlur(p=0.2),
                    MedianBlur(blur_limit=3, p=0.1),
                    Blur(blur_limit=3, p=0.1),
                ],
                      p=0.2),
                ShiftScaleRotate(
                    shift_limit=0.0625, scale_limit=0.2, rotate_limit=3,
                    p=0.2),
                OneOf([
                    OpticalDistortion(p=0.3),
                    ElasticTransform(p=0.1),
                ],
                      p=0.2),
            ],
                               p=0.5)

        self.im_mode = '1'

    def add(self, image: Union[str, Image.Image], text: str,
            baseline: List[Tuple[int, int]], boundary: List[Tuple[int, int]],
            *args, **kwargs):
        """
        Adds a line to the dataset.

        Args:
            im (path): Path to the whole page image
            text (str): Transcription of the line.
            baseline (list): A list of coordinates [[x0, y0], ..., [xn, yn]].
            boundary (list): A polygon mask for the line.
        """
        for func in self.text_transforms:
            text = func(text)
            if not text:
                raise KrakenInputException(
                    'Text line is empty after transformations')
        if self.preload:
            if not isinstance(image, Image.Image):
                im = Image.open(image)
            im, _ = next(
                extract_polygons(
                    im, {
                        'type': 'baselines',
                        'lines': [{
                            'baseline': baseline,
                            'boundary': boundary
                        }]
                    }))
            try:
                im = self.head_transforms(im)
                if not is_bitonal(im):
                    self.im_mode = im.mode
                im = self.tail_transforms(im)
            except ValueError:
                raise KrakenInputException(
                    'Image transforms failed on {}'.format(image))
            self._images.append(im)
        else:
            self._images.append((image, baseline, boundary))
        self._gt.append(text)
        self.alphabet.update(text)

    def encode(self, codec: Optional[PytorchCodec] = None) -> None:
        """
        Adds a codec to the dataset and encodes all text lines.

        Has to be run before sampling from the dataset.
        """
        if codec:
            self.codec = codec
        else:
            self.codec = PytorchCodec(''.join(self.alphabet.keys()))
        self.training_set = [
        ]  # type: List[Tuple[Union[Image, torch.Tensor], torch.Tensor]]
        for im, gt in zip(self._images, self._gt):
            self.training_set.append((im, self.codec.encode(gt)))

    def no_encode(self) -> None:
        """
        Creates an unencoded dataset.
        """
        self.training_set = [
        ]  # type: List[Tuple[Union[Image, torch.Tensor], str]]
        for im, gt in zip(self._images, self._gt):
            self.training_set.append((im, gt))

    def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
        if self.preload:
            x, y = self.training_set[index]
            if self.aug:
                x = x.permute((1, 2, 0)).numpy()
                o = self.aug(image=x)
                x = torch.tensor(o['image'].transpose(2, 0, 1))
            return {'image': x, 'target': y}
        else:
            item = self.training_set[index]
            try:
                logger.debug('Attempting to load {}'.format(item[0]))
                im = item[0][0]
                if not isinstance(im, Image.Image):
                    im = Image.open(im)
                im, _ = next(
                    extract_polygons(
                        im, {
                            'type':
                            'baselines',
                            'lines': [{
                                'baseline': item[0][1],
                                'boundary': item[0][2]
                            }]
                        }))
                im = self.head_transforms(im)
                if not is_bitonal(im):
                    self.im_mode = im.mode
                im = self.tail_transforms(im)
                if self.aug:
                    im = im.permute((1, 2, 0)).numpy()
                    o = self.aug(image=im)
                    im = torch.tensor(o['image'].transpose(2, 0, 1))
                return {'image': im, 'target': item[1]}
            except Exception:
                idx = np.random.randint(0, len(self.training_set))
                logger.debug('Failed. Replacing with sample {}'.format(idx))
                return self[np.random.randint(0, len(self.training_set))]

    def __len__(self) -> int:
        return len(self.training_set)

Example #4

Show file

File: dataset.py Project: D-K-E/kraken

class GroundTruthDataset(Dataset):
    """
    Dataset for ground truth used during training.

    All data is cached in memory.
    """
    def __init__(self, split: Callable[[str], str] = lambda x: os.path.splitext(x)[0],
                 suffix: str = '.gt.txt',
                 normalization: Optional[str] = None,
                 reorder: bool = True,
                 im_transforms: Callable[[Any], torch.Tensor] = transforms.Compose([]),
                 preload: bool = True) -> None:
        """
        Reads a list of image-text pairs and creates a ground truth set.

        Args:
            split (func): Function for generating the base name without
                          extensions from paths
            suffix (str): Suffix to attach to image base name for text
                          retrieval
            mode (str): Image color space. Either RGB (color) or L
                        (grayscale/bw). Only L is compatible with vertical
                        scaling/dewarping.
            scale (int, tuple): Target height or (width, height) of dewarped
                                line images. Vertical-only scaling is through
                                CenterLineNormalizer, resizing with Lanczos
                                interpolation. Set to 0 to disable.
            normalization (str): Unicode normalization for gt
            reorder (bool): Whether to rearrange code points in "display"/LTR
                            order
            im_transforms (func): Function taking an PIL.Image and returning a
                                  tensor suitable for forward passes.
            preload (bool): Enables preloading and preprocessing of image files.
        """
        self.suffix = suffix
        self.split = lambda x: split(x) + self.suffix
        self._images = []  # type:  Union[List[Image], List[torch.Tensor]]
        self._gt = []  # type:  List[str]
        self.alphabet = Counter()  # type: Counter
        self.text_transforms = []  # type: List[Callable[[str], str]]
        self.transforms = im_transforms
        self.preload = preload
        # built text transformations
        if normalization:
            self.text_transforms.append(lambda x: unicodedata.normalize(cast(str, normalization), x))
        if reorder:
            self.text_transforms.append(bd.get_display)

    def add(self, image: str) -> None:
        """
        Adds a line-image-text pair to the dataset.

        Args:
            image (str): Input image path
        """
        with open(self.split(image), 'r', encoding='utf-8') as fp:
            gt = fp.read().strip('\n\r')
            for func in self.text_transforms:
                gt = func(gt)
            if not gt:
                raise KrakenInputException('Text line is empty ({})'.format(fp.name))
        if self.preload:
            im = Image.open(image)
            try:
                im = self.transforms(im)
            except ValueError as e:
                raise KrakenInputException('Image transforms failed on {}'.format(image))
            self._images.append(im)
        else:
            self._images.append(image)
        self._gt.append(gt)
        self.alphabet.update(gt)

    def encode(self, codec: Optional[PytorchCodec] = None) -> None:
        """
        Adds a codec to the dataset and encodes all text lines.

        Has to be run before sampling from the dataset.
        """
        if codec:
            self.codec = codec
        else:
            self.codec = PytorchCodec(''.join(self.alphabet.keys()))
        self.training_set = []  # type: List[Tuple[Union[Image, torch.Tensor], torch.Tensor]]
        for im, gt in zip(self._images, self._gt):
            self.training_set.append((im, self.codec.encode(gt)))

    def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
        if self.preload:
            return self.training_set[index]
        else:
            item = self.training_set[index]
            try:
                logger.debug('Attempting to load {}'.format(item[0]))
                return (self.transforms(Image.open(item[0])), item[1])
            except Exception:
                idx = np.random.randint(0, len(self.training_set))
                logger.debug('Failed. Replacing with sample {}'.format(idx))
                return self[np.random.randint(0, len(self.training_set))]

    def __len__(self) -> int:
        return len(self.training_set)