def add_to_argparse(parser):
     BaseDataModule.add_to_argparse(parser)
     parser.add_argument("--num_samples", type=int, default=_NUM_SAMPLES)
     parser.add_argument("--num_classes", type=int, default=_NUM_CLASSES)
     parser.add_argument("--image_height", type=int, default=_IMAGE_LEN)
     parser.add_argument("--image_width", type=int, default=_IMAGE_LEN)
     return parser
Esempio n. 2
0
 def add_to_argparse(parser):
     BaseDataModule.add_to_argparse(parser)
     parser.add_argument("--max_length", type=int, default=MAX_LENGTH, help="Max line length in characters.")
     parser.add_argument(
         "--min_overlap",
         type=float,
         default=MIN_OVERLAP,
         help="Min overlap between characters in a line, between 0 and 1.",
     )
     parser.add_argument(
         "--max_overlap",
         type=float,
         default=MAX_OVERLAP,
         help="Max overlap between characters in a line, between 0 and 1.",
     )
     parser.add_argument("--with_start_end_tokens", action="store_true", default=False)
     return parser
 def add_to_argparse(parser):
     BaseDataModule.add_to_argparse(parser)
     parser.add_argument("--augment_data", type=str, default="true")
     parser.add_argument("--max_length", type=int, default=MAX_LENGTH, help="Max line length in characters.")
     parser.add_argument(
         "--min_overlap",
         type=float,
         default=MIN_OVERLAP,
         help="Min overlap between characters in a line, between 0 and 1.",
     )
     parser.add_argument(
         "--max_overlap",
         type=float,
         default=MAX_OVERLAP,
         help="Max overlap between characters in a line, between 0 and 1.",
     )
     return parser
Esempio n. 4
0
import shutil
import zipfile

from torchvision import transforms
import h5py
import numpy as np
import toml

from text_recognizer.data.base_data_module import _download_raw_dataset, BaseDataModule, load_and_print_info
from text_recognizer.data.util import BaseDataset, split_dataset

NUM_SPECIAL_TOKENS = 4
SAMPLE_TO_BALANCE = True  # If true, take at most the mean number of instances per class.
TRAIN_FRAC = 0.8

RAW_DATA_DIRNAME = BaseDataModule.data_dirname() / "raw" / "emnist"
METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml"
DL_DATA_DIRNAME = BaseDataModule.data_dirname() / "downloaded" / "emnist"
PROCESSED_DATA_DIRNAME = BaseDataModule.data_dirname() / "processed" / "emnist"
PROCESSED_DATA_FILENAME = PROCESSED_DATA_DIRNAME / "byclass.h5"
ESSENTIALS_FILENAME = Path(
    __file__).parents[0].resolve() / "emnist_essentials.json"


class EMNIST(BaseDataModule):
    """
    "The EMNIST dataset is a set of handwritten character digits derived from the NIST Special Database 19
    and converted to a 28x28 pixel image format and dataset structure that directly matches the MNIST dataset."
    From https://www.nist.gov/itl/iad/image-group/emnist-dataset

    The data split we will use is
Esempio n. 5
0
from torchvision import transforms
import h5py
import numpy as np
import torch


import sys
sys.path.append('D:\Projects\All Projects\FSDL-21-Codes-and-Lab\Text Recognizer Pro\lab3')

from text_recognizer.data.util import BaseDataset
from text_recognizer.data.base_data_module import BaseDataModule, load_and_print_info
from text_recognizer.data.emnist import EMNIST


DATA_DIRNAME = BaseDataModule.data_dirname() / "processed" / "emnist_lines"
ESSENTIALS_FILENAME = Path(__file__).parents[0].resolve() / "emnist_lines_essentials.json"

MAX_LENGTH = 32
MIN_OVERLAP = 0
MAX_OVERLAP = 0.33
NUM_TRAIN = 10000
NUM_VAL = 2000
NUM_TEST = 2000


class EMNISTLines(BaseDataModule):
    """EMNIST Lines dataset: synthetic handwriting lines dataset made from EMNIST characters."""

    def __init__(
        self,
Esempio n. 6
0
from text_recognizer.data.iam_paragraphs import (
    IAMParagraphs,
    get_dataset_properties,
    resize_image,
    get_transform,
    NEW_LINE_TOKEN,
    IMAGE_SCALE_FACTOR,
)
from text_recognizer.data.iam import IAM
from text_recognizer.data.iam_lines import line_crops_and_labels, save_images_and_labels, load_line_crops_and_labels
from text_recognizer.data.base_data_module import BaseDataModule, load_and_print_info
from text_recognizer.data.util import BaseDataset, convert_strings_to_labels


PROCESSED_DATA_DIRNAME = BaseDataModule.data_dirname() / "processed" / "iam_synthetic_paragraphs"


class IAMSyntheticParagraphs(IAMParagraphs):
    """
    IAM Handwriting database synthetic paragraphs.
    """

    def prepare_data(self, *args, **kwargs) -> None:
        """
        Prepare IAM lines such that they can be used to generate synthetic paragraphs dataset in setup().
        This method is IAMLines.prepare_data + resizing of line crops.
        """
        if PROCESSED_DATA_DIRNAME.exists():
            return
        print("IAMSyntheticParagraphs.prepare_data: preparing IAM lines for synthetic IAM paragraph creation...")
Esempio n. 7
0
"""MNIST DataModule"""
import argparse

from torch.utils.data import random_split
from torchvision.datasets import MNIST as TorchMNIST
from torchvision import transforms

import sys
sys.path.append('D:\Projects\All Projects\FSDL-21-Codes-and-Lab\Text Recognizer Pro\lab1')

from text_recognizer.data.base_data_module import BaseDataModule, load_and_print_info

DOWNLOADED_DATA_DIRNAME = BaseDataModule.data_dirname() / "downloaded"

# NOTE: temp fix until https://github.com/pytorch/vision/issues/1938 is resolved
from six.moves import urllib  # pylint: disable=wrong-import-position, wrong-import-order

opener = urllib.request.build_opener()
opener.addheaders = [("User-agent", "Mozilla/5.0")]
urllib.request.install_opener(opener)


class MNIST(BaseDataModule):
    """
    MNIST DataModule.
    Learn more at https://pytorch-lightning.readthedocs.io/en/stable/extensions/datamodules.html
    """

    def __init__(self, args: argparse.Namespace) -> None:
        super().__init__(args)
        self.data_dir = DOWNLOADED_DATA_DIRNAME
 def add_to_argparse(parser):
     BaseDataModule.add_to_argparse(parser)
     parser.add_argument("--augment_data", type=str, default="true")
     return parser
import json
import random

from PIL import Image, ImageFile, ImageOps
import numpy as np
import torch
from torchvision import transforms

from text_recognizer.data.util import BaseDataset, convert_strings_to_labels
from text_recognizer.data.base_data_module import BaseDataModule, load_and_print_info
from text_recognizer.data.emnist import EMNIST
from text_recognizer.data.iam import IAM

ImageFile.LOAD_TRUNCATED_IMAGES = True

PROCESSED_DATA_DIRNAME = BaseDataModule.data_dirname() / "processed" / "iam_lines"
TRAIN_FRAC = 0.8
IMAGE_HEIGHT = 56
IMAGE_WIDTH = 1024  # Rounding up the actual empirical max to a power of 2

class IAMLines(BaseDataModule):
    """
    IAM Handwriting database lines.
    """

    def __init__(self, args: argparse.Namespace = None):
        super().__init__(args)
        self.augment = self.args.get("augment_data", "true") == "true"
        self.mapping = EMNIST().mapping
        self.inverse_mapping = {v: k for k, v in enumerate(self.mapping)}
        self.dims = (1, IMAGE_HEIGHT, IMAGE_WIDTH)  # We assert that this is correct in setup()
"""SentenceGenerator class and supporting functions."""
import itertools
import re
import string
from typing import Optional

import nltk
import numpy as np

from text_recognizer.data.base_data_module import BaseDataModule

NLTK_DATA_DIRNAME = BaseDataModule.data_dirname() / "downloaded" / "nltk"


class SentenceGenerator:
    """Generate text sentences using the Brown corpus."""
    def __init__(self, max_length: Optional[int] = None):
        self.text = brown_text()
        self.word_start_inds = [0] + [
            _.start(0) + 1 for _ in re.finditer(" ", self.text)
        ]
        self.max_length = max_length

    def generate(self, max_length: Optional[int] = None) -> str:
        """
        Sample a string from text of the Brown corpus of length at least one word and at most max_length.
        """
        if max_length is None:
            max_length = self.max_length
        if max_length is None:
            raise ValueError(
Esempio n. 11
0
"""Class for loading the IAM dataset, which encompasses both paragraphs and lines, with associated utilities."""
from pathlib import Path
from typing import Dict, List
import argparse
import os
import xml.etree.ElementTree as ElementTree
import zipfile

from boltons.cacheutils import cachedproperty
import toml

from text_recognizer.data.base_data_module import BaseDataModule, _download_raw_dataset, load_and_print_info

RAW_DATA_DIRNAME = BaseDataModule.data_dirname() / "raw" / "iam"
METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml"
DL_DATA_DIRNAME = BaseDataModule.data_dirname() / "downloaded" / "iam"
EXTRACTED_DATASET_DIRNAME = DL_DATA_DIRNAME / "iamdb"

DOWNSAMPLE_FACTOR = 2  # If images were downsampled, the regions must also be.
LINE_REGION_PADDING = 16  # add this many pixels around the exact coordinates


class IAM(BaseDataModule):
    """
    "The IAM Lines dataset, first published at the ICDAR 1999, contains forms of unconstrained handwritten text,
    which were scanned at a resolution of 300dpi and saved as PNG images with 256 gray levels.
    From http://www.fki.inf.unibe.ch/databases/iam-handwriting-database

    The data split we will use is
    IAM lines Large Writer Independent Text Line Recognition Task (lwitlrt): 9,862 text lines.
        The validation set has been merged into the train set.