def add_to_argparse(parser): BaseDataModule.add_to_argparse(parser) parser.add_argument("--num_samples", type=int, default=_NUM_SAMPLES) parser.add_argument("--num_classes", type=int, default=_NUM_CLASSES) parser.add_argument("--image_height", type=int, default=_IMAGE_LEN) parser.add_argument("--image_width", type=int, default=_IMAGE_LEN) return parser
def add_to_argparse(parser): BaseDataModule.add_to_argparse(parser) parser.add_argument("--max_length", type=int, default=MAX_LENGTH, help="Max line length in characters.") parser.add_argument( "--min_overlap", type=float, default=MIN_OVERLAP, help="Min overlap between characters in a line, between 0 and 1.", ) parser.add_argument( "--max_overlap", type=float, default=MAX_OVERLAP, help="Max overlap between characters in a line, between 0 and 1.", ) parser.add_argument("--with_start_end_tokens", action="store_true", default=False) return parser
def add_to_argparse(parser): BaseDataModule.add_to_argparse(parser) parser.add_argument("--augment_data", type=str, default="true") parser.add_argument("--max_length", type=int, default=MAX_LENGTH, help="Max line length in characters.") parser.add_argument( "--min_overlap", type=float, default=MIN_OVERLAP, help="Min overlap between characters in a line, between 0 and 1.", ) parser.add_argument( "--max_overlap", type=float, default=MAX_OVERLAP, help="Max overlap between characters in a line, between 0 and 1.", ) return parser
import shutil import zipfile from torchvision import transforms import h5py import numpy as np import toml from text_recognizer.data.base_data_module import _download_raw_dataset, BaseDataModule, load_and_print_info from text_recognizer.data.util import BaseDataset, split_dataset NUM_SPECIAL_TOKENS = 4 SAMPLE_TO_BALANCE = True # If true, take at most the mean number of instances per class. TRAIN_FRAC = 0.8 RAW_DATA_DIRNAME = BaseDataModule.data_dirname() / "raw" / "emnist" METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml" DL_DATA_DIRNAME = BaseDataModule.data_dirname() / "downloaded" / "emnist" PROCESSED_DATA_DIRNAME = BaseDataModule.data_dirname() / "processed" / "emnist" PROCESSED_DATA_FILENAME = PROCESSED_DATA_DIRNAME / "byclass.h5" ESSENTIALS_FILENAME = Path( __file__).parents[0].resolve() / "emnist_essentials.json" class EMNIST(BaseDataModule): """ "The EMNIST dataset is a set of handwritten character digits derived from the NIST Special Database 19 and converted to a 28x28 pixel image format and dataset structure that directly matches the MNIST dataset." From https://www.nist.gov/itl/iad/image-group/emnist-dataset The data split we will use is
from torchvision import transforms import h5py import numpy as np import torch import sys sys.path.append('D:\Projects\All Projects\FSDL-21-Codes-and-Lab\Text Recognizer Pro\lab3') from text_recognizer.data.util import BaseDataset from text_recognizer.data.base_data_module import BaseDataModule, load_and_print_info from text_recognizer.data.emnist import EMNIST DATA_DIRNAME = BaseDataModule.data_dirname() / "processed" / "emnist_lines" ESSENTIALS_FILENAME = Path(__file__).parents[0].resolve() / "emnist_lines_essentials.json" MAX_LENGTH = 32 MIN_OVERLAP = 0 MAX_OVERLAP = 0.33 NUM_TRAIN = 10000 NUM_VAL = 2000 NUM_TEST = 2000 class EMNISTLines(BaseDataModule): """EMNIST Lines dataset: synthetic handwriting lines dataset made from EMNIST characters.""" def __init__( self,
from text_recognizer.data.iam_paragraphs import ( IAMParagraphs, get_dataset_properties, resize_image, get_transform, NEW_LINE_TOKEN, IMAGE_SCALE_FACTOR, ) from text_recognizer.data.iam import IAM from text_recognizer.data.iam_lines import line_crops_and_labels, save_images_and_labels, load_line_crops_and_labels from text_recognizer.data.base_data_module import BaseDataModule, load_and_print_info from text_recognizer.data.util import BaseDataset, convert_strings_to_labels PROCESSED_DATA_DIRNAME = BaseDataModule.data_dirname() / "processed" / "iam_synthetic_paragraphs" class IAMSyntheticParagraphs(IAMParagraphs): """ IAM Handwriting database synthetic paragraphs. """ def prepare_data(self, *args, **kwargs) -> None: """ Prepare IAM lines such that they can be used to generate synthetic paragraphs dataset in setup(). This method is IAMLines.prepare_data + resizing of line crops. """ if PROCESSED_DATA_DIRNAME.exists(): return print("IAMSyntheticParagraphs.prepare_data: preparing IAM lines for synthetic IAM paragraph creation...")
"""MNIST DataModule""" import argparse from torch.utils.data import random_split from torchvision.datasets import MNIST as TorchMNIST from torchvision import transforms import sys sys.path.append('D:\Projects\All Projects\FSDL-21-Codes-and-Lab\Text Recognizer Pro\lab1') from text_recognizer.data.base_data_module import BaseDataModule, load_and_print_info DOWNLOADED_DATA_DIRNAME = BaseDataModule.data_dirname() / "downloaded" # NOTE: temp fix until https://github.com/pytorch/vision/issues/1938 is resolved from six.moves import urllib # pylint: disable=wrong-import-position, wrong-import-order opener = urllib.request.build_opener() opener.addheaders = [("User-agent", "Mozilla/5.0")] urllib.request.install_opener(opener) class MNIST(BaseDataModule): """ MNIST DataModule. Learn more at https://pytorch-lightning.readthedocs.io/en/stable/extensions/datamodules.html """ def __init__(self, args: argparse.Namespace) -> None: super().__init__(args) self.data_dir = DOWNLOADED_DATA_DIRNAME
def add_to_argparse(parser): BaseDataModule.add_to_argparse(parser) parser.add_argument("--augment_data", type=str, default="true") return parser
import json import random from PIL import Image, ImageFile, ImageOps import numpy as np import torch from torchvision import transforms from text_recognizer.data.util import BaseDataset, convert_strings_to_labels from text_recognizer.data.base_data_module import BaseDataModule, load_and_print_info from text_recognizer.data.emnist import EMNIST from text_recognizer.data.iam import IAM ImageFile.LOAD_TRUNCATED_IMAGES = True PROCESSED_DATA_DIRNAME = BaseDataModule.data_dirname() / "processed" / "iam_lines" TRAIN_FRAC = 0.8 IMAGE_HEIGHT = 56 IMAGE_WIDTH = 1024 # Rounding up the actual empirical max to a power of 2 class IAMLines(BaseDataModule): """ IAM Handwriting database lines. """ def __init__(self, args: argparse.Namespace = None): super().__init__(args) self.augment = self.args.get("augment_data", "true") == "true" self.mapping = EMNIST().mapping self.inverse_mapping = {v: k for k, v in enumerate(self.mapping)} self.dims = (1, IMAGE_HEIGHT, IMAGE_WIDTH) # We assert that this is correct in setup()
"""SentenceGenerator class and supporting functions.""" import itertools import re import string from typing import Optional import nltk import numpy as np from text_recognizer.data.base_data_module import BaseDataModule NLTK_DATA_DIRNAME = BaseDataModule.data_dirname() / "downloaded" / "nltk" class SentenceGenerator: """Generate text sentences using the Brown corpus.""" def __init__(self, max_length: Optional[int] = None): self.text = brown_text() self.word_start_inds = [0] + [ _.start(0) + 1 for _ in re.finditer(" ", self.text) ] self.max_length = max_length def generate(self, max_length: Optional[int] = None) -> str: """ Sample a string from text of the Brown corpus of length at least one word and at most max_length. """ if max_length is None: max_length = self.max_length if max_length is None: raise ValueError(
"""Class for loading the IAM dataset, which encompasses both paragraphs and lines, with associated utilities.""" from pathlib import Path from typing import Dict, List import argparse import os import xml.etree.ElementTree as ElementTree import zipfile from boltons.cacheutils import cachedproperty import toml from text_recognizer.data.base_data_module import BaseDataModule, _download_raw_dataset, load_and_print_info RAW_DATA_DIRNAME = BaseDataModule.data_dirname() / "raw" / "iam" METADATA_FILENAME = RAW_DATA_DIRNAME / "metadata.toml" DL_DATA_DIRNAME = BaseDataModule.data_dirname() / "downloaded" / "iam" EXTRACTED_DATASET_DIRNAME = DL_DATA_DIRNAME / "iamdb" DOWNSAMPLE_FACTOR = 2 # If images were downsampled, the regions must also be. LINE_REGION_PADDING = 16 # add this many pixels around the exact coordinates class IAM(BaseDataModule): """ "The IAM Lines dataset, first published at the ICDAR 1999, contains forms of unconstrained handwritten text, which were scanned at a resolution of 300dpi and saved as PNG images with 256 gray levels. From http://www.fki.inf.unibe.ch/databases/iam-handwriting-database The data split we will use is IAM lines Large Writer Independent Text Line Recognition Task (lwitlrt): 9,862 text lines. The validation set has been merged into the train set.