Python EmnistDataset Examples

Programming Language: Python

Namespace/Package Name: text_recognizer.datasets.emnist

Class/Type: EmnistDataset

Examples at hotexamples.com: 5

Python EmnistDataset - 5 examples found. These are the top rated real world Python examples of text_recognizer.datasets.emnist.EmnistDataset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

EmnistDataset(4)

load_or_generate_data(2)

get(1)

items(1)

Example #1

Show file

class IamLinesDataset(Dataset):
    """
    "The IAM Lines dataset, first published at the ICDAR 1999, contains forms of unconstrained handwritten text,
    which were scanned at a resolution of 300dpi and saved as PNG images with 256 gray levels.
    From http://www.fki.inf.unibe.ch/databases/iam-handwriting-database

    The data split we will use is
    IAM lines Large Writer Independent Text Line Recognition Task (lwitlrt): 9,862 text lines.
        The validation set has been merged into the train set.
        The train set has 7,101 lines from 326 writers.
        The test set has 1,861 lines from 128 writers.
        The text lines of all data sets are mutually exclusive, thus each writer has contributed to one set only.

    Note that we use cachedproperty because data takes time to load.
    """
    def __init__(self):
        self.mapping = EmnistDataset().mapping
        self.inverse_mapping = {v: k for k, v in self.mapping.items()}
        self.num_classes = len(self.mapping)
        self.input_shape = (28, 952)
        self.output_shape = (97, self.num_classes)

    def cleanup(self, labels):
        labels_clean = []
        for label in labels:
            s = ''.join([self.mapping.get(i, '') for i in label])
            t = s.replace('&quot;', '"')
            t = t + '_' * (self.output_shape[0] - len(t))
            labels_clean.append([self.inverse_mapping[c] for c in t])
        return np.array(labels_clean)

    def load_or_generate_data(self):
        if not PROCESSED_DATA_FILENAME.exists():
            PROCESSED_DATA_DIRNAME.mkdir(parents=True, exist_ok=True)
            print('Downloading IAM lines...')
            urlretrieve(PROCESSED_DATA_URL, PROCESSED_DATA_FILENAME)
        with h5py.File(PROCESSED_DATA_FILENAME, 'r') as f:
            self.x_train = f['x_train'][:]
            self.y_train_int = self.cleanup(f['y_train'][:])
            self.x_test = f['x_test'][:]
            self.y_test_int = self.cleanup(f['y_test'][:])

    @cachedproperty
    def y_train(self):
        return to_categorical(self.y_train_int, self.num_classes)

    @cachedproperty
    def y_test(self):
        return to_categorical(self.y_test_int, self.num_classes)

    def __repr__(self):
        return ('IAM Lines Dataset\n'
                f'Num classes: {self.num_classes}\n'
                f'Mapping: {self.mapping}\n'
                f'Train: {self.x_train.shape} {self.y_train.shape}\n'
                f'Test: {self.x_test.shape} {self.y_test.shape}\n')

Example #2

Show file

 def test_evaluate(self):
     predictor = CharacterPredictor()
     dataset = EmnistDataset()
     dataset.load_or_generate_data()
     t = time()
     metric = predictor.evaluate(dataset)
     time_taken = time() - t
     print(f'acc: {metric}, time_taken: {time_taken}')
     self.assertGreater(metric, 0.7)
     self.assertLess(time_taken, 10)

Example #3

Show file

File: create_emnist_support_files.py Project: ngbrenda/fsdl-text-recognizer-project-2018

def create_emnist_support_files():
    shutil.rmtree(SUPPORT_DIRNAME, ignore_errors=True)
    SUPPORT_DIRNAME.mkdir()

    dataset = EmnistDataset()
    dataset.load_or_generate_data()

    for ind in [5, 7, 9]:
        image = dataset.x_test[ind]
        label = dataset.mapping[np.argmax(dataset.y_test[ind])]
        print(ind, label)
        util.write_image(image, str(SUPPORT_DIRNAME / f'{label}.png'))

Example #4

Show file

 def __init__(self,
              max_length: int = 34,
              max_overlap: float = 0.33,
              num_train: int = 10000,
              num_test: int = 1000):
     self.emnist = EmnistDataset()
     self.mapping = self.emnist.mapping
     self.max_length = max_length
     self.max_overlap = max_overlap
     self.num_classes = len(self.mapping)
     self.input_shape = (self.emnist.input_shape[0],
                         self.emnist.input_shape[1] * self.max_length)
     self.output_shape = (self.max_length, self.num_classes)
     self.num_train = num_train
     self.num_test = num_test

Example #5

Show file

 def __init__(self):
     self.mapping = EmnistDataset().mapping
     self.inverse_mapping = {v: k for k, v in self.mapping.items()}
     self.num_classes = len(self.mapping)
     self.input_shape = (28, 952)
     self.output_shape = (97, self.num_classes)