Esempio n. 1
0
 def test_krakentrainer_rec_bl_load(self):
     training_data = [self.xml]
     evaluation_data = [self.xml]
     trainer = KrakenTrainer.recognition_train_gen(format_type='xml',
                                                   load=self.model,
                                                   training_data=training_data,
                                                   evaluation_data=evaluation_data)
     self.assertEqual(trainer.model.seg_type, 'baselines')
     self.assertIsInstance(trainer.train_set.dataset, kraken.lib.dataset.PolygonGTDataset)
Esempio n. 2
0
 def test_krakentrainer_rec_box_load(self):
     training_data = self.box_lines
     evaluation_data = self.box_lines
     trainer = KrakenTrainer.recognition_train_gen(format_type='path',
                                                   load=self.model,
                                                   training_data=training_data,
                                                   evaluation_data=evaluation_data)
     self.assertEqual(trainer.model.seg_type, 'bbox')
     self.assertIsInstance(trainer.train_set.dataset, kraken.lib.dataset.GroundTruthDataset)
Esempio n. 3
0
def learn(transcribe_path,
          validation_size=0.3,
          batch_size: int = 1,
          lag: int = 5,
          min_delta: float = 0,
          learning_rate: float = 0.001,
          threads: int = 1,
          augment: bool = False) -> None:
    """
    Creates models out of learning from the transcribe file

    :param transcribe_path: The path of the data to learn from
    :param validation_size: The size of validation set
    :param batch_size: Batch size to learn in every epoch
    :param lag: Number of iterations without any improvement that are allowed
    :param min_delta: The goal min value of accuracy
    :param learning_rate: Learning rate
    :param threads: Number of threads to run on
    :param augment: Augment the data
    :return: None
    """
    if (0 == os.fork()):
        exec(ketos.extract(["--output", "output_directory", transcribe_path]))
    os.wait()

    pngs = sorted([
        "output_directory/" + f for f in os.listdir("output_directory")
        if "png" in f
    ])
    random.shuffle(pngs)

    trian = pngs[:int(len(pngs) * (1 - validation_size))]
    test = pngs[int(len(pngs) * (1 - validation_size)):]

    def _update_progress():
        print('.', end='')

    def _print_eval(epoch, accuracy, **kwargs):
        print(
            f"epoch: {epoch}, accuracy: {accuracy}, right: {kwargs['chars'] - kwargs['error']}, errors: {kwargs['error']}"
        )

    hp = kraken.lib.default_specs.RECOGNITION_HYPER_PARAMS
    hp["batch_size"] = batch_size
    hp["lag"] = lag
    hp["min_delta"] = min_delta
    hp["lrate"] = learning_rate

    kt = KrakenTrainer.recognition_train_gen(hyper_params=hp,
                                             training_data=trian,
                                             evaluation_data=test,
                                             format_type='path',
                                             threads=threads,
                                             augment=augment)
    kt.run(_print_eval, _update_progress)
Esempio n. 4
0
 def test_krakentrainer_rec_box_path(self):
     """
     Tests recognition trainer constructor with legacy path training data.
     """
     training_data = self.box_lines
     evaluation_data = self.box_lines
     trainer = KrakenTrainer.recognition_train_gen(format_type='path',
                                                   training_data=training_data,
                                                   evaluation_data=evaluation_data)
     self.assertEqual(trainer.model.seg_type, 'bbox')
     self.assertIsInstance(trainer.train_set.dataset, kraken.lib.dataset.GroundTruthDataset)
Esempio n. 5
0
 def test_krakentrainer_rec_bl_dict(self):
     """
     Tests recognition trainer constructor with dictionary style training data.
     """
     training_data = [{'image': path.join(resources, 'bw.png'), 'text': 'foo', 'baseline': [[10, 10], [300, 10]], 'boundary': [[10, 5], [300, 5], [300, 15], [10, 15]]}]
     evaluation_data = [{'image': path.join(resources, 'bw.png'), 'text': 'foo', 'baseline': [[10, 10], [300, 10]], 'boundary': [[10, 5], [300, 5], [300, 15], [10, 15]]}]
     trainer = KrakenTrainer.recognition_train_gen(format_type=None,
                                                   training_data=training_data,
                                                   evaluation_data=evaluation_data)
     self.assertEqual(trainer.model.seg_type, 'baselines')
     self.assertIsInstance(trainer.train_set.dataset, kraken.lib.dataset.PolygonGTDataset)
Esempio n. 6
0
 def test_krakentrainer_rec_bl_append(self):
     training_data = [self.xml]
     evaluation_data = [self.xml]
     trainer = KrakenTrainer.recognition_train_gen(format_type='xml',
                                                   load=self.model,
                                                   append=1,
                                                   spec='[Cr4,4,32]',
                                                   training_data=training_data,
                                                   evaluation_data=evaluation_data)
     self.assertEqual(trainer.model.seg_type, 'baselines')
     self.assertIsInstance(trainer.train_set.dataset, kraken.lib.dataset.PolygonGTDataset)
     self.assertTrue(trainer.model.spec.startswith('[1,48,0,1 Cr{C_0}4,2,1,4,2 Cr{C_1}4,4,32 O{O_2}'))
Esempio n. 7
0
 def test_krakentrainer_rec_box_append(self):
     training_data = self.box_lines
     evaluation_data = self.box_lines
     trainer = KrakenTrainer.recognition_train_gen(format_type='path',
                                                   load=self.model,
                                                   append=1,
                                                   spec='[Cr4,4,32]',
                                                   training_data=training_data,
                                                   evaluation_data=evaluation_data)
     self.assertEqual(trainer.model.seg_type, 'bbox')
     self.assertIsInstance(trainer.train_set.dataset, kraken.lib.dataset.GroundTruthDataset)
     self.assertTrue(trainer.model.spec.startswith('[1,48,0,1 Cr{C_0}4,2,1,4,2 Cr{C_1}4,4,32 O{O_2}'))
Esempio n. 8
0
 def test_krakentrainer_rec_bl_xml(self):
     """
     Tests recognition trainer constructor with XML training data.
     """
     training_data = [self.xml]
     evaluation_data = [self.xml]
     trainer = KrakenTrainer.recognition_train_gen(format_type='xml',
                                                   training_data=training_data,
                                                   evaluation_data=evaluation_data)
     self.assertEqual(trainer.model.seg_type, 'baselines')
     self.assertIsInstance(trainer.train_set.dataset, kraken.lib.dataset.PolygonGTDataset)
     self.assertEqual(len(trainer.train_set), 44)
     self.assertEqual(len(trainer.val_set), 44)