コード例 #1
0
    def run(self,
            n_train: int,
            val_amount: float,
            cross_folds: int,
            single_folds: List[int],
            train_books: List[str],
            test_books: List[str],
            train_books_extend: List[str]
            ):
        from omr.steps.step import Step
        global_args = self.global_args
        logger.info("Finding PcGts files with valid ground truth")
        train_args = generate_dataset(
            lock_states=[LockState(Locks.STAFF_LINES, True), LockState(Locks.LAYOUT, True)],
            n_train=n_train,
            val_amount=val_amount,
            cross_folds=cross_folds,
            single_folds=single_folds,
            train_books=train_books,
            test_books=test_books,
            train_books_extend=train_books_extend,
        )

        train_args = [SingleDataArgs(gd.fold,
                                     os.path.join(global_args.model_dir, '{}_{}'.format(global_args.algorithm_type.value, gd.fold)),
                                     gd.train_pcgts_files, gd.validation_pcgts_files,
                                     gd.test_pcgts_files,
                                     global_args) for gd in train_args]

        experimenter_class = Step.meta(self.global_args.algorithm_type).experimenter()
        results = [experimenter_class(args, logger).run_single() for args in train_args]
        experimenter_class.print_results(self.global_args, results, logger)
コード例 #2
0
ファイル: trainer.py プロジェクト: hajicj/ommr4all-server
        self.pc_trainer.run(model_for_book, callback)
        print("Training Calamari")
        self.s2s_trainer.run(model_for_book, callback)
        print("Done")


if __name__ == '__main__':
    import random
    import numpy as np
    random.seed(1)
    np.random.seed(1)
    b = DatabaseBook('demo')
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    train_pcgts, val_pcgts = dataset_by_locked_pages(
        0.8,
        [LockState(Locks.STAFF_LINES, True),
         LockState(Locks.LAYOUT, True)], True, [b])
    output = 'models_out/test_pcs2s'
    params = SymbolDetectionDatasetParams(
        gt_required=True,
        height=40,
        dewarp=True,
        cut_region=False,
        pad=(0, 10, 0, 20),
        pad_power_of_2=None,
        center=True,
        staff_lines_only=True,
    )
    train_params = SymbolDetectionTrainerParams(
        params,
        train_pcgts,
コード例 #3
0
                          )

        """
if __name__ == '__main__':
    import random
    import numpy as np
    random.seed(1)
    np.random.seed(1)

    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ommr4all.settings')
    import django
    django.setup()
    b = DatabaseBook('Pa_14819')

    train_pcgts, val_pcgts = dataset_by_locked_pages(0.9999, [LockState(Locks.LAYOUT, True)], True, [b])
    trainer_params = CalamariTrainer.default_params()
    trainer_params.l_rate = 1e-3
    trainer_params.load = '/home/ls6/wick/Documents/Projects/calamari_models/fraktur_historical_ligs/0.ckpt.json'

    params = DatasetParams(
        gt_required=True,
        height=48,
        cut_region=True,
        pad=[0, 10, 0, 20],
        #lyrics_normalization=LyricsNormalization.ONE_STRING,
    )
    train_params = AlgorithmTrainerSettings(
        params,
        train_pcgts,
        val_pcgts,
コード例 #4
0

if __name__ == '__main__':
    from omr.steps.step import Step, AlgorithmTypes
    from ommr4all.settings import BASE_DIR
    import random
    import cv2
    import matplotlib.pyplot as plt
    from shared.pcgtscanvas import PcGtsCanvas
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    random.seed(1)
    np.random.seed(1)
    if False:
        train_pcgts, val_pcgts = dataset_by_locked_pages(
            0.8,
            [LockState(Locks.SYMBOLS, True),
             LockState(Locks.LAYOUT, True)],
            True,
            [
                # DatabaseBook('Graduel_Part_1'),
                # DatabaseBook('Graduel_Part_2'),
                # DatabaseBook('Graduel_Part_3'),
            ])
    book = DatabaseBook('Gothic_Test')
    meta = Step.meta(AlgorithmTypes.OCR_CALAMARI)
    # model = meta.newest_model_for_book(book)
    model = Model(
        MetaId.from_custom_path(
            BASE_DIR +
            '/internal_storage/pretrained_models/text_calamari/fraktur_historical',
            meta.type()))
コード例 #5
0
                     m.operation.page.image_to_page_scale(
                         dataset.local_to_global_pos(Point((pos.global_start + pos.global_end) / 2, 40), m.operation.params).x,
                         m.operation.scale_reference
                     ))
                    for pos in p.positions]
        return CalamariSequence.to_symbols(dataset.params.calamari_codec, sentence, m.operation.music_line.staff_lines)


if __name__ == '__main__':
    import random
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    from shared.pcgtscanvas import PcGtsCanvas, PageScaleReference
    random.seed(1)
    np.random.seed(1)
    b = DatabaseBook('Graduel_Fully_Annotated')
    train_pcgts, val_pcgts = dataset_by_locked_pages(0.8, [LockState(Locks.STAFF_LINES, True), LockState(Locks.LAYOUT, True)], True, [
        DatabaseBook('Graduel_Part_1'),
        DatabaseBook('Graduel_Part_2'),
        DatabaseBook('Graduel_Part_3'),
    ])
    pred = OMRPredictor(AlgorithmPredictorSettings(
        model=Meta.best_model_for_book(b),
    ))
    ps = list(pred.predict([p.page.location for p in val_pcgts[7:8]]))
    for p in ps:
        p: PredictionResult = p
        canvas = PcGtsCanvas(p.pcgts.page, PageScaleReference.NORMALIZED_X2)
        for sp in p.music_lines:
            canvas.draw(sp.symbols)

        canvas.show()
コード例 #6
0
ファイル: trainer.py プロジェクト: hajicj/ommr4all-server
                preload_validation=True,
                codec=Codec(self.settings.dataset_params.calamari_codec.codec.
                            values()),
            )
            trainer.train()


if __name__ == '__main__':
    import random
    import numpy as np
    random.seed(1)
    np.random.seed(1)
    b = DatabaseBook('Graduel_Fully_Annotated')
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    train_pcgts, val_pcgts = dataset_by_locked_pages(
        0.8, [LockState(Locks.SYMBOLS, True),
              LockState(Locks.LAYOUT, True)], True, [b])
    dataset_params = DatasetParams(
        gt_required=True,
        height=40,
        dewarp=True,
        cut_region=False,
        pad=[0, 10, 0, 20],
        center=True,
        staff_lines_only=True,
        masks_as_input=False,
    )
    train_settings = AlgorithmTrainerSettings(
        dataset_params=dataset_params,
        train_data=train_pcgts,
        validation_data=val_pcgts,
コード例 #7
0
    def run(self, task: Task, com_queue: Queue) -> dict:
        class Callback(TrainerCallback):
            def __init__(self):
                super().__init__()
                self.iter, self.loss, self.acc, self.best_iter, self.best_acc, self.best_iters = -1, -1, -1, -1, -1, -1

            def resolving_files(self):
                com_queue.put(
                    TaskCommunicationData(
                        task,
                        TaskStatus(
                            TaskStatusCodes.RUNNING,
                            TaskProgressCodes.RESOLVING_DATA,
                        )))

            def loading(self, n: int, total: int):
                com_queue.put(
                    TaskCommunicationData(
                        task,
                        TaskStatus(
                            TaskStatusCodes.RUNNING,
                            TaskProgressCodes.LOADING_DATA,
                            progress=n / total,
                            n_processed=n,
                            n_total=total,
                        )))

            def loading_started(self, total: int):
                pass

            def loading_finished(self, total: int):
                com_queue.put(
                    TaskCommunicationData(
                        task,
                        TaskStatus(
                            TaskStatusCodes.RUNNING,
                            TaskProgressCodes.PREPARING_TRAINING,
                        )))

            def put(self):
                com_queue.put(
                    TaskCommunicationData(
                        task,
                        TaskStatus(
                            TaskStatusCodes.RUNNING,
                            TaskProgressCodes.WORKING,
                            progress=self.iter / self.total_iters,
                            accuracy=self.best_acc
                            if self.best_acc >= 0 else -1,
                            early_stopping_progress=self.best_iters /
                            self.early_stopping_iters
                            if self.early_stopping_iters > 0 else -1,
                            loss=self.loss,
                        )))

            def next_iteration(self, iter: int, loss: float, acc: float):
                self.iter, self.loss, self.acc = iter, loss, acc
                self.put()

            def next_best_model(self, best_iter: int, best_acc: float,
                                best_iters: int):
                self.best_iter, self.best_acc, self.best_iters = best_iter, best_acc, best_iters
                self.put()

            def early_stopping(self):
                pass

        callback = Callback()

        logger.info("Finding PcGts files with valid ground truth")
        callback.resolving_files()
        train_pcgts, val_pcgts = dataset_by_locked_pages(
            self.params.nTrain, [LockState('Symbols', True)],
            datasets=[self.selection.book]
            if not self.params.includeAllTrainingData else [])
        if len(train_pcgts) + len(val_pcgts) < 50:
            # only very few files, use all for training and evaluate on training as-well
            train_pcgts = train_pcgts + val_pcgts
            val_pcgts = train_pcgts
            logger.info("Combining training and validation files because n<50")

        logger.info(
            "Starting training with {} training and {} validation files".
            format(len(train_pcgts), len(val_pcgts)))
        logger.debug("Training files: {}".format(
            [p.page.location.local_path() for p in train_pcgts]))
        logger.debug("Validation files: {}".format(
            [p.page.location.local_path() for p in val_pcgts]))

        meta = self.algorithm_meta()
        train, val = self.params.to_train_val(
            locks=[LockState('StaffLines', True)], books=[self.selection.book])

        settings = AlgorithmTrainerSettings(
            train_data=train,
            validation_data=val,
            dataset_params=DatasetParams(
                gt_required=True,
                pad=None,
                pad_power_of_2=3,
                height=80,
                dewarp=False,
                cut_region=False,
                center=True,
                staff_lines_only=True,
            ),
        )

        trainer = meta.create_trainer(settings)
        if self.params.pretrainedModel:
            trainer.settings.params.load = self.params.pretrainedModel.id
        trainer.train(self.selection.book, callback=callback)
        logger.info("Training finished for book {}".format(
            self.selection.book.local_path()))
        return {}
コード例 #8
0
ファイル: trainer.py プロジェクト: hajicj/ommr4all-server
 def required_locks() -> List[LockState]:
     return [LockState(Locks.TEXT, True)]