Beispiel #1
0
    def run(self,
            n_train: int,
            val_amount: float,
            cross_folds: int,
            single_folds: List[int],
            train_books: List[str],
            test_books: List[str],
            train_books_extend: List[str]
            ):
        from omr.steps.step import Step
        global_args = self.global_args
        logger.info("Finding PcGts files with valid ground truth")
        train_args = generate_dataset(
            lock_states=[LockState(Locks.STAFF_LINES, True), LockState(Locks.LAYOUT, True)],
            n_train=n_train,
            val_amount=val_amount,
            cross_folds=cross_folds,
            single_folds=single_folds,
            train_books=train_books,
            test_books=test_books,
            train_books_extend=train_books_extend,
        )

        train_args = [SingleDataArgs(gd.fold,
                                     os.path.join(global_args.model_dir, '{}_{}'.format(global_args.algorithm_type.value, gd.fold)),
                                     gd.train_pcgts_files, gd.validation_pcgts_files,
                                     gd.test_pcgts_files,
                                     global_args) for gd in train_args]

        experimenter_class = Step.meta(self.global_args.algorithm_type).experimenter()
        results = [experimenter_class(args, logger).run_single() for args in train_args]
        experimenter_class.print_results(self.global_args, results, logger)
Beispiel #2
0
 def put(self, request, group, style):
     meta = ModelMeta.from_json(request.body)
     default_type = AlgorithmGroups(group).types()[0]
     model = Model.from_id_str(meta.id)
     target_meta = MetaId(
         DatabaseAvailableModels.local_default_models(style, default_type),
         Step.meta(default_type).model_dir())
     target_model = Model(target_meta)
     model.copy_to(target_model, override=True)
     return Response()
Beispiel #3
0
    def __init__(self, settings: AlgorithmPredictorSettings):
        super().__init__(settings)

        meta = Step.meta(AlgorithmTypes.OCR_CALAMARI)
        from ommr4all.settings import BASE_DIR
        model = Model(
            MetaId.from_custom_path(
                BASE_DIR +
                '/internal_storage/default_models/fraktur/text_calamari/',
                meta.type()))
        settings = AlgorithmPredictorSettings(model=model, )
        settings.params.ctcDecoder.params.type = CTCDecoderParams.CTC_DEFAULT
        self.ocr_predictor = meta.create_predictor(settings)
Beispiel #4
0
    def __init__(self, settings: AlgorithmPredictorSettings):
        super().__init__(settings)
        self.document_id = settings.params.documentId
        self.document_text = settings.params.documentText

        self.document_similar_tester = SimilarDocumentChecker()
        self.text_normalizer = LyricsNormalizationProcessor(
            LyricsNormalizationParams(LyricsNormalization.WORDS))
        meta = Step.meta(AlgorithmTypes.OCR_CALAMARI)
        from ommr4all.settings import BASE_DIR
        model = Model(
            MetaId.from_custom_path(
                BASE_DIR +
                '/internal_storage/default_models/fraktur/text_calamari/',
                meta.type()))
        settings = AlgorithmPredictorSettings(model=model, )
        settings.params.ctcDecoder.params.type = CTCDecoderParams.CTC_DEFAULT
        self.ocr_predictor = meta.create_predictor(settings)
Beispiel #5
0
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    random.seed(1)
    np.random.seed(1)
    if False:
        train_pcgts, val_pcgts = dataset_by_locked_pages(
            0.8,
            [LockState(Locks.SYMBOLS, True),
             LockState(Locks.LAYOUT, True)],
            True,
            [
                # DatabaseBook('Graduel_Part_1'),
                # DatabaseBook('Graduel_Part_2'),
                # DatabaseBook('Graduel_Part_3'),
            ])
    book = DatabaseBook('Gothic_Test')
    meta = Step.meta(AlgorithmTypes.OCR_CALAMARI)
    # model = meta.newest_model_for_book(book)
    model = Model(
        MetaId.from_custom_path(
            BASE_DIR +
            '/internal_storage/pretrained_models/text_calamari/fraktur_historical',
            meta.type()))
    settings = AlgorithmPredictorSettings(model=model, )
    pred = meta.create_predictor(settings)
    ps: List[PredictionResult] = list(pred.predict(book.pages()[0:1]))
    for i, p in enumerate(ps):
        canvas = PcGtsCanvas(p.pcgts.page,
                             p.text_lines[0].line.operation.scale_reference)
        for j, s in enumerate(p.text_lines):
            canvas.draw(s)
Beispiel #6
0
    from shared.pcgtscanvas import PcGtsCanvas
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    random.seed(1)
    np.random.seed(1)
    if False:
        train_pcgts, val_pcgts = dataset_by_locked_pages(
            0.8,
            [LockState(Locks.SYMBOLS, True),
             LockState(Locks.LAYOUT, True)],
            True,
            [
                # DatabaseBook('Graduel_Part_1'),
                # DatabaseBook('Graduel_Part_2'),
                # DatabaseBook('Graduel_Part_3'),
            ])
    book = DatabaseBook('Paper_New_York')
    meta = Step.meta(AlgorithmTypes.SYLLABLES_FROM_TEXT)
    model = meta.best_model_for_book(book)
    settings = AlgorithmPredictorSettings(model=model, )
    pred = meta.create_predictor(settings)
    ps: List[PredictionResult] = list(pred.predict(book.pages()[:1]))
    for i, p in enumerate(ps):
        pmr = p.page_match_result
        canvas = PcGtsCanvas(pmr.pcgts.page, PageScaleReference.NORMALIZED_X2)
        canvas.draw(pmr.text_prediction_result.text_lines[4],
                    color=(25, 150, 25),
                    background=True)
        # canvas.draw(pmr.match_results)
        # canvas.draw(p.annotations)
        canvas.show()
Beispiel #7
0
    os.environ['DJANGO_SETTINGS_MODULE'] = 'ommr4all.settings'
    django.setup()
    from ommr4all.settings import BASE_DIR
    import random
    import matplotlib.pyplot as plt
    from shared.pcgtscanvas import PcGtsCanvas
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    from database.file_formats.pcgts import PageScaleReference
    random.seed(1)
    np.random.seed(1)
    if False:
        train_pcgts, val_pcgts = dataset_by_locked_pages(0.8, [LockState(Locks.SYMBOLS, True), LockState(Locks.LAYOUT, True)], True, [
            # DatabaseBook('Graduel_Part_1'),
            # DatabaseBook('Graduel_Part_2'),
            # DatabaseBook('Graduel_Part_3'),
        ])
    book = DatabaseBook('Paper_New_York')
    meta = Step.meta(AlgorithmTypes.SYLLABLES_IN_ORDER)
    model = meta.best_model_for_book(book)
    settings = AlgorithmPredictorSettings(
        model=model,
    )
    pred = meta.create_predictor(settings)
    ps: List[PredictionResult] = list(pred.predict(book.pages()[:1]))
    for i, p in enumerate(ps):
        pmr = p.page_match_result
        canvas = PcGtsCanvas(pmr.pcgts.page, PageScaleReference.NORMALIZED_X2)
        canvas.draw(pmr.match_results)
        canvas.draw(p.annotations)
        canvas.show()
Beispiel #8
0
 def get(self, request, group, style):
     default_type = AlgorithmGroups(group).types()[0]
     return Response(
         Step.meta(default_type).list_available_models_for_style(
             style).to_dict())