Example #1
0
    def run(self,
            n_train: int,
            val_amount: float,
            cross_folds: int,
            single_folds: List[int],
            train_books: List[str],
            test_books: List[str],
            train_books_extend: List[str]
            ):
        from omr.steps.step import Step
        global_args = self.global_args
        logger.info("Finding PcGts files with valid ground truth")
        train_args = generate_dataset(
            lock_states=[LockState(Locks.STAFF_LINES, True), LockState(Locks.LAYOUT, True)],
            n_train=n_train,
            val_amount=val_amount,
            cross_folds=cross_folds,
            single_folds=single_folds,
            train_books=train_books,
            test_books=test_books,
            train_books_extend=train_books_extend,
        )

        train_args = [SingleDataArgs(gd.fold,
                                     os.path.join(global_args.model_dir, '{}_{}'.format(global_args.algorithm_type.value, gd.fold)),
                                     gd.train_pcgts_files, gd.validation_pcgts_files,
                                     gd.test_pcgts_files,
                                     global_args) for gd in train_args]

        experimenter_class = Step.meta(self.global_args.algorithm_type).experimenter()
        results = [experimenter_class(args, logger).run_single() for args in train_args]
        experimenter_class.print_results(self.global_args, results, logger)
Example #2
0
 def put(self, request, group, style):
     meta = ModelMeta.from_json(request.body)
     default_type = AlgorithmGroups(group).types()[0]
     model = Model.from_id_str(meta.id)
     target_meta = MetaId(
         DatabaseAvailableModels.local_default_models(style, default_type),
         Step.meta(default_type).model_dir())
     target_model = Model(target_meta)
     model.copy_to(target_model, override=True)
     return Response()
Example #3
0
 def post(self, request, book, operation):
     body = json.loads(request.body, encoding='utf-8')
     book = DatabaseBook(book)
     algorithm = Step.predictor(AlgorithmTypes(operation))
     page_selection = PageSelection.from_params(PageSelectionParams.from_dict(body), book)
     pages = page_selection.get_pages(algorithm.unprocessed)
     return Response({
         'pages': [p.page for p in pages],
         'pageCount': page_selection.page_count.value,
         'singlePage': page_selection.single_page,
         'book': book.book,
         'totalPages': len(book.pages()),
     })
Example #4
0
    def __init__(self, settings: AlgorithmPredictorSettings):
        super().__init__(settings)

        meta = Step.meta(AlgorithmTypes.OCR_CALAMARI)
        from ommr4all.settings import BASE_DIR
        model = Model(
            MetaId.from_custom_path(
                BASE_DIR +
                '/internal_storage/default_models/fraktur/text_calamari/',
                meta.type()))
        settings = AlgorithmPredictorSettings(model=model, )
        settings.params.ctcDecoder.params.type = CTCDecoderParams.CTC_DEFAULT
        self.ocr_predictor = meta.create_predictor(settings)
Example #5
0
    def __init__(self, settings: AlgorithmPredictorSettings):
        super().__init__(settings)
        self.document_id = settings.params.documentId
        self.document_text = settings.params.documentText

        self.document_similar_tester = SimilarDocumentChecker()
        self.text_normalizer = LyricsNormalizationProcessor(
            LyricsNormalizationParams(LyricsNormalization.WORDS))
        meta = Step.meta(AlgorithmTypes.OCR_CALAMARI)
        from ommr4all.settings import BASE_DIR
        model = Model(
            MetaId.from_custom_path(
                BASE_DIR +
                '/internal_storage/default_models/fraktur/text_calamari/',
                meta.type()))
        settings = AlgorithmPredictorSettings(model=model, )
        settings.params.ctcDecoder.params.type = CTCDecoderParams.CTC_DEFAULT
        self.ocr_predictor = meta.create_predictor(settings)
Example #6
0
from ..dataset import SymbolDetectionDataset, Dataset
from omr.steps.step import Step


class Meta(AlgorithmMeta):
    @staticmethod
    def type() -> AlgorithmTypes:
        return AlgorithmTypes.SYMBOLS_SEQUENCE_TO_SEQUENCE

    @classmethod
    def predictor(cls) -> Type[AlgorithmPredictor]:
        from .predictor import OMRPredictor
        return OMRPredictor

    @classmethod
    def trainer(cls) -> Type[AlgorithmTrainer]:
        from .trainer import OMRTrainer
        return OMRTrainer

    @classmethod
    def experimenter(cls) -> Type[Experimenter]:
        from ..experimenter import SymbolsExperimenter
        return SymbolsExperimenter

    @staticmethod
    def dataset_class() -> Type[Dataset]:
        return SymbolDetectionDataset


Step.register(Meta)
Example #7
0
    def run_single(self):
        args = self.args
        fold_log = self.fold_log
        from omr.steps.algorithm import AlgorithmPredictorSettings, AlgorithmTrainerSettings, AlgorithmTrainerParams
        from omr.steps.step import Step
        global_args = args.global_args


        def print_dataset_content(files: List[PcGts], label: str):
            fold_log.debug("Got {} {} files: {}".format(len(files), label, [f.page.location.local_path() for f in files]))

        print_dataset_content(args.train_pcgts_files, 'training')
        if args.validation_pcgts_files:
            print_dataset_content(args.validation_pcgts_files, 'validation')
        else:
            fold_log.debug("No validation data. Using training data instead")
        print_dataset_content(args.test_pcgts_files, 'testing')

        if not os.path.exists(args.model_dir):
            os.makedirs(args.model_dir)

        prediction_path = os.path.join(args.model_dir, 'pred.json')
        model_path = os.path.join(args.model_dir, 'best')

        if not global_args.skip_train:
            fold_log.info("Starting training")
            trainer = Step.create_trainer(
                global_args.algorithm_type,
                AlgorithmTrainerSettings(
                    dataset_params=args.global_args.dataset_params,
                    train_data=args.train_pcgts_files,
                    validation_data=args.validation_pcgts_files if args.validation_pcgts_files else args.train_pcgts_files,
                    model=Model(MetaId.from_custom_path(model_path, global_args.algorithm_type)),
                    params=global_args.trainer_params,
                    page_segmentation_params=global_args.page_segmentation_params,
                    calamari_params=global_args.calamari_params,
                )
            )
            trainer.train()

        test_pcgts_files = args.test_pcgts_files
        if not global_args.skip_predict:
            fold_log.info("Starting prediction")
            pred_params = deepcopy(global_args.predictor_params)
            pred_params.modelId = MetaId.from_custom_path(model_path, global_args.algorithm_type)
            if global_args.calamari_dictionary_from_gt:
                words = set()
                for pcgts in test_pcgts_files:
                    words = words.union(sum([t.sentence.text().replace('-', '').split() for t in pcgts.page.all_text_lines()], []))
                pred_params.ctcDecoder.params.dictionary[:] = words

            pred = Step.create_predictor(
                global_args.algorithm_type,
                AlgorithmPredictorSettings(
                    None,
                    pred_params,
                ))
            full_predictions = list(pred.predict([f.page.location for f in test_pcgts_files]))
            predictions = self.extract_gt_prediction(full_predictions)
            with open(prediction_path, 'wb') as f:
                pickle.dump(predictions, f)

            if global_args.output_book:
                fold_log.info("Outputting data")
                pred_book = DatabaseBook(global_args.output_book)
                if not pred_book.exists():
                    pred_book_meta = DatabaseBookMeta(pred_book.book, pred_book.book)
                    pred_book.create(pred_book_meta)

                output_pcgts = [PcGts.from_file(pcgts.page.location.copy_to(pred_book).file('pcgts'))
                                for pcgts in test_pcgts_files]

                self.output_prediction_to_book(pred_book, output_pcgts, full_predictions)

                for o_pcgts in output_pcgts:
                    o_pcgts.to_file(o_pcgts.page.location.file('pcgts').local_path())

        else:
            fold_log.info("Skipping prediction")

            with open(prediction_path, 'rb') as f:
                predictions = pickle.load(f)

        predictions = tuple(predictions)
        if not global_args.skip_eval and len(predictions) > 0:
            fold_log.info("Starting evaluation")
            r = self.evaluate(predictions, global_args.evaluation_params)
        else:
            r = None

        # if not global_args.skip_cleanup:
        #    fold_log.info("Cleanup")
        #    shutil.rmtree(args.model_dir)

        return r
Example #8
0
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    random.seed(1)
    np.random.seed(1)
    if False:
        train_pcgts, val_pcgts = dataset_by_locked_pages(
            0.8,
            [LockState(Locks.SYMBOLS, True),
             LockState(Locks.LAYOUT, True)],
            True,
            [
                # DatabaseBook('Graduel_Part_1'),
                # DatabaseBook('Graduel_Part_2'),
                # DatabaseBook('Graduel_Part_3'),
            ])
    book = DatabaseBook('Gothic_Test')
    meta = Step.meta(AlgorithmTypes.OCR_CALAMARI)
    # model = meta.newest_model_for_book(book)
    model = Model(
        MetaId.from_custom_path(
            BASE_DIR +
            '/internal_storage/pretrained_models/text_calamari/fraktur_historical',
            meta.type()))
    settings = AlgorithmPredictorSettings(model=model, )
    pred = meta.create_predictor(settings)
    ps: List[PredictionResult] = list(pred.predict(book.pages()[0:1]))
    for i, p in enumerate(ps):
        canvas = PcGtsCanvas(p.pcgts.page,
                             p.text_lines[0].line.operation.scale_reference)
        for j, s in enumerate(p.text_lines):
            canvas.draw(s)
Example #9
0
if __name__ == "__main__":
    from database import DatabaseBook
    from PIL import Image
    import matplotlib.pyplot as plt
    import numpy as np
    from omr.steps.step import Step, AlgorithmTypes

    b = DatabaseBook('demo')
    p = b.page('page00000001')
    img = np.array(Image.open(p.file('color_norm').local_path()))
    mask = np.zeros(img.shape, np.float) + 255
    val_pcgts = [PcGts.from_file(p.file('pcgts'))]

    settings = PredictorSettings()
    pred = Step.create_predictor(AlgorithmTypes.LAYOUT_SIMPLE_BOUNDING_BOXES, settings)

    def s(c):
        return val_pcgts[0].page.page_to_image_scale(c, settings.page_scale_reference)

    for p in pred.predict(val_pcgts):
        for i, mr_c in enumerate(p.blocks.get(BlockType.MUSIC, [])):
            s(mr_c.coords).draw(mask, (255, 0, 0), fill=True, thickness=0)

        for i, mr_c in enumerate(p.blocks.get(BlockType.LYRICS, [])):
            s(mr_c.coords).draw(mask, (0, 255, 0), fill=True, thickness=0)

        for i, mr_c in enumerate(p.blocks.get(BlockType.DROP_CAPITAL, [])):
            s(mr_c.coords).draw(mask, (0, 0, 255), fill=True, thickness=0)

    import json
Example #10
0
    from shared.pcgtscanvas import PcGtsCanvas
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    random.seed(1)
    np.random.seed(1)
    if False:
        train_pcgts, val_pcgts = dataset_by_locked_pages(
            0.8,
            [LockState(Locks.SYMBOLS, True),
             LockState(Locks.LAYOUT, True)],
            True,
            [
                # DatabaseBook('Graduel_Part_1'),
                # DatabaseBook('Graduel_Part_2'),
                # DatabaseBook('Graduel_Part_3'),
            ])
    book = DatabaseBook('Paper_New_York')
    meta = Step.meta(AlgorithmTypes.SYLLABLES_FROM_TEXT)
    model = meta.best_model_for_book(book)
    settings = AlgorithmPredictorSettings(model=model, )
    pred = meta.create_predictor(settings)
    ps: List[PredictionResult] = list(pred.predict(book.pages()[:1]))
    for i, p in enumerate(ps):
        pmr = p.page_match_result
        canvas = PcGtsCanvas(pmr.pcgts.page, PageScaleReference.NORMALIZED_X2)
        canvas.draw(pmr.text_prediction_result.text_lines[4],
                    color=(25, 150, 25),
                    background=True)
        # canvas.draw(pmr.match_results)
        # canvas.draw(p.annotations)
        canvas.show()
Example #11
0
 def algorithm_meta(self) -> Type[AlgorithmMeta]:
     return Step.create_meta(self.algorithm_type)
Example #12
0
    os.environ['DJANGO_SETTINGS_MODULE'] = 'ommr4all.settings'
    django.setup()
    from ommr4all.settings import BASE_DIR
    import random
    import matplotlib.pyplot as plt
    from shared.pcgtscanvas import PcGtsCanvas
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    from database.file_formats.pcgts import PageScaleReference
    random.seed(1)
    np.random.seed(1)
    if False:
        train_pcgts, val_pcgts = dataset_by_locked_pages(0.8, [LockState(Locks.SYMBOLS, True), LockState(Locks.LAYOUT, True)], True, [
            # DatabaseBook('Graduel_Part_1'),
            # DatabaseBook('Graduel_Part_2'),
            # DatabaseBook('Graduel_Part_3'),
        ])
    book = DatabaseBook('Paper_New_York')
    meta = Step.meta(AlgorithmTypes.SYLLABLES_IN_ORDER)
    model = meta.best_model_for_book(book)
    settings = AlgorithmPredictorSettings(
        model=model,
    )
    pred = meta.create_predictor(settings)
    ps: List[PredictionResult] = list(pred.predict(book.pages()[:1]))
    for i, p in enumerate(ps):
        pmr = p.page_match_result
        canvas = PcGtsCanvas(pmr.pcgts.page, PageScaleReference.NORMALIZED_X2)
        canvas.draw(pmr.match_results)
        canvas.draw(p.annotations)
        canvas.show()
Example #13
0
 def get(self, request, group, style):
     default_type = AlgorithmGroups(group).types()[0]
     return Response(
         Step.meta(default_type).list_available_models_for_style(
             style).to_dict())