def run(self, n_train: int, val_amount: float, cross_folds: int, single_folds: List[int], train_books: List[str], test_books: List[str], train_books_extend: List[str] ): from omr.steps.step import Step global_args = self.global_args logger.info("Finding PcGts files with valid ground truth") train_args = generate_dataset( lock_states=[LockState(Locks.STAFF_LINES, True), LockState(Locks.LAYOUT, True)], n_train=n_train, val_amount=val_amount, cross_folds=cross_folds, single_folds=single_folds, train_books=train_books, test_books=test_books, train_books_extend=train_books_extend, ) train_args = [SingleDataArgs(gd.fold, os.path.join(global_args.model_dir, '{}_{}'.format(global_args.algorithm_type.value, gd.fold)), gd.train_pcgts_files, gd.validation_pcgts_files, gd.test_pcgts_files, global_args) for gd in train_args] experimenter_class = Step.meta(self.global_args.algorithm_type).experimenter() results = [experimenter_class(args, logger).run_single() for args in train_args] experimenter_class.print_results(self.global_args, results, logger)
self.pc_trainer.run(model_for_book, callback) print("Training Calamari") self.s2s_trainer.run(model_for_book, callback) print("Done") if __name__ == '__main__': import random import numpy as np random.seed(1) np.random.seed(1) b = DatabaseBook('demo') from omr.dataset.datafiles import dataset_by_locked_pages, LockState train_pcgts, val_pcgts = dataset_by_locked_pages( 0.8, [LockState(Locks.STAFF_LINES, True), LockState(Locks.LAYOUT, True)], True, [b]) output = 'models_out/test_pcs2s' params = SymbolDetectionDatasetParams( gt_required=True, height=40, dewarp=True, cut_region=False, pad=(0, 10, 0, 20), pad_power_of_2=None, center=True, staff_lines_only=True, ) train_params = SymbolDetectionTrainerParams( params, train_pcgts,
) """ if __name__ == '__main__': import random import numpy as np random.seed(1) np.random.seed(1) from omr.dataset.datafiles import dataset_by_locked_pages, LockState os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ommr4all.settings') import django django.setup() b = DatabaseBook('Pa_14819') train_pcgts, val_pcgts = dataset_by_locked_pages(0.9999, [LockState(Locks.LAYOUT, True)], True, [b]) trainer_params = CalamariTrainer.default_params() trainer_params.l_rate = 1e-3 trainer_params.load = '/home/ls6/wick/Documents/Projects/calamari_models/fraktur_historical_ligs/0.ckpt.json' params = DatasetParams( gt_required=True, height=48, cut_region=True, pad=[0, 10, 0, 20], #lyrics_normalization=LyricsNormalization.ONE_STRING, ) train_params = AlgorithmTrainerSettings( params, train_pcgts, val_pcgts,
if __name__ == '__main__': from omr.steps.step import Step, AlgorithmTypes from ommr4all.settings import BASE_DIR import random import cv2 import matplotlib.pyplot as plt from shared.pcgtscanvas import PcGtsCanvas from omr.dataset.datafiles import dataset_by_locked_pages, LockState random.seed(1) np.random.seed(1) if False: train_pcgts, val_pcgts = dataset_by_locked_pages( 0.8, [LockState(Locks.SYMBOLS, True), LockState(Locks.LAYOUT, True)], True, [ # DatabaseBook('Graduel_Part_1'), # DatabaseBook('Graduel_Part_2'), # DatabaseBook('Graduel_Part_3'), ]) book = DatabaseBook('Gothic_Test') meta = Step.meta(AlgorithmTypes.OCR_CALAMARI) # model = meta.newest_model_for_book(book) model = Model( MetaId.from_custom_path( BASE_DIR + '/internal_storage/pretrained_models/text_calamari/fraktur_historical', meta.type()))
m.operation.page.image_to_page_scale( dataset.local_to_global_pos(Point((pos.global_start + pos.global_end) / 2, 40), m.operation.params).x, m.operation.scale_reference )) for pos in p.positions] return CalamariSequence.to_symbols(dataset.params.calamari_codec, sentence, m.operation.music_line.staff_lines) if __name__ == '__main__': import random from omr.dataset.datafiles import dataset_by_locked_pages, LockState from shared.pcgtscanvas import PcGtsCanvas, PageScaleReference random.seed(1) np.random.seed(1) b = DatabaseBook('Graduel_Fully_Annotated') train_pcgts, val_pcgts = dataset_by_locked_pages(0.8, [LockState(Locks.STAFF_LINES, True), LockState(Locks.LAYOUT, True)], True, [ DatabaseBook('Graduel_Part_1'), DatabaseBook('Graduel_Part_2'), DatabaseBook('Graduel_Part_3'), ]) pred = OMRPredictor(AlgorithmPredictorSettings( model=Meta.best_model_for_book(b), )) ps = list(pred.predict([p.page.location for p in val_pcgts[7:8]])) for p in ps: p: PredictionResult = p canvas = PcGtsCanvas(p.pcgts.page, PageScaleReference.NORMALIZED_X2) for sp in p.music_lines: canvas.draw(sp.symbols) canvas.show()
preload_validation=True, codec=Codec(self.settings.dataset_params.calamari_codec.codec. values()), ) trainer.train() if __name__ == '__main__': import random import numpy as np random.seed(1) np.random.seed(1) b = DatabaseBook('Graduel_Fully_Annotated') from omr.dataset.datafiles import dataset_by_locked_pages, LockState train_pcgts, val_pcgts = dataset_by_locked_pages( 0.8, [LockState(Locks.SYMBOLS, True), LockState(Locks.LAYOUT, True)], True, [b]) dataset_params = DatasetParams( gt_required=True, height=40, dewarp=True, cut_region=False, pad=[0, 10, 0, 20], center=True, staff_lines_only=True, masks_as_input=False, ) train_settings = AlgorithmTrainerSettings( dataset_params=dataset_params, train_data=train_pcgts, validation_data=val_pcgts,
def run(self, task: Task, com_queue: Queue) -> dict: class Callback(TrainerCallback): def __init__(self): super().__init__() self.iter, self.loss, self.acc, self.best_iter, self.best_acc, self.best_iters = -1, -1, -1, -1, -1, -1 def resolving_files(self): com_queue.put( TaskCommunicationData( task, TaskStatus( TaskStatusCodes.RUNNING, TaskProgressCodes.RESOLVING_DATA, ))) def loading(self, n: int, total: int): com_queue.put( TaskCommunicationData( task, TaskStatus( TaskStatusCodes.RUNNING, TaskProgressCodes.LOADING_DATA, progress=n / total, n_processed=n, n_total=total, ))) def loading_started(self, total: int): pass def loading_finished(self, total: int): com_queue.put( TaskCommunicationData( task, TaskStatus( TaskStatusCodes.RUNNING, TaskProgressCodes.PREPARING_TRAINING, ))) def put(self): com_queue.put( TaskCommunicationData( task, TaskStatus( TaskStatusCodes.RUNNING, TaskProgressCodes.WORKING, progress=self.iter / self.total_iters, accuracy=self.best_acc if self.best_acc >= 0 else -1, early_stopping_progress=self.best_iters / self.early_stopping_iters if self.early_stopping_iters > 0 else -1, loss=self.loss, ))) def next_iteration(self, iter: int, loss: float, acc: float): self.iter, self.loss, self.acc = iter, loss, acc self.put() def next_best_model(self, best_iter: int, best_acc: float, best_iters: int): self.best_iter, self.best_acc, self.best_iters = best_iter, best_acc, best_iters self.put() def early_stopping(self): pass callback = Callback() logger.info("Finding PcGts files with valid ground truth") callback.resolving_files() train_pcgts, val_pcgts = dataset_by_locked_pages( self.params.nTrain, [LockState('Symbols', True)], datasets=[self.selection.book] if not self.params.includeAllTrainingData else []) if len(train_pcgts) + len(val_pcgts) < 50: # only very few files, use all for training and evaluate on training as-well train_pcgts = train_pcgts + val_pcgts val_pcgts = train_pcgts logger.info("Combining training and validation files because n<50") logger.info( "Starting training with {} training and {} validation files". format(len(train_pcgts), len(val_pcgts))) logger.debug("Training files: {}".format( [p.page.location.local_path() for p in train_pcgts])) logger.debug("Validation files: {}".format( [p.page.location.local_path() for p in val_pcgts])) meta = self.algorithm_meta() train, val = self.params.to_train_val( locks=[LockState('StaffLines', True)], books=[self.selection.book]) settings = AlgorithmTrainerSettings( train_data=train, validation_data=val, dataset_params=DatasetParams( gt_required=True, pad=None, pad_power_of_2=3, height=80, dewarp=False, cut_region=False, center=True, staff_lines_only=True, ), ) trainer = meta.create_trainer(settings) if self.params.pretrainedModel: trainer.settings.params.load = self.params.pretrainedModel.id trainer.train(self.selection.book, callback=callback) logger.info("Training finished for book {}".format( self.selection.book.local_path())) return {}
def required_locks() -> List[LockState]: return [LockState(Locks.TEXT, True)]