def read_all_page_stats(books: List[str], ignore_page: List[str] = []): all_stats = [] for book in books: book = DatabaseBook(book) for page in book.pages(): if any([s in page.page for s in ignore_page]): continue all_stats.append(page.page_statistics()) return all_stats
def post(self, request, book, operation): body = json.loads(request.body, encoding='utf-8') book = DatabaseBook(book) algorithm = Step.predictor(AlgorithmTypes(operation)) page_selection = PageSelection.from_params(PageSelectionParams.from_dict(body), book) pages = page_selection.get_pages(algorithm.unprocessed) return Response({ 'pages': [p.page for p in pages], 'pageCount': page_selection.page_count.value, 'singlePage': page_selection.single_page, 'book': book.book, 'totalPages': len(book.pages()), })
x.value: x for x in MonodiXlsxConfig.default_config } self.entries = MonodiXlsxConfig.default_config if __name__ == '__main__': from matplotlib import pyplot as plt from PIL import Image import numpy as np from database import DatabaseBook, DatabaseFile from database.file_formats.pcgts import PageScaleReference book = DatabaseBook('Annotation___Square_Notation') pages = book.pages()[0] pcgts = [ DatabaseFile(page, 'pcgts', create_if_not_existing=True).page.pcgts() for page in [pages] ] file = pages.file('color_norm_x2').local_path() orig = Image.open(file) orig = np.array(orig) lines = pcgts[0].page.all_music_lines() page = pcgts[0].page for p in lines: # page = p.line.operation.page def p2i(l): return page.page_to_image_scale(l,
connection = connections[0] if document is not None: line_id_start = document.start.line_id line_id_end = document.end.line_id line_ids = [line.id for line in connection.text_region.lines] if page.p_id == document.end.page_id: if line_id_end in line_ids: break if page.p_id == document.start.page_id or document_started: if line_id_start in line_ids or document_started: add_note(mb.lines) document_started = True else: add_note(mb.lines) else: continue break return {'notes': notes, 'totalTime': total_duration} if __name__ == "__main__": from database import DatabaseBook b = DatabaseBook('Pa_14819') pcgts = [p.pcgts() for p in b.pages()][0] sme = SimpleMidiExporter([pcgts]) sme.generate_midi("/tmp/test.mid")
def test_page_selection(self): book = DatabaseBook('demo') p = PageSelectionParams(count=PageCount.ALL, ) sel = PageSelection.from_params(p, book) self.assertListEqual([p.local_path() for p in sel.get_pages()], [p.local_path() for p in book.pages()])
def extract_from_book(book: DatabaseBook): for page in tqdm(book.pages(), desc="Processing {}".format(book.book)): extract_from_pcgts(page.pcgts())
if debug: import matplotlib.pyplot as plt canvas = np.stack(((canvas).astype(np.uint8), ) * 3, -1) cv2.polylines(canvas, [central_text_line.astype(np.int32)], False, [255, 0, 0], thickness=4) cv2.polylines(canvas, polys, True, [0, 255, 0]) cv2.polylines(canvas, contours, True, [0, 0, 255]) plt.imshow(canvas) plt.show() return [Coords(p) for p in polys] if __name__ == '__main__': from database import DatabaseBook import pickle book = DatabaseBook('demo') page = book.pages()[0] with open( page.file('connected_components_norm', create_if_not_existing=True).local_path(), 'rb') as f: cc = pickle.load(f) line = Coords(np.array([[100, 740], [900, 738]])) staff_lines = [] for mr in PcGts.from_file(page.file('pcgts')).page.music_regions: for ml in mr.staffs: staff_lines += ml.staff_lines extract_components(cc, line, staff_lines, debug=True)
continue line_symbols = symbols[current_symbol_index:neume_pos] # add symbols until position of connection add_line_symbols(line_symbols) current_symbol_index = neume_pos # add the syllable self.get_or_create_current_line_container().children.append( Syllable(sc.syllable.text, SpacedNotes([])) ) add_line_symbols(symbols[current_symbol_index:]) else: tr = element text = " ".join([tl.text() for tl in tr.lines]) if len(text) == 0: continue self.line_containers.append( ParatextContainer( text=text ) ) if __name__=="__main__": from database import DatabaseBook b = DatabaseBook('test2') pcgts = [ns_pcgts.PcGts.from_file(p.file('pcgts')) for p in b.pages()[4:5]] root = PcgtsToMonodiConverter(pcgts).root print(json.dumps(root.to_json(), indent=2))
np.random.seed(1) if False: train_pcgts, val_pcgts = dataset_by_locked_pages( 0.8, [LockState(Locks.SYMBOLS, True), LockState(Locks.LAYOUT, True)], True, [ # DatabaseBook('Graduel_Part_1'), # DatabaseBook('Graduel_Part_2'), # DatabaseBook('Graduel_Part_3'), ]) book = DatabaseBook('Gothic_Test') meta = Step.meta(AlgorithmTypes.OCR_CALAMARI) # model = meta.newest_model_for_book(book) model = Model( MetaId.from_custom_path( BASE_DIR + '/internal_storage/pretrained_models/text_calamari/fraktur_historical', meta.type())) settings = AlgorithmPredictorSettings(model=model, ) pred = meta.create_predictor(settings) ps: List[PredictionResult] = list(pred.predict(book.pages()[0:1])) for i, p in enumerate(ps): canvas = PcGtsCanvas(p.pcgts.page, p.text_lines[0].line.operation.scale_reference) for j, s in enumerate(p.text_lines): canvas.draw(s) canvas.show()
callback: Optional[PredictionCallback] = None ) -> AlgorithmPredictionResultGenerator: book = pages[0].book self.dict_corrector.load_dict(book=book) for page in pages: pcgts = page.pcgts() text_lines: List[Line] = pcgts.page.all_text_lines() single_line_pred_result: List[PredictionResultSingleLine] = [] for t_line in text_lines: text = t_line.text() text = text.replace("-", "") sentence = self.dict_corrector.segmentate_correct_and_hyphenate_text( text) single_line_pred_result.append( PredictionResultSingleLine(t_line, hyphenated=sentence)) yield PredictionResult(pcgts, page, single_line_pred_result) if __name__ == '__main__': from omr.dataset import DatasetParams, RegionLineMaskData from omr.dataset.datafiles import dataset_by_locked_pages, LockState b = DatabaseBook('demo') val_pcgts = [PcGts.from_file(p.file('pcgts')) for p in b.pages()[0:1]] pred = Predictor(AlgorithmPredictorSettings(Meta.best_model_for_book(b))) ps = list(pred.predict([p.page.location for p in val_pcgts])) import matplotlib.pyplot as plt
break if page.p_id == document.start.page_id or document_started: if line_id_start in line_ids or document_started: add_block(symbols) document_started = True else: add_block(symbols) else: tr = element text = " ".join([tl.text() for tl in tr.lines]) if len(text) == 0: continue self.line_containers.append(ParatextContainer(text=text)) else: continue break if __name__ == "__main__": from database import DatabaseBook b = DatabaseBook('demo2') pcgts = [ns_pcgts.PcGts.from_file(x.file('pcgts')) for x in b.pages()] root = PcgtsToMonodiConverter(pcgts, document=True).root print(json.dumps(root.to_json(), indent=2)) with open('data.json', 'w', encoding='utf-8') as f: json.dump(root.to_json(), f, ensure_ascii=False, indent=4)
Size = 7 global_counts = np.zeros(Counts.Size, dtype=np.uint32) table = PrettyTable(["Dataset"] + [str(Counts(i))[7:] for i in range(Counts.Size)]) for book_name in args.datasets: book = DatabaseBook(book_name) counts = np.zeros(Counts.Size, dtype=np.uint32) if not book.exists(): raise ValueError("Dataset '{}' does not exist at '{}'".format( book.book, book.local_path())) for page in book.pages(): pcgts = PcGts.from_file(page.file('pcgts')) counts[Counts.Pages] += 1 for mr in pcgts.page.music_regions: for ml in mr.staffs: counts[Counts.Staves] += 1 counts[Counts.StaffLines] += len(ml.staff_lines) for s in ml.symbols: if isinstance(s, Neume): n: Neume = s counts[Counts.Symbols] += len(n.notes) counts[Counts.NoteComponents] += len(n.notes) else: counts[Counts.Symbols] += 1
self.local_path()], stdout=subprocess.PIPE) result, err = proc.communicate() # error code in the java script is to be ignored for now exit_code = proc.wait() else: raise Exception("Cannot create file for {}".format( self.definition.id)) if __name__ == "__main__": from database import DatabaseBook import database.file_formats.pcgts as ns_pcgts b = DatabaseBook('demo') page = b.pages()[0] path = DatabaseFile(page, 'monodiplus').local_path() from ommr4all.settings import BASE_DIR script_path = os.path.join(BASE_DIR, 'internal_storage', 'resources', 'monodi_svg_render', 'bin', 'one-shot') import subprocess proc = subprocess.Popen([script_path, path, "-w", "500"], stdout=subprocess.PIPE) result, err = proc.communicate() str_result = str(result) reg = re.match(r".*(<svg.*</svg>).*", str_result).group(1) #start_prefix = "<?xml" #print(str_result[str_result.find(start_prefix):]) #print(result)
# normalize errors total_diffs = total_diffs / total_diffs[-1] # transfer total / errors => acc = 1 - errors / total total_diffs[-2] = 1 - 1 / total_diffs[-2] cm.plot_confusion_matrix() cm.plot_confusion_matrix(normalize=True) return f_metrics.mean(axis=0), counts.sum( axis=0), acc_counts, acc_acc, total_diffs if __name__ == '__main__': from omr.symboldetection.predictor import SymbolDetectionPredictor, create_predictor, PredictorTypes, SymbolDetectionPredictorParameters from prettytable import PrettyTable from database import DatabaseBook b = DatabaseBook('Graduel') eval_pcgts = [PcGts.from_file(p.file('pcgts')) for p in b.pages()[12:13]] print([e.page.location.local_path() for e in eval_pcgts]) predictor = create_predictor( PredictorTypes.PIXEL_CLASSIFIER, SymbolDetectionPredictorParameters( [b.local_path(os.path.join('pc_symbol_detection', 'model'))])) gt_symbols, pred_symbols = [], [] for p in predictor.predict(eval_pcgts): pred_symbols.append(p.symbols) gt_symbols.append(p.line.operation.music_line.symbols) evaluator = SymbolDetectionEvaluator() metrics, counts, acc_counts, acc_acc, diffs = evaluator.evaluate( gt_symbols, pred_symbols) at = PrettyTable()