def get(self, request, book, page): page = DatabasePage(DatabaseBook(book), page) file = DatabaseFile(page, 'pcgts') if not file.exists(): file.create() try: pcgts = PcGts.from_file(file) return Response(pcgts.to_json()) except JSONDecodeError as e: logger.error(e) file.delete() file.create() return Response(PcGts.from_file(file).to_json())
def dataset_by_locked_pages( n_train, locks: List[LockState], shuffle: bool = True, datasets: List[DatabaseBook] = None ) -> Tuple[List[PcGts], List[PcGts]]: logger.info("Finding PcGts files with valid ground truth") pcgts = [] for dataset in (datasets if datasets else DatabaseBook.list_available()): logger.debug("Listing files of dataset '{}'".format(dataset.book)) if not dataset.exists(): raise ValueError("Dataset '{}' does not exist at '{}'".format( dataset.book, dataset.local_path())) for page in dataset.pages_with_lock(locks): pcgts.append(PcGts.from_file(page.file('pcgts'))) if len(pcgts) == 0: raise EmptyDataSetException() if shuffle: random.shuffle(pcgts) train_pcgts = pcgts[:int(len(pcgts) * n_train)] val_pcgts = pcgts[len(train_pcgts):] if 0 < n_train < 1 and (len(train_pcgts) == 0 or len(val_pcgts) == 0): raise EmptyDataSetException() return train_pcgts, val_pcgts
def test_upgrade(self): with open( os.path.join(raw_storage, 'page_test_upgrade_001', 'pcgts.json')) as f: json0 = json.load(f) json1 = deepcopy(json0) self.assertTrue(update_pcgts(json1)) self.maxDiff = None self.assertEqual(json1, PcGts.from_json(json1, None).to_json())
def put(self, request, book, page): book = DatabaseBook(book) page = DatabasePage(book, page) obj = json.loads(request.body, encoding='utf-8') pcgts = PcGts.from_json(obj, page) pcgts.to_file(page.file('pcgts').local_path()) # add to backup archive with zipfile.ZipFile(page.file('pcgts_backup').local_path(), 'a', compression=zipfile.ZIP_DEFLATED) as zf: zf.writestr('pcgts_{}.json'.format(datetime.datetime.now()), json.dumps(pcgts.to_json(), indent=2)) logger.debug('Successfully saved pcgts file to {}'.format(page.file('pcgts').local_path())) return Response()
def create(self): with mutex_dict.get(self.local_path(), Lock()): if self.exists(): # check if exists return # check if requirement files exist for file in self.definition.requires: DatabaseFile(self.page, file).create() # check again if exists since the requirements might have created that file! if self.exists(): return from omr.steps.preprocessing.preprocessing import Preprocessing # create local file logger.info('Creating local file {}'.format(self.local_path())) if self.definition.id == 'statistics' \ or self.definition.id == 'page_progress': import json with open(self.local_path(), 'w') as f: json.dump({}, f) elif self.definition.id == 'page_progress_backup' \ or self.definition.id == 'statistics_backup': import zipfile zf = zipfile.ZipFile(self.local_path(), mode='w', compression=zipfile.ZIP_DEFLATED) zf.close() elif self.definition.id == 'annotation': import json with open(self.local_path(), 'w') as f: json.dump({}, f) elif self.definition.id == 'pcgts': from database.file_formats.pcgts import PcGts, Page, Meta img = Image.open(DatabaseFile(self.page, 'color_original').local_path()) pcgts = PcGts( meta=Meta(), page=Page(location=self.page), ) pcgts.page.image_width, pcgts.page.image_height = img.size pcgts.to_file(self.local_path()) elif self.definition.id == 'pcgts_backup': import zipfile zf = zipfile.ZipFile(self.local_path(), mode='w', compression=zipfile.ZIP_DEFLATED) zf.close() elif self.definition.id == 'color_original': # create preview img = Image.open(self.local_path()) img.thumbnail(thumbnail_size) img.save(self.local_thumbnail_path()) elif self.definition.id == 'color_highres_preproc': meta = self.page.meta() preproc = Preprocessing() img = Image.open(self.page.local_file_path('color_original.jpg')) w, h = img.size out_w = min(high_res_max_width, w) out_h = (out_w * h) // w c_hr = img.resize((out_w, out_h), Image.BILINEAR) c_hr, g_hr, b_hr = preproc.preprocess(c_hr) meta.preprocessing.deskewing_degrees = preproc.deskewed_angle meta.save(self.page) self._save_and_thumbnail(c_hr, 0) self._save_and_thumbnail(g_hr, 1) self._save_and_thumbnail(b_hr, 2) elif self.definition.id == 'color_lowres_preproc': c_hr = Image.open(self.page.local_file_path('color_highres_preproc.jpg')) b_hr = Image.open(self.page.local_file_path('binary_highres_preproc.png')) g_hr = Image.open(self.page.local_file_path('gray_highres_preproc.jpg')) w, h = c_hr.size out_w = min(low_res_max_width, w) out_h = (out_w * h) // w size = (out_w, out_h) c_hr = c_hr.resize(size, Image.BILINEAR) g_hr = g_hr.resize(size, Image.BILINEAR) b_hr = b_hr.resize(size, Image.NEAREST) self._save_and_thumbnail(c_hr, 0) self._save_and_thumbnail(g_hr, 1) self._save_and_thumbnail(b_hr, 2) elif self.definition.id == 'color_norm': meta = self.page.meta() c_hr = Image.open(self.page.local_file_path('color_highres_preproc.jpg')) if meta.preprocessing.auto_line_distance: from omr.steps.preprocessing.scale.scale import LineDistanceComputer ldc = LineDistanceComputer() low_binary = Image.open(self.page.local_file_path('binary_highres_preproc.png')) line_distance = ldc.get_line_distance(np.array(low_binary) / 255).line_distance meta.preprocessing.average_line_distance = line_distance meta.save(self.page) else: # average_line_distance is expected to be computed on the original image c_orig = Image.open(self.page.local_file_path('color_original.jpg')) line_distance = int(np.round(meta.preprocessing.average_line_distance * c_hr.size[0] / c_orig.size[0])) assert(line_distance > 0) # rescale original image scaling = line_distance / target_staff_line_distance size = (int(c_hr.size[0] / scaling), int(c_hr.size[1] / scaling)) c_hr = c_hr.resize(size, Image.BILINEAR) # compute gray and binary based on normalized color image preproc = Preprocessing() g_hr = preproc.im2gray(c_hr) b_hr = preproc.binarize(c_hr) # save output self._save_and_thumbnail(c_hr, 0) self._save_and_thumbnail(g_hr, 1) self._save_and_thumbnail(b_hr, 2) elif self.definition.id == 'color_norm_x2': meta = self.page.meta() line_distance = meta.preprocessing.average_line_distance if line_distance <= 0: nf = self.page.file('color_norm') nf.delete() nf.create() meta = self.page.meta() line_distance = meta.preprocessing.average_line_distance assert(line_distance > 0) c_hr = Image.open(self.page.local_file_path('color_highres_preproc.jpg')) # rescale original image scaling = line_distance / (target_staff_line_distance * 2) size = (int(c_hr.size[0] / scaling), int(c_hr.size[1] / scaling)) c_hr = c_hr.resize(size, Image.BILINEAR) # compute gray and binary based on normalized color image preproc = Preprocessing() g_hr = preproc.im2gray(c_hr) b_hr = preproc.binarize(c_hr) # save output self._save_and_thumbnail(c_hr, 0) self._save_and_thumbnail(g_hr, 1) self._save_and_thumbnail(b_hr, 2) elif self.definition.id == 'connected_components_norm': import pickle from omr.steps.preprocessing.util.connected_compontents import connected_compontents_with_stats binary = np.array(Image.open(DatabaseFile(self.page, 'binary_norm').local_path())) with open(self.local_path(), 'wb') as f: pickle.dump(connected_compontents_with_stats(binary), f) else: raise Exception("Cannot create file for {}".format(self.definition.id))
return target_cell_origin + rel * target_cell_extend return p def transform_points(self, ps): return np.array([self.transform_point(p) for p in ps]) if __name__ == '__main__': from database import DatabaseBook from database.file_formats.pcgts import PageScaleReference import matplotlib.pyplot as plt page = DatabaseBook('Gothic_Test').pages()[0] binary = Image.open(page.file('binary_highres_preproc', create_if_not_existing=True).local_path()) gray = Image.open(page.file('gray_highres_preproc').local_path()) pcgts = PcGts.from_file(page.file('pcgts', create_if_not_existing=True)) overlay = np.array(gray) points_to_transform = np.array([(100, 50), (200, 50), (300, 50), (400, 50), (600, 50), (800, 50), (100, 100), (200, 150), (300, 200)], dtype=int) # staffs.draw(overlay) images = [binary, gray, Image.fromarray(overlay)] f, ax = plt.subplots(2, len(images), True, True) for a, l in enumerate(images): l = np.array(l) for p in points_to_transform: l[p[1]-5:p[1]+5, p[0]-5:p[0]+5] = 255 ax[0, a].imshow(l) dewarper = Dewarper(images[0].size, pcgts.page.all_staves_staff_line_coords(scale=PageScaleReference.HIGHRES)) images = dewarper.dewarp(images)
pad=[0, 10, 0, 40], dewarp=True, center=True, staff_lines_only=True, cut_region=False, height=120, ) print(params.to_json()) at = AlgorithmTypes.SYMBOLS_PC if at == AlgorithmTypes.SYMBOLS_SEQUENCE_TO_SEQUENCE: f, ax = plt.subplots(9, max(2, len(pages)), sharex='all', sharey='all') for i, p in enumerate(pages): pcgts = PcGts.from_file(p.file('pcgts')) dataset = SymbolDetectionDataset([pcgts], params) calamari_dataset = dataset.to_calamari_dataset(train=True) for a, (sample, out) in enumerate( zip(calamari_dataset.samples(), dataset.load())): img, region, mask = out.line_image, out.region, out.mask img = sample['image'].transpose() ax[a, i].imshow(img) elif at == AlgorithmTypes.SYMBOLS_PC: page = pages[0] pcgts = PcGts.from_file(page.file('pcgts')) dataset = SymbolDetectionDataset([pcgts], params) ps_dataset = dataset.to_page_segmentation_dataset() canvas_ol = PcGtsCanvas( pcgts.page, scale_reference=PageScaleReference.NORMALIZED_X2)
def pcgts_from_dict(self, d: dict) -> 'PcGts': from database.file_formats.pcgts import PcGts self._pcgts = PcGts.from_json(d, self) return self._pcgts
def pcgts(self, create_if_not_existing=True) -> 'PcGts': if not self._pcgts: from database.file_formats.pcgts import PcGts self._pcgts = PcGts.from_file(self.file('pcgts', create_if_not_existing)) return self._pcgts
p_to_np(p.get('text', []), page), BlockType.MUSIC: p_to_np(p.get('system', []), page), }, ) if __name__ == "__main__": from database import DatabaseBook from PIL import Image import matplotlib.pyplot as plt b = DatabaseBook('demo') p = b.page('page00000001') img = np.array(Image.open(p.file('color_norm').local_path())) mask = np.zeros(img.shape, np.float) + 255 val_pcgts = [PcGts.from_file(p.file('pcgts'))] params = AlgorithmPredictorSettings(model=Meta.best_model_for_book(b), ) pred = Predictor(params) def s(c): return val_pcgts[0].page.page_to_image_scale( c, pred.dataset_params.page_scale_reference) for p in pred.predict(val_pcgts): for i, mr_c in enumerate(p.blocks.get(BlockType.MUSIC, [])): s(mr_c.coords).draw(mask, (255, 0, 0), fill=True, thickness=0) for i, mr_c in enumerate(p.blocks.get(BlockType.LYRICS, [])): s(mr_c.coords).draw(mask, (0, 255, 0), fill=True, thickness=0)
def create(self): with mutex_dict.get(self.local_path(), Lock()): if self.exists() and not self.definition.recalculate: # check if exists return # check if requirement files exist for file in self.definition.requires: DatabaseFile(self.page, file).create() # check again if exists since the requirements might have created that file! if self.exists() and not self.definition.recalculate: return from omr.steps.preprocessing.preprocessing import Preprocessing # create local file logger.info('Creating local file {}'.format(self.local_path())) if self.definition.id == 'statistics' \ or self.definition.id == 'page_progress': import json with open(self.local_path(), 'w') as f: json.dump({}, f) elif self.definition.id == 'page_progress_backup' \ or self.definition.id == 'statistics_backup': import zipfile zf = zipfile.ZipFile(self.local_path(), mode='w', compression=zipfile.ZIP_DEFLATED) zf.close() elif self.definition.id == 'annotation': import json with open(self.local_path(), 'w') as f: json.dump({}, f) elif self.definition.id == 'pcgts': from database.file_formats.pcgts import PcGts, Page, Meta img = Image.open( DatabaseFile(self.page, 'color_original').local_path()) pcgts = PcGts( meta=Meta(), page=Page(location=self.page), ) pcgts.page.image_width, pcgts.page.image_height = img.size pcgts.to_file(self.local_path()) elif self.definition.id == 'pcgts_backup': import zipfile zf = zipfile.ZipFile(self.local_path(), mode='w', compression=zipfile.ZIP_DEFLATED) zf.close() elif self.definition.id == 'color_original': # create preview img = Image.open(self.local_path()) img.thumbnail(thumbnail_size) img.save(self.local_thumbnail_path()) elif self.definition.id == 'color_highres_preproc': meta = self.page.meta() preproc = Preprocessing() img = Image.open( self.page.local_file_path('color_original.jpg')) w, h = img.size out_w = min(high_res_max_width, w) out_h = (out_w * h) // w c_hr = img.resize((out_w, out_h), Image.BILINEAR) c_hr, g_hr, b_hr = preproc.preprocess(c_hr) meta.preprocessing.deskewing_degrees = preproc.deskewed_angle meta.save(self.page) self._save_and_thumbnail(c_hr, 0) self._save_and_thumbnail(g_hr, 1) self._save_and_thumbnail(b_hr, 2) elif self.definition.id == 'color_lowres_preproc': c_hr = Image.open( self.page.local_file_path('color_highres_preproc.jpg')) b_hr = Image.open( self.page.local_file_path('binary_highres_preproc.png')) g_hr = Image.open( self.page.local_file_path('gray_highres_preproc.jpg')) w, h = c_hr.size out_w = min(low_res_max_width, w) out_h = (out_w * h) // w size = (out_w, out_h) c_hr = c_hr.resize(size, Image.BILINEAR) g_hr = g_hr.resize(size, Image.BILINEAR) b_hr = b_hr.resize(size, Image.NEAREST) self._save_and_thumbnail(c_hr, 0) self._save_and_thumbnail(g_hr, 1) self._save_and_thumbnail(b_hr, 2) elif self.definition.id == 'color_norm': meta = self.page.meta() c_hr = Image.open( self.page.local_file_path('color_highres_preproc.jpg')) if meta.preprocessing.auto_line_distance: from omr.steps.preprocessing.scale.scale import LineDistanceComputer ldc = LineDistanceComputer() low_binary = Image.open( self.page.local_file_path( 'binary_highres_preproc.png')) line_distance = ldc.get_line_distance( np.array(low_binary) / 255).line_distance meta.preprocessing.average_line_distance = line_distance meta.save(self.page) else: # average_line_distance is expected to be computed on the original image c_orig = Image.open( self.page.local_file_path('color_original.jpg')) line_distance = int( np.round(meta.preprocessing.average_line_distance * c_hr.size[0] / c_orig.size[0])) assert (line_distance > 0) # rescale original image scaling = line_distance / target_staff_line_distance size = (int(c_hr.size[0] / scaling), int(c_hr.size[1] / scaling)) c_hr = c_hr.resize(size, Image.BILINEAR) # compute gray and binary based on normalized color image preproc = Preprocessing() g_hr = preproc.im2gray(c_hr) b_hr = preproc.binarize(c_hr) # save output self._save_and_thumbnail(c_hr, 0) self._save_and_thumbnail(g_hr, 1) self._save_and_thumbnail(b_hr, 2) elif self.definition.id == 'color_norm_x2': meta = self.page.meta() line_distance = meta.preprocessing.average_line_distance if line_distance <= 0: nf = self.page.file('color_norm') nf.delete() nf.create() meta = self.page.meta() line_distance = meta.preprocessing.average_line_distance assert (line_distance > 0) c_hr = Image.open( self.page.local_file_path('color_highres_preproc.jpg')) # rescale original image scaling = line_distance / (target_staff_line_distance * 2) size = (int(c_hr.size[0] / scaling), int(c_hr.size[1] / scaling)) c_hr = c_hr.resize(size, Image.BILINEAR) # compute gray and binary based on normalized color image preproc = Preprocessing() g_hr = preproc.im2gray(c_hr) b_hr = preproc.binarize(c_hr) # save output self._save_and_thumbnail(c_hr, 0) self._save_and_thumbnail(g_hr, 1) self._save_and_thumbnail(b_hr, 2) elif self.definition.id == 'connected_components_norm': import pickle from omr.steps.preprocessing.util.connected_compontents import connected_compontents_with_stats binary = np.array( Image.open( DatabaseFile(self.page, 'binary_norm').local_path())) with open(self.local_path(), 'wb') as f: pickle.dump(connected_compontents_with_stats(binary), f) elif self.definition.id == 'monodiplus': import json import database.file_formats.pcgts as ns_pcgts # with open(DatabaseFile(self.page, 'pcgts').local_path()) as json_file: # pcgts = json.load(json_file) pcgts = ns_pcgts.PcGts.from_file( DatabaseFile(self.page, 'pcgts')) root = PcgtsToMonodiConverter([pcgts]).root # import hashlib with open(self.local_path(), 'w', encoding='utf-8') as f: json.dump(root.to_json(), f, ensure_ascii=False, indent=4) elif self.definition.id == 'monodiplus_svg': path = DatabaseFile(self.page, 'monodiplus').local_path() from ommr4all.settings import BASE_DIR script_path = os.path.join(BASE_DIR, 'internal_storage', 'resources', 'monodi_svg_render', 'bin', 'one-shot') import subprocess proc = subprocess.Popen( [script_path, path, "-o", self.local_path()], stdout=subprocess.PIPE) result, err = proc.communicate() # error code in the java script is to be ignored for now exit_code = proc.wait() else: raise Exception("Cannot create file for {}".format( self.definition.id))