def __init__(self, glyph_classifier_fn=None): """Creates the engine and TF graph for running OMR. Args: glyph_classifier_fn: Callable that loads the glyph classifier into the graph. Accepts a `Structure` as the single argument, and returns an instance of `BaseGlyphClassifier`. The function typically loads a TF saved model or other external data, and wraps the classification in a concrete glyph classifier subclass. If the classifier uses a `StafflineExtractor` for classification, it must set the `staffline_extractor` attribute of the `Structure`. Otherwise, glyph x coordinates will not be scaled back to image coordinates. """ glyph_classifier_fn = (glyph_classifier_fn or saved_classifier_fn.build_classifier_fn()) self.graph = tf.Graph() self.session = tf.Session(graph=self.graph) with self.graph.as_default(): with self.session.as_default(): with tf.name_scope('OMREngine'): self.png_path = tf.placeholder(tf.string, name='png_path', shape=()) self.image = image.decode_music_score_png( tf.read_file(self.png_path, name='page_image')) self.structure = structure_module.create_structure( self.image) # Loading saved models happens outside of the name scope, because scopes # can rename tensors from the model and cause dangling references. # TODO(ringw): TF should be able to load models gracefully within a # name scope. self.glyph_classifier = glyph_classifier_fn(self.structure)
def testGetPage_x_scale(self): # Random staffline images matching the dimensions of PREDICTIONS. dummy_stafflines = np.random.random((2, 3, 5, 6)) classifier = glyphs_testing.DummyGlyphClassifier(glyphs_testing.PREDICTIONS) image = np.random.randint(0, 255, (30, 20), dtype=np.uint8) staves = staves_testing.FakeStaves( image_t=image, staves_t=np.asarray([[[0, 10], [19, 10]], [[0, 20], [19, 20]]], np.int32), staffline_distance_t=np.asarray([5, 20], np.int32), staffline_thickness_t=np.asarray(1, np.int32)) structure = structure_module.create_structure(image, lambda unused_image: staves) class DummyStafflineExtractor(object): """A placeholder for StafflineExtractor. It only contains the constants necessary to scale the x coordinates. """ staffline_distance_multiple = 2 target_height = 10 omr = engine.OMREngine(lambda _: classifier) page = omr.process_image( # Feed in a dummy image. It doesn't matter because FakeStaves has # hard-coded staff values. np.random.randint(0, 255, (100, 100)), process_structure=False) page = staff_processor.StaffProcessor(structure, DummyStafflineExtractor()).apply(page) self.assertEqual(len(page.system[0].staff), 2) # The first staff has a staffline distance of 5. # The extracted staffline slices have an original height of # staffline_distance * staffline_distance_multiple (10), which equals # target_height here, so there is no scaling. self.assertEqual( musicscore_pb2.Staff(glyph=page.system[0].staff[0].glyph), glyphs_testing.GLYPHS_PAGE.system[0].staff[0]) # Glyphs in the second staff have a scaled x coordinate. self.assertEqual( len(page.system[0].staff[1].glyph), len(glyphs_testing.GLYPHS_PAGE.system[0].staff[1].glyph)) for glyph in glyphs_testing.GLYPHS_PAGE.system[0].staff[1].glyph: expected_glyph = copy.deepcopy(glyph) # The second staff has a staffline distance of 20. The extracted staffline # slice would be 4 times the size of the scaled staffline, so x # coordinates are scaled by 4. Also, the glyphs may be in a different # order. expected_glyph.x *= 4 self.assertIn(expected_glyph, page.system[0].staff[1].glyph)
def testCompute(self): filename = os.path.join(tf.resource_loader.get_data_files_path(), '../testdata/IMSLP00747-000.png') image = image_module.decode_music_score_png(tf.read_file(filename)) struct = structure.create_structure(image) with self.test_session(): struct = struct.compute() self.assertEqual(np.int32, struct.staff_detector.staves.dtype) # Expected number of staves for the corpus image. self.assertEqual((12, 2, 2), struct.staff_detector.staves.shape) self.assertEqual(np.int32, struct.verticals.lines.dtype) self.assertEqual(3, struct.verticals.lines.ndim) self.assertEqual((2, 2), struct.verticals.lines.shape[1:])
def testSaveAndLoadDummyClassifier(self): with tempfile.TemporaryDirectory() as base_dir: export_dir = os.path.join(base_dir, 'export') with self.test_session() as sess: patches = tf.placeholder(tf.float32, shape=(None, 18, 15)) num_patches = tf.shape(patches)[0] # Glyph.NONE is number 1. class_ids = tf.ones([num_patches], tf.int32) signature = tf.saved_model.signature_def_utils.build_signature_def( # pyformat: disable {'input': tf.saved_model.utils.build_tensor_info(patches)}, { 'class_ids': tf.saved_model.utils.build_tensor_info(class_ids) }, 'serve') builder = tf.saved_model.builder.SavedModelBuilder(export_dir) builder.add_meta_graph_and_variables( sess, ['serve'], signature_def_map={ tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature }) builder.save() tf.reset_default_graph() # Load the saved model. with self.test_session() as sess: filename = os.path.join( tf.resource_loader.get_data_files_path(), '../testdata/IMSLP00747-000.png') page = image.decode_music_score_png(tf.read_file(filename)) clazz = saved_classifier.SavedConvolutional1DClassifier( structure.create_structure(page), export_dir) # Run min length should be the default. self.assertEqual(clazz.run_min_length, convolutional.DEFAULT_RUN_MIN_LENGTH) predictions = clazz.staffline_predictions.eval() self.assertEqual(predictions.ndim, 3) # Staff, staff position, x self.assertGreater(predictions.size, 0) # Predictions are all musicscore_pb2.Glyph.NONE. self.assertAllEqual( predictions, np.full(predictions.shape, musicscore_pb2.Glyph.NONE, np.int32))