def testEmpty(self): height = 15 width = 20 image_t = tf.zeros((1, 2, 0, 3, 4, height, width)) patch_width = 10 patches_t = patches.patches_1d(image_t, patch_width) with self.test_session(): self.assertEqual(patches_t.eval().shape, (1, 2, 0, 3, 4, 0, height, patch_width))
def _extract_patches(stafflines, patch_width, min_num_dark_pixels=10): patches = util_patches.patches_1d(stafflines, patch_width) # Limit to patches that have min_num_dark_pixels. num_dark_pixels = tf.reduce_sum(tf.where( tf.less(patches, 0.5), tf.ones_like(patches, dtype=tf.int32), tf.zeros_like(patches, dtype=tf.int32)), axis=[-2, -1]) return tf.boolean_mask( patches, tf.greater_equal(num_dark_pixels, min_num_dark_pixels))
def test2D(self): image_t = tf.random_uniform((100, 200)) image_t.set_shape((100, 200)) patch_width = 10 patches_t = patches.patches_1d(image_t, patch_width) with self.test_session() as sess: image_arr, patches_arr = sess.run((image_t, patches_t)) self.assertEqual(patches_arr.shape, (200 - patch_width + 1, 100, patch_width)) for i in moves.xrange(patches_arr.shape[0]): self.assertAllEqual(patches_arr[i], image_arr[:, i:i + patch_width])
def test4D(self): height = 15 width = 20 image_t = tf.random_uniform((4, 8, height, width)) image_t.set_shape((None, None, height, width)) patch_width = 10 patches_t = patches.patches_1d(image_t, patch_width) with self.test_session() as sess: image_arr, patches_arr = sess.run((image_t, patches_t)) self.assertEqual( patches_arr.shape, (4, 8, width - patch_width + 1, height, patch_width)) for i in moves.xrange(patches_arr.shape[0]): for j in moves.xrange(patches_arr.shape[1]): for k in moves.xrange(patches_arr.shape[2]): self.assertAllEqual( patches_arr[i, j, k], image_arr[i, j, :, k:k + patch_width])
def __init__(self, structure, saved_model_dir, num_sections=19, *args, **kwargs): """Loads a saved classifier model for the OMR engine. Args: structure: A `structure.Structure`. saved_model_dir: Path to the TF saved_model directory to load. num_sections: Number of vertical positions of patches to extract, centered on the middle staff line. *args: Passed through to `SavedConvolutional1DClassifier`. **kwargs: Passed through to `SavedConvolutional1DClassifier`. Raises: ValueError: If the saved model input could not be interpreted as a 3D array with the patch size. """ super(SavedConvolutional1DClassifier, self).__init__(*args, **kwargs) sess = tf.get_default_session() graph_def = tf.saved_model.loader.load( sess, [tf.saved_model.tag_constants.SERVING], saved_model_dir) signature = None for key in _SIGNATURE_KEYS: if key in graph_def.signature_def: signature = graph_def.signature_def[key] break else: # for/else is only executed if the loop completes without breaking. raise ValueError( 'One of the following signatures must be present: %s' % _SIGNATURE_KEYS) input_info = signature.inputs['input'] if not (len(input_info.tensor_shape.dim) == 3 and input_info.tensor_shape.dim[1].size > 0 and input_info.tensor_shape.dim[2].size > 0): raise ValueError('Invalid patches input: ' + str(input_info)) patch_height = input_info.tensor_shape.dim[1].size patch_width = input_info.tensor_shape.dim[2].size with tf.name_scope('saved_classifier'): self.staffline_extractor = staffline_extractor.StafflineExtractor( structure.staff_remover.remove_staves, structure.staff_detector, num_sections=num_sections, target_height=patch_height) stafflines = self.staffline_extractor.extract_staves() num_staves = tf.shape(stafflines)[0] num_sections = tf.shape(stafflines)[1] staffline_patches = patches.patches_1d(stafflines, patch_width) staffline_patches_shape = tf.shape(staffline_patches) patches_per_position = staffline_patches_shape[2] flat_patches = tf.reshape(staffline_patches, [ num_staves * num_sections * patches_per_position, patch_height, patch_width ]) # Feed in the flat extracted patches as the classifier input. predictions_name = signature.outputs[ prediction_keys.PredictionKeys.CLASS_IDS].name predictions = tf.contrib.graph_editor.graph_replace( sess.graph.get_tensor_by_name(predictions_name), { sess.graph.get_tensor_by_name(signature.inputs['input'].name): flat_patches }) # Reshape to the original patches shape. predictions = tf.reshape(predictions, staffline_patches_shape[:3]) # Pad the output. We take only the valid patches, but we want to shift all # of the predictions so that a patch at index i on the x-axis is centered # on column i. This determines the x coordinates of the glyphs. width = tf.shape(stafflines)[-1] predictions_width = tf.shape(predictions)[-1] pad_before = (width - predictions_width) // 2 pad_shape_before = tf.concat( [staffline_patches_shape[:2], [pad_before]], axis=0) pad_shape_after = tf.concat([ staffline_patches_shape[:2], [width - predictions_width - pad_before] ], axis=0) self.output = tf.concat( [ # NONE has value 1. tf.ones(pad_shape_before, tf.int64), tf.to_int64(predictions), tf.ones(pad_shape_after, tf.int64), ], axis=-1) # run_min_length can be set on the saved model to tweak its behavior, but # should be overridden by the keyword argument. if 'run_min_length' not in kwargs: try: # Try to read the run min length from the saved model. This is tweaked # on a per-model basis. run_min_length_t = sess.graph.get_tensor_by_name( _RUN_MIN_LENGTH_CONSTANT_NAME) run_min_length = tf.contrib.util.constant_value( run_min_length_t) # Implicit comparison is invalid on a NumPy array. # pylint: disable=g-explicit-bool-comparison if run_min_length is None or run_min_length.shape != (): raise ValueError( 'Bad run_min_length: {}'.format(run_min_length)) # Overwrite the property after the Convolutional1DGlyphClassifier # constructor completes. self.run_min_length = int(run_min_length) except KeyError: pass # No run_min_length tensor in the saved model.
def __init__(self, structure, saved_model_dir, num_sections=19, *args, **kwargs): """Loads a saved classifier model for the OMR engine. Args: structure: A `structure.Structure`. saved_model_dir: Path to the TF saved_model directory to load. num_sections: Number of vertical positions of patches to extract, centered on the middle staff line. *args: Passed through to `SavedConvolutional1DClassifier`. **kwargs: Passed through to `SavedConvolutional1DClassifier`. Raises: ValueError: If the saved model input could not be interpreted as a 3D array with the patch size. """ super(SavedConvolutional1DClassifier, self).__init__(*args, **kwargs) sess = tf.get_default_session() graph_def = tf.saved_model.loader.load( sess, [tf.saved_model.tag_constants.SERVING], saved_model_dir) signature = graph_def.signature_def[tf.saved_model.signature_constants. DEFAULT_SERVING_SIGNATURE_DEF_KEY] input_info = signature.inputs['input'] if not (len(input_info.tensor_shape.dim) == 3 and input_info.tensor_shape.dim[1].size > 0 and input_info.tensor_shape.dim[2].size > 0): raise ValueError('Invalid patches input: ' + str(input_info)) patch_height = input_info.tensor_shape.dim[1].size patch_width = input_info.tensor_shape.dim[2].size with tf.name_scope('saved_classifier'): self.staffline_extractor = staffline_extractor.StafflineExtractor( structure.staff_remover.remove_staves, structure.staff_detector, num_sections=num_sections, target_height=patch_height) stafflines = self.staffline_extractor.extract_staves() num_staves = tf.shape(stafflines)[0] num_sections = tf.shape(stafflines)[1] staffline_patches = patches.patches_1d(stafflines, patch_width) staffline_patches_shape = tf.shape(staffline_patches) patches_per_position = staffline_patches_shape[2] flat_patches = tf.reshape(staffline_patches, [ num_staves * num_sections * patches_per_position, patch_height, patch_width ]) # Feed in the flat extracted patches as the classifier input. predictions_name = signature.outputs[ prediction_keys.PredictionKeys.CLASS_IDS].name predictions = tf.contrib.graph_editor.graph_replace( sess.graph.get_tensor_by_name(predictions_name), { sess.graph.get_tensor_by_name(signature.inputs['input'].name): flat_patches }) # Reshape to the original patches shape. predictions = tf.reshape(predictions, staffline_patches_shape[:3]) # Pad the output. We take only the valid patches, but we want to shift all # of the predictions so that a patch at index i on the x-axis is centered # on column i. This determines the x coordinates of the glyphs. width = tf.shape(stafflines)[-1] predictions_width = tf.shape(predictions)[-1] pad_before = (width - predictions_width) // 2 pad_shape_before = tf.concat( [staffline_patches_shape[:2], [pad_before]], axis=0) pad_shape_after = tf.concat([ staffline_patches_shape[:2], [width - predictions_width - pad_before] ], axis=0) self.output = tf.concat( [ # NONE has value 1. tf.ones(pad_shape_before, tf.int64), tf.to_int64(predictions), tf.ones(pad_shape_after, tf.int64), ], axis=-1)
def __init__(self, corpus_file, staffline_extractor, **kwargs): """Build a 1-nearest-neighbor classifier with labeled patches. Args: corpus_file: Path to the TFRecords of Examples with patch (cluster) values in the "patch" feature, and the glyph label in the "label" feature. staffline_extractor: The staffline extractor. **kwargs: Passed through to `Convolutional1DGlyphClassifier`. """ super(NearestNeighborGlyphClassifier, self).__init__(**kwargs) patch_height, patch_width = corpus.get_patch_shape(corpus_file) centroids, labels = corpus.parse_corpus(corpus_file, patch_height, patch_width) centroids_shape = tf.shape(centroids) flattened_centroids = tf.reshape( centroids, [centroids_shape[0], centroids_shape[1] * centroids_shape[2]]) self.staffline_extractor = staffline_extractor stafflines = staffline_extractor.extract_staves() # Collapse the stafflines per stave. width = tf.shape(stafflines)[-1] # Shape (num_staves, num_stafflines, num_patches, height, patch_width). staffline_patches = patches.patches_1d(stafflines, patch_width) staffline_patches_shape = tf.shape(staffline_patches) flattened_patches = tf.reshape(staffline_patches, [ staffline_patches_shape[0] * staffline_patches_shape[1] * staffline_patches_shape[2], staffline_patches_shape[3] * staffline_patches_shape[4] ]) distance_matrix = _squared_euclidean_distance_matrix( flattened_patches, flattened_centroids) # Take the k centroids with the lowest distance to each patch. Wrap the k # constant in a tf.identity, which tests can use to feed in another value. k_value = tf.identity(tf.constant(K_NEAREST_VALUE), name='k_nearest_value') nearest_centroid_inds = tf.nn.top_k(-distance_matrix, k=k_value)[1] # Get the label corresponding to each nearby centroids, and reshape the # labels back to the original shape. nearest_labels = tf.reshape( tf.gather(labels, tf.reshape(nearest_centroid_inds, [-1])), tf.shape(nearest_centroid_inds)) # Make a histogram of counts for each glyph type in the nearest centroids, # for each row (patch). bins = tf.map_fn(lambda row: tf.bincount(row, minlength=NUM_GLYPHS), tf.to_int32(nearest_labels)) # Take the argmax of the histogram to get the top prediction. Discard glyph # type 1 (NONE) for now. mode_out_of_k = tf.argmax( bins[:, musicscore_pb2.Glyph.NONE + 1:], axis=1) + 2 # Force predictions to NONE only if all k nearby centroids were NONE. # Otherwise, the non-NONE nearby centroids will contribute to the # prediction. mode_out_of_k = tf.where( tf.equal(bins[:, musicscore_pb2.Glyph.NONE], k_value), tf.fill( tf.shape(mode_out_of_k), tf.to_int64(musicscore_pb2.Glyph.NONE)), mode_out_of_k) predictions = tf.reshape(mode_out_of_k, staffline_patches_shape[:3]) # Pad the output. predictions_width = tf.shape(predictions)[-1] pad_before = (width - predictions_width) // 2 pad_shape_before = tf.concat([staffline_patches_shape[:2], [pad_before]], axis=0) pad_shape_after = tf.concat( [staffline_patches_shape[:2], [width - predictions_width - pad_before]], axis=0) self.output = tf.concat( [ # NONE has value 1. tf.ones(pad_shape_before, tf.int64), predictions, tf.ones(pad_shape_after, tf.int64), ], axis=-1)