Beispiel #1
0
 def testEmpty(self):
     height = 15
     width = 20
     image_t = tf.zeros((1, 2, 0, 3, 4, height, width))
     patch_width = 10
     patches_t = patches.patches_1d(image_t, patch_width)
     with self.test_session():
         self.assertEqual(patches_t.eval().shape,
                          (1, 2, 0, 3, 4, 0, height, patch_width))
Beispiel #2
0
def _extract_patches(stafflines, patch_width, min_num_dark_pixels=10):
    patches = util_patches.patches_1d(stafflines, patch_width)
    # Limit to patches that have min_num_dark_pixels.
    num_dark_pixels = tf.reduce_sum(tf.where(
        tf.less(patches, 0.5), tf.ones_like(patches, dtype=tf.int32),
        tf.zeros_like(patches, dtype=tf.int32)),
                                    axis=[-2, -1])
    return tf.boolean_mask(
        patches, tf.greater_equal(num_dark_pixels, min_num_dark_pixels))
Beispiel #3
0
 def test2D(self):
     image_t = tf.random_uniform((100, 200))
     image_t.set_shape((100, 200))
     patch_width = 10
     patches_t = patches.patches_1d(image_t, patch_width)
     with self.test_session() as sess:
         image_arr, patches_arr = sess.run((image_t, patches_t))
         self.assertEqual(patches_arr.shape,
                          (200 - patch_width + 1, 100, patch_width))
         for i in moves.xrange(patches_arr.shape[0]):
             self.assertAllEqual(patches_arr[i],
                                 image_arr[:, i:i + patch_width])
Beispiel #4
0
 def test4D(self):
     height = 15
     width = 20
     image_t = tf.random_uniform((4, 8, height, width))
     image_t.set_shape((None, None, height, width))
     patch_width = 10
     patches_t = patches.patches_1d(image_t, patch_width)
     with self.test_session() as sess:
         image_arr, patches_arr = sess.run((image_t, patches_t))
         self.assertEqual(
             patches_arr.shape,
             (4, 8, width - patch_width + 1, height, patch_width))
         for i in moves.xrange(patches_arr.shape[0]):
             for j in moves.xrange(patches_arr.shape[1]):
                 for k in moves.xrange(patches_arr.shape[2]):
                     self.assertAllEqual(
                         patches_arr[i, j, k], image_arr[i, j, :,
                                                         k:k + patch_width])
Beispiel #5
0
    def __init__(self,
                 structure,
                 saved_model_dir,
                 num_sections=19,
                 *args,
                 **kwargs):
        """Loads a saved classifier model for the OMR engine.

    Args:
      structure: A `structure.Structure`.
      saved_model_dir: Path to the TF saved_model directory to load.
      num_sections: Number of vertical positions of patches to extract, centered
        on the middle staff line.
      *args: Passed through to `SavedConvolutional1DClassifier`.
      **kwargs: Passed through to `SavedConvolutional1DClassifier`.

    Raises:
      ValueError: If the saved model input could not be interpreted as a 3D
        array with the patch size.
    """
        super(SavedConvolutional1DClassifier, self).__init__(*args, **kwargs)
        sess = tf.get_default_session()
        graph_def = tf.saved_model.loader.load(
            sess, [tf.saved_model.tag_constants.SERVING], saved_model_dir)

        signature = None
        for key in _SIGNATURE_KEYS:
            if key in graph_def.signature_def:
                signature = graph_def.signature_def[key]
                break
        else:
            # for/else is only executed if the loop completes without breaking.
            raise ValueError(
                'One of the following signatures must be present: %s' %
                _SIGNATURE_KEYS)

        input_info = signature.inputs['input']
        if not (len(input_info.tensor_shape.dim) == 3
                and input_info.tensor_shape.dim[1].size > 0
                and input_info.tensor_shape.dim[2].size > 0):
            raise ValueError('Invalid patches input: ' + str(input_info))
        patch_height = input_info.tensor_shape.dim[1].size
        patch_width = input_info.tensor_shape.dim[2].size

        with tf.name_scope('saved_classifier'):
            self.staffline_extractor = staffline_extractor.StafflineExtractor(
                structure.staff_remover.remove_staves,
                structure.staff_detector,
                num_sections=num_sections,
                target_height=patch_height)
            stafflines = self.staffline_extractor.extract_staves()
            num_staves = tf.shape(stafflines)[0]
            num_sections = tf.shape(stafflines)[1]
            staffline_patches = patches.patches_1d(stafflines, patch_width)
            staffline_patches_shape = tf.shape(staffline_patches)
            patches_per_position = staffline_patches_shape[2]
            flat_patches = tf.reshape(staffline_patches, [
                num_staves * num_sections * patches_per_position, patch_height,
                patch_width
            ])

            # Feed in the flat extracted patches as the classifier input.
            predictions_name = signature.outputs[
                prediction_keys.PredictionKeys.CLASS_IDS].name
            predictions = tf.contrib.graph_editor.graph_replace(
                sess.graph.get_tensor_by_name(predictions_name), {
                    sess.graph.get_tensor_by_name(signature.inputs['input'].name):
                    flat_patches
                })
            # Reshape to the original patches shape.
            predictions = tf.reshape(predictions, staffline_patches_shape[:3])

            # Pad the output. We take only the valid patches, but we want to shift all
            # of the predictions so that a patch at index i on the x-axis is centered
            # on column i. This determines the x coordinates of the glyphs.
            width = tf.shape(stafflines)[-1]
            predictions_width = tf.shape(predictions)[-1]
            pad_before = (width - predictions_width) // 2
            pad_shape_before = tf.concat(
                [staffline_patches_shape[:2], [pad_before]], axis=0)
            pad_shape_after = tf.concat([
                staffline_patches_shape[:2],
                [width - predictions_width - pad_before]
            ],
                                        axis=0)
            self.output = tf.concat(
                [
                    # NONE has value 1.
                    tf.ones(pad_shape_before, tf.int64),
                    tf.to_int64(predictions),
                    tf.ones(pad_shape_after, tf.int64),
                ],
                axis=-1)

        # run_min_length can be set on the saved model to tweak its behavior, but
        # should be overridden by the keyword argument.
        if 'run_min_length' not in kwargs:
            try:
                # Try to read the run min length from the saved model. This is tweaked
                # on a per-model basis.
                run_min_length_t = sess.graph.get_tensor_by_name(
                    _RUN_MIN_LENGTH_CONSTANT_NAME)
                run_min_length = tf.contrib.util.constant_value(
                    run_min_length_t)
                # Implicit comparison is invalid on a NumPy array.
                # pylint: disable=g-explicit-bool-comparison
                if run_min_length is None or run_min_length.shape != ():
                    raise ValueError(
                        'Bad run_min_length: {}'.format(run_min_length))
                # Overwrite the property after the Convolutional1DGlyphClassifier
                # constructor completes.
                self.run_min_length = int(run_min_length)
            except KeyError:
                pass  # No run_min_length tensor in the saved model.
Beispiel #6
0
    def __init__(self,
                 structure,
                 saved_model_dir,
                 num_sections=19,
                 *args,
                 **kwargs):
        """Loads a saved classifier model for the OMR engine.

    Args:
      structure: A `structure.Structure`.
      saved_model_dir: Path to the TF saved_model directory to load.
      num_sections: Number of vertical positions of patches to extract, centered
        on the middle staff line.
      *args: Passed through to `SavedConvolutional1DClassifier`.
      **kwargs: Passed through to `SavedConvolutional1DClassifier`.

    Raises:
      ValueError: If the saved model input could not be interpreted as a 3D
        array with the patch size.
    """
        super(SavedConvolutional1DClassifier, self).__init__(*args, **kwargs)
        sess = tf.get_default_session()
        graph_def = tf.saved_model.loader.load(
            sess, [tf.saved_model.tag_constants.SERVING], saved_model_dir)
        signature = graph_def.signature_def[tf.saved_model.signature_constants.
                                            DEFAULT_SERVING_SIGNATURE_DEF_KEY]
        input_info = signature.inputs['input']
        if not (len(input_info.tensor_shape.dim) == 3
                and input_info.tensor_shape.dim[1].size > 0
                and input_info.tensor_shape.dim[2].size > 0):
            raise ValueError('Invalid patches input: ' + str(input_info))
        patch_height = input_info.tensor_shape.dim[1].size
        patch_width = input_info.tensor_shape.dim[2].size

        with tf.name_scope('saved_classifier'):
            self.staffline_extractor = staffline_extractor.StafflineExtractor(
                structure.staff_remover.remove_staves,
                structure.staff_detector,
                num_sections=num_sections,
                target_height=patch_height)
            stafflines = self.staffline_extractor.extract_staves()
            num_staves = tf.shape(stafflines)[0]
            num_sections = tf.shape(stafflines)[1]
            staffline_patches = patches.patches_1d(stafflines, patch_width)
            staffline_patches_shape = tf.shape(staffline_patches)
            patches_per_position = staffline_patches_shape[2]
            flat_patches = tf.reshape(staffline_patches, [
                num_staves * num_sections * patches_per_position, patch_height,
                patch_width
            ])

            # Feed in the flat extracted patches as the classifier input.
            predictions_name = signature.outputs[
                prediction_keys.PredictionKeys.CLASS_IDS].name
            predictions = tf.contrib.graph_editor.graph_replace(
                sess.graph.get_tensor_by_name(predictions_name), {
                    sess.graph.get_tensor_by_name(signature.inputs['input'].name):
                    flat_patches
                })
            # Reshape to the original patches shape.
            predictions = tf.reshape(predictions, staffline_patches_shape[:3])

            # Pad the output. We take only the valid patches, but we want to shift all
            # of the predictions so that a patch at index i on the x-axis is centered
            # on column i. This determines the x coordinates of the glyphs.
            width = tf.shape(stafflines)[-1]
            predictions_width = tf.shape(predictions)[-1]
            pad_before = (width - predictions_width) // 2
            pad_shape_before = tf.concat(
                [staffline_patches_shape[:2], [pad_before]], axis=0)
            pad_shape_after = tf.concat([
                staffline_patches_shape[:2],
                [width - predictions_width - pad_before]
            ],
                                        axis=0)
            self.output = tf.concat(
                [
                    # NONE has value 1.
                    tf.ones(pad_shape_before, tf.int64),
                    tf.to_int64(predictions),
                    tf.ones(pad_shape_after, tf.int64),
                ],
                axis=-1)
Beispiel #7
0
  def __init__(self, corpus_file, staffline_extractor, **kwargs):
    """Build a 1-nearest-neighbor classifier with labeled patches.

    Args:
      corpus_file: Path to the TFRecords of Examples with patch (cluster) values
        in the "patch" feature, and the glyph label in the "label" feature.
      staffline_extractor: The staffline extractor.
      **kwargs: Passed through to `Convolutional1DGlyphClassifier`.
    """
    super(NearestNeighborGlyphClassifier, self).__init__(**kwargs)

    patch_height, patch_width = corpus.get_patch_shape(corpus_file)
    centroids, labels = corpus.parse_corpus(corpus_file, patch_height,
                                            patch_width)
    centroids_shape = tf.shape(centroids)
    flattened_centroids = tf.reshape(
        centroids,
        [centroids_shape[0], centroids_shape[1] * centroids_shape[2]])
    self.staffline_extractor = staffline_extractor
    stafflines = staffline_extractor.extract_staves()
    # Collapse the stafflines per stave.
    width = tf.shape(stafflines)[-1]
    # Shape (num_staves, num_stafflines, num_patches, height, patch_width).
    staffline_patches = patches.patches_1d(stafflines, patch_width)
    staffline_patches_shape = tf.shape(staffline_patches)
    flattened_patches = tf.reshape(staffline_patches, [
        staffline_patches_shape[0] * staffline_patches_shape[1] *
        staffline_patches_shape[2],
        staffline_patches_shape[3] * staffline_patches_shape[4]
    ])
    distance_matrix = _squared_euclidean_distance_matrix(
        flattened_patches, flattened_centroids)

    # Take the k centroids with the lowest distance to each patch. Wrap the k
    # constant in a tf.identity, which tests can use to feed in another value.
    k_value = tf.identity(tf.constant(K_NEAREST_VALUE), name='k_nearest_value')
    nearest_centroid_inds = tf.nn.top_k(-distance_matrix, k=k_value)[1]
    # Get the label corresponding to each nearby centroids, and reshape the
    # labels back to the original shape.
    nearest_labels = tf.reshape(
        tf.gather(labels, tf.reshape(nearest_centroid_inds, [-1])),
        tf.shape(nearest_centroid_inds))
    # Make a histogram of counts for each glyph type in the nearest centroids,
    # for each row (patch).
    bins = tf.map_fn(lambda row: tf.bincount(row, minlength=NUM_GLYPHS),
                     tf.to_int32(nearest_labels))
    # Take the argmax of the histogram to get the top prediction. Discard glyph
    # type 1 (NONE) for now.
    mode_out_of_k = tf.argmax(
        bins[:, musicscore_pb2.Glyph.NONE + 1:], axis=1) + 2
    # Force predictions to NONE only if all k nearby centroids were NONE.
    # Otherwise, the non-NONE nearby centroids will contribute to the
    # prediction.
    mode_out_of_k = tf.where(
        tf.equal(bins[:, musicscore_pb2.Glyph.NONE], k_value),
        tf.fill(
            tf.shape(mode_out_of_k), tf.to_int64(musicscore_pb2.Glyph.NONE)),
        mode_out_of_k)
    predictions = tf.reshape(mode_out_of_k, staffline_patches_shape[:3])

    # Pad the output.
    predictions_width = tf.shape(predictions)[-1]
    pad_before = (width - predictions_width) // 2
    pad_shape_before = tf.concat([staffline_patches_shape[:2], [pad_before]],
                                 axis=0)
    pad_shape_after = tf.concat(
        [staffline_patches_shape[:2], [width - predictions_width - pad_before]],
        axis=0)
    self.output = tf.concat(
        [
            # NONE has value 1.
            tf.ones(pad_shape_before, tf.int64),
            predictions,
            tf.ones(pad_shape_after, tf.int64),
        ],
        axis=-1)