def testSample_distribution(self):
        sample = more_iter_tools.iter_sample(moves.range(0, 100000),
                                             9999,
                                             rand=random.Random(12345))
        self.assertEqual(9999, len(sample))

        # Create a histogram with 10 bins.
        bins = np.bincount([elem // 10000 for elem in sample])
        self.assertEqual(10, len(bins))

        # Samples should be distributed roughly uniformly into bins.
        expected_bin_count = 9999 // 10
        for bin_count in bins:
            self.assertTrue(
                np.allclose(bin_count, expected_bin_count, rtol=0.1),
                '{} within 10% of {}'.format(bin_count, expected_bin_count))
    def process(self, png_path):
        self.total_pages_counter.inc()
        try:
            with self.session.as_default():
                patches_iter = self.extractor.page_patch_iterator(png_path)
        # pylint: disable=broad-except
        except Exception:
            logging.exception('Skipping failed music score (%s)', png_path)
            self.failed_pages_counter.inc()
            return
        patches_iter = filter(_filter_patch, patches_iter)

        if 0 < self.max_patches_per_page:
            # Subsample patches.
            patches = more_iter_tools.iter_sample(patches_iter,
                                                  self.max_patches_per_page)
        else:
            patches = list(patches_iter)

        if not patches:
            self.empty_pages_counter.inc()
        self.total_patches_counter.inc(len(patches))

        # Serialize each patch as an Example.
        for patch_name, patch in patches:
            example = tf.train.Example()
            example.features.feature['name'].bytes_list.value.append(
                patch_name.encode('utf-8'))
            example.features.feature['features'].float_list.value.extend(
                patch.ravel())
            example.features.feature['height'].int64_list.value.append(
                patch.shape[0])
            example.features.feature['width'].int64_list.value.append(
                patch.shape[1])
            yield example

        self.successful_pages_counter.inc()
        # Patches are sub-sampled by this point.
        self.emitted_patches_counter.inc(len(patches))
 def testSample_iter_empty(self):
     self.assertEqual([], more_iter_tools.iter_sample(moves.range(0), 10))
 def testSample_count_0(self):
     self.assertEqual([], more_iter_tools.iter_sample(moves.range(100), 0))