def testSample_distribution(self): sample = more_iter_tools.iter_sample(moves.range(0, 100000), 9999, rand=random.Random(12345)) self.assertEqual(9999, len(sample)) # Create a histogram with 10 bins. bins = np.bincount([elem // 10000 for elem in sample]) self.assertEqual(10, len(bins)) # Samples should be distributed roughly uniformly into bins. expected_bin_count = 9999 // 10 for bin_count in bins: self.assertTrue( np.allclose(bin_count, expected_bin_count, rtol=0.1), '{} within 10% of {}'.format(bin_count, expected_bin_count))
def process(self, png_path): self.total_pages_counter.inc() try: with self.session.as_default(): patches_iter = self.extractor.page_patch_iterator(png_path) # pylint: disable=broad-except except Exception: logging.exception('Skipping failed music score (%s)', png_path) self.failed_pages_counter.inc() return patches_iter = filter(_filter_patch, patches_iter) if 0 < self.max_patches_per_page: # Subsample patches. patches = more_iter_tools.iter_sample(patches_iter, self.max_patches_per_page) else: patches = list(patches_iter) if not patches: self.empty_pages_counter.inc() self.total_patches_counter.inc(len(patches)) # Serialize each patch as an Example. for patch_name, patch in patches: example = tf.train.Example() example.features.feature['name'].bytes_list.value.append( patch_name.encode('utf-8')) example.features.feature['features'].float_list.value.extend( patch.ravel()) example.features.feature['height'].int64_list.value.append( patch.shape[0]) example.features.feature['width'].int64_list.value.append( patch.shape[1]) yield example self.successful_pages_counter.inc() # Patches are sub-sampled by this point. self.emitted_patches_counter.inc(len(patches))
def testSample_iter_empty(self): self.assertEqual([], more_iter_tools.iter_sample(moves.range(0), 10))
def testSample_count_0(self): self.assertEqual([], more_iter_tools.iter_sample(moves.range(100), 0))