def _testDistribution(self, initial_known): classes = np.random.randint(5, size=(20000,)) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] initial_dist = [0.2] * 5 if initial_known else None iterator = (dataset_ops.Dataset.from_tensor_slices(classes).shuffle( 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).apply( resampling.rejection_resample( target_dist=target_dist, initial_dist=initial_dist, class_func=lambda c, _: c, seed=27)).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() with self.test_session() as sess: sess.run(init_op) returned = [] with self.assertRaises(errors.OutOfRangeError): while True: returned.append(sess.run(get_next)) returned_classes, returned_classes_and_data = zip(*returned) _, returned_data = zip(*returned_classes_and_data) self.assertAllEqual([compat.as_bytes(str(c)) for c in returned_classes], returned_data) total_returned = len(returned_classes) # Subsampling rejects a large percentage of the initial data in # this case. self.assertGreater(total_returned, 20000 * 0.2) class_counts = np.array([ len([True for v in returned_classes if v == c]) for c in range(5)]) returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2)
def _testDistribution(self, initial_known): classes = np.random.randint(5, size=(20000,)) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] initial_dist = [0.2] * 5 if initial_known else None iterator = (dataset_ops.Dataset.from_tensor_slices(classes).shuffle( 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).apply( resampling.rejection_resample( target_dist=target_dist, initial_dist=initial_dist, class_func=lambda c, _: c, seed=27)).make_one_shot_iterator()) get_next = iterator.get_next() with self.test_session() as sess: returned = [] with self.assertRaises(errors.OutOfRangeError): while True: returned.append(sess.run(get_next)) returned_classes, returned_classes_and_data = zip(*returned) _, returned_data = zip(*returned_classes_and_data) self.assertAllEqual([compat.as_bytes(str(c)) for c in returned_classes], returned_data) total_returned = len(returned_classes) # Subsampling rejects a large percentage of the initial data in # this case. self.assertGreater(total_returned, 20000 * 0.2) class_counts = np.array([ len([True for v in returned_classes if v == c]) for c in range(5)]) returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2)
def testDistribution(self, initial_known): classes = np.random.randint(5, size=(20000,)) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] initial_dist = [0.2] * 5 if initial_known else None classes = math_ops.to_int64(classes) # needed for Windows build. dataset = dataset_ops.Dataset.from_tensor_slices(classes).shuffle( 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).repeat() get_next = dataset.apply( resampling.rejection_resample( target_dist=target_dist, initial_dist=initial_dist, class_func=lambda c, _: c, seed=27)).make_one_shot_iterator().get_next() with self.cached_session() as sess: returned = [] while len(returned) < 4000: returned.append(sess.run(get_next)) returned_classes, returned_classes_and_data = zip(*returned) _, returned_data = zip(*returned_classes_and_data) self.assertAllEqual([compat.as_bytes(str(c)) for c in returned_classes], returned_data) total_returned = len(returned_classes) class_counts = np.array([ len([True for v in returned_classes if v == c]) for c in range(5)]) returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2)
def testDistribution(self, initial_known): classes = np.random.randint(5, size=(20000, )) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] initial_dist = [0.2] * 5 if initial_known else None classes = math_ops.to_int64(classes) # needed for Windows build. dataset = dataset_ops.Dataset.from_tensor_slices(classes).shuffle( 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).repeat() get_next = dataset.apply( resampling.rejection_resample( target_dist=target_dist, initial_dist=initial_dist, class_func=lambda c, _: c, seed=27)).make_one_shot_iterator().get_next() with self.cached_session() as sess: returned = [] while len(returned) < 4000: returned.append(sess.run(get_next)) returned_classes, returned_classes_and_data = zip(*returned) _, returned_data = zip(*returned_classes_and_data) self.assertAllEqual( [compat.as_bytes(str(c)) for c in returned_classes], returned_data) total_returned = len(returned_classes) class_counts = np.array([ len([True for v in returned_classes if v == c]) for c in range(5) ]) returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2)
def testVariableDevicePlacement(self): classes = np.random.randint(5, size=(20000,)) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] with ops.device( device_setter.replica_device_setter(ps_tasks=1, ps_device="/cpu:0")): _ = (dataset_ops.Dataset.from_tensor_slices(classes).shuffle( 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).apply( resampling.rejection_resample( target_dist=target_dist, initial_dist=None, class_func=lambda c, _: c, seed=27))) self.assertEqual(1, len(variables.local_variables())) self.assertEqual(b"", compat.as_bytes(variables.local_variables()[0].device))
def testVariableDevicePlacement(self): classes = np.random.randint(5, size=(20000, )) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] with ops.device( device_setter.replica_device_setter(ps_tasks=1, ps_device="/cpu:0")): _ = (dataset_ops.Dataset.from_tensor_slices(classes).shuffle( 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).apply( resampling.rejection_resample(target_dist=target_dist, initial_dist=None, class_func=lambda c, _: c, seed=27))) self.assertEqual(1, len(variables.local_variables())) self.assertEqual( b"", compat.as_bytes(variables.local_variables()[0].device))
def _time_resampling(test_obj, data_np, target_dist, init_dist, num_to_sample): dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat() # Reshape distribution via rejection sampling. dataset = dataset.apply( resampling.rejection_resample(class_func=lambda x: x, target_dist=target_dist, initial_dist=init_dist, seed=142)) get_next = dataset.make_one_shot_iterator().get_next() with test_obj.test_session() as sess: start_time = time.time() for _ in xrange(num_to_sample): sess.run(get_next) end_time = time.time() return end_time - start_time
def _time_resampling( test_obj, data_np, target_dist, init_dist, num_to_sample): dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat() # Reshape distribution via rejection sampling. dataset = dataset.apply( resampling.rejection_resample( class_func=lambda x: x, target_dist=target_dist, initial_dist=init_dist, seed=142)) get_next = dataset.make_one_shot_iterator().get_next() with test_obj.test_session() as sess: start_time = time.time() for _ in xrange(num_to_sample): sess.run(get_next) end_time = time.time() return end_time - start_time
def testEdgeCasesSampleFromInitialDataset(self, only_initial_dist): init_dist = [0.5, 0.5] target_dist = [0.5, 0.5] if only_initial_dist else [0.0, 1.0] num_classes = len(init_dist) # We don't need many samples to test that this works. num_samples = 100 data_np = np.random.choice(num_classes, num_samples, p=init_dist) dataset = dataset_ops.Dataset.from_tensor_slices(data_np) # Reshape distribution. dataset = dataset.apply( resampling.rejection_resample(class_func=lambda x: x, target_dist=target_dist, initial_dist=init_dist)) get_next = dataset.make_one_shot_iterator().get_next() with self.cached_session() as sess: returned = [] with self.assertRaises(errors.OutOfRangeError): while True: returned.append(sess.run(get_next))
def testRandomClasses(self): init_dist = [0.25, 0.25, 0.25, 0.25] target_dist = [0.0, 0.0, 0.0, 1.0] num_classes = len(init_dist) num_samples = 100 # We don't need many samples to test a dirac-delta target distribution data_np = np.random.choice(num_classes, num_samples, p=init_dist) dataset = dataset_ops.Dataset.from_tensor_slices(data_np) # Apply a random mapping that preserves the data distribution. def _remap_fn(_): return math_ops.cast(random_ops.random_uniform([1]) * num_classes, dtypes.int32)[0] dataset = dataset.map(_remap_fn) # Reshape distribution. dataset = dataset.apply( resampling.rejection_resample( class_func=lambda x: x, target_dist=target_dist, initial_dist=init_dist)) get_next = dataset.make_one_shot_iterator().get_next() with self.test_session() as sess: returned = [] with self.assertRaises(errors.OutOfRangeError): while True: returned.append(sess.run(get_next)) classes, _ = zip(*returned) bincount = np.bincount( np.array(classes), minlength=num_classes).astype(np.float32) / len(classes) self.assertAllClose(target_dist, bincount, atol=1e-2)
def testRandomClasses(self): init_dist = [0.25, 0.25, 0.25, 0.25] target_dist = [0.0, 0.0, 0.0, 1.0] num_classes = len(init_dist) # We don't need many samples to test a dirac-delta target distribution. num_samples = 100 data_np = np.random.choice(num_classes, num_samples, p=init_dist) dataset = dataset_ops.Dataset.from_tensor_slices(data_np) # Apply a random mapping that preserves the data distribution. def _remap_fn(_): return math_ops.cast( random_ops.random_uniform([1]) * num_classes, dtypes.int32)[0] dataset = dataset.map(_remap_fn) # Reshape distribution. dataset = dataset.apply( resampling.rejection_resample(class_func=lambda x: x, target_dist=target_dist, initial_dist=init_dist)) get_next = dataset.make_one_shot_iterator().get_next() with self.cached_session() as sess: returned = [] with self.assertRaises(errors.OutOfRangeError): while True: returned.append(sess.run(get_next)) classes, _ = zip(*returned) bincount = np.bincount(np.array(classes), minlength=num_classes).astype( np.float32) / len(classes) self.assertAllClose(target_dist, bincount, atol=1e-2)
def testEdgeCasesSampleFromInitialDataset(self, only_initial_dist): init_dist = [0.5, 0.5] target_dist = [0.5, 0.5] if only_initial_dist else [0.0, 1.0] num_classes = len(init_dist) # We don't need many samples to test that this works. num_samples = 100 data_np = np.random.choice(num_classes, num_samples, p=init_dist) dataset = dataset_ops.Dataset.from_tensor_slices(data_np) # Reshape distribution. dataset = dataset.apply( resampling.rejection_resample( class_func=lambda x: x, target_dist=target_dist, initial_dist=init_dist)) get_next = dataset.make_one_shot_iterator().get_next() with self.cached_session() as sess: returned = [] with self.assertRaises(errors.OutOfRangeError): while True: returned.append(sess.run(get_next))