def test_outcomes(self): dataset = ObliviousDataset.create( Sample([s(0), s(1), s(2)], outcome=s(60)), Sample([s(10), s(11), s(12)], outcome=s(70)), Sample([s(20), s(21), s(22)], outcome=s(80))).select([s(1), s(0), s(1)]) self.assertEqual(reveal(dataset.outcomes), [60, 80])
def test_select_best_attribute_with_gini_denominator_zero(self): samples = ObliviousDataset.create( Sample([s(0), s(0), s(1), s(0)], s(1)), Sample([s(0), s(0), s(1), s(0)], s(1)), Sample([s(0), s(0), s(0), s(0)], s(0))) (best_attribute, _) = select_best_attribute(samples) self.assertEqual(reveal(best_attribute), 2)
def test_random_sample(self): dataset = ObliviousDataset.create( Sample([s(1), s(2), s(3)], s(4)), Sample([s(11), s(12), s(13)], s(14))) randomSamples = [reveal(dataset.choice()) for _ in range(100)] self.assertIn(Sample([1, 2, 3], 4), randomSamples) self.assertIn(Sample([11, 12, 13], 14), randomSamples)
def test_continuous_attributes(self): dataset = ObliviousDataset.create(Sample([s(0), s(1), s(1)], s(0)), Sample([s(1), s(2), s(1)], s(1)), continuous=[False, True, False]) self.assertFalse(dataset.is_continuous(0)) self.assertTrue(dataset.is_continuous(1)) self.assertFalse(dataset.is_continuous(2))
def test_continuous_attribute_check_with_secret_index(self): dataset = ObliviousDataset.create(Sample([s(0), s(1), s(1)], s(0)), Sample([s(1), s(2), s(1)], s(1)), continuous=[False, True, False]) self.assertFalse(reveal(dataset.is_continuous(s(0)))) self.assertTrue(reveal(dataset.is_continuous(s(1)))) self.assertFalse(reveal(dataset.is_continuous(s(2))))
def test_select_best_attribute(self): samples = ObliviousDataset.create( Sample([s(0), s(1), s(1), s(0)], s(1)), Sample([s(1), s(0), s(1), s(1)], s(1)), Sample([s(0), s(0), s(0), s(1)], s(0))) (best_attribute, threshold) = select_best_attribute(samples) self.assertEqual(reveal(best_attribute), 2) self.assertEqual(reveal(threshold), 0)
def test_partition_on_binary_attribute(self): data = ObliviousDataset.create(sample(s(0), s(1), s(1), s(0)), sample(s(1), s(0), s(1), s(1)), sample(s(0), s(0), s(0), s(1))) left, right = partition_binary(data, attribute_index=s(2)) self.assertEqual(reveal(left), [Sample([0, 0, 0, 1], 0)]) self.assertEqual(reveal(right), [Sample([0, 1, 1, 0], 0), Sample([1, 0, 1, 1], 0)])
def test_select_best_attribute_using_subset(self): samples = ObliviousDataset.create( Sample([s(0), s(1), s(1), s(0)], s(1)), Sample([s(1), s(0), s(1), s(1)], s(1)), Sample([s(42), s(43), s(44), s(45)], s(46)), Sample([s(0), s(0), s(0), s(1)], s(0)), ).select([s(0), s(1), s(0), s(1)]) (best_attribute, _) = select_best_attribute(samples) self.assertEqual(reveal(best_attribute), 2)
def test_select_best_attribute_from_continuous_and_binary(self): samples = ObliviousDataset.create(Sample([s(0), s(1)], s(0)), Sample([s(0), s(2)], s(0)), Sample([s(1), s(3)], s(0)), Sample([s(1), s(4)], s(1)), Sample([s(1), s(5)], s(1)), continuous=[False, True]) (best_attribute, threshold) = select_best_attribute(samples) self.assertEqual(reveal(best_attribute), 1) self.assertEqual(reveal(threshold), 3)
def test_calculate_gains_for_thresholds_ignores_duplicates(self): samples = ObliviousDataset.create(Sample([s(0)], s(0)), Sample([s(0)], s(0)), Sample([s(0)], s(0)), continuous=[True]) column = samples.column(0) outcomes = samples.outcomes gains = calculate_gains_for_thresholds(column, outcomes) plain_gains = reveal(gains) self.assertEqual(len(plain_gains), 1)
def test_partition_on_continuous_attribute(self): data = ObliviousDataset.create(sample(s(1), s(3), s(3), s(1)), sample(s(3), s(1), s(3), s(3)), sample(s(1), s(1), s(1), s(3))) left, right = partition_continuous(data, attribute_index=s(2), threshold=s(2)) self.assertEqual(reveal(left), [Sample([1, 1, 1, 3], 0)]) self.assertEqual(reveal(right), [Sample([1, 3, 3, 1], 0), Sample([3, 1, 3, 3], 0)])
def test_sorts_column_and_outcomes_of_array(self): dataset = ObliviousDataset.create( Sample([s(2)], s(5)), Sample([s(1)], s(6)), Sample([s(3)], s(7)), Sample([s(4)], s(8)) ) column = dataset.column(s(0)) outcomes = dataset.outcomes sorted_column, sorted_outcomes = sort(column, outcomes) self.assertEqual(reveal(sorted_column), [1, 2, 3, 4]) self.assertEqual(reveal(sorted_outcomes), [6, 5, 7, 8])
async def test_continuous_attribute_with_some_depth(self): samples = ObliviousDataset.create(Sample([s(1)], s(0)), Sample([s(2)], s(0)), Sample([s(3)], s(1)), Sample([s(4)], s(1)), Sample([s(5)], s(0)), continuous=[True]) tree = reveal(await train(samples, depth=2)) self.assertEqual(tree.attribute, 0) self.assertEqual(tree.threshold, 2) self.assertTrue(isinstance(tree.left, Branch)) self.assertTrue(isinstance(tree.right, Branch)) self.assertEqual(tree.right.attribute, 0) self.assertEqual(tree.right.threshold, 4)
async def test_multiple_samples_with_some_depth(self): samples = ObliviousDataset.create(Sample([s(0), s(1)], s(0)), Sample([s(1), s(0)], s(0)), Sample([s(1), s(0)], s(0)), Sample([s(1), s(1)], s(1))) self.assertEqual( reveal(await train(samples, depth=2)), Branch( 1, threshold=0, left=Branch( 1, # random, could have been zero as well threshold=0, left=leaf(0), right=pruned()), right=Branch(0, threshold=0, left=leaf(0), right=leaf(1))))
async def test_single_sample_with_some_depth(self): samples = ObliviousDataset.create(Sample([s(1)], s(1))) self.assertEqual( reveal(await train(samples, depth=2)), Branch(0, threshold=0, left=Branch(0, threshold=0, left=pruned(), right=pruned()), right=Branch(0, threshold=0, left=pruned(), right=leaf(1))))
async def random_attributes(samples, amount): columns, continuous, labels = await random_columns(samples, amount) outcomes = samples.outcomes smaller_samples = [] for r in range(len(samples)): inputs = [] outcome = outcomes[r] for c in range(len(columns)): inputs.append(columns[c][r]) smaller_samples.append(Sample(inputs, outcome)) return ObliviousDataset.create(*smaller_samples, continuous=continuous, labels=labels)
async def test_single_sample_depth_zero_outcome_0(self): samples = ObliviousDataset.create(Sample([s(1)], s(0))) self.assertEqual(reveal(await train(samples, depth=0)), leaf(0))
def sample(ins, out): return Sample([s(i) for i in ins], s(out))
def test_add_samples(self): sample1 = Sample([s(1), s(2), s(3)], s(4)) sample2 = Sample([s(5), s(6), s(7)], s(8)) self.assertEqual(reveal(sample1 + sample2), Sample([6, 8, 10], 12))
def test_determine_class_multiple_samples(self): dataset = ObliviousDataset.create(Sample([s(0)], s(0)), Sample([s(0)], s(1)), Sample([s(0)], s(1))) self.assertEqual(reveal(dataset.determine_class()), 1)
def sample(*inputs): return Sample(inputs, s(0))
async def test_continuous_attributes(self): samples = ObliviousDataset.create(Sample([s(1), s(2)], s(0)), Sample([s(1), s(3)], s(1)), continuous=[False, True]) self.assertEqual(reveal(await train(samples, depth=1)), Branch(1, threshold=2, left=leaf(0), right=leaf(1)))
def test_random_sample_with_one_sample(self): dataset = ObliviousDataset.create(Sample([s(1), s(2), s(3)], s(4))) self.assertEqual(reveal(dataset.choice()), Sample([1, 2, 3], 4))
def test_multiply_samples(self): sample = Sample([s(1), s(2), s(3)], s(4)) self.assertEqual(reveal(sample * s(2)), Sample([2, 4, 6], 8))
async def test_two_samples_two_attributes(self): samples = ObliviousDataset.create(Sample([s(1), s(0)], s(0)), Sample([s(1), s(1)], s(1))) self.assertEqual(reveal(await train(samples, depth=1)), Branch(1, threshold=0, left=leaf(0), right=leaf(1)))
def sample(*inputs): return Sample(list(inputs), s(0))