Ejemplo n.º 1
0
 def test_rename_double_metadata(self):
     non_sf = SensitiveFrame(columns=['empty'],
                             sensitive_data=['empty'],
                             quasi_identifiers=['empty'])
     test_sf = non_sf.rename(columns={'empty': 'void'})
     assert test_sf.columns.tolist() == ['void']
     assert test_sf.quasi_identifiers == ['void']
     assert test_sf.sensitive_data == ['void']
Ejemplo n.º 2
0
 def test_get_and_set_double_metadata(self, sample_sf_two):
     test_sf = SensitiveFrame()
     test_sf['birth_year'] = sample_sf_two['born']
     expected_df = DataFrame([2335, 2340], columns=['birth_year'])
     assert_frame_equal(test_sf, expected_df)
     assert test_sf.quasi_identifiers == ['birth_year']
     assert test_sf.sensitive_data == ['birth_year']
Ejemplo n.º 3
0
    def test_simple_partition(self, middle_df):
        test_mp = MondrianPartitioner(2)
        quasi_identifiers = ['middle_values']
        test_sf = SensitiveFrame(middle_df, quasi_identifiers=quasi_identifiers)
        actual_partitions = test_mp.partition_sf(test_sf)

        expected_left_df = DataFrame.from_dict({0: 1, 1: 1}, orient='index')
        expected_left_df.columns = ['middle_values']
        expected_right_df = DataFrame.from_dict({2: 3, 3: 5, 4: 5}, orient='index')
        expected_right_df.columns = ['middle_values']

        assert len(actual_partitions) == 2
        assert actual_partitions[0].quasi_identifiers == quasi_identifiers
        assert actual_partitions[1].quasi_identifiers == quasi_identifiers
        assert_frame_equal(actual_partitions[0], expected_left_df)
        assert_frame_equal(actual_partitions[1], expected_right_df)
Ejemplo n.º 4
0
    def test_partition_along_widest_quasi_identifiers(self, sample_df):
        test_mp = MondrianPartitioner(2)
        quasi_identifiers = ['slim_values', 'wide_values']
        test_sf = SensitiveFrame(sample_df, quasi_identifiers=quasi_identifiers)
        actual_partitions = test_mp.partition_sf(test_sf)

        expected_left_df = DataFrame.from_dict({0: [1, 1], 1: [1, 1]}, orient='index')
        expected_left_df.columns = quasi_identifiers
        expected_right_df = DataFrame.from_dict({2: [1, 1000], 3: [2, 1000], 4: [2, 1000]}, orient='index')
        expected_right_df.columns = quasi_identifiers

        assert len(actual_partitions) == 2
        assert sorted(actual_partitions[0].quasi_identifiers) == quasi_identifiers
        assert sorted(actual_partitions[1].quasi_identifiers) == quasi_identifiers
        assert_frame_equal(actual_partitions[0], expected_left_df)
        assert_frame_equal(actual_partitions[1], expected_right_df)
Ejemplo n.º 5
0
 def test_oversized_partition(self, middle_df):
     test_mp = MondrianPartitioner(10)
     test_sf = SensitiveFrame(middle_df, quasi_identifiers=['middle_values'])
     assert test_mp.partition_sf(test_sf) == [test_sf]
Ejemplo n.º 6
0
 def test_init_cleans_sf_metadata(self, sample_sf):
     test_sf = SensitiveFrame(sample_sf)
     assert test_sf.quasi_identifiers == []
     assert test_sf.sensitive_data == []
Ejemplo n.º 7
0
 def test_init_with_df_keeps_data(self, sample_df):
     test_sf = SensitiveFrame(sample_df)
     expected_df = sample_df
     assert_frame_equal(test_sf, expected_df)
Ejemplo n.º 8
0
 def test_init_selects_metadata(self):
     test_sf = SensitiveFrame(columns=['a'], quasi_identifiers=['a', 'b'])
     assert test_sf.quasi_identifiers == ['a']
Ejemplo n.º 9
0
 def test_init_sets_multiple_metadata(self):
     test_sf = SensitiveFrame(columns=['a', 'b'],
                              sensitive_data=['a'],
                              quasi_identifiers=['b'])
     assert test_sf.sensitive_data == ['a']
     assert test_sf.quasi_identifiers == ['b']
Ejemplo n.º 10
0
 def test_init_sets_sensitive_data(self):
     test_sf = SensitiveFrame(columns=['c', 'd'], sensitive_data=['c', 'd'])
     assert test_sf.sensitive_data == ['c', 'd']
Ejemplo n.º 11
0
def sample_sf_three():
    return SensitiveFrame([['william', 'red'], ['laren', 'red']],
                          columns=['given_name', 'uniform'],
                          quasi_identifiers=['uniform'],
                          sensitive_data=['given_name'])
Ejemplo n.º 12
0
def sample_sf_two():
    return SensitiveFrame([['starfleet', 2335], ['starfleet', 2340]],
                          columns=['organization', 'born'],
                          quasi_identifiers=['born'],
                          sensitive_data=['born'])
Ejemplo n.º 13
0
 def test_get_and_set_from_df(self, sample_df):
     test_sf = SensitiveFrame()
     test_sf['grade'] = sample_df['rank']
     expected_df = DataFrame(['commander', 'lieutenant'], columns=['grade'])
     assert_frame_equal(test_sf, expected_df)
     assert test_sf.quasi_identifiers == []
Ejemplo n.º 14
0
 def test_get_and_set_non_metadata(self, sample_sf):
     test_sf = SensitiveFrame()
     test_sf['person'] = sample_sf['name']
     expected_df = DataFrame(['riker', 'ro'], columns=['person'])
     assert_frame_equal(test_sf, expected_df)
     assert test_sf.sensitive_data == []
Ejemplo n.º 15
0
def sample_sf():
    return SensitiveFrame([['riker', 'commander', 'man', 'human'],
                           ['ro', 'ensign', 'woman', 'bejoran']],
                          columns=['name', 'rank', 'gender', 'species'],
                          quasi_identifiers=['rank', 'gender'],
                          sensitive_data=['species'])
Ejemplo n.º 16
0
def sample_right_sf():
    return SensitiveFrame(
        [['riker', 'starfleet', 2335], ['ro', 'starfleet', 2340]],
        columns=['name', 'organization', 'born'],
        quasi_identifiers=['name', 'born'],
        sensitive_data=['born'])
Ejemplo n.º 17
0
 def test_join_non_metadata(self):
     non_sf = SensitiveFrame(columns=['empty'])
     test_sf = SensitiveFrame(columns=['void']).join(non_sf)
     assert test_sf.quasi_identifiers == []
     assert test_sf.sensitive_data == []