def test_sample_without_replacement(): array = np.ones(100) node_single = DataNode() node_single.set_private_data(name="array", data=array) sample_size = 50 def u(x, r): output = np.zeros(len(r)) for i in range(len(r)): output[i] = r[i] * sum(np.greater_equal(x, r[i])) return output r = np.arange(0, 3.5, 0.001) delta_u = r.max() epsilon = 5 exponential_mechanism = ExponentialMechanism(u, r, delta_u, epsilon, size=sample_size) access_modes = [LaplaceMechanism(1, 1)] access_modes.append(GaussianMechanism(1, (0.5, 0.5))) access_modes.append(RandomizedResponseBinary(0.5, 0.5, 1)) access_modes.append(RandomizedResponseCoins()) access_modes.append(exponential_mechanism) for a in access_modes: sampling_method = SampleWithoutReplacement(a, sample_size, array.shape) node_single.configure_data_access("array", sampling_method) result = node_single.query("array") assert result.shape[0] == sample_size
def test_exponential_mechanism_pricing(): def u(x, r): output = np.zeros(len(r)) for i in range(len(r)): output[i] = r[i] * sum(np.greater_equal(x, r[i])) return output x = [1.00, 1.00, 1.00, 3.01] # Input dataset: the true bids r = np.arange(0, 3.5, 0.001) # Set the interval of possible outputs r delta_u = r.max() # In this specific case, Delta u = max(r) epsilon = 5 # Set a value for epsilon size = 10000 # We want to repeat the query this many times node = DataNode() node.set_private_data(name="bids", data=np.array(x)) data_access_definition = ExponentialMechanism(u, r, delta_u, epsilon, size) node.configure_data_access("bids", data_access_definition) result = node.query("bids") y_bin, x_bin = np.histogram(a=result, bins=int(round(np.sqrt(len(result)))), density=True) max_price = x_bin[np.where(y_bin == y_bin.max())] min_price = x_bin[np.where(y_bin == y_bin.min())] bin_size = x_bin[1] - x_bin[0] assert (1.00 - x_bin[np.where(y_bin == max_price)] > bin_size).all() # Check the best price is close to 1.00 assert ((x_bin[np.where(y_bin == min_price)] > (3.01 - bin_size) ).all() # Check the no-revenue price is either greater than 3.01 or x_bin[np.where(y_bin == min_price)][0] < bin_size ) # or close to 0.00
def test_configure_data_access(): data_access_definition = AdaptiveDifferentialPrivacy(epsilon_delta=(1, 1)) data_node = DataNode() data_node.set_private_data("test", np.array(range(10))) with pytest.raises(ValueError): data_node.configure_data_access("test", data_access_definition) data_node.query("test")
def test_exception_budget_2(): data_access_definition = AdaptiveDifferentialPrivacy(epsilon_delta=(1, 0.001)) data_node = DataNode() array = np.array(range(10)) data_node.set_private_data("test", array) data_node.configure_data_access("test", data_access_definition) with pytest.raises(ExceededPrivacyBudgetError): for i in range(1, 1000): data_node.query("test", differentially_private_mechanism=GaussianMechanism(1, epsilon_delta=(0.1, 1)))
def test_randomize_binary_mechanism_no_binary_scalar(): scalar = 0.1 node_single = DataNode() node_single.set_private_data(name="scalar", data=scalar) data_access_definition = RandomizedResponseBinary(f0=0.5, f1=0.5, epsilon=1) node_single.configure_data_access("scalar", data_access_definition) with pytest.raises(ValueError): node_single.query("scalar")
def test_randomize_binary_mechanism_scalar_coins(): scalar = 1 node_single = DataNode() node_single.set_private_data(name="scalar", data=scalar) node_single.configure_data_access("scalar", RandomizedResponseCoins()) result = node_single.query(private_property="scalar") assert np.isscalar(result) assert result == 0 or result == 1
def test_laplace_scalar_mechanism(): scalar = 175 node = DataNode() node.set_private_data("scalar", scalar) node.configure_data_access("scalar", LaplaceMechanism(1, 1)) result = node.query("scalar") assert scalar != result assert np.abs(scalar - result) < 100
def test_data_access(): data_access_definition = AdaptiveDifferentialPrivacy(epsilon_delta=(1, 1)) data_node = DataNode() array = np.array(range(10)) data_node.set_private_data("test", array) data_node.configure_data_access("test", data_access_definition) query_result = data_node.query("test", differentially_private_mechanism=GaussianMechanism(1, epsilon_delta=(0.1, 1))) assert query_result is not None
def test_gaussian_scalar_mechanism(): scalar = 175 node = DataNode() node.set_private_data("scalar", scalar) node.configure_data_access("scalar", GaussianMechanism(1, epsilon_delta=(0.1, 1))) result = node.query("scalar") assert scalar != result assert np.abs(scalar - result) < 100
def test_exception_exceeded_privacy_budget_error(): scalar = 175 dp_mechanism = GaussianMechanism(1, epsilon_delta=(0.1, 1)) data_access_definition = AdaptiveDifferentialPrivacy(epsilon_delta=(1, 0), differentially_private_mechanism=dp_mechanism) node = DataNode() node.set_private_data("scalar", scalar) node.configure_data_access("scalar", data_access_definition) with pytest.raises(ExceededPrivacyBudgetError): node.query("scalar")
def test_sample_without_replacement_multidimensional(): array = np.ones((100, 2)) sample_size = 50 node_single = DataNode() node_single.set_private_data(name="array", data=array) epsilon = 1 access_mode = LaplaceMechanism(1, epsilon) sampling_method = SampleWithoutReplacement(access_mode, sample_size, array.shape) node_single.configure_data_access("array", sampling_method) result = node_single.query("array") assert result.shape[0] == sample_size
def test_randomize_binary_mechanism_array_almost_always_true_values_coins(): array = np.ones(1000) node_single = DataNode() node_single.set_private_data(name="array", data=array) # Very low heads probability in the first attempt, mean should be near true value data_access_definition = RandomizedResponseCoins(prob_head_first=0.01, prob_head_second=0.9) node_single.configure_data_access("array", data_access_definition) result = node_single.query("array") assert 1 - np.mean(result) < 0.05
def test_randomize_binary_random_scalar_0(): scalar = 0 node_single = DataNode() node_single.set_private_data(name="scalar", data=scalar) data_access_definition = RandomizedResponseBinary(f0=0.5, f1=0.5, epsilon=1) node_single.configure_data_access("scalar", data_access_definition) result = node_single.query(private_property="scalar") assert np.isscalar(result) assert result == 0 or result == 1
def test_laplace_dictionary_mechanism(): dictionary = { 0: np.array([[2, 4, 5], [2, 3, 5]]), 1: np.array([[1, 3, 1], [1, 4, 6]]) } node = DataNode() node.set_private_data("dictionary", dictionary) node.configure_data_access("dictionary", LaplaceMechanism(1, 1)) result = node.query("dictionary") assert dictionary.keys() == result.keys() assert np.mean(dictionary[0]) - np.mean(result[0]) < 5
def test_randomize_binary_mechanism_array_almost_always_false_values_zeros(): array = np.zeros(1000) node_single = DataNode() node_single.set_private_data(name="array", data=array) # Prob of one given 1 very low, mean should be near 0 data_access_definition = RandomizedResponseBinary(f0=0.01, f1=0.5, epsilon=1) node_single.configure_data_access("array", data_access_definition) result = node_single.query("array") assert np.abs(1 - np.mean(result)) < 0.05
def test_randomize_binary_mechanism_array_almost_always_true_values_ones(): array = np.ones(1000) node_single = DataNode() node_single.set_private_data(name="array", data=array) # Prob of one given 1 very high, mean should be near 1 data_access_definition = RandomizedResponseBinary(f0=0.5, f1=0.99, epsilon=5) node_single.configure_data_access("array", data_access_definition) result = node_single.query("array") assert 1 - np.mean(result) < 0.05
def test_laplace_dictionary_mechanism_wrong_shapes(): dictionary = {0: np.array([2, 3, 5]), 1: np.array([[1, 3, 1], [1, 4, 6]])} sensitivity = { 0: np.array([[1, 1, 2], [2, 1, 1]]), 1: np.array([3, 1, 11, 1, 2]) } node = DataNode() node.set_private_data("dictionary", dictionary) dp_access_mechanism = LaplaceMechanism(sensitivity, 1) node.configure_data_access("dictionary", dp_access_mechanism) with pytest.raises(ValueError): node.query("dictionary")
def test_randomize_binary_mechanism_array_coins(): array = np.ones(100) node_single = DataNode() node_single.set_private_data(name="array", data=array) node_single.configure_data_access("array", RandomizedResponseCoins()) result = node_single.query("array") differences = 0 for i in range(100): if result[i] != array[i]: differences = differences + 1 assert not np.isscalar(result) assert 0 < differences < 100 assert np.mean(result) < 1
def test_randomize_binary_random(): data_size = 100 array = np.ones(data_size) node_single = DataNode() node_single.set_private_data(name="A", data=array) data_access_definition = RandomizedResponseBinary(f0=0.5, f1=0.5, epsilon=1) node_single.configure_data_access("A", data_access_definition) result = node_single.query(private_property="A") differences = 0 for i in range(data_size): if result[i] != array[i]: differences = differences + 1 assert 0 < differences < data_size assert np.mean(result) < 1
def test_exponential_mechanism_obtain_laplace(): def u_laplacian(x, r): output = -np.absolute(x - r) return output r = np.arange(-20, 20, 0.001) # Set the interval of possible outputs r x = 3.5 # Set a value for the dataset delta_u = 1 # We simply set it to one epsilon = 1 # Set a value for epsilon size = 100000 # We want to repeat the query this many times node = DataNode() node.set_private_data(name="identity", data=np.array(x)) data_access_definition = ExponentialMechanism(u_laplacian, r, delta_u, epsilon, size) node.configure_data_access("identity", data_access_definition) result = node.query("identity") assert (result > r.min()).all() and ( result < r.max()).all() # Check all outputs are within range assert np.absolute(np.mean(result) - x) < ( delta_u / epsilon) # Check the mean output is close to true value
def test_sensitivity_wrong_input(): epsilon_delta = (0.1, 1) # Negative sensitivity: scalar = 175 sensitivity = -0.1 node = DataNode() node.set_private_data("scalar", scalar) with pytest.raises(ValueError): node.configure_data_access( "scalar", GaussianMechanism(sensitivity=sensitivity, epsilon_delta=epsilon_delta)) # Scalar query result, Too many sensitivity values provided: scalar = 175 sensitivity = [0.1, 0.5] node = DataNode() node.set_private_data("scalar", scalar) node.configure_data_access( "scalar", GaussianMechanism(sensitivity=sensitivity, epsilon_delta=epsilon_delta)) with pytest.raises(ValueError): result = node.query("scalar") # Both query result and sensitivity are 1D-arrays, but non-broadcastable: data_array = [10, 10, 10, 10] sensitivity = [0.1, 10, 100, 1000, 1000] node = DataNode() node.set_private_data("data_array", data_array) node.configure_data_access( "data_array", GaussianMechanism(sensitivity=sensitivity, epsilon_delta=epsilon_delta)) with pytest.raises(ValueError): result = node.query("data_array") # ND-array query result and 1D-array sensitivity, but non-broadcastable: data_ndarray = [[10, 10, 10, 10], [10, 10, 10, 10], [10, 10, 10, 10]] sensitivity = [0.1, 10, 100] node = DataNode() node.set_private_data("data_ndarray", data_ndarray) node.configure_data_access( "data_ndarray", GaussianMechanism(sensitivity=sensitivity, epsilon_delta=epsilon_delta)) with pytest.raises(ValueError): result = node.query("data_ndarray") # Both query result and sensitivity are ND-arrays, but non-broadcastable (they should have the same shape in this case): data_ndarray = [[10, 10, 10, 10], [10, 10, 10, 10], [10, 10, 10, 10]] sensitivity = [[0.1, 10, 100, 1000, 10000], [0.1, 10, 100, 1000, 10000], [0.1, 10, 100, 1000, 10000]] node = DataNode() node.set_private_data("data_ndarray", data_ndarray) node.configure_data_access( "data_ndarray", GaussianMechanism(sensitivity=sensitivity, epsilon_delta=epsilon_delta)) with pytest.raises(ValueError): result = node.query("data_ndarray")
def test_sensitivity_wrong_input(): epsilon_delta = (0.1, 1) # Negative sensitivity: scalar = 175 sensitivity = -0.1 node = DataNode() node.set_private_data("scalar", scalar) with pytest.raises(ValueError): node.configure_data_access( "scalar", GaussianMechanism(sensitivity=sensitivity, epsilon_delta=epsilon_delta)) # Scalar query result, Too many sensitivity values provided: scalar = 175 sensitivity = [0.1, 0.5] node = DataNode() node.set_private_data("scalar", scalar) node.configure_data_access( "scalar", GaussianMechanism(sensitivity=sensitivity, epsilon_delta=epsilon_delta)) with pytest.raises(ValueError): result = node.query("scalar") # Both query result and sensitivity are 1D-arrays, but non-broadcastable: data_array = [10, 10, 10, 10] sensitivity = [0.1, 10, 100, 1000, 1000] node = DataNode() node.set_private_data("data_array", data_array) node.configure_data_access( "data_array", GaussianMechanism(sensitivity=sensitivity, epsilon_delta=epsilon_delta)) with pytest.raises(ValueError): result = node.query("data_array") # ND-array query result and 1D-array sensitivity, but non-broadcastable: data_ndarray = [[10, 10, 10, 10], [10, 10, 10, 10], [10, 10, 10, 10]] sensitivity = [0.1, 10, 100] node = DataNode() node.set_private_data("data_ndarray", data_ndarray) node.configure_data_access( "data_ndarray", GaussianMechanism(sensitivity=sensitivity, epsilon_delta=epsilon_delta)) with pytest.raises(ValueError): result = node.query("data_ndarray") # Both query result and sensitivity are ND-arrays, but non-broadcastable (they should have the same shape # in this case): data_ndarray = [[10, 10, 10, 10], [10, 10, 10, 10], [10, 10, 10, 10]] sensitivity = [[0.1, 10, 100, 1000, 10000], [0.1, 10, 100, 1000, 10000], [0.1, 10, 100, 1000, 10000]] node = DataNode() node.set_private_data("data_ndarray", data_ndarray) node.configure_data_access( "data_ndarray", GaussianMechanism(sensitivity=sensitivity, epsilon_delta=epsilon_delta)) with pytest.raises(ValueError): result = node.query("data_ndarray") # Query result is a list of arrays: sensitivity must be either a scalar, or a list of the same length as query data_list = [ np.random.rand(30, 20), np.random.rand(20, 30), np.random.rand(50, 40) ] sensitivity = np.array([1, 1]) # Array instead of scalar node = DataNode() node.set_private_data("data_list", data_list) node.configure_data_access( "data_list", LaplaceMechanism(sensitivity=sensitivity, epsilon=1)) with pytest.raises(ValueError): result = node.query("data_list") sensitivity = [1, 1] # List of wrong length node = DataNode() node.set_private_data("data_list", data_ndarray) node.configure_data_access( "data_list", LaplaceMechanism(sensitivity=sensitivity, epsilon=1)) with pytest.raises(IndexError): result = node.query("data_list") # Query result is wrong data structure: so far, tuples are not allowed data_tuple = (1, 2, 3, 4, 5) sensitivity = 2 node = DataNode() node.set_private_data("data_tuple", data_tuple) node.configure_data_access( "data_tuple", LaplaceMechanism(sensitivity=sensitivity, epsilon=1)) with pytest.raises(NotImplementedError): result = node.query("data_tuple")