Ejemplo n.º 1
0
 def test_re_identification_risk_to_dataframe_shape(self):
     risk_profile = RiskProfile(self.risk_profile_response)
     df = risk_profile.re_identification_risk_dataframe()
     self.assertEqual(
         self.risk_profile_response["reIdentificationRisk"]["measures"]
         ["records_affected_by_highest_prosecutor_risk"],
         df["records_affected_by_highest_prosecutor_risk"][0])
Ejemplo n.º 2
0
 def test_equality(self):
     risk_profile_1 = RiskProfile(self.risk_profile_response)
     risk_profile_2 = RiskProfile(self.risk_profile_response)
     self.assertEqual(risk_profile_1, risk_profile_2)
     risk_profile_2._re_identification_of_risk[
         "estimated_prosecutor_risk"] = 50.0
     self.assertNotEqual(risk_profile_1, risk_profile_2)
Ejemplo n.º 3
0
 def test_attacker_success_rate_property(self):
     expected = {
         'Prosecutor_attacker_success_rate': 1.0,
         'Marketer_attacker_success_rate': 1.0,
         'Journalist_attacker_success_rate': 1.0
     }
     risk_profile = RiskProfile(self.risk_profile_response)
     self.assertEqual(expected, risk_profile.attacker_success_rate)
Ejemplo n.º 4
0
    def risk_profile(self, dataset: Dataset) -> RiskProfile:
        """
        Creates a risk profile for a provided Dataset

        RiskProfile contains:
         - re-identifiaction risks
         - distributed risk

        :param dataset: Dataset to create a risk profile for
        :return: RiskProfile
        """

        analyze_request = self._risk_profile_payload(dataset)
        response = self._risk_profile(analyze_request)
        metric_dict = json.loads(response.text)
        return RiskProfile(metric_dict)
Ejemplo n.º 5
0
    def _anonymize_result(self, response):
        """
        Creates the result to be delivered back to the caller

        :param response:
        :return:
        """
        json_string = response.text
        response_dict = json.loads(json_string)
        attributes = self._attributes(response_dict)
        dataset = Dataset(response_dict["anonymizeResult"]["data"], attributes)
        risk_profile = RiskProfile(response_dict["riskProfile"])
        anon_status = response_dict["anonymizeResult"]["anonymizationStatus"]
        anonymization_metrics = response_dict["anonymizeResult"]["metrics"]
        return AnonymizeResult._from_response(dataset, risk_profile,
                                              anonymization_metrics,
                                              anon_status)
Ejemplo n.º 6
0
    def setUp(self):
        self.test_data = [['id', 'name'], ['0', 'Viktor'], ['1', 'Jerry']]
        self.test_attribute_type_mapping = {
            'id': AttributeType.IDENTIFYING,
            'name': AttributeType.QUASIIDENTIFYING
        }
        self.test_dataset = Dataset(self.test_data,
                                    self.test_attribute_type_mapping)
        self.risk_profile_response = {
            "reIdentificationRisk": {
                "measures": {
                    "measure_value":
                    "[%]",
                    "Prosecutor_attacker_success_rate":
                    "98.72",
                    "records_affected_by_highest_prosecutor_risk":
                    "97.46000000000001",
                    "sample_uniques":
                    "97.46000000000001",
                    "estimated_prosecutor_risk":
                    "100.0",
                    "population_model":
                    "PITMAN",
                    "highest_journalist_risk":
                    "100.0",
                    "records_affected_by_lowest_risk":
                    "0.06",
                    "estimated_marketer_risk":
                    "98.72000000000001",
                    "Journalist_attacker_success_rate":
                    "98.72",
                    "highest_prosecutor_risk":
                    "100.0",
                    "estimated_journalist_risk":
                    "100.0",
                    "lowest_risk":
                    "33.33333333333333",
                    "Marketer_attacker_success_rate":
                    "98.72",
                    "average_prosecutor_risk":
                    "98.72000000000001",
                    "records_affected_by_highest_journalist_risk":
                    "97.46000000000001",
                    "population_uniques":
                    "39.64593493418713",
                    "quasi_identifiers": [
                        "Innvandrerbakgrunn", "Ytelse", "Innsatsgruppe",
                        "Ledighetsstatus"
                    ]
                }
            },
            "distributionOfRisk": {
                "riskIntervalList": [{
                    "interval": "]50,100]",
                    "recordsWithRiskWithinInteval": 0.9746,
                    "recordsWithMaxmalRiskWithinInterval": 1.0
                }, {
                    "interval":
                    "]33.4,50]",
                    "recordsWithRiskWithinInteval":
                    0.0248,
                    "recordsWithMaxmalRiskWithinInterval":
                    0.0254
                }, {
                    "interval":
                    "]25,33.4]",
                    "recordsWithRiskWithinInteval":
                    0.0006,
                    "recordsWithMaxmalRiskWithinInterval":
                    0.0006
                }, {
                    "interval": "]20,25]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]16.7,20]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]14.3,16.7]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]12.5,14.3]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]10,12.5]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]9,10]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]8,9]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]7,8]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]6,7]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]5,6]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]4,5]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]3,4]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]2,3]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]1,2]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]0.1,1]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]0.01,0.1]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]0.001,0.01]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]0.0001,0.001]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]1e-5,0.0001]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]1e-6,1e-5]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }, {
                    "interval": "]0,1e-6]",
                    "recordsWithRiskWithinInteval": 0.0,
                    "recordsWithMaxmalRiskWithinInterval": 0.0
                }]
            }
        }

        self.test_riskprofile = RiskProfile(self.risk_profile_response)

        self.test_anonymize_result = AnonymizeResult(self.test_data,
                                                     self.test_riskprofile,
                                                     self.test_metrics)
Ejemplo n.º 7
0
def risk_profile() -> RiskProfile:
    raw_risk_profile = analyze_response()
    return RiskProfile(raw_risk_profile)
Ejemplo n.º 8
0
 def test_population_model_property(self):
     expected = "ZAYATZ"
     risk_profile = RiskProfile(self.risk_profile_response)
     self.assertEqual(expected, risk_profile.population_model)
Ejemplo n.º 9
0
 def test_quasi_indentifers_property(self):
     expected = ['zipcode']
     risk_profile = RiskProfile(self.risk_profile_response)
     self.assertEqual(expected, risk_profile.quasi_identifiers)
Ejemplo n.º 10
0
 def test_re_identification_risk_to_dataframe__column_types(self):
     risk_profile = RiskProfile(self.risk_profile_response)
     df = risk_profile.re_identification_risk_dataframe()
     for d_type in df.dtypes.tolist():
         self.assertEqual(d_type, dtype("float64"))
Ejemplo n.º 11
0
 def test_distribution_of_risk_to_dataframe_shape(self):
     risk_profile = RiskProfile(self.risk_profile_response)
     df = risk_profile.distribution_of_risk_dataframe()
     self.assertEqual(
         self.risk_profile_response["distributionOfRisk"]
         ["riskIntervalList"][0]["interval"], df["interval"][0])
Ejemplo n.º 12
0
 def test_to_dataframe(self):
     risk_profile = RiskProfile(self.risk_profile_response)
     df = risk_profile.re_identification_risk_dataframe()
     self.assertIsInstance(df, DataFrame)
Ejemplo n.º 13
0
 def test_hash(self):
     risk_profile_1 = RiskProfile(self.risk_profile_response)
     risk_profile_2 = RiskProfile(self.risk_profile_response)
     test_set = {risk_profile_1, risk_profile_2}
     self.assertEqual(1, len(test_set))
Ejemplo n.º 14
0
 def test_init(self):
     RiskProfile(self.risk_profile_response)