Exemple #1
0
 def test_empty_dataset_and_empty_attribute_set(self):
     self._dataset = DummyEmptyDataset()
     self._df_one_fp_per_browser = (
         self._dataset.get_df_w_one_fp_per_browser())
     self._attribute_set = AttributeSet()
     with self.assertRaises(ValueError):
         self.check_entropy_result(WONT_COMPUTE)
class TestBestConditionalEntropic(unittest.TestCase):

    def setUp(self):
        self._attribute_set = AttributeSet(ATTRIBUTES)
        self._dataset = DummyCleanDataset()
        self._df_w_one_fp_per_browser = (
            self._dataset.get_df_w_one_fp_per_browser())

    def test_best_conditional_entropic_attribute(self):
        # This will just take the first attribute which is sufficient as it has
        # unique values
        best_cond_ent_attr = _best_conditional_entropic_attribute(
            self._df_w_one_fp_per_browser, current_attributes=AttributeSet(),
            candidate_attributes=self._attribute_set)
        self.assertEqual(best_cond_ent_attr[0], ATTRIBUTES[1])

    def test_best_conditional_entropic_attribute_all_taken(self):
        best_cond_ent_attr = _best_conditional_entropic_attribute(
            self._df_w_one_fp_per_browser,
            current_attributes=self._attribute_set,
            candidate_attributes=self._attribute_set)
        self.assertIsNone(best_cond_ent_attr[0])

    def test_best_conditional_entropic_attribute_empty_attribute_set(self):
        best_cond_ent_attr = _best_conditional_entropic_attribute(
            self._df_w_one_fp_per_browser,
            current_attributes=AttributeSet({ATTRIBUTES[0], ATTRIBUTES[1]}),
            candidate_attributes=AttributeSet())
        self.assertIsNone(best_cond_ent_attr[0])

    def test_best_conditional_entropic_attribute_empty_dataset(self):
        self._dataset = DummyEmptyDataset()
        self._df_w_one_fp_per_browser = (
            self._dataset.get_df_w_one_fp_per_browser())
        with self.assertRaises(ValueError):
            best_cond_ent_attr = _best_conditional_entropic_attribute(
                self._df_w_one_fp_per_browser,
                current_attributes=AttributeSet({ATTRIBUTES[0],
                                                 ATTRIBUTES[1]}),
                candidate_attributes=self._attribute_set)

    def test_best_conditional_entropic_attribute_empty_parameters(self):
        self._dataset = DummyEmptyDataset()
        self._df_w_one_fp_per_browser = (
            self._dataset.get_df_w_one_fp_per_browser())
        best_cond_ent_attr = _best_conditional_entropic_attribute(
            self._df_w_one_fp_per_browser,
            current_attributes=AttributeSet({ATTRIBUTES[0], ATTRIBUTES[1]}),
            candidate_attributes=AttributeSet())
        self.assertIsNone(best_cond_ent_attr[0])

    def test_best_conditional_entropic_attribute_unexistent_attribute(self):
        self._attribute_set.add(UNEXISTENT_ATTRIBUTE)
        with self.assertRaises(KeyError):
            best_cond_ent_attr = _best_conditional_entropic_attribute(
                self._df_w_one_fp_per_browser,
                current_attributes=AttributeSet({ATTRIBUTES[0],
                                                 ATTRIBUTES[1]}),
                candidate_attributes=self._attribute_set)
 def test_best_conditional_entropic_attribute_empty_parameters(self):
     self._dataset = DummyEmptyDataset()
     self._df_w_one_fp_per_browser = (
         self._dataset.get_df_w_one_fp_per_browser())
     best_cond_ent_attr = _best_conditional_entropic_attribute(
         self._df_w_one_fp_per_browser,
         current_attributes=AttributeSet({ATTRIBUTES[0], ATTRIBUTES[1]}),
         candidate_attributes=AttributeSet())
     self.assertIsNone(best_cond_ent_attr[0])
 def test_best_conditional_entropic_attribute_empty_dataset(self):
     self._dataset = DummyEmptyDataset()
     self._df_w_one_fp_per_browser = (
         self._dataset.get_df_w_one_fp_per_browser())
     with self.assertRaises(ValueError):
         best_cond_ent_attr = _best_conditional_entropic_attribute(
             self._df_w_one_fp_per_browser,
             current_attributes=AttributeSet({ATTRIBUTES[0],
                                              ATTRIBUTES[1]}),
             candidate_attributes=self._attribute_set)
Exemple #5
0
class TestAttributeSetEntropyFunction(unittest.TestCase):

    def setUp(self):
        self._dataset = DummyCleanDataset()
        self._df_one_fp_per_browser = (
            self._dataset.get_df_w_one_fp_per_browser())
        self._attribute_set = AttributeSet(ATTRIBUTES)

    def check_entropy_result(self, expected_entropy: float):
        computed_entropy = attribute_set_entropy(self._df_one_fp_per_browser,
                                                 self._attribute_set)
        self.assertAlmostEqual(expected_entropy, computed_entropy)

    def test_empty_dataset_and_empty_attribute_set(self):
        self._dataset = DummyEmptyDataset()
        self._df_one_fp_per_browser = (
            self._dataset.get_df_w_one_fp_per_browser())
        self._attribute_set = AttributeSet()
        with self.assertRaises(ValueError):
            self.check_entropy_result(WONT_COMPUTE)

    def test_empty_dataset(self):
        self._dataset = DummyEmptyDataset()
        self._df_one_fp_per_browser = (
            self._dataset.get_df_w_one_fp_per_browser())
        with self.assertRaises(ValueError):
            self.check_entropy_result(WONT_COMPUTE)

    def test_empty_attribute_set(self):
        self._attribute_set = AttributeSet()
        with self.assertRaises(ValueError):
            self.check_entropy_result(WONT_COMPUTE)

    def test_unexistent_attribute(self):
        self._attribute_set.add(UNEXISTENT_ATTRIBUTE)
        with self.assertRaises(KeyError):
            self.check_entropy_result(WONT_COMPUTE)

    def test_always_the_same_value(self):
        self._attribute_set = AttributeSet([ATTRIBUTES[2]])
        self.check_entropy_result(0.0)

    def test_in_between_entropy(self):
        self._attribute_set = AttributeSet([ATTRIBUTES[0]])
        expected_entropy = -1 * ((1/5)*log2(1/5) + (2/5)*log2(2/5)
                                 + (2/5)*log2(2/5))
        self.check_entropy_result(expected_entropy)

    def test_unique_values(self):
        self._attribute_set = AttributeSet([ATTRIBUTES[1]])
        expected_entropy = log2(len(self._dataset.dataframe))
        self.check_entropy_result(expected_entropy)
 def test_get_best_entropic_attribute_empty_candidates_and_dataset(self):
     empty_dataset = DummyEmptyDataset()
     result = _get_best_conditional_entropic_attribute(
         empty_dataset, current_attributes=AttributeSet({ATTRIBUTES[0],
                                                         ATTRIBUTES[1]}),
         candidate_attributes=AttributeSet())
     self.assertIsNone(result)
Exemple #7
0
 def test_empty_dataset_and_attributes(self):
     self._dataset = DummyEmptyDataset()
     self._attributes = AttributeSet({})
     grouped_by_browser = self._get_grouped_by_browser()
     attributes_instability = _compute_attributes_instability(
         grouped_by_browser, self._attributes)
     expected_result = {}
     self.assertDictEqual(expected_result, attributes_instability)
 def test_get_best_entropic_attribute_empty_dataset(self):
     empty_dataset = DummyEmptyDataset()
     with self.assertRaises(ValueError):
         _get_best_conditional_entropic_attribute(
             empty_dataset,
             current_attributes=AttributeSet({ATTRIBUTES[0],
                                              ATTRIBUTES[1]}),
             candidate_attributes=self._attribute_set)
Exemple #9
0
 def test_empty_dataset_and_attributes(self):
     self._dataset = DummyEmptyDataset()
     self._dataframe = self._dataset.dataframe
     self._attributes = AttributeSet({})
     attributes_avg_size = _compute_attribute_avg_size(
         self._dataset, self._attributes)
     expected_result = self._get_expected_result()
     self.assertDictEqual(expected_result, attributes_avg_size)
Exemple #10
0
 def test_empty_dataset(self):
     self._dataset = DummyEmptyDataset()
     grouped_by_browser = self._get_grouped_by_browser()
     attributes_instability = _compute_attributes_instability(
         grouped_by_browser, self._attributes)
     expected_result = {
         ATTRIBUTES[0]: 0.0,
         ATTRIBUTES[1]: 0.0,
         ATTRIBUTES[2]: 0.0
     }
     self.assertDictEqual(expected_result, attributes_instability)
Exemple #11
0
 def setUp(self):
     self._dummy_fp_dataset = DummyFingerprintDataset()
     self._empty_dataset = DummyEmptyDataset()
     self._clean_dataset = DummyCleanDataset()
Exemple #12
0
 def setUp(self):
     self._dataset = DummyCleanDataset()
     self._df_one_fp_per_browser = (
         self._dataset.get_df_w_one_fp_per_browser())
     self._attribute_set = AttributeSet(ATTRIBUTES)
Exemple #13
0
 def test_empty_dataset(self):
     self._dataset = DummyEmptyDataset()
     with self.assertRaises(ValueError):
         self.check_entropy_result(WONT_COMPUTE)
Exemple #14
0
 def test_empty_dataset(self):
     self._dataset = DummyEmptyDataset()
     self._check_result()
Exemple #15
0
 def test_empty_dataset(self):
     self._dataset = DummyEmptyDataset()
     with self.assertRaises(KeyError):  # Attribute is not in the dataset
         _compute_attribute_avg_size(self._dataset.dataframe,
                                     self._attributes)
Exemple #16
0
 def test_empty_dataset_and_empty_attribute_set(self):
     self._dataset = DummyEmptyDataset()
     self._attribute_set = AttributeSet()
     with self.assertRaises(ValueError):
         self.check_unicity_result(WONT_COMPUTE)
Exemple #17
0
 def test_empty_dataset(self):
     empty_dataset = DummyEmptyDataset()
     expected_result = {}
     analysis = ProportionOfChanges(empty_dataset)
     analysis.execute()
     self.assertDictEqual(expected_result, analysis.result)