Ejemplo n.º 1
0
 def test_synthetic_EQF_5(self):
     d = EqualFrequency(5, 0)
     p2t = {1:[]}
     p2t[1] = [TimeStamp(-75,1,1,0),TimeStamp(25,1,1,0)] # min = -75, max = 25
     expected_cutpoints = {1:[-55,-35,-15,5]}
     d.discretize_property_without_abstracting({},{},p2t,1)
     real_cutpoints = d.bins_cutpoints
     res, msg = assert_almost_equality(expected_cutpoints,real_cutpoints)
     self.assertTrue(res,msg)
Ejemplo n.º 2
0
 def set_bin_ranges(self, property_to_entities: Dict[int, Set[Entity]], class_to_entities: Dict[int, Set[Entity]], property_to_timestamps: Dict[int, List[TimeStamp]]):
     equal_frequency = EqualFrequency(self.ACCURACY_MEASURE, -1)
     m1,m2,m3 = equal_frequency.discretize(property_to_entities, class_to_entities, property_to_timestamps)
     self.candidate_cutpoints = equal_frequency.bins_cutpoints
     cutpoints = {}
     for p in property_to_timestamps.keys():
         cutpoints[p] = self.set_bin_ranges_for_property(m1, m2, m3, p)
     # cutpoints = self.parallel_cutpoint_set(m1, m2, m3)
     return cutpoints
Ejemplo n.º 3
0
 def test_synthetic_EQF_Stress_Big_Request_4(self):
     p2t = {1: [TimeStamp(i,1,1,0) for i in range(STRESS_VALUE_COUNT)]}
     max_index = STRESS_VALUE_COUNT - 1
     BIN_COUNT = 4
     d = EqualFrequency(BIN_COUNT, 0)
     d.discretize_property_without_abstracting({}, {}, p2t, 1)
     expected_res = {1:[i*max_index/BIN_COUNT for i in range(1,BIN_COUNT)]}
     res = d.bins_cutpoints
     res, msg = assert_almost_equality(expected_res,res)
     self.assertTrue(res,msg)
Ejemplo n.º 4
0
    def test_real_EQF_FAAgeGroup_F3_Property_44(self):
        PROPERTY_ID = 44
        EXPECTED_RES = [0.71,0.84,0.95,1.1]

        d = EqualFrequency(5, 0)
        d.property_folder = PARTITIONS_PATH
        d.discretize_property_without_abstracting({}, {}, {}, PROPERTY_ID)
        real_cutpoints = d.bins_cutpoints
        expected_cutpoints = {PROPERTY_ID: EXPECTED_RES}
        res, msg = assert_almost_equality(expected_cutpoints, real_cutpoints)

        self.assertTrue(res,msg)
Ejemplo n.º 5
0
 def test_synthetic_EQF_Stress_Many_Requests(self):
     res = True
     msg = ""
     p2t = {1: [TimeStamp(0,1,1,0),TimeStamp(1,1,1,0)]}
     for bin_count in range(2,10000):
         d = EqualFrequency(bin_count, 0)
         d.discretize_property_without_abstracting({},{},p2t,1)
         sum_real_cutpoints = sum(d.bins_cutpoints[1])
         expected_sum = (bin_count-1)/2
         t_res, t_msg = assert_almost_equality({1:[expected_sum]}, {1:[sum_real_cutpoints]})
         res &= t_res
         msg += t_msg
     self.assertTrue(res,msg)
Ejemplo n.º 6
0
    def set_bin_ranges_for_property(self,
                                    property_to_entities: Dict[int,
                                                               Set[Entity]],
                                    class_to_entities: Dict[int, Set[Entity]],
                                    property_to_timestamps: Dict[
                                        int, List[TimeStamp]],
                                    property_id: int) -> List[float]:
        property_to_entities, candidates = EqualFrequency.load_candidate_cuts(
            property_to_entities, class_to_entities, property_to_timestamps,
            property_id, 100, self.property_folder)
        property_to_entities = self.property_to_timestamps_to_property_to_entity(
            property_to_entities)
        self.candidate_cutpoints = candidates
        candidate_cutoffs: List[float] = sorted(
            self.candidate_cutpoints[property_id])
        debug_print("%s: %s" % (property_id, candidate_cutoffs))
        state_count = (len(candidate_cutoffs) + 1)
        A = np.zeros(shape=(state_count, state_count))
        state_vector = [0] * state_count
        chosen_cutoffs = SortedList()
        chosen_cutoffs_indices = SortedList()

        self.load_state_information(A, property_id, property_to_entities,
                                    state_vector)

        cutoffs_according_to_order = []
        chosen_scores = []
        iterations_scores_and_cutoffs = []

        for i in range(self.bin_count - 1):
            scores_and_cutoffs = []
            max_distance = float('-inf')
            best_cutoff = float('-inf')
            best_index = float('-inf')
            for j in range(len(candidate_cutoffs)):
                cutoff = candidate_cutoffs[j]
                if j in chosen_cutoffs_indices:
                    continue
                temp_cutoff_indices = chosen_cutoffs_indices.copy()
                temp_cutoff_indices.add(j)
                new_A = self.collapse_matrix(A, temp_cutoff_indices)
                distance_of_series = self.distance_measure(new_A, state_vector)
                scores_and_cutoffs.append((cutoff, distance_of_series))
                if distance_of_series > max_distance:
                    max_distance = distance_of_series
                    best_cutoff = cutoff
                    best_index = j
            iterations_scores_and_cutoffs.append(scores_and_cutoffs)
            chosen_cutoffs.add(best_cutoff)
            chosen_cutoffs_indices.add(best_index)
            cutoffs_according_to_order.append(best_cutoff)
            chosen_scores.append(max_distance)

        self.cutoffs_according_to_order.update(
            {property_id: cutoffs_according_to_order})
        self.chosen_scores.update({property_id: chosen_scores})
        return list(chosen_cutoffs)
Ejemplo n.º 7
0
    def set_bin_ranges_for_property(self, property_to_entities: Dict[int, Set[Entity]],
                                     class_to_entities: Dict[int, Set[Entity]],
                                    property_to_timestamps: Dict[int, List[TimeStamp]], property_id: int):
        class_to_entities, candidates = EqualFrequency.load_candidate_cuts(property_to_entities,class_to_entities,
                                                                           property_to_timestamps,property_id, self.ACCURACY_MEASURE, self.property_folder)
        class_to_entities = self.property_to_timestamps_to_class_to_entities(class_to_entities)
        self.candidate_cutpoints = candidates
        candidate_cutoffs: List[float] = sorted(self.candidate_cutpoints[property_id])
        chosen_cutoffs = SortedList()
        chosen_cutoffs_indices = SortedList()
        cutoffs_according_to_order = []

        class_to_state_vector = TD4C.populate_state_vector(property_to_entities, class_to_entities, property_to_timestamps, len(candidate_cutoffs), property_id)

        chosen_scores = []
        iterations_scores_and_cutoffs = []
        debug_print("\n---------------------%s----------------------" % property_id)
        for i in range(self.bin_count - 1):
            scores_and_cutoffs = []
            max_distance = float('-inf')
            best_cutoff = float('-inf')
            best_index = float('-inf')
            for j in range(len(candidate_cutoffs)):
                cutoff = candidate_cutoffs[j]
                if j in chosen_cutoffs_indices:
                    continue
                temp_cutoff_indices = chosen_cutoffs_indices.copy()
                temp_cutoff_indices.add(j)
                probability_vector = self.calculate_probability_vector(class_to_state_vector, temp_cutoff_indices)
                distance_of_series = self.distance_measure(probability_vector)
                scores_and_cutoffs.append((cutoff, distance_of_series))
                if distance_of_series > max_distance:
                    max_distance = distance_of_series
                    best_cutoff = cutoff
                    best_index = j
            debug_print("%s: %s" % (best_cutoff, scores_and_cutoffs))
            iterations_scores_and_cutoffs.append(scores_and_cutoffs)
            chosen_cutoffs.add(best_cutoff)
            chosen_cutoffs_indices.add(best_index)
            cutoffs_according_to_order.append(best_cutoff)
            chosen_scores.append(max_distance)

        self.cutoffs_according_to_order.update({property_id: cutoffs_according_to_order})
        self.chosen_scores.update({property_id: chosen_scores})
        return list(chosen_cutoffs)
Ejemplo n.º 8
0
 def test_synthetic_EQF_array_partial_index_2(self):
     test_arr = [-1, 0, 1]
     res = EqualFrequency.get_cutpoint(2-EPSILON, test_arr)
     expected = 1
     self.assertAlmostEqual(expected, res)
Ejemplo n.º 9
0
 def test_synthetic_EQF_array_partial_index_1_half(self):
     test_arr = [-1, 0, 1]
     res = EqualFrequency.get_cutpoint(3/2, test_arr)
     expected = 1/2
     self.assertAlmostEqual(expected, res)