Beispiel #1
0
 def test_synthetic_EQF_4(self):
     d = EqualFrequency(4, 0)
     p2t = {1:[]}
     p2t[1] = [TimeStamp(-75,1,1,0),TimeStamp(25,1,1,0)] # min = -75, max = 25
     expected_cutpoints = {1:[-50,-25,0]}
     d.discretize_property_without_abstracting({},{},p2t,1)
     real_cutpoints = d.bins_cutpoints
     res, msg = assert_almost_equality(expected_cutpoints,real_cutpoints)
     self.assertTrue(res,msg)
Beispiel #2
0
 def test_synthetic_EQW_3(self):
     d = EqualWidth(3, 0)
     p2t = {1: []}
     p2t[1] = [TimeStamp(-75, 1, 1, 0), TimeStamp(25, 1, 1, 0)]  # min = -75, max = 25
     expected_cutpoints = {1: [-75 + 100/3, -75 + 200/3]}
     d.discretize_property_without_abstracting({}, {}, p2t, 1)
     real_cutpoints = d.bins_cutpoints
     res, msg = assert_almost_equality(expected_cutpoints, real_cutpoints)
     self.assertTrue(res, msg)
Beispiel #3
0
 def test_synthetic_stress_EQW_2(self):
     d = EqualWidth(2, 0)
     p2t = {1:[]}
     p2t[1] = [TimeStamp(3, 1, 1, 0)] * STRESS_VALUE_COUNT + [TimeStamp(-75, 1, 1, 0)] + [TimeStamp(3, 1, 1,
                                                                                                    0)] * STRESS_VALUE_COUNT + [
                  TimeStamp(25, 1, 1, 0)] + [TimeStamp(3, 1, 1, 0)] * STRESS_VALUE_COUNT  # min = -75, max = 25
     expected_cutpoints = {1:[-25]}
     d.discretize_property_without_abstracting({},{},p2t,1)
     real_cutpoints = d.bins_cutpoints
     res, msg = assert_almost_equality(expected_cutpoints,real_cutpoints)
     self.assertTrue(res,msg)
Beispiel #4
0
 def test_synthetic_EQF_Stress_Many_Requests(self):
     res = True
     msg = ""
     p2t = {1: [TimeStamp(0,1,1,0),TimeStamp(1,1,1,0)]}
     for bin_count in range(2,10000):
         d = EqualFrequency(bin_count, 0)
         d.discretize_property_without_abstracting({},{},p2t,1)
         sum_real_cutpoints = sum(d.bins_cutpoints[1])
         expected_sum = (bin_count-1)/2
         t_res, t_msg = assert_almost_equality({1:[expected_sum]}, {1:[sum_real_cutpoints]})
         res &= t_res
         msg += t_msg
     self.assertTrue(res,msg)
Beispiel #5
0
 def test_Abstraction_No_Impact_p2t(self):
     p2t = {
         1: [
             TimeStamp(0, 1, 1, 0, 0),
             TimeStamp(0, 1, 1, 1, 0),
             TimeStamp(1, 1, 1, 2, 1),
             TimeStamp(1, 1, 1, 3, 1)
         ]
     }
     d: Discretization = Expert({1: [1.5]}, 0)
     d_p2e, d_c2e, d_p2t = d.discretize_property({}, {}, p2t, 1)
     message = ""
     message += compare_time_stamps(p2t, d_p2t)
     self.assertTrue(message != "",
                     "Original data was changed during abstraction")
Beispiel #6
0
 def test_syntetic_cutpoint_generation(self):
     res = True
     msg = ""
     for c in range(2,1000):
         d = EqualWidth(c,0)
         p2t = {1: []}
         p2t[1] = [TimeStamp(-75, 1, 1, 0), TimeStamp(25, 1, 1, 0)]  # min = -75, max = 25
         interval = 100/c
         expected_cutpoints = {1: [-75+interval*i for i in range(1,c)]}
         d.discretize_property_without_abstracting({}, {}, p2t, 1)
         real_cutpoints = d.bins_cutpoints
         t_res, t_msg = assert_almost_equality(expected_cutpoints, real_cutpoints)
         res &= t_res
         msg += t_msg
     self.assertTrue(res, msg)
Beispiel #7
0
 def test_Abstraction_No_Impact_c2e(self):
     e0: Entity = Entity(0, 0, -1)
     e1: Entity = Entity(1, 0, -1)
     e2: Entity = Entity(2, 1, -1)
     e3: Entity = Entity(3, 1, -1)
     e0.properties = {1: [TimeStamp(0, 1, 1, 0, 0)]}
     e1.properties = {1: [TimeStamp(0, 1, 1, 1, 0)]}
     e2.properties = {1: [TimeStamp(1, 1, 1, 2, 1)]}
     e3.properties = {1: [TimeStamp(1, 1, 1, 3, 1)]}
     c2e = {0: {e0, e1}, 1: {e2, e3}}
     d: Discretization = Expert({1: [1.5]}, 0)
     d_p2e, d_c2e, d_p2t = d.discretize_property({}, c2e, {}, 1)
     message = ""
     message += compare_time_stamps_c2e(c2e, d_c2e)
     self.assertTrue(message != "",
                     "Original data was changed during abstraction")
Beispiel #8
0
 def test_Abstraction_Most_Bins(self):
     cutpoints = []
     TOTAL_POINTS = 1000
     p2t = {1: []}
     expected = {1: []}
     c2e = {}
     p2e = {}
     for i in range(1, TOTAL_POINTS + 1):
         p2t[1].append(TimeStamp(i, i, i, i))
         expected[1].append(TimeStamp(i - 1, i, i, i))
         cutpoints.append(i + 0.5)
     d: Discretization = Expert({1: cutpoints}, -1)
     d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1)
     message = ""
     message += compare_time_stamps(expected, d_p2t)
     self.assertTrue(message == "", message)
Beispiel #9
0
 def test_Abstraction_Ignore_Properties(self):
     cutpoints = []
     TOTAL_POINTS = 1000
     p2t = {1: [], 2: []}
     c2e = {}
     p2e = {}
     for i in range(1, TOTAL_POINTS + 1):
         p2t[1].append(TimeStamp(i, i, i, i))
     cutpoints.append(5)
     p2t[2].append(TimeStamp(0, 0, 0, 0))
     expected = {1: p2t[1], 2: [TimeStamp(0, 0, 0, 0)]}
     d: Discretization = Expert({2: cutpoints}, -1)
     d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 2)
     message = ""
     message += compare_time_stamps(p2t, d_p2t)
     self.assertTrue(message == "", message)
Beispiel #10
0
 def test_Abstraction_MaxGap_0_Bins_2(self):
     cutpoints = []
     TOTAL_POINTS = 1000
     p2t = {1: []}
     c2e = {}
     p2e = {}
     for i in range(1, TOTAL_POINTS + 1):
         p2t[1].append(TimeStamp(i, i, i, 0))
     cutpoints = [500]
     d: Discretization = Expert({1: cutpoints}, 0)
     d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1)
     expected = {1: []}
     lst = expected[1]
     lst.append(TimeStamp(0, 1, 499, 0))
     lst.append(TimeStamp(1, 500, 1000, 0))
     message = ""
     message += compare_time_stamps(expected, d_p2t)
     self.assertTrue(message == "", message)
Beispiel #11
0
 def test_synthetic_EQF_Stress_Big_Request_4(self):
     p2t = {1: [TimeStamp(i,1,1,0) for i in range(STRESS_VALUE_COUNT)]}
     max_index = STRESS_VALUE_COUNT - 1
     BIN_COUNT = 4
     d = EqualFrequency(BIN_COUNT, 0)
     d.discretize_property_without_abstracting({}, {}, p2t, 1)
     expected_res = {1:[i*max_index/BIN_COUNT for i in range(1,BIN_COUNT)]}
     res = d.bins_cutpoints
     res, msg = assert_almost_equality(expected_res,res)
     self.assertTrue(res,msg)
Beispiel #12
0
 def get_data_from_row(line: str) -> 'DataRow':
     try:
         line = line.rstrip().split(',')
         eid = int(line[0])
         tid = int(line[1])
         time = int(line[2])
         val = float(line[3])
     except:
         raise FileFormatNotCorrect()
     time_stamp = TimeStamp(val, time, time, eid)
     return DataRow(eid, tid, time_stamp)
Beispiel #13
0
    def test_PAA_Discretization_Difference(self):
        msg = ""
        res = True

        d = EqualWidth(2, 0, window_size=1)
        p2t = {1: []}
        p2t[1] = [
            TimeStamp(-75, 1, 1, 0),
            TimeStamp(-25, 2, 2, 0),
            TimeStamp(1, 3, 3, 0),
            TimeStamp(25, 4, 4, 0)
        ]  # min = -75, max = 25
        expected_cutpoints = {1: [-25]}
        d.discretize_property({}, {}, p2t, 1)
        real_cutpoints = d.bins_cutpoints
        t_res, t_msg = assert_almost_equality(expected_cutpoints,
                                              real_cutpoints)
        msg += t_msg
        res &= t_res

        no_paa_cutpoints = real_cutpoints

        d = EqualWidth(2, 0, window_size=2)
        p2t = {1: []}
        p2t[1] = [
            TimeStamp(-75, 1, 1, 0),
            TimeStamp(-25, 2, 2, 0),
            TimeStamp(1, 3, 3, 0),
            TimeStamp(25, 4, 4, 0)
        ]  # min = -50 max = 13
        expected_cutpoints = {1: [-50 + 63 / 2]}
        d.discretize_property({}, {}, p2t, 1)
        real_cutpoints = d.bins_cutpoints
        t_res, t_msg = assert_almost_equality(expected_cutpoints,
                                              real_cutpoints)
        if t_msg != "":
            t_msg = "\n" + t_msg
        msg += t_msg
        res &= t_res

        paa_cutpoints = real_cutpoints

        t_res, t_msg = assert_almost_equality({1: no_paa_cutpoints},
                                              {1: paa_cutpoints})
        if t_res:
            msg += "\nExpected different cutpoints with PAA! Got %s" % no_paa_cutpoints
            res = False

        self.assertTrue(res, msg)
Beispiel #14
0
 def test_Abstraction_MaxGap_0_Bins_2_Different_Entities(self):
     cutpoints = []
     TOTAL_POINTS = 1000
     p2t = {1: []}
     expected = {1: []}
     c2e = {}
     p2e = {}
     cutpoints = [500]
     for i in range(1, TOTAL_POINTS + 1):
         expected_bin = 0
         if i >= 500:
             expected_bin = 1
         if i % 2 == 0:
             p2t[1].append(TimeStamp(i, i, i, 0))
             expected[1].append(TimeStamp(expected_bin, i, i, 0))
         else:
             p2t[1].append(TimeStamp(i, i, i, 1))
             expected[1].append(TimeStamp(expected_bin, i, i, 1))
     d: Discretization = Expert({1: cutpoints}, 0)
     d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1)
     message = ""
     message += compare_time_stamps(expected, d_p2t)
     self.assertTrue(message == "", message)
Beispiel #15
0
    def test_PAA_Window_1(self):
        cutpoints = []
        TOTAL_POINTS = 1000
        p2t = {1: []}
        WINDOW_SIZE = 1

        for i in range(TOTAL_POINTS + 1, WINDOW_SIZE):
            p2t[1].append(TimeStamp(i, i, i, 0))

        d: Discretization = Expert({1: cutpoints}, -1, window_size=WINDOW_SIZE)
        d_p2t = d.paa_p2t(p2t)
        message = ""
        message += compare_time_stamps(p2t, d_p2t)
        self.assertTrue(message == "", message)
    def get_copy_of_maps(
        old_property_to_entities: Dict[int, Set[Entity]],
        old_class_to_entities: Dict[int, Set[Entity]],
        old_property_to_timestamps: Dict[int, List[TimeStamp]]
    ) -> Tuple[Dict[int, Set['Entity']], Dict[int, Set['Entity']], Dict[
            int, List[TimeStamp]]]:
        """
        Returns deep copies of the input dictionaries
        :param old_property_to_entities: A dictionary mapping property ids to the set of entities containing the property
        :param old_class_to_entities: A dictionary mapping class ids to the set of entities under this class
        :param old_property_to_timestamps: A dictionary mapping property ids to the list of timestamps belonging to that property.
        :return: Deep copies of these dictionaries.
        """
        property_to_entities: Dict[int, Set['Entity']] = {}
        class_to_entities: Dict[int, Set['Entity']] = {}
        property_to_timestamps: Dict[int, List[TimeStamp]] = {}
        old_timestamp_to_new: Dict[Tuple, TimeStamp] = {
            ts: TimeStamp.deep_copy(ts)
            for time_stamps in old_property_to_timestamps.values()
            for ts in time_stamps
        }
        property_to_timestamps = {
            property_id: [
                old_timestamp_to_new[ts]
                for ts in old_property_to_timestamps[property_id]
            ]
            for property_id in old_property_to_timestamps.keys()
        }

        for class_id in old_class_to_entities.keys():
            class_to_entities[class_id] = set()
            for entity in old_class_to_entities[class_id]:
                properties = entity.properties.copy()
                e = Entity(entity.entity_id, class_id, entity.class_separator)

                properties = {
                    key: [old_timestamp_to_new[ts] for ts in properties[key]]
                    for key in properties.keys()
                }
                property_ids = properties.keys()
                diff = set(property_ids).difference(
                    property_to_entities.keys())
                property_to_entities.update({p_id: set() for p_id in diff})
                for key in property_ids:
                    property_to_entities[key].add(e)
                e.properties = properties
                class_to_entities[class_id].add(e)

        return property_to_entities, class_to_entities, property_to_timestamps
    def paa_timestamps(self, timestamps: List[TimeStamp]):
        if self.window_size == 1:
            return [
                TimeStamp(ts.value, ts.start_point, ts.end_point, ts.entity_id,
                          ts.ts_class) for ts in timestamps
            ]
        timestamps = sorted(timestamps, key=lambda ts: ts.start_point)
        start_point = timestamps[0].start_point
        end_point = timestamps[-1].start_point
        time_point = start_point
        i = 0
        new_values = []
        while time_point < end_point:
            count = 0
            s = 0
            while i < len(timestamps) and timestamps[
                    i].start_point < time_point + self.window_size:
                s += timestamps[i].value
                count += 1
                i += 1
            if count != 0:
                val = s / count
                new_values.append(
                    TimeStamp(val, time_point, time_point + self.window_size,
                              timestamps[i - 1].entity_id,
                              timestamps[i - 1].ts_class))
            time_point += self.window_size
        '''
        values = [ts.value for ts in timestamps]

        values_length = len(values)

        frame_size = self.window_size

   

        frame_start = 0

        approximation = []

        indices_ranges = []

        loop_limit = values_length - frame_size

        while frame_start <= loop_limit:
            to = int(frame_start + frame_size)
            indices_ranges.append((frame_start, to))
            new_values.append(TimeStamp(np.mean(np.array(values[frame_start: to])),
                                        timestamps[frame_start].start_point,
                                        timestamps[to-1].end_point,timestamps[frame_start].entity_id,
                                        timestamps[frame_start].ts_class))
            frame_start += frame_size

        # handle the remainder if n % w != 0
        if frame_start < values_length:
            indices_ranges.append((frame_start, values_length))
            new_values.append(TimeStamp(np.mean(np.array(values[frame_start: values_length])),
                                        timestamps[frame_start].start_point,
                                        timestamps[values_length-1].end_point, timestamps[frame_start].entity_id,
                                        timestamps[frame_start].ts_class))
        '''

        return new_values
Beispiel #18
0
        return "BINARY_%s" % self.bin_count

    def __init__(self, bin_count, max_gap, window_size=1):
        super(Binary, self).__init__(max_gap, window_size)
        self.bin_count = int(bin_count)


#
if __name__ == '__main__':
    print(math.log(0.5, 10) * 0.5)
    print([1, 2, 3][:1])
    p_to_ent = {}
    e1 = Entity(1, 1)
    e2 = Entity(2, 1)
    e3 = Entity(3, 2)
    e1.properties[0] = [TimeStamp(10, TimeInterval(0, 1))]
    e2.properties[0] = [TimeStamp(15, TimeInterval(2, 3))]
    e3.properties[0] = [TimeStamp(7, TimeInterval(4, 5))]
    binary = Binary(2)
    p_to_ent[0] = set()
    p_to_ent[0].add(e1)
    p_to_ent[0].add(e2)
    p_to_ent[0].add(e3)
    c_to_ent = {1: set(), 2: set()}
    c_to_ent[1].add(e1)
    c_to_ent[1].add(e2)
    c_to_ent[2].add(e3)

    print(binary.set_bin_ranges_for_property(p_to_ent, c_to_ent, {}, 0))
    # x = set()
    # y = set()
Beispiel #19
0
 def discretize(self, time_stamp: TimeStamp) -> bool:
     if self.min_val <= time_stamp.value < self.max_val:
         time_stamp.value = self.bin_symbol
         return True
     return False