Esempio n. 1
0
 def test_synthetic_EQW_5(self):
     d = EqualWidth(5, 0)
     p2t = {1: []}
     p2t[1] = [TimeStamp(-75, 1, 1, 0), TimeStamp(25, 1, 1, 0)]  # min = -75, max = 25
     expected_cutpoints = {1: [-55,-35,-15,5]}
     d.discretize_property_without_abstracting({}, {}, p2t, 1)
     real_cutpoints = d.bins_cutpoints
     res, msg = assert_almost_equality(expected_cutpoints, real_cutpoints)
     self.assertTrue(res, msg)
Esempio n. 2
0
 def test_synthetic_stress_EQW_2(self):
     d = EqualWidth(2, 0)
     p2t = {1:[]}
     p2t[1] = [TimeStamp(3, 1, 1, 0)] * STRESS_VALUE_COUNT + [TimeStamp(-75, 1, 1, 0)] + [TimeStamp(3, 1, 1,
                                                                                                    0)] * STRESS_VALUE_COUNT + [
                  TimeStamp(25, 1, 1, 0)] + [TimeStamp(3, 1, 1, 0)] * STRESS_VALUE_COUNT  # min = -75, max = 25
     expected_cutpoints = {1:[-25]}
     d.discretize_property_without_abstracting({},{},p2t,1)
     real_cutpoints = d.bins_cutpoints
     res, msg = assert_almost_equality(expected_cutpoints,real_cutpoints)
     self.assertTrue(res,msg)
Esempio n. 3
0
 def test_write_output_c2e_no_information_loss(self):
     out_folder = "test_files_folder"
     if not exists(out_folder):
         mkdir(out_folder)
     msg = ""
     for p in properties:
         m = {}
         d = EqualWidth(3, 0)
         d.property_folder = PARTITIONS_PATH
         d.load_class_to_entity(m, p)
         write_partition_float({}, m, {}, out_folder, p)
         lines = []
         msg = check_file_similiarity(lines, msg, out_folder, p)
     self.assertTrue(msg == "", msg)
Esempio n. 4
0
 def test_syntetic_cutpoint_generation(self):
     res = True
     msg = ""
     for c in range(2,1000):
         d = EqualWidth(c,0)
         p2t = {1: []}
         p2t[1] = [TimeStamp(-75, 1, 1, 0), TimeStamp(25, 1, 1, 0)]  # min = -75, max = 25
         interval = 100/c
         expected_cutpoints = {1: [-75+interval*i for i in range(1,c)]}
         d.discretize_property_without_abstracting({}, {}, p2t, 1)
         real_cutpoints = d.bins_cutpoints
         t_res, t_msg = assert_almost_equality(expected_cutpoints, real_cutpoints)
         res &= t_res
         msg += t_msg
     self.assertTrue(res, msg)
Esempio n. 5
0
def test_cutpoints(MAX, MIN, PROPERTY_ID):
    res = True
    msg = ""
    m1 = {}
    m2 = {}
    m3 = {}
    for c in range(2, 1000):
        d = EqualWidth(c, 0)
        d.property_folder = PARTITIONS_PATH
        expected_cutpoints = {PROPERTY_ID: generate_cutpoints(MIN, MAX, c)}
        d.discretize_property_without_abstracting(m1, m2, m3, PROPERTY_ID)
        real_cutpoints = d.bins_cutpoints
        t_res, t_msg = assert_almost_equality(expected_cutpoints, real_cutpoints)
        res &= t_res
        msg += t_msg
    return msg, res
Esempio n. 6
0
    def test_PAA_Discretization_Difference(self):
        msg = ""
        res = True

        d = EqualWidth(2, 0, window_size=1)
        p2t = {1: []}
        p2t[1] = [
            TimeStamp(-75, 1, 1, 0),
            TimeStamp(-25, 2, 2, 0),
            TimeStamp(1, 3, 3, 0),
            TimeStamp(25, 4, 4, 0)
        ]  # min = -75, max = 25
        expected_cutpoints = {1: [-25]}
        d.discretize_property({}, {}, p2t, 1)
        real_cutpoints = d.bins_cutpoints
        t_res, t_msg = assert_almost_equality(expected_cutpoints,
                                              real_cutpoints)
        msg += t_msg
        res &= t_res

        no_paa_cutpoints = real_cutpoints

        d = EqualWidth(2, 0, window_size=2)
        p2t = {1: []}
        p2t[1] = [
            TimeStamp(-75, 1, 1, 0),
            TimeStamp(-25, 2, 2, 0),
            TimeStamp(1, 3, 3, 0),
            TimeStamp(25, 4, 4, 0)
        ]  # min = -50 max = 13
        expected_cutpoints = {1: [-50 + 63 / 2]}
        d.discretize_property({}, {}, p2t, 1)
        real_cutpoints = d.bins_cutpoints
        t_res, t_msg = assert_almost_equality(expected_cutpoints,
                                              real_cutpoints)
        if t_msg != "":
            t_msg = "\n" + t_msg
        msg += t_msg
        res &= t_res

        paa_cutpoints = real_cutpoints

        t_res, t_msg = assert_almost_equality({1: no_paa_cutpoints},
                                              {1: paa_cutpoints})
        if t_res:
            msg += "\nExpected different cutpoints with PAA! Got %s" % no_paa_cutpoints
            res = False

        self.assertTrue(res, msg)
                    _entity_element = entity_elements[i]
                    f.write(
                        str(_entity_element[0]) + ',' +
                        str(_entity_element[1]) + ',' +
                        str(_entity_element[2]) + ',' +
                        str(_entity_element[3]))
                    karma_output.write(
                        str(_entity_element[0]) + ',' +
                        str(_entity_element[1]) + ',' +
                        str(property_to_base[_entity_element[3]] +
                            _entity_element[2]) + ',' +
                        str(_entity_element[3]))
                    if i + 1 != len(entity_elements):
                        f.write(';')
                        karma_output.write(';')
                f.write('\n')
                karma_output.write('\n')


if __name__ == '__main__':

    test_path = r'D:\test_stuff.txt'
    dataset_path = r'..\..\datasets\SAGender/SAGender.csv'

    m1, m2, m3 = get_maps_from_file(dataset_path, 55)
    d = EqualWidth(4)
    # _m1, _m2, _m3 = d.get_copy_of_maps(m1, m2, m3)
    _m1, _m2, _m3 = d.discretize(m1, m2, m3)
    convert_cutpoints_to_output(_m2, "D:\\", 'SAGender',
                                d.get_discretization_name())