Exemple #1
0
    def test_PAA_Window_Stress(self):
        cutpoints = []
        TOTAL_POINTS = STRESS_VALUE_COUNT

        message = ""
        for WINDOW_SIZE in range(2, 10000):
            p2t = {1: []}
            expected = {1: []}
            for i in range(TOTAL_POINTS + 1, WINDOW_SIZE):
                p2t[1] += [
                    TimeStamp(i + x, i + x, i + x, 0)
                    for x in range(WINDOW_SIZE)
                ]
                expected[1].append(
                    TimeStamp(i + (WINDOW_SIZE - 1) / 2, i,
                              i + WINDOW_SIZE - 1, 0))

            d: Discretization = Expert({1: cutpoints},
                                       -1,
                                       window_size=WINDOW_SIZE)
            d_p2t = d.paa_p2t(p2t)
            t_msg = compare_time_stamps(expected, d_p2t)
            if t_msg != "":
                message += "WINDOW SIZE: %s\n%s\n" % (WINDOW_SIZE, t_msg)
        self.assertTrue(message == "", message)
Exemple #2
0
    def test_PAA_Window_1(self):
        cutpoints = []
        TOTAL_POINTS = 1000
        p2t = {1: []}
        WINDOW_SIZE = 1

        for i in range(TOTAL_POINTS + 1, WINDOW_SIZE):
            p2t[1].append(TimeStamp(i, i, i, 0))

        d: Discretization = Expert({1: cutpoints}, -1, window_size=WINDOW_SIZE)
        d_p2t = d.paa_p2t(p2t)
        message = ""
        message += compare_time_stamps(p2t, d_p2t)
        self.assertTrue(message == "", message)
Exemple #3
0
 def test_Abstraction_No_Impact_p2t(self):
     p2t = {
         1: [
             TimeStamp(0, 1, 1, 0, 0),
             TimeStamp(0, 1, 1, 1, 0),
             TimeStamp(1, 1, 1, 2, 1),
             TimeStamp(1, 1, 1, 3, 1)
         ]
     }
     d: Discretization = Expert({1: [1.5]}, 0)
     d_p2e, d_c2e, d_p2t = d.discretize_property({}, {}, p2t, 1)
     message = ""
     message += compare_time_stamps(p2t, d_p2t)
     self.assertTrue(message != "",
                     "Original data was changed during abstraction")
Exemple #4
0
 def test_Abstraction_Most_Bins(self):
     cutpoints = []
     TOTAL_POINTS = 1000
     p2t = {1: []}
     expected = {1: []}
     c2e = {}
     p2e = {}
     for i in range(1, TOTAL_POINTS + 1):
         p2t[1].append(TimeStamp(i, i, i, i))
         expected[1].append(TimeStamp(i - 1, i, i, i))
         cutpoints.append(i + 0.5)
     d: Discretization = Expert({1: cutpoints}, -1)
     d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1)
     message = ""
     message += compare_time_stamps(expected, d_p2t)
     self.assertTrue(message == "", message)
Exemple #5
0
 def test_Abstraction_No_Impact_c2e(self):
     e0: Entity = Entity(0, 0, -1)
     e1: Entity = Entity(1, 0, -1)
     e2: Entity = Entity(2, 1, -1)
     e3: Entity = Entity(3, 1, -1)
     e0.properties = {1: [TimeStamp(0, 1, 1, 0, 0)]}
     e1.properties = {1: [TimeStamp(0, 1, 1, 1, 0)]}
     e2.properties = {1: [TimeStamp(1, 1, 1, 2, 1)]}
     e3.properties = {1: [TimeStamp(1, 1, 1, 3, 1)]}
     c2e = {0: {e0, e1}, 1: {e2, e3}}
     d: Discretization = Expert({1: [1.5]}, 0)
     d_p2e, d_c2e, d_p2t = d.discretize_property({}, c2e, {}, 1)
     message = ""
     message += compare_time_stamps_c2e(c2e, d_c2e)
     self.assertTrue(message != "",
                     "Original data was changed during abstraction")
Exemple #6
0
 def test_Abstraction_Ignore_Properties(self):
     cutpoints = []
     TOTAL_POINTS = 1000
     p2t = {1: [], 2: []}
     c2e = {}
     p2e = {}
     for i in range(1, TOTAL_POINTS + 1):
         p2t[1].append(TimeStamp(i, i, i, i))
     cutpoints.append(5)
     p2t[2].append(TimeStamp(0, 0, 0, 0))
     expected = {1: p2t[1], 2: [TimeStamp(0, 0, 0, 0)]}
     d: Discretization = Expert({2: cutpoints}, -1)
     d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 2)
     message = ""
     message += compare_time_stamps(p2t, d_p2t)
     self.assertTrue(message == "", message)
Exemple #7
0
 def test_Abstraction_MaxGap_0_Bins_2(self):
     cutpoints = []
     TOTAL_POINTS = 1000
     p2t = {1: []}
     c2e = {}
     p2e = {}
     for i in range(1, TOTAL_POINTS + 1):
         p2t[1].append(TimeStamp(i, i, i, 0))
     cutpoints = [500]
     d: Discretization = Expert({1: cutpoints}, 0)
     d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1)
     expected = {1: []}
     lst = expected[1]
     lst.append(TimeStamp(0, 1, 499, 0))
     lst.append(TimeStamp(1, 500, 1000, 0))
     message = ""
     message += compare_time_stamps(expected, d_p2t)
     self.assertTrue(message == "", message)
Exemple #8
0
    def test_Abstraction_MaxGap_1_Bins_2_Different_Entities(self):
        cutpoints = []
        TOTAL_POINTS = 1000
        p2t = {1: []}
        expected = {1: []}
        c2e = {}
        p2e = {}
        cutpoints = [500]
        for i in range(1, TOTAL_POINTS + 1):
            if i % 2 == 0:
                p2t[1].append(TimeStamp(i, i, i, 0))
            else:
                p2t[1].append(TimeStamp(i, i, i, 1))

        expected[1].append(TimeStamp(0, 1, 499, 1))
        expected[1].append(TimeStamp(0, 2, 498, 0))
        expected[1].append(TimeStamp(1, 500, 1000, 0))
        expected[1].append(TimeStamp(1, 501, 999, 1))
        d: Discretization = Expert({1: cutpoints}, 1)
        d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1)
        message = ""
        message += compare_time_stamps(expected, d_p2t)
        self.assertTrue(message == "", message)
def first_method(running_configurations, root_folder, file_id):
    input_path = "%s\\%s\\%s.csv" % (root_folder, file_id, file_id)
    partitions_path = "%s\\%s\\%s" % (root_folder, file_id, "partitions")
    entities_path = "%s\\%s\\%s" % (root_folder, file_id, "entities.csv")
    discretizable = True
    p2e = {}
    c2e = {}
    p2t = {}
    property_ids = []
    class_to_entity_count = {}
    entity_count = 0
    print("Checking partitions...")
    try:
        get_maps_from_file(input_path, entities_path, CLASS_SEPARATOR)
    except FileFormatNotCorrect as e:
        discretizable = False
    with open(partitions_path + "\\properties.csv") as f:
        in_f = csv.reader(f)
        for line in in_f:
            property_ids = [int(x) for x in list(line)]
    with open(partitions_path + "\\class_to_entity_count.csv") as f:
        in_f = csv.reader(f)
        for line in in_f:
            class_to_entity_count[int(line[0])] = int(line[1])
    with open(entities_path) as f:
        in_f = csv.reader(f)
        for line in f:
            entity_count += 1
        entity_count -= 1

    print("Partitions done")
    discretization_methods: List[Tuple(str, str, Discretization)] = []
    for running_configuration in running_configurations:
        method_name = running_configuration[0]
        args = running_configuration[1]
        if method_name == "EXPERT":
            md5 = args.split("_")[0]
            max_gap = args.split("_")[1]
            discretization_methods.append(
                (method_name, args,
                 Expert(
                     "%s\\%s\\%s\\%s" %
                     (root_folder, file_id, method_name, md5), max_gap)))
        else:
            discretization_methods.append(
                (method_name, args,
                 methods_names_to_functions[method_name](*args.split("_"))))
    discretization_methods = sorted(discretization_methods,
                                    key=lambda x: x[2].get_map_used())
    property_count = 0
    total_properties = len(property_ids)
    discretization_count = 0
    total_configurations = len(discretization_methods)
    for pid in property_ids:
        last_map_used = ""
        p2e = {}
        c2e = {}
        p2t = {}
        for running_configuration in discretization_methods:
            discretization_count += 1
            method_name = running_configuration[0]
            args = running_configuration[1]
            print(
                "                     Discretizing property id %s in method %s, total: %s/%s"
                % (pid, method_name, discretization_count,
                   total_configurations * total_properties))
            print(
                "                     ------------------------------------------------------"
            )
            output_path_folder = "%s\\%s\\%s\\%s" % (root_folder, file_id,
                                                     method_name, args)
            vmap_path = "%s\\%s\\%s" % (root_folder, file_id, "vmap.csv")
            try:
                if not exists(output_path_folder):
                    makedirs(output_path_folder)
                if method_name == "KARMALEGO":
                    discretization_methods.remove(running_configuration)
                    use_karma_lego(input_path, output_path_folder, "TIRPS.csv",
                                   args)
                    run_KL(input_path, output_path_folder, *args)
                    continue
                elif discretizable:
                    d: Discretization = running_configuration[-1]
                    d.property_folder = "%s\\%s\\%s" % (root_folder, file_id,
                                                        "partitions")
                    if not DEBUG_MODE and (
                            exists(output_path_folder + "\\states.csv")
                            or exists(output_path_folder +
                                      "\\property%s_cutpoints.temp" % pid)):
                        #discretization_methods.remove(running_configuration)
                        discretization_count += total_properties - 1
                        print(
                            "Output files found! Canceling discretization method for this dataset... %s/%s is the new count."
                            % (discretization_count,
                               total_configurations * total_properties))
                        continue
                    if d.get_map_used() != last_map_used:
                        print("***CLEANING MAPS FROM MEMORY***")
                        last_map_used = d.get_map_used()
                        del p2e
                        del c2e
                        del p2t
                        p2e = {}
                        c2e = {}
                        p2t = {}
                    d1, d2, d3 = d.discretize_property(p2e, c2e, p2t, pid)
                    write_partition(d1, d2, d3, d.bins_cutpoints[pid],
                                    output_path_folder, pid)
            except Exception as e:
                print("\n*************EXCPETION THROWN!!!!*************")
                exception_text = "--------------------\nDate: %s\nInput file: %s\nOutput path: %s\nMethod: %s\nArgs: %s\nError: %s\n" % (
                    datetime.datetime.now(), input_path, output_path_folder,
                    method_name, args, e)
                print(exception_text)
                print("***********************************************\n")
                with open(output_path_folder + "\\error.log", 'w') as f:
                    f.write(exception_text)
                with open(SAD_LOG_PATH, 'a') as f:
                    f.write(exception_text)
                raise
    print("Writing output...")
    configuration_count = 0
    for running_configuration in discretization_methods:
        method_name = running_configuration[0]
        args = running_configuration[1]

        configuration_count += 1
        method_name = running_configuration[0]
        if method_name == "KARMALEGO":
            continue
        args = running_configuration[1]
        print("Outputting method %s, total: %s/%s" %
              (method_name, configuration_count, total_configurations))
        output_path_folder = "%s\\%s\\%s\\%s" % (root_folder, file_id,
                                                 method_name, args)
        vmap_path = "%s\\%s\\%s" % (root_folder, file_id, "vmap.csv")
        try:
            merge_partitions(output_path_folder, vmap_path, method_name,
                             property_ids, list(class_to_entity_count.keys()),
                             class_to_entity_count, entity_count)
        except Exception as e:
            print(
                "\n*************EXCPETION THROWN WHILE OUTPUTTING!!!!*************"
            )
            exception_text = "--------------------\nDate: %s\nInput file: %s\nOutput path: %s\nMethod: %s\nArgs: %s\nError: %s\n" % (
                datetime.datetime.now(), input_path, output_path_folder,
                method_name, args, e)
            print(exception_text)
            print("***********************************************\n")
            with open(output_path_folder + "\\error.log", 'w') as f:
                f.write(exception_text)
            with open(SAD_LOG_PATH, 'a') as f:
                f.write(exception_text)
            raise
def discretize():
    global m1,m2,m3
    d = Expert({44: [0.74], 3: [26], 4:[21], 5:[70]}, max_gap=1)
    d1,d2,d3 = d.discretize(m1,m2,m3)
    print_maps(d1,d2,d3)