def run_method(input_path, output_path_folder, method_name, args):
    """
    :param input_path: input file
    :param output_path_folder: A path for the folder in which the output file is to be saved
    :param method_name: name for the requested method
    :param args: list of arguments for the requested method
    :return: void
    """
    try:
        with open(r"C:\Users\rejabek\Server\python_happy_log.txt", 'a') as f:
            f.write(
                "--------------------\nDate: %s\nInput file: %s\nOutput path: %s\nMethod: %s\nArgs: %s\n"
                % (datetime.datetime.now(), input_path, output_path_folder,
                   method_name, args))

        print(args[0])
        dataset_name = input_path.split('\\')[-1][:-4]
        d = methods_names_to_functions[method_name](*args)
        m1, m2, m3 = get_maps_from_file(input_path, CLASS_SEPARATOR)
        d1, d2, d3 = d.discretize(m1, m2, m3)
        s = d.bins_cutpoints.__str__()
        convert_cutpoints_to_output(d2, output_path_folder, dataset_name,
                                    d.get_discretization_name())
        d.write_auxiliary_information(d1, d2, d3, output_path_folder)
        with open(output_path_folder + "\\" + "cut_points.txt", 'w') as f:
            f.write(s)
    except Exception as e:
        with open(r"C:\Users\rejabek\Server\python_error_log.txt", 'a') as f:
            f.write(
                "--------------------\nDate: %s\nInput file: %s\nOutput path: %s\nMethod: %s\nArgs: %s\nError: %s\n"
                % (datetime.datetime.now(), input_path, output_path_folder,
                   method_name, args, e))
Beispiel #2
0
from Implementation.ClassicMethods.SAX import SaxConstrainsException, __SAX__, SAX
from Implementation.InputHandler import get_maps_from_file
from Implementation.TimeInterval import TimeInterval
from Implementation.TD4C.TD4C import TD4C
from Implementation.TimeStamp import TimeStamp
from Tests.Discretization_Result_Generator import get_test_result, assert_almost_equality, compare_time_stamps
from Tests.Constants import DATASETS_PATH, STRESS_VALUE_COUNT

DATASET_NAME = "FAAgeGroup_F3"
CLASS_SEPERATOR = -1

DATASET_PATH = DATASETS_PATH + "\\" + DATASET_NAME + "\\" + DATASET_NAME + ".csv"
EXPECTED_OUTPUT_PATH = DATASETS_PATH + "\\" + DATASET_NAME + "\\" + DATASET_NAME + "_"
PARTITIONS_PATH = DATASETS_PATH + "\\" + DATASET_NAME + "\\partitions"
ENTITIES_PATH = "%s\\%s\\%s" % (DATASETS_PATH, DATASET_NAME, "entities.csv")
get_maps_from_file(DATASET_PATH, ENTITIES_PATH, CLASS_SEPERATOR)


def test_SAX(MEAN, STD, PROPERTY_ID):
    msg = ""
    for BIN_COUNT in range(2, 6):
        for WINDOW_SIZE in range(2, 6):
            d = SAX(BIN_COUNT, -1, WINDOW_SIZE)
            d.property_folder = PARTITIONS_PATH
            limits = __SAX__.build_limits(BIN_COUNT)
            p2t = {}
            d.load_property_to_timestamps(p2t, PROPERTY_ID)
            _, _, d_p2t = d.discretize_property({}, {}, p2t, PROPERTY_ID)
            values = [(ts.value - MEAN) / STD for ts in p2t[PROPERTY_ID]]
            length = len(values) // WINDOW_SIZE
            remainder = len(values) % WINDOW_SIZE
                    _entity_element = entity_elements[i]
                    f.write(
                        str(_entity_element[0]) + ',' +
                        str(_entity_element[1]) + ',' +
                        str(_entity_element[2]) + ',' +
                        str(_entity_element[3]))
                    karma_output.write(
                        str(_entity_element[0]) + ',' +
                        str(_entity_element[1]) + ',' +
                        str(property_to_base[_entity_element[3]] +
                            _entity_element[2]) + ',' +
                        str(_entity_element[3]))
                    if i + 1 != len(entity_elements):
                        f.write(';')
                        karma_output.write(';')
                f.write('\n')
                karma_output.write('\n')


if __name__ == '__main__':

    test_path = r'D:\test_stuff.txt'
    dataset_path = r'..\..\datasets\SAGender/SAGender.csv'

    m1, m2, m3 = get_maps_from_file(dataset_path, 55)
    d = EqualWidth(4)
    # _m1, _m2, _m3 = d.get_copy_of_maps(m1, m2, m3)
    _m1, _m2, _m3 = d.discretize(m1, m2, m3)
    convert_cutpoints_to_output(_m2, "D:\\", 'SAGender',
                                d.get_discretization_name())
def first_method(running_configurations, root_folder, file_id):
    input_path = "%s\\%s\\%s.csv" % (root_folder, file_id, file_id)
    partitions_path = "%s\\%s\\%s" % (root_folder, file_id, "partitions")
    entities_path = "%s\\%s\\%s" % (root_folder, file_id, "entities.csv")
    discretizable = True
    p2e = {}
    c2e = {}
    p2t = {}
    property_ids = []
    class_to_entity_count = {}
    entity_count = 0
    print("Checking partitions...")
    try:
        get_maps_from_file(input_path, entities_path, CLASS_SEPARATOR)
    except FileFormatNotCorrect as e:
        discretizable = False
    with open(partitions_path + "\\properties.csv") as f:
        in_f = csv.reader(f)
        for line in in_f:
            property_ids = [int(x) for x in list(line)]
    with open(partitions_path + "\\class_to_entity_count.csv") as f:
        in_f = csv.reader(f)
        for line in in_f:
            class_to_entity_count[int(line[0])] = int(line[1])
    with open(entities_path) as f:
        in_f = csv.reader(f)
        for line in f:
            entity_count += 1
        entity_count -= 1

    print("Partitions done")
    discretization_methods: List[Tuple(str, str, Discretization)] = []
    for running_configuration in running_configurations:
        method_name = running_configuration[0]
        args = running_configuration[1]
        if method_name == "EXPERT":
            md5 = args.split("_")[0]
            max_gap = args.split("_")[1]
            discretization_methods.append(
                (method_name, args,
                 Expert(
                     "%s\\%s\\%s\\%s" %
                     (root_folder, file_id, method_name, md5), max_gap)))
        else:
            discretization_methods.append(
                (method_name, args,
                 methods_names_to_functions[method_name](*args.split("_"))))
    discretization_methods = sorted(discretization_methods,
                                    key=lambda x: x[2].get_map_used())
    property_count = 0
    total_properties = len(property_ids)
    discretization_count = 0
    total_configurations = len(discretization_methods)
    for pid in property_ids:
        last_map_used = ""
        p2e = {}
        c2e = {}
        p2t = {}
        for running_configuration in discretization_methods:
            discretization_count += 1
            method_name = running_configuration[0]
            args = running_configuration[1]
            print(
                "                     Discretizing property id %s in method %s, total: %s/%s"
                % (pid, method_name, discretization_count,
                   total_configurations * total_properties))
            print(
                "                     ------------------------------------------------------"
            )
            output_path_folder = "%s\\%s\\%s\\%s" % (root_folder, file_id,
                                                     method_name, args)
            vmap_path = "%s\\%s\\%s" % (root_folder, file_id, "vmap.csv")
            try:
                if not exists(output_path_folder):
                    makedirs(output_path_folder)
                if method_name == "KARMALEGO":
                    discretization_methods.remove(running_configuration)
                    use_karma_lego(input_path, output_path_folder, "TIRPS.csv",
                                   args)
                    run_KL(input_path, output_path_folder, *args)
                    continue
                elif discretizable:
                    d: Discretization = running_configuration[-1]
                    d.property_folder = "%s\\%s\\%s" % (root_folder, file_id,
                                                        "partitions")
                    if not DEBUG_MODE and (
                            exists(output_path_folder + "\\states.csv")
                            or exists(output_path_folder +
                                      "\\property%s_cutpoints.temp" % pid)):
                        #discretization_methods.remove(running_configuration)
                        discretization_count += total_properties - 1
                        print(
                            "Output files found! Canceling discretization method for this dataset... %s/%s is the new count."
                            % (discretization_count,
                               total_configurations * total_properties))
                        continue
                    if d.get_map_used() != last_map_used:
                        print("***CLEANING MAPS FROM MEMORY***")
                        last_map_used = d.get_map_used()
                        del p2e
                        del c2e
                        del p2t
                        p2e = {}
                        c2e = {}
                        p2t = {}
                    d1, d2, d3 = d.discretize_property(p2e, c2e, p2t, pid)
                    write_partition(d1, d2, d3, d.bins_cutpoints[pid],
                                    output_path_folder, pid)
            except Exception as e:
                print("\n*************EXCPETION THROWN!!!!*************")
                exception_text = "--------------------\nDate: %s\nInput file: %s\nOutput path: %s\nMethod: %s\nArgs: %s\nError: %s\n" % (
                    datetime.datetime.now(), input_path, output_path_folder,
                    method_name, args, e)
                print(exception_text)
                print("***********************************************\n")
                with open(output_path_folder + "\\error.log", 'w') as f:
                    f.write(exception_text)
                with open(SAD_LOG_PATH, 'a') as f:
                    f.write(exception_text)
                raise
    print("Writing output...")
    configuration_count = 0
    for running_configuration in discretization_methods:
        method_name = running_configuration[0]
        args = running_configuration[1]

        configuration_count += 1
        method_name = running_configuration[0]
        if method_name == "KARMALEGO":
            continue
        args = running_configuration[1]
        print("Outputting method %s, total: %s/%s" %
              (method_name, configuration_count, total_configurations))
        output_path_folder = "%s\\%s\\%s\\%s" % (root_folder, file_id,
                                                 method_name, args)
        vmap_path = "%s\\%s\\%s" % (root_folder, file_id, "vmap.csv")
        try:
            merge_partitions(output_path_folder, vmap_path, method_name,
                             property_ids, list(class_to_entity_count.keys()),
                             class_to_entity_count, entity_count)
        except Exception as e:
            print(
                "\n*************EXCPETION THROWN WHILE OUTPUTTING!!!!*************"
            )
            exception_text = "--------------------\nDate: %s\nInput file: %s\nOutput path: %s\nMethod: %s\nArgs: %s\nError: %s\n" % (
                datetime.datetime.now(), input_path, output_path_folder,
                method_name, args, e)
            print(exception_text)
            print("***********************************************\n")
            with open(output_path_folder + "\\error.log", 'w') as f:
                f.write(exception_text)
            with open(SAD_LOG_PATH, 'a') as f:
                f.write(exception_text)
            raise
Beispiel #5
0
def read_file():
    global m1, m2, m3
    print("Reading file...")
    m1, m2, m3 = get_maps_from_file(DATASET_PATH, CLASS_SEPERATOR, True)