Ejemplo n.º 1
0
def filterModule(dict_context, controller):

    i = 1
    print("Context Features:")
    text = "Context Features:"
    controller.getAMController().addToConsoleAll(text + "\n")
    for key, value in dict_context.items():
        text = "CF" + str(i) + " - " + str(value)
        print(text)
        i = i + 1
        controller.getAMController().addToConsoleAll(text + "\n")
        controller.getAMController().addToConsoleInput(text + "\n")

    start_time = time.time()

    # Takes the dictionary and converts it to the correct format for Crossing (e.g. ["b5:a", "b5:b"])
    extracted_cross_filters, frequency_count = FILS.extractCrossFilters(
        dict_context, controller)

    # NOTE: CROSS is the collection of SSFs
    CROSS, frequency_count = FILS.processLVLs(
        extracted_cross_filters)  # Returns the filter list for each level

    # Record run time
    module_time = (time.time() - start_time)

    # Update singleton frequency
    AMVS.getSingleton().updateFrequency_FilterModule(frequency_count,
                                                     module_time)

    return CROSS
Ejemplo n.º 2
0
def crossProcessModule(df_dataset, np_CROSS, depth, controller):
    start_time = time.time()

    dict_significant_results, frequency_count, highest_process_frequency = CMPS.crossProcessOptimized(
        df_dataset, np_CROSS, depth, controller)

    # Record run time
    module_time = (time.time() - start_time)

    # Update singleton frequency
    AMVS.getSingleton().updateFrequency_CrossProcessModule(
        frequency_count, highest_process_frequency, module_time)

    return dict_significant_results
Ejemplo n.º 3
0
def loaderModule():
    start_time = time.time()

    df_raw_dataset, df_dataset, ftr_names, pd_raw_dataset, frequency_count = LS.loadInput(
    )  # Can add parameters

    # Record run time
    module_time = (time.time() - start_time)

    # Update singleton frequency
    AMVS.getSingleton().updateFrequency_LoaderModule(frequency_count,
                                                     module_time)

    return df_raw_dataset, df_dataset, ftr_names, pd_raw_dataset
Ejemplo n.º 4
0
def runAutomatedMining(controller):

    text = "RUNNING Automated Mining\n"  # Show start message in console
    controller.getAMController().addToConsoleAll(text + "\n")

    text = "MAX CROSS: " + str(
        UICS.MAX_CROSS)  # Show MAX CROSS in console and input
    controller.getAMController().addToConsoleAll(text + "\n")
    controller.getAMController().addToConsoleInput(text + "\n")

    text = "MAX LEVEL: " + str(
        UICS.MAX_LEVEL) + "\n"  # Show MAX LEVEL in console and input
    controller.getAMController().addToConsoleAll(text + "\n")
    controller.getAMController().addToConsoleInput(text + "\n")

    df_raw_dataset, df_dataset, ftr_names, pd_raw_dataset = loaderModule()

    # Run STATIC depth mining (Loops based on MAX DEPTH)
    # dict_significant_results = runStaticDepthMining(df_raw_dataset, df_dataset, ftr_names, controller)

    # Depth mining that continues on until the p-value stops updating
    dict_significant_results = runMobileDepthMining(df_raw_dataset, df_dataset,
                                                    ftr_names, pd_raw_dataset,
                                                    controller)

    controller.isAMFinished(
    )  # Enables the Check button (Call on completion of the last iteration)
    print("Automated Mining Finished...")

    str_depths = str(AMVS.getSingleton().getDepths())
    controller.getAMController().addToConsoleAll("\nTotal Depth: " +
                                                 str_depths)
    print("Total Depth " + str_depths)

    str_run_time = str(AMVS.getSingleton().getTime())
    controller.getAMController().addToConsoleAll("\nAM Run time:\n" +
                                                 str_run_time + " seconds\n")
    print("Mining Run Time: " + str_run_time + " seconds")

    AMVS.getSingleton().resetSingleton()
    return dict_significant_results
Ejemplo n.º 5
0
def isConstantSSFs(list_currSSFs):
    singleton = AMVS.getSingleton()
    llist_prevSSFs = singleton.getLlSSFs(
    )  # Get the list of all parsed SSFs (from all depths) via the Singleton class
    state = False

    for SSFs in llist_prevSSFs:
        # Check if all items in the current SSFs list are contained
        # in any previously parsed SSFs list
        state = isListsMatch(SSFs, list_currSSFs)
        if state:  # If there's a match, stop looping and return 'state'
            break

    return state
def crossProcessOptimized(df_dataset, np_CROSS, depth, controller):
    key = UICS.KEY_PRE_CROSS_MODULE  # Key for progress bar

    controller.updateModuleProgress(key, UICS.MODULE_INDICATOR + "Starting CROSS PROCESS MODULE")  # 1
    # time.sleep(0.01)  # Sleep

    # Generate datasets as dictated by filters
    # NOTE:
    #   np_dataset_pairs[type]                      - A list of cross types
    #   np_dataset_pairs[type][level]               - A list of levels within the list of cross types
    #   np_dataset_pairs[type][level][0]            - A list of dataset pairs (list) within the list of levels
    #   np_dataset_pairs[0][0][0][0]                - The contents of the list containing the dataset pairs
    controller.updateModuleProgress(key, UICS.SUB_MODULE_INDICATOR + "Extracting Datasets by Filter")  # 2
    # time.sleep(0.01)  # Sleep
    np_cross_datasets, np_cross_filters = extractDatasets(df_dataset, np_CROSS)  # TODO (Future) Try to optimize

    controller.updateModuleProgress(key, UICS.SUB_MODULE_INDICATOR + "Successfully Extracted Datasets")  # 3
    # time.sleep(0.01)  # Sleep

    len_cross_datasets = int(UICS.MAX_CROSS)  # len(np_cross_datasets)
    len_cross_types = int(UICS.MAX_LEVEL)  # UICS.MAX_CROSS  # len(cross_type)
    # len_cross_level = UICS.MAX_LEVEL  # len(cross_level)

    list_cross_ssfs = []
    dict_result_table_sig = collections.OrderedDict()

    print("Processing - Please Wait...")

    controller.updateModuleProgress(key, UICS.SUB_MODULE_INDICATOR + "Starting Cross Process : This might take some time...")  # 4
    # time.sleep(0.01)  # Sleep


    # Prepare to update progress bar with the second half of the CROSS PROCESS MODULE
    key = UICS.KEY_CROSS_MODULE  # Key for progress bar

    # Compute the total process of this section according to the computed cross type and level count
    # Compute for one pass at Level (See line commented with "LVL Pass 1")
    UICS.CROSS_MAX_PROCESS_COUNT = computeMaxCrossLevelCount(np_cross_datasets, len_cross_datasets, len_cross_types)
    # Multiply by 2 since you will record each pass (1) then update for exporting the table (1)
    data_filter_process_count = computeMaxProcessCount(np_cross_datasets, len_cross_datasets, len_cross_types)
    data_filter_process_count = data_filter_process_count  # * 2
    UICS.CROSS_MAX_PROCESS_COUNT = UICS.CROSS_MAX_PROCESS_COUNT + data_filter_process_count
    list_level_ssfs = None

    start_time = time.time()
    # Apply Chi-square on all dataset pairs in the list np_dataset_pairs
    for i_cross_type in range(len_cross_datasets):  # TODO (Future) Find the best way to partition this
        cross_type = np_cross_datasets[i_cross_type]  # Iterate through each CROSS TYPE

        for i_cross_level in range(len_cross_types):
            # The variable cross_level is the list of dataframes
            cross_level = cross_type[i_cross_level]  # Iterate through each LEVEL
            len_cross_level = len(cross_level)

            list_level_ssfs = []
            list_all_ssfs = []
            list_ssfs = []

            str_current_cross = "[" + str(i_cross_type) + "][" + str(i_cross_level + 1) + "]"
            # Title for the current cross process
            str_title = UICS.SUB_MODULE_INDICATOR + "Processing CROSS" + str_current_cross  # LVL Pass 1
            # Update the progress bar about the current CROSS[type][level]
            controller.updateModuleProgress(key, str_title)  # Pass 1
            # time.sleep(0.01)  # Sleep

            i_process_count = 0  # Process count for current CROSS[type][level]
            # np_level_ssfs = np.array(list_level_ssfs)
            for i_dataset_pairs in range(len_cross_level):
                dataset_pairs = cross_level[i_dataset_pairs]
                len_dataset_pairs = len(dataset_pairs)

                str_cross_level_length = str(len_cross_level)
                #  Description for the current cross process
                str_description = "         " + str_current_cross + " - " + str(i_dataset_pairs + 1) + " of " + str_cross_level_length
                controller.updateModuleProgress(key, str_description)  # INNER PASS 1

                for i_dataset_pair in range(len_dataset_pairs):

                    dataset_pair = dataset_pairs[i_dataset_pair]

                    dict_chi_square = CHIS.chiSquare(dataset_pair)
                    # if dict_chi_square is None:
                    #     print("dict_chi_square is NONE")
                    # controller.updateModuleProgress(key, "Applying Chi-square")
                    # time.sleep(0.01)

                    df_processed_output, list_ssf, list_sig_output = CHIS.processChiSquareTable(dict_chi_square)

                    # if df_processed_output is None:
                        # print("df_processed_output is NONE")
                    if df_processed_output is not None:
                        dataset_pair_filter = np_cross_filters[i_cross_type][i_cross_level][i_dataset_pairs]

                        if len(list_ssfs) == 0:
                            list_ssfs = list_ssf
                        else:
                            list_ssfs = mergeAndFilter(list_ssfs, list_ssf)


                        np_dataset_pair_filter = np.array(dataset_pair_filter)
                        # list_chi_square_output.append([df_output, np_dataset_pair_filter])
                        list_index = [i_cross_type, i_cross_level]

                        # controller.updateModuleProgress(key, "Exporting Chi-square Table")
                        # time.sleep(0.01)

                        df_output, str_pair_name = LS.exportChiSquareTable(df_processed_output,
                                                                           np_dataset_pair_filter,
                                                                           list_index)

                        dict_result_table_sig = addToDictionaryResult(dict_result_table_sig, str_pair_name, list_sig_output)
                    # else:
                        # controller.updateModuleProgress(key, str_description)  # Pass 2
                        # Add 1 to make up for the missed processes
                        # print("DF OUTPUT IS NULL: Skipping Item")


                list_all_ssfs = mergeAndFilter(list_all_ssfs, list_ssfs)
                ssfs_filename = "SSFs - CROSS[" + str(i_cross_type) + "][" + str(i_cross_level + 1) + "].csv"
                LS.exportSSFs(list_ssfs, ssfs_filename, depth)


            # list_level_ssfs.append(list_all_ssfs)  # Store SSF list  # TODO: Commented out, check if still needed
        # list_cross_ssfs.append(list_level_ssfs)  # TODO: Commented out, check if still needed

    run_time = (time.time() - start_time)
    AMVS.getSingleton().updateTime(run_time)  # Update Singleton's run time
    print("--- %s seconds ---" % run_time)
    str_runtime = "\nCross Process Time:\n" + str(run_time) + " seconds"
    controller.getAMController().addToConsoleAll(str_runtime + "\n")

    print("Processing Complete")
    LS.exportOutputModuleResults(dict_result_table_sig, len_cross_datasets,
                                 len_cross_types, controller)

    return dict_result_table_sig
Ejemplo n.º 7
0
def crossProcessOptimized(df_dataset, np_CROSS, depth, controller):
    key = UICS.KEY_PRE_CROSS_MODULE  # Key for progress bar

    controller.updateModuleProgress(key, UICS.MODULE_INDICATOR +
                                    "Starting CROSS PROCESS MODULE")  # 1
    # time.sleep(0.01)  # Sleep

    # Generate datasets as dictated by filters
    # NOTE:
    #   np_dataset_pairs[type]                      - A list of cross types
    #   np_dataset_pairs[type][level]               - A list of levels within the list of cross types
    #   np_dataset_pairs[type][level][0]            - A list of dataset pairs (list) within the list of levels
    #   np_dataset_pairs[0][0][0][0]                - The contents of the list containing the dataset pairs
    controller.updateModuleProgress(key, UICS.SUB_MODULE_INDICATOR +
                                    "Extracting Datasets by Filter")  # 2
    # time.sleep(0.01)  # Sleep
    np_cross_datasets, np_cross_filters = extractDatasets(
        df_dataset, np_CROSS)  # TODO (Future) Try to optimize

    controller.updateModuleProgress(key, UICS.SUB_MODULE_INDICATOR +
                                    "Successfully Extracted Datasets")  # 3
    # time.sleep(0.01)  # Sleep

    len_cross_datasets = int(UICS.MAX_CROSS)  # len(np_cross_datasets)
    len_cross_types = int(UICS.MAX_LEVEL)  # UICS.MAX_CROSS  # len(cross_type)
    # len_cross_level = UICS.MAX_LEVEL  # len(cross_level)

    list_cross_ssfs = []
    dict_result_table_sig = collections.OrderedDict()

    print(
        "Processing - Please Wait... (Average Runtime for ALL Features - 8 minutes"
    )

    controller.updateModuleProgress(
        key, UICS.SUB_MODULE_INDICATOR +
        "Starting Cross Process : This might take some time...")  # 4
    # time.sleep(0.01)  # Sleep

    # Prepare to update progress bar with the second half of the CROSS PROCESS MODULE
    key = UICS.KEY_CROSS_MODULE  # Key for progress bar

    # Compute the total process of this section according to the computed cross type and level count
    # Compute for one pass at Level (See line commented with "LVL Pass 1")
    UICS.CROSS_MAX_PROCESS_COUNT = computeMaxCrossLevelCount(
        np_cross_datasets, len_cross_datasets, len_cross_types)
    # Multiply by 2 since you will record each pass (1) then update for exporting the table (1)
    data_filter_process_count = computeMaxProcessCount(np_cross_datasets,
                                                       len_cross_datasets,
                                                       len_cross_types)
    data_filter_process_count = data_filter_process_count  # * 2
    UICS.CROSS_MAX_PROCESS_COUNT = UICS.CROSS_MAX_PROCESS_COUNT + data_filter_process_count

    start_time = time.time()

    pool_size = len_cross_datasets * len_cross_types
    process_params = []  # Iterable that will contain tuples of parameters
    print("Pool Size: " + str(pool_size))
    pool = Pool(processes=pool_size)
    manager = multiprocessing.Manager()  # Instantiate a Manager
    queue_flag = manager.Queue()
    queue_return = manager.Queue()
    queue_frequency = manager.Queue()
    # queue_time = manager.Queue()
    queue_console = manager.Queue()

    # Apply Chi-square on all dataset pairs in the list np_dataset_pairs
    for i_cross_type in range(
            len_cross_datasets
    ):  # TODO (Future) Find the best way to partition this

        for i_cross_level in range(len_cross_types):
            queue_flag.put("Done")  # Initialize the Flag Queue, queue_flag
            params = (
                i_cross_type, i_cross_level
            )  # Instantiate a process tuple (iterable) parameter for every (i) cross type and level
            process_params.append(params)

    process_func = partial(
        CPMPP.process, queue_flag, queue_return, queue_frequency, depth,
        np_cross_filters, np_cross_datasets, queue_console
    )  # Declare the target function and the parameters, minus the iterable

    pool.map(process_func, process_params
             )  # Launch the partial function and iterable asynchronously

    pool.close()
    pool.join()
    # print((queue_console.qsize()))

    run_time = (time.time() - start_time)
    AMVS.getSingleton().updateTime(run_time)  # Update Singleton's run time
    print("--- %s seconds ---" % run_time)
    str_runtime = "\nCross Process Time:\n" + str(run_time) + " seconds"
    controller.getAMController().addToConsoleAll(str_runtime + "\n")

    frequency_count = 0
    highest_frequency = 0
    # longest_run_time = 0
    while not queue_return.empty():
        dict_result_table_sig = queue_return.get()
        frequency_item = queue_frequency.get()
        frequency_count = frequency_count + frequency_item

        if frequency_item > highest_frequency:
            highest_frequency = frequency_item

        # time_item = queue_time.get()
        # if time_item > longest_run_time:
        #     longest_run_time = time_item

        LS.exportOutputModuleResults(dict_result_table_sig, len_cross_datasets,
                                     len_cross_types, depth, controller)

    # module_time = longest_run_time
    controller.updateModuleProgress(100, UICS.SUB_MODULE_INDICATOR +
                                    "Finished Depth " + str(depth) + "")  # 1
    print("Processing Complete")

    return dict_result_table_sig, frequency_count, highest_frequency
Ejemplo n.º 8
0
def runMobileDepthMining(df_raw_dataset, df_dataset, ftr_names, pd_raw_dataset,
                         controller):
    singleton = AMVS.getSingleton()  # A Singleton class is used
    dict_significant_results = None
    isUpdating = True
    hasPrevSSFs = True
    i_depth = 0

    while isUpdating:  # Keep looping until the stop criteria are met
        curr_depth = i_depth + 1
        singleton.resetCtrAccepted()

        print("Starting DEPTH: " + str(curr_depth))
        # Select SSFs, if first iteration, use RFE, else load the generated SSFs of the previous depth
        if i_depth == 0:
            print("Loading SEED SSFs...")
            # dict_ranked_features = rfeModule(df_raw_dataset, ftr_names, controller)
            dict_ranked_features = UICS.SEED_SSFS
            AMVS.getSingleton().updateDictSSFs(dict_ranked_features,
                                               curr_depth)
            print("-- Successfully Loaded SEED SSFs --")

            print("Extracting RFE Features")
            # rfe_features = rfeModule(df_raw_dataset, ftr_names, pd_raw_dataset, controller)
            # print("-- Successfully Determined RFE Features --")
            # print(rfe_features)
            print("")

        else:
            print("Extracting SSFs from Previous Depth [" + str(i_depth) +
                  "]...")
            # Load the previous SSFs and consolidate. The current depth
            # indicates the PREVIOUS SSF folder.
            df_SSFs = DS.loadPreviousSSFs(i_depth)
            print("df_SSFs")
            print(df_SSFs)

            if df_SSFs is None:  # If there were no previously loaded SSFs, stop updating TODO: check if this can be determined earlier
                hasPrevSSFs = False
                isUpdating = False
                dict_ranked_features = None
                print("-- Failed to Locate Previous SSFs --")
            else:
                # Partition the extracted SSFs to 3 Ranks
                dict_new_ranked_features = DS.rankSSFs(df_SSFs)
                # Merge the new SSFs with the old SSFs
                AMVS.getSingleton().updateDictSSFs(dict_new_ranked_features,
                                                   curr_depth)
                print("RANK")
                dict_ranked_features = AMVS.getSingleton().getDictSSFs()
                print(dict_ranked_features)
                print("-- Successfully Extracted Previous SSFs --")

        if hasPrevSSFs:
            print("Starting Filtering...")
            np_cross = filterModule(dict_ranked_features, controller)
            print("-- Filtering Finished --")
            print("")

            print("Starting Cross Process...")
            dict_significant_results = crossProcessModule(
                df_dataset, np_cross, curr_depth, controller)
            print("-- Cross Process Finished --")

            list_SSFs = getSSFsList(dict_ranked_features)
            print(list_SSFs)
            # if isConstantSSFs(list_SSFs):  # Stop mining if the current list of SSFs have been parsed before
            if singleton.isConstantSSFs(
                    list_SSFs
            ):  # Stop mining if the current list of SSFs have been parsed before
                isUpdating = False
            elif singleton.getCtrAccepted(
            ) == 0:  # Mark mining as finished when there are no more accepted values
                isUpdating = False

            print(singleton.getCtrAccepted())

            i_depth = i_depth + 1

            singleton.updateFrequencyCountsText(curr_depth)

    singleton.setDepths(i_depth - 1)  # Log total number of depths
    singleton.printAllTextData()

    return dict_significant_results
Ejemplo n.º 9
0
def crossFilters(filters, level):
    singleton = AMVS.getSingleton()

    # Get possible combinations of options (in filters parameter)
    combination = list(itertools.combinations(filters, level))
    set_combination = set(combination)
    list_combination = []

    for item in set_combination:
        list_item = np.asarray(item)
        list_combination.append(list_item)

    # list_combination = [val for sublist in list_combination for val in sublist]
    list_combination = np.array(list_combination)
    len_list_combination = len(list_combination)
    cross_filters = []

    end_index = len_list_combination - 1

    # TODO [PRINT: Amount of reduced values for paper]
    ctr_Raw = 0
    ctr_Valid = 0
    ctr_Purged = 0
    ctr_Filtered = 0

    for i in range(end_index):
        item_1 = list_combination[i]
        for j in range(end_index):
            counter = i + (j + 1)
            if counter <= end_index:
                item_2 = list_combination[counter]
                cross = [
                    item_1, item_2
                ]  # Sample content: [array(['b1:a', 'p11:b'], dtype='<U5'), array(['b1:a', 'p11:a'], dtype='<U5')]

                if validComparison(
                        cross
                ):  # Only proceed if cross is VALID; FMI check notes above function
                    if updateChecklist(cross, level):
                        if not purgedCross(cross):  # Remove repeating pairs
                            # cross_filters.append(cross)
                            # print(cross)
                            ctr_Purged = ctr_Purged + 1
                            if not singleton.isFeaturePairParsed(
                                    cross
                            ):  # Don't include previously parsed pairs (from previous depths)
                                cross_filters.append(
                                    cross)  # Append a filter to cross_filters
                                singleton.updateFeaturePairs(cross)
                                singleton.addCtrAccepted()
                                # print("Added:")
                                # print(cross)
                                # print("Singleton contents:")
                                # print(singleton.getFeaturePairs())
                                ctr_Filtered = ctr_Filtered + 1

                    ctr_Valid = ctr_Valid + 1
                ctr_Raw = ctr_Raw + 1

    # Remove the extra details from the array, i.e. "dtype"
    list_cross_filters = []
    for item in cross_filters:
        item = [list(i) for i in item]
        list_cross_filters.append(item)
    np_list_cross_filters = np.array(list_cross_filters)

    # print(np_list_cross_filters)
    print("")
    print("RAW " + str(ctr_Raw))
    print("VALID " + str(ctr_Valid))
    print("PURGED " + str(ctr_Purged))
    print("ACCEPTED " + str(ctr_Filtered))
    print("")

    frequency_count = end_index * end_index
    return np_list_cross_filters, frequency_count