Example #1
0
def execute_script():
    log = xes_importer.apply(
        os.path.join("..", "tests", "input_data", "running-example.xes"))
    performance_dfg = dfg_miner.apply(log, variant=dfg_miner.DFG_PERFORMANCE)
    reach_graph, tang_reach_graph, stochastic_map, q_matrix = ctmc.get_tangible_reachability_and_q_matrix_from_dfg_performance(
        performance_dfg)
    # pick the source state
    state = [x for x in tang_reach_graph.states if x.name == "source1"][0]
    # analyse the distribution over the states of the system starting from the source after 86400.0 seconds (1 day)
    transient_result = ctmc.transient_analysis_from_tangible_q_matrix_and_single_state(
        tang_reach_graph, q_matrix, state, 86400.0)
    print(transient_result)
 def test_alpha_miner_log(self):
     log = xes_importer.apply(
         os.path.join("input_data", "running-example.xes"))
     from pm4py.algo.discovery.alpha import factory as alpha_miner
     net1, im1, fm1 = alpha_miner.apply(
         log, variant=alpha_miner.ALPHA_VERSION_CLASSIC)
     net2, im2, fm2 = alpha_miner.apply(
         log, variant=alpha_miner.ALPHA_VERSION_PLUS)
     from pm4py.algo.discovery.dfg import factory as dfg_discovery
     dfg = dfg_discovery.apply(log)
     net3, im3, fm3 = alpha_miner.apply_dfg(
         dfg, variant=alpha_miner.ALPHA_VERSION_CLASSIC)
Example #3
0
def apply(log, activity, parameters=None):
    """
    Gets the time passed to each succeeding activity

    Parameters
    -------------
    log
        Log
    activity
        Activity that we are considering
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    dictio
        Dictionary containing a 'post' key with the
        list of aggregates times from the given activity to each succeeding activity
    """
    if parameters is None:
        parameters = {}

    dfg_frequency = dfg_factory.apply(log, variant="frequency", parameters=parameters)
    dfg_performance = dfg_factory.apply(log, variant="performance", parameters=parameters)

    post = []
    sum_perf_post = 0.0
    sum_acti_post = 0.0

    for entry in dfg_performance.keys():
        if entry[0] == activity:
            post.append([entry[1], float(dfg_performance[entry]), int(dfg_frequency[entry])])
            sum_perf_post = sum_perf_post + float(dfg_performance[entry]) * float(dfg_frequency[entry])
            sum_acti_post = sum_acti_post + float(dfg_frequency[entry])

    perf_acti_post = 0.0
    if sum_acti_post > 0:
        perf_acti_post = sum_perf_post / sum_acti_post

    return {"post": post, "post_avg_perf": perf_acti_post}
Example #4
0
 def highLevelDFG(self):
     """
     Create high level DFG of entire process
     """
     try:
         df, log, parameters = modules.eventAbstraction.aggregateData(self.dataframe, remove_duplicates=False)
         dfg = dfg_factory.apply(log, variant="frequency", parameters=parameters)
         gviz_parameters = self._createImageParameters(log=log, high_level=True)
         gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency", parameters=gviz_parameters)
         self._create_image(gviz, "DFG_model")
     except Exception as e:
         print(f"[PROCESS MINING] Could not create DFG: {e}")
         return False
def apply_heu(log, parameters=None):
    """
    Discovers an Heuristics Net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including: activity_key, case_id_glue, timestamp_key,
        dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh,
        loops_length_two_thresh

    Returns
    ------------
    heu
        Heuristics Net
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    start_activities = log_sa_filter.get_start_activities(log, parameters=parameters)
    end_activities = log_ea_filter.get_end_activities(log, parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(log, activity_key, parameters=parameters)
    activities = list(activities_occurrences.keys())
    dfg = dfg_factory.apply(log, parameters=parameters)
    parameters_w2 = deepcopy(parameters)
    parameters_w2["window"] = 2
    dfg_window_2 = dfg_factory.apply(log, parameters=parameters_w2)
    freq_triples = dfg_factory.apply(log, parameters=parameters, variant="freq_triples")

    return apply_heu_dfg(dfg, activities=activities, activities_occurrences=activities_occurrences,
                         start_activities=start_activities,
                         end_activities=end_activities, dfg_window_2=dfg_window_2, freq_triples=freq_triples,
                         parameters=parameters)
Example #6
0
def Hueristics(file):
    #import os
    from pm4py.objects.log.importer.xes import factory as xes_importer
    log = xes_importer.import_log(file)

    from pm4py.algo.discovery.dfg import factory as dfg_factory
    dfg = dfg_factory.apply(log)

    from pm4py.visualization.dfg import factory as dfg_vis_factory
    gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency")
    location = "/mnt/c/Users/harim/Downloads/dfg.png"
    dfg_vis_factory.save(gviz, location)
    return location
Example #7
0
def visualize_dfg(log, filename):
    """
    Visualizes an event log as a DFG
    :param log: event log that will be visualized
    :param filename: filename for the created DFG
    """
    dfg = dfg_factory.apply(log)
    parameters = {"format": "svg"}
    gviz = dfg_vis_factory.apply(dfg,
                                 log=log,
                                 parameters=parameters,
                                 variant='frequency')
    dfg_vis_factory.save(gviz, filename)
    def test_dfdoc1(self):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"
        from pm4py.objects.log.importer.xes import factory as xes_importer
        log = xes_importer.import_log(os.path.join("input_data", "running-example.xes"))
        from pm4py.algo.discovery.dfg import factory as dfg_factory
        dfg = dfg_factory.apply(log)
        from pm4py.algo.filtering.log.attributes import attributes_filter
        activities_count = attributes_filter.get_attribute_values(log, "concept:name")

        from pm4py.visualization.dfg.versions import simple_visualize as dfg_visualize
        gviz = dfg_visualize.graphviz_visualization(activities_count, dfg)
        del gviz
Example #9
0
def generate_process_model(log):
    '''
    Description: to generate graphical process model in
                .svg format using pm4py library function
    Used: generate process model under provided log
    Input: log file
    Output: Display process model
    '''

    dfg = dfg_factory.apply(log)
    '''To decorate DFG with the frequency of activities'''
    gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency")
    dfg_vis_factory.view(gviz)
    return dfg
Example #10
0
 def highLevelPetriNet(self):
     """
     Create high level petri net of entire process
     """
     try:
         df, log, parameters = modules.eventAbstraction.aggregateData(self.dataframe, remove_duplicates=False)
         dfg = dfg_factory.apply(log, variant="frequency", parameters=parameters)
         gviz_parameters = self._createImageParameters(log=log, high_level=True)
         net, im, fm = dfg_conv_factory.apply(dfg, parameters=gviz_parameters)
         pnml_factory.apply(net, im, os.path.join(self.discovery_path, f'{self.filename}_petri_net.pnml'),
                            final_marking=fm)
     # gviz = pn_vis_factory.apply(net, im, fm, parameters=gviz_parameters)
     # self._create_image(gviz, "petri_net")
     except Exception as e:
         print(f"[PROCESS MINING] Could not create Petri Net: {e}")
         return False
Example #11
0
    def _createDFG(self, log=None, parameters=None, high_level=False):
        """
        create df using dataframe with all traces

        :param log: low-level event log
        :param parameters: ooptional parameters to generate image
        """
        if high_level:
            df, log, parameters = modules.eventAbstraction.aggregateData(self.dataframe, remove_duplicates=False)
        else:
            if parameters is None:
                parameters = {}
            if log is None:
                log = self._log
        dfg = dfg_factory.apply(log, variant="frequency", parameters=parameters)
        return dfg, log
def execute_script():
    # import csv & create log
    dataframe = csv_import_adapter.import_dataframe_from_path(
        datasourceMockdata(), sep=";")
    dataframe = dataframe.rename(columns={
        'coID': 'case:concept:name',
        'Activity': 'concept:name'
    })
    log = conversion_factory.apply(dataframe)

    # option 1: Directly-Follows Graph, represent frequency or performance
    parameters = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"}
    variant = 'frequency'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz1 = dfg_vis_factory.apply(dfg,
                                  log=log,
                                  variant=variant,
                                  parameters=parameters)
    dfg_vis_factory.view(gviz1)

    # option 2: Heuristics Miner, acts on the Directly-Follows Graph, find common structures, output: Heuristic Net (.svg)
    heu_net = heuristics_miner.apply_heu(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.00
        })
    gviz2 = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"})
    hn_vis.view(gviz2)

    # option 3: Petri Net based on Heuristic Miner (.png)
    net, im, fm = heuristics_miner.apply(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.00
        })
    gviz3 = petri_vis.apply(
        net,
        im,
        fm,
        parameters={
            petri_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "png"
        })
    petri_vis.view(gviz3)
Example #13
0
    def get_paths(self, attribute_key, parameters=None):
        """
        Gets the paths from the log

        Parameters
        -------------
        attribute_key
            Attribute key

        Returns
        -------------
        paths
            List of paths
        """
        dfg = dfg_factory.apply(self.log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key})

        return dfg
Example #14
0
def read_xes(data_dir, dataset, aggregate_type, mode="pruning"):
    prune_parameter_freq = 350
    prune_parameter_time = -1  #keep all
    #read the xes file
    if dataset in "BPIC14":
        # log = csv_importer.import_event_stream(os.path.join(data_dir, dataset + ".csv"))
        data = csv_import_adapter.import_dataframe_from_path(os.path.join(
            data_dir, dataset + ".csv"),
                                                             sep=";")
        data['case:concept:name'] = data['Incident ID']
        data['time:timestamp'] = data['DateStamp']
        data['concept:name'] = data['IncidentActivity_Type']
        log = conversion_factory.apply(data)
    elif dataset == "Unrineweginfectie":
        data = csv_import_adapter.import_dataframe_from_path(os.path.join(
            data_dir, dataset + ".csv"),
                                                             sep=",")
        data['case:concept:name'] = data['Patientnummer']
        data['time:timestamp'] = data['Starttijd']
        data['concept:name'] = data['Aciviteit']
        log = conversion_factory.apply(data)
    else:
        log = xes_import_factory.apply(os.path.join(data_dir,
                                                    dataset + ".xes"))
        data = get_dataframe_from_event_stream(log)

    # dataframe = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME)
    # dfg_freq = dfg_factory.apply(log,variant="frequency")
    # dfg_time =get_dfg_time(data,aggregate_type,dataset)

    if aggregate_type == AggregateType.FREQ:
        dfg = dfg_factory.apply(log, variant="frequency")
    else:
        dfg = get_dfg_time(data, aggregate_type, dataset)
    """Getting Start and End activities"""
    # log = xes_importer.import_log(xes_file)
    log_start = start_activities_filter.get_start_activities(log)
    log_end = end_activities_filter.get_end_activities(log)
    # return dfg_freq,dfg_time
    return dfg
def execute_script(variant="frequency"):
    # read the log using the nonstandard importer (faster)
    log_path = os.path.join("..", "tests", "input_data", "receipt.xes")
    log = xes_importer.import_log(log_path, variant="nonstandard")
    # applies Inductive Miner on the log
    net, initial_marking, final_marking = inductive_miner.apply(log)
    # find shortest paths in the net
    spaths = get_shortest_paths(net)

    # then we start to decorate the net
    # we decide if we should decorate it with frequency or performance
    # we decide the aggregation measure (sum, min, max, mean, median, stdev)
    aggregation_measure = "mean"
    if variant == "frequency":
        aggregation_measure = "sum"
    # we find the DFG
    dfg = dfg_factory.apply(log, variant=variant)
    # we find the number of activities occurrences in the trace log
    activities_count = attributes_filter.get_attribute_values(
        log, "concept:name")
    # we calculate the statistics on the Petri net applying the greedy algorithm
    aggregated_statistics = get_decorations_from_dfg_spaths_acticount(
        net,
        dfg,
        spaths,
        activities_count,
        variant=variant,
        aggregation_measure=aggregation_measure)
    # we find the gviz
    gviz = pn_vis_factory.apply(net,
                                initial_marking,
                                final_marking,
                                variant=variant,
                                aggregated_statistics=aggregated_statistics,
                                parameters={"format": "svg"})
    # we show the viz on screen
    pn_vis_factory.view(gviz)
Example #16
0
def execute_script():
    log = xes_importer.apply(
        os.path.join("..", "tests", "input_data", "running-example.xes"))
    frequency_dfg = dfg_miner.apply(log, variant="frequency")
    net, im, fm = dfg_conv_factory.apply(frequency_dfg)
    # perform the Montecarlo simulation with the arrival rate inferred by the log (the simulation lasts 5 secs)
    log, res = montecarlo_simulation.apply(log,
                                           net,
                                           im,
                                           fm,
                                           parameters={
                                               "token_replay_variant":
                                               Variants.BACKWARDS,
                                               "enable_diagnostics": False,
                                               "max_thread_exec_time": 5
                                           })
    print(
        "\n(Montecarlo - Petri net) case arrival ratio inferred from the log")
    print(res["median_cases_ex_time"])
    print(res["total_cases_time"])
    # perform the Montecarlo simulation with the arrival rate specified (the simulation lasts 5 secs)
    log, res = montecarlo_simulation.apply(log,
                                           net,
                                           im,
                                           fm,
                                           parameters={
                                               "token_replay_variant":
                                               Variants.BACKWARDS,
                                               "enable_diagnostics": False,
                                               "max_thread_exec_time": 5,
                                               "case_arrival_ratio": 60
                                           })
    print(
        "\n(Montecarlo - Petri net) case arrival ratio specified by the user")
    print(res["median_cases_ex_time"])
    print(res["total_cases_time"])
Example #17
0
    tel_tree_num = 0
    log_tree_num = 0
    sum_tree_num = 0
    print(tree)
    tree_avg_tel = 0
    tree_avg_log = 0
    tree_avg_sum = 0
    for sam in range(1, 11):
        print(sam)

        path = os.path.join("input_data", "df_complete_logs",
                            "%d_1000_%d.xes" % (tree, sam))
        log = xes_importer.apply(path)
        tel = xes_importer.apply(path)
        xes_utils.set_enabled(tel)
        dfg_100 = dfg_factory.apply(log)
        start_act = set(get_start_activities(log).keys())
        end_act = set(get_end_activities(log).keys())

        result_norm = []
        result_tel = []

        num = len(dfg_100.keys())
        score_tel = 0
        score_log = 0
        score_sum = 0
        su_tel = 0
        su_log = 0
        su_sum = 0
        for k in range(10):
            found_tel = False
Example #18
0
 def visualize_dfg(self,
                   log,
                   save_file=False,
                   file_name="dfg",
                   variant="relevance"):
     """
     Visualises the event log as direct follower graph (DFG).
     :param log: event log as a list of traces [list].
     :param save_file: boolean flog indicating to save the DFG or not [bool].
     :param file_name: name of the file [str].
     :param variant: dfg version to be produced: "frequency", "time", "relevance" or "all" [str]
     :return: file_names [list].
     """
     parameters = {"format": "svg"}
     file_names = []
     relevance_scores = self.aggregate_relevance_scores(log)
     if variant == "relevance" or variant == "all":
         for label, items in relevance_scores.items():
             data = filter_log_by_caseid(log, items['traces'])
             dfg = dfg_factory.apply(data)
             gviz = dfg_vis_factory.apply(dfg,
                                          activities_count=items['scores'],
                                          parameters=parameters)
             if len(items['traces']) == 1:
                 title = "Prediction: " + str(
                     label) + ", Case ID: " + items['traces'][0]
             else:
                 title = "No of Service Orders: " + str(len(
                     log)) + ", Filter: Repair not on time (Label = " + str(
                         label) + ")"
             gviz.body.append('\t// title')
             gviz.body.append('\tfontsize = 50;')
             gviz.body.append('\tlabelloc = "t";')
             gviz.body.append('\tlabel = "' + title + '";')
             print("rel_sc: ", items['scores'])
             if save_file:
                 filen = file_name + "_rel_" + str(label) + ".svg"
                 dfg_vis_factory.save(gviz, filen)
                 print("Saved DFG image to: " + filen)
                 file_names.append(filen)
     if variant == "frequency" or variant == "all":
         for label, items in relevance_scores.items():
             data = filter_log_by_caseid(log, items['traces'])
             dfg = dfg_factory.apply(data)
             activities_cnt = attributes_filter.get_attribute_values(
                 log, attribute_key="concept:name")
             gviz = dfg_vis_factory.apply(dfg,
                                          activities_count=activities_cnt,
                                          parameters=parameters)
             if len(items['traces']) == 1:
                 title = "Prediction: " + str(
                     label) + ", Case ID: " + items['traces'][0]
             else:
                 title = "No of Service Orders: " + str(len(
                     log)) + ", Filter: Repair not on time (Label = " + str(
                         label) + ")"
             gviz.body.append('\t// title')
             gviz.body.append('\tfontsize = 50;')
             gviz.body.append('\tlabelloc = "t";')
             gviz.body.append('\tlabel = "' + title + '";')
             if save_file:
                 filen = file_name + "_freq_" + str(label) + ".svg"
                 dfg_vis_factory.save(gviz, filen)
                 print("Saved DFG image to: " + filen)
                 file_names.append(filen)
     if variant == "time" or variant == "all":
         for label, items in relevance_scores.items():
             data = filter_log_by_caseid(log, items['traces'])
             dfg = dfg_factory.apply(data)
             parameters = {"format": "svg", "AGGREGATION_MEASURE": "mean"}
             gviz = dfg_vis_factory.apply(dfg,
                                          variant="performance",
                                          parameters=parameters)
             if len(items['traces']) == 1:
                 title = "Prediction: " + str(
                     label) + ", Case ID: " + items['traces'][0]
             else:
                 title = "No of Service Orders: " + str(len(
                     log)) + ", Filter: Repair not on time (Label = " + str(
                         label) + ")"
             gviz.body.append('\t// title')
             gviz.body.append('\tfontsize = 50;')
             gviz.body.append('\tlabelloc = "t";')
             gviz.body.append('\tlabel = "' + title + '";')
             if save_file:
                 filen = file_name + "_time_" + str(label) + ".svg"
                 dfg_vis_factory.save(gviz, filen)
                 print("Saved DFG image to: " + filen)
                 file_names.append(filen)
         return file_names
    log)
inductive_petri, inductive_initial_marking, inductive_final_marking = inductive_miner.apply(
    log)

precision_alpha = precision_factory.apply(log, alpha_petri,
                                          alpha_initial_marking,
                                          alpha_final_marking)
precision_inductive = precision_factory.apply(log, inductive_petri,
                                              inductive_initial_marking,
                                              inductive_final_marking)

print("precision_alpha=", precision_alpha)
print("precision_inductive=", precision_inductive)

from pm4py.algo.discovery.dfg import factory as dfg_factory
dfg = dfg_factory.apply(log)
#-----------------------------
from collections import Counter
dfg1 = Counter({
    ('scroll', 'blur'): 1,
    ('selection', 'blur'): 1,
    ('click-0', 'scroll'): 1,
    ('focus', 'selection'): 1,
    ('click-0', 'blur'): 1,
    ('blur', 'focus'): 1,
    ('scroll', 'click-0'): 1,
    ('focus', 'blur'): 1,
    ('scroll', 'selection'): 1,
    ('focus', 'scroll'): 1,
    ('load', 'click-0'): 1,
    ('load', 'scroll'): 1,
alpha_avg = []
alpha_num = []

for tree in range(1, 11):
    tel_tree_num = 0
    log_tree_num = 0
    print(tree)
    tree_avg_tel = 0
    tree_avg_log = 0
    for sam in range(1, 11):
        print(sam)

        path = os.path.join("input_data", "df_complete_logs",
                            "%d_1000_%d.xes" % (tree, sam))
        log = xes_importer.apply(path)
        dfg_org = dfg_factory.apply(log)
        start_act = set(get_start_activities(log).keys())
        end_act = set(get_end_activities(log).keys())

        alpha_path = os.path.join("input_data", "df_complete_logs",
                                  "df_complete_alpha",
                                  "%d_alpha_%d.xes" % (tree, sam))
        alpha_log = xes_importer.apply(alpha_path)

        num = len(dfg_org.keys())
        score_tel = 0
        su_tel = 0
        for k in range(10):
            for n in range(1, 1000):
                sampled_log = sampling.sample_log(alpha_log, no_traces=n)
                dfg_log = dfg_factory.apply(sampled_log)
def create_graphs(without_error, log, approach):
    """
    creates visualization: Directly-Follows-Graph and Heuristic Net
    """

    # create dfg frequency
    path = "common_path"
    vis_type = "dfg_frequency"
    naming_error = "with_error"
    if without_error:
        naming_error = "no_error"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    parameters = {
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name",
        "format": "svg"
    }
    variant = 'frequency'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz = dfg_vis_factory.apply(dfg,
                                 log=log,
                                 variant=variant,
                                 parameters=parameters)
    dfg_vis_factory.view(gviz)
    dfg_vis_factory.save(gviz, filename)
    log_info.info("DFG frequency has been stored in '%s' in file '%s'", path,
                  file)

    # create dfg performance
    vis_type = "dfg_performance"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    variant = 'performance'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz = dfg_vis_factory.apply(dfg,
                                 log=log,
                                 variant=variant,
                                 parameters=parameters)
    dfg_vis_factory.view(gviz)
    dfg_vis_factory.save(gviz, filename)
    log_info.info("DFG performance has been stored in '%s' in file '%s'", path,
                  file)

    # create heuristic net
    vis_type = "heuristicnet"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    heu_net = heuristics_miner.apply_heu(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.60
        })
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"})
    hn_vis.view(gviz)
    hn_vis.save(gviz, filename)
    log_info.info("Heuristic Net has been stored in '%s' in file '%s'", path,
                  file)

    # save heuristic net in plain-ext format
    file = f"{vis_type}_{approach}_{naming_error}.plain-ext"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.plain-ext"
    gviz = hn_vis.apply(heu_net,
                        parameters={
                            hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT:
                            "plain-ext"
                        })
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .plain-ext has been stored in '%s' "
        "in file '%s'", path, file)

    # save heuristic net in dot format
    file = f"{vis_type}_{approach}_{naming_error}.dot"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.dot"
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "dot"})
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .dot has been stored in '%s' "
        "in file '%s'", path, file)

    # save heuristic net in xdot format
    file = f"{vis_type}_{approach}_{naming_error}.xdot"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.xdot"
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "xdot"})
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .xdot has been stored in '%s' "
        "in file '%s'", path, file)
Example #22
0
def run_dfg_miner(log,variant="frequency"):
    dfg = dfg_factory.apply(log)
    #gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant)
    #dfg_vis_factory.view(gviz)
    return dfg
Example #23
0
def directly_follows_graphs_perf(log_file):
    dfg = dfg_factory.apply(log_file)
    gviz = dfg_vis_factory.apply(dfg, log=log_file, variant="performance")
    pn_vis_factory.save(gviz, "static/dag_performance.png")
    return "success!"
Example #24
0
def show(model, tel, file_name, parameters):
    '''
    Show model and its quality measures
    :param model: model type (transition system, state based region, DFG miner, alpha miner)
    :param tel: input log
    :param file_name: img file name to show model
    :param parameters: parmater for transition system (afreq, sfreq)
    :return:
    '''

    tel_flag = False
    if isinstance(tel[0][0], tel_event):
        tel_flag = True

    if model in ['ts', 'sbr']:
        if tel_flag:
            output_file_path = os.path.join(
                "static", "images", file_name[:file_name.find('.')] + '_' +
                model + '_' + str(parameters['afreq_thresh']) + '_' +
                str(parameters['sfreq_thresh']) + ".png")
        else:
            output_file_path = os.path.join(
                "static", "images",
                "2" + "_" + file_name[:file_name.find('.')] + '_' + model +
                '_' + str(parameters[PARAM_KEY_DIRECTION]) + '_' +
                str(parameters[PARAM_KEY_WINDOW]) + "_" +
                str(parameters[PARAM_KEY_VIEW]) + ".png")
        auto = utils.discover_annotated_automaton(tel, parameters=parameters)

        max_thresh = {}
        max_afreq = 0
        max_sfreq = 0

        if tel_flag:
            for trans in auto.transitions:
                max_afreq = max(max_afreq, trans.afreq)
            for state in auto.states:
                max_sfreq = max(max_sfreq, state.sfreq)
        max_thresh['afreq'] = max_afreq
        max_thresh['sfreq'] = max_sfreq

        if model == 'ts':
            result = {}
            gviz = vis_factory.apply(auto)
            vis_factory.save(gviz, output_file_path)
            result['num of transitions'] = len(auto.transitions)
            result['num of states'] = len(auto.states)

        else:
            net, im, fm = sb.petri_net_synthesis(auto)
            gviz = petri_vis_factory.apply(net, im, fm)
            petri_vis_factory.save(gviz, output_file_path)
            result = evaluation(net, im, fm, tel)

    else:
        if tel_flag:
            output_file_path = os.path.join(
                "static", "images",
                file_name[:file_name.find('.')] + '_' + model + '_' + ".png")
        else:
            output_file_path = os.path.join(
                "static", "images", "2" + file_name[:file_name.find('.')] +
                '_' + model + '_' + ".png")

        if model == 'alpha':
            if isinstance(tel[0][0], Event):
                net, im, fm = trans_alpha(tel)
            else:
                net, im, fm = alpha_miner.apply(tel)
            gviz = petri_vis_factory.apply(net, im, fm)
            petri_vis_factory.save(gviz, output_file_path)
            result = evaluation(net, im, fm, tel)

        else:
            dfg = dfg_factory.apply(tel)
            if tel_flag:
                dfg_tel = inductive_revise.get_dfg_graph_trans(tel)
                #dfg = dfg_tel + dfg
                dfg = dfg_tel

            gviz = dfg_vis_factory.apply(dfg, log=tel)
            dfg_vis_factory.save(gviz, output_file_path)
            result = dict(
                sorted(dfg.items(), key=operator.itemgetter(1), reverse=True))

        max_thresh = None

    return output_file_path, result, max_thresh
Example #25
0
def apply(log, parameters=None, classic_output=False):
    """
    Gets a simple model out of a log

    Parameters
    -------------
    log
        Trace log
    parameters
        Parameters of the algorithm, including:
            maximum_number_activities -> Maximum number of activities to keep
            discovery_algorithm -> Discovery algorithm to use (alpha, inductive)
            desidered_output -> Desidered output of the algorithm (default: Petri)
            include_filtered_log -> Include the filtered log in the output
            include_dfg_frequency -> Include the DFG of frequencies in the output
            include_dfg_performance -> Include the DFG of performance in the output
            include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output
            include_filtered_dfg_performance -> Include the filtered DFG of performance in the output
    classic_output
        Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking)
        or can return a more detailed dictionary
    """
    if parameters is None:
        parameters = {}

    returned_dictionary = {}

    net = None
    initial_marking = None
    final_marking = None
    bpmn_graph = None
    dfg_frequency = None
    dfg_performance = None
    filtered_dfg_frequency = None
    filtered_dfg_performance = None

    maximum_number_activities = parameters[
        "maximum_number_activities"] if "maximum_number_activities" in parameters else 20
    discovery_algorithm = parameters["discovery_algorithm"] if "discovery_algorithm" in parameters else "alpha"
    desidered_output = parameters["desidered_output"] if "desidered_output" in parameters else "petri"
    include_filtered_log = parameters["include_filtered_log"] if "include_filtered_log" in parameters else True
    include_dfg_frequency = parameters["include_dfg_frequency"] if "include_dfg_frequency" in parameters else True
    include_dfg_performance = parameters[
        "include_dfg_performance"] if "include_dfg_performance" in parameters else False
    include_filtered_dfg_frequency = parameters[
        "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True
    include_filtered_dfg_performance = parameters[
        "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else False

    if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters:
        activity_key = parameters[
            PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key
    else:
        log, activity_key = insert_classifier.search_act_class_attr(log)
        if activity_key is None:
            activity_key = DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key

    if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY]

    activities_count_dictio = attributes_filter.get_attribute_values(log, activity_key)
    activities_count_list = []
    for activity in activities_count_dictio:
        activities_count_list.append([activity, activities_count_dictio[activity]])

    activities_count_list = sorted(activities_count_list, key=lambda x: x[1], reverse=True)
    activities_count_list = activities_count_list[:min(len(activities_count_list), maximum_number_activities)]
    activities_keep_list = [x[0] for x in activities_count_list]

    log = attributes_filter.apply(log, activities_keep_list, parameters=parameters)

    filtered_log = None

    if "alpha" in discovery_algorithm:
        # parameters_sa = deepcopy(parameters)
        # parameters_sa["decreasingFactor"] = 1.0
        filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters)
        filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters)
        filtered_log = filter_topvariants_soundmodel.apply(filtered_log, parameters=parameters)
    elif "dfg_mining" in discovery_algorithm:
        filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters)
        filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters)
        filtered_log = auto_filter.apply_auto_filter(filtered_log, parameters=parameters)

    if include_dfg_frequency or "dfg_mining" in discovery_algorithm:
        dfg_frequency = dfg_factory.apply(log, parameters=parameters, variant="frequency")
    if include_dfg_performance:
        dfg_performance = dfg_factory.apply(log, parameters=parameters, variant="performance")
    if include_filtered_dfg_frequency:
        filtered_dfg_frequency = dfg_factory.apply(filtered_log, parameters=parameters, variant="frequency")
    if include_filtered_dfg_performance:
        filtered_dfg_performance = dfg_factory.apply(filtered_log, parameters=parameters, variant="performance")

    if "alpha" in discovery_algorithm:
        net, initial_marking, final_marking = alpha_miner.apply(filtered_log, parameters=parameters)
    elif "dfg_mining" in discovery_algorithm:
        start_activities = start_activities_filter.get_start_activities(filtered_log, parameters=parameters)
        end_activities = end_activities_filter.get_end_activities(filtered_log, parameters=parameters)

        parameters_conv = {}
        parameters_conv["start_activities"] = start_activities
        parameters_conv["end_activities"] = end_activities

        net, initial_marking, final_marking = dfg_conv_factory.apply(dfg_frequency, parameters=parameters_conv)

    if filtered_log is not None and include_filtered_log:
        returned_dictionary["filtered_log"] = filtered_log
    if net is not None and desidered_output == "petri":
        returned_dictionary["net"] = net
    if initial_marking is not None and desidered_output == "petri":
        returned_dictionary["initial_marking"] = initial_marking
    if final_marking is not None and desidered_output == "petri":
        returned_dictionary["final_marking"] = final_marking
    if bpmn_graph is not None and desidered_output == "bpmn":
        returned_dictionary["bpmn_graph"] = bpmn_graph
    if dfg_frequency is not None and include_dfg_frequency:
        returned_dictionary["dfg_frequency"] = dfg_frequency
    if dfg_performance is not None and include_dfg_performance:
        returned_dictionary["dfg_performance"] = dfg_performance
    if filtered_dfg_frequency is not None and include_filtered_dfg_frequency:
        returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency
    if filtered_dfg_performance is not None and include_filtered_dfg_performance:
        returned_dictionary["filtered_dfg_performance"] = filtered_dfg_performance

    if classic_output:
        if net is not None and desidered_output == "petri":
            return net, initial_marking, final_marking

    return returned_dictionary
Example #26
0
def get_decorated_net(net,
                      initial_marking,
                      final_marking,
                      log,
                      parameters=None,
                      variant="frequency"):
    """
    Get a decorated net according to the specified variant (decorate Petri net based on DFG)

    Parameters
    ------------
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    log
        Log to use to decorate the Petri net
    parameters
        Algorithm parameters
    variant
        Specify if the decoration should take into account the frequency or the performance

    Returns
    ------------
    gviz
        GraphViz object
    """
    if parameters is None:
        parameters = {}

    aggregation_measure = "mean"

    if "frequency" in variant:
        aggregation_measure = "sum"
    elif "performance" in variant:
        aggregation_measure = "mean"

    if "aggregationMeasure" in parameters:
        aggregation_measure = parameters["aggregationMeasure"]

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    # we find the DFG
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    # we find shortest paths
    spaths = get_shortest_paths(net)
    # we find the number of activities occurrences in the trace log
    activities_count = attributes_filter.get_attribute_values(
        log, activity_key, parameters=parameters)
    aggregated_statistics = get_decorations_from_dfg_spaths_acticount(
        net,
        dfg,
        spaths,
        activities_count,
        variant=variant,
        aggregation_measure=aggregation_measure)

    return visualize.apply(net,
                           initial_marking,
                           final_marking,
                           parameters=parameters,
                           decorations=aggregated_statistics)
Example #27
0
def directly_follows_graphs_freq(log_file):
    dfg = dfg_factory.apply(log_file)
    gviz = dfg_vis_factory.apply(dfg, log=log_file, variant="frequency")
    pn_vis_factory.save(gviz, "static/dag_frequency.png")
    return "success!"
Example #28
0
def apply(log, parameters=None):
    """
    Gets the Petri net through Inductive Miner, decorated by frequency metric

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    # reduce the depth of the search done by token-based replay
    token_replay.MAX_REC_DEPTH = 1
    token_replay.MAX_IT_FINAL1 = 1
    token_replay.MAX_IT_FINAL2 = 1
    token_replay.MAX_REC_DEPTH_HIDTRANSENABL = 1

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    net, im, fm = inductive_miner.apply_dfg(dfg,
                                            parameters=parameters,
                                            activities=activities,
                                            start_activities=start_activities,
                                            end_activities=end_activities)

    parameters["format"] = "svg"
    gviz = pn_vis_factory.apply(net,
                                im,
                                fm,
                                log=filtered_log,
                                variant="frequency",
                                parameters=parameters)

    svg = get_base64_from_gviz(gviz)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_petri(net, im, fm)

    return svg, export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "inductive", "freq", None, "", activity_key
Example #29
0
def apply(log, parameters=None):
    """
    Gets the frequency DFG

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)

    parameters["format"] = "svg"
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities

    gviz = dfg_vis_factory.apply(dfg,
                                 log=filtered_log,
                                 variant="frequency",
                                 parameters=parameters)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities,
                                             end_activities)

    net, im, fm = dfg_conv_factory.apply(dfg,
                                         parameters={
                                             "start_activities":
                                             start_activities,
                                             "end_activities": end_activities
                                         })

    return get_base64_from_gviz(gviz), export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "freq", None, "", activity_key
Example #30
0
def apply(log, parameters=None):
    """
    Gets the process tree using Inductive Miner Directly-Follows

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    tree = inductive_miner.apply_tree_dfg(dfg,
                                          parameters=parameters,
                                          activities=activities,
                                          start_activities=start_activities,
                                          end_activities=end_activities)
    parameters["format"] = "svg"
    gviz = pt_vis_factory.apply(tree, parameters=parameters)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    return get_base64_from_gviz(gviz), None, "", "xes", activities, start_activities, end_activities, gviz_base64, [], "tree", "freq", None, "", activity_key