예제 #1
0
def execute_script():
    log_path = os.path.join("..", "tests", "input_data", "interval_event_log.xes")
    #log_path = os.path.join("..", "tests", "input_data", "reviewing.xes")
    log = xes_importer.apply(log_path)
    parameters = {}
    parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp"
    parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp"
    parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name"
    parameters["strict"] = False
    parameters["format"] = "svg"
    start_activities = sa_get.get_start_activities(log, parameters=parameters)
    end_activities = ea_get.get_end_activities(log, parameters=parameters)
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities
    soj_time = soj_time_get.apply(log, parameters=parameters)
    print("soj_time")
    print(soj_time)
    conc_act = conc_act_get.apply(log, parameters=parameters)
    print("conc_act")
    print(conc_act)
    efg = efg_get.apply(log, parameters=parameters)
    print("efg")
    print(efg)
    dfg_freq = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.FREQUENCY)
    dfg_perf = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.PERFORMANCE)
    dfg_gv_freq = dfg_vis_fact.apply(dfg_freq, log=log, variant=dfg_vis_fact.Variants.FREQUENCY,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_freq)
    dfg_gv_perf = dfg_vis_fact.apply(dfg_perf, log=log, variant=dfg_vis_fact.Variants.PERFORMANCE,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_perf)
    net, im, fm = dfg_conv.apply(dfg_freq)
    gviz = pn_vis.apply(net, im, fm, parameters=parameters)
    pn_vis.view(gviz)
예제 #2
0
def dfg_dist_calc_minkowski(log1, log2, alpha):
    act1 = attributes_filter.get_attribute_values(log1, "concept:name")
    act2 = attributes_filter.get_attribute_values(log2, "concept:name")
    dfg1 = dfg_algorithm.apply(log1)
    dfg2 = dfg_algorithm.apply(log2)
    df1_act = act_dist_calc.occu_var_act(act1)
    df2_act = act_dist_calc.occu_var_act(act2)
    df1_dfg = act_dist_calc.occu_var_act(dfg1)
    df2_dfg = act_dist_calc.occu_var_act(dfg2)
    df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0)
    df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0)
    dist_act = pdist(np.array([
        df_act['freq_x'].values / np.sum(df_act['freq_x'].values),
        df_act['freq_y'].values / np.sum(df_act['freq_y'].values)
    ]),
                     'minkowski',
                     p=2.)[0]
    dist_dfg = pdist(np.array([
        df_dfg['freq_x'].values / np.sum(df_dfg['freq_x'].values),
        df_dfg['freq_y'].values / np.sum(df_dfg['freq_y'].values)
    ]),
                     'minkowski',
                     p=2.)[0]
    dist = dist_act * alpha + dist_dfg * (1 - alpha)
    return dist
예제 #3
0
def dfg_dist_calc_suc(log1, log2):
    dfg1 = dfg_algorithm.apply(log1)
    dfg2 = dfg_algorithm.apply(log2)
    df1_dfg = act_dist_calc.occu_var_act(dfg1)
    df2_dfg = act_dist_calc.occu_var_act(dfg2)
    df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0)
    dist_dfg = pdist(
        np.array([df_dfg['freq_x'].values, df_dfg['freq_y'].values]),
        'cosine')[0]
    return dist_dfg
예제 #4
0
def extract_performance_of_direct_follows_relationships(logs):
    results = []
    for log in logs:
        graph = dfg_discovery.apply(log)
        graph = dfg_discovery.apply(log, variant="performance")
        log_results = []
        for element in set(graph):
            log_results.append((str(element),graph[element]))
        results.append(log_results)
    return results
예제 #5
0
def apply_heu(log, parameters=None):
    """
    Discovers an Heuristics Net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including:
            - Parameters.ACTIVITY_KEY
            - Parameters.TIMESTAMP_KEY
            - Parameters.CASE_ID_KEY
            - Parameters.DEPENDENCY_THRESH
            - Parameters.AND_MEASURE_THRESH
            - Parameters.MIN_ACT_COUNT
            - Parameters.MIN_DFG_OCCURRENCES
            - Parameters.DFG_PRE_CLEANING_NOISE_THRESH
            - Parameters.LOOP_LENGTH_TWO_THRESH

    Returns
    ------------
    heu
        Heuristics Net
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)

    start_activities = log_sa_filter.get_start_activities(
        log, parameters=parameters)
    end_activities = log_ea_filter.get_end_activities(log,
                                                      parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(
        log, activity_key, parameters=parameters)
    activities = list(activities_occurrences.keys())
    dfg = dfg_alg.apply(log, parameters=parameters)
    parameters_w2 = deepcopy(parameters)
    parameters_w2["window"] = 2
    dfg_window_2 = dfg_alg.apply(log, parameters=parameters_w2)
    freq_triples = dfg_alg.apply(log,
                                 parameters=parameters,
                                 variant=dfg_alg.Variants.FREQ_TRIPLES)

    return apply_heu_dfg(dfg,
                         activities=activities,
                         activities_occurrences=activities_occurrences,
                         start_activities=start_activities,
                         end_activities=end_activities,
                         dfg_window_2=dfg_window_2,
                         freq_triples=freq_triples,
                         parameters=parameters)
예제 #6
0
def execute_script():
    log_input_directory = "xesinput"
    all_logs_names = os.listdir(log_input_directory)
    all_logs_names = [log for log in all_logs_names if ".xe" in log]

    for logName in all_logs_names:
        # logPath = os.path.join("..", "tests", "inputData", logName)
        log_path = log_input_directory + "\\" + logName
        log = xes_importer.apply(log_path)
        print("\n\n")
        print("log loaded")
        print("Number of traces - ", len(log))
        event_log = log_conversion.apply(
            log, variant=log_conversion.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
        exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName
        print("exporting log", exp_log_name)
        xes_exporter.apply(log, exp_log_name)
        print("exported log", exp_log_name)

        log, classifier_attr_key = insert_classifier.search_act_class_attr(log)

        classifiers = list(log.classifiers.keys())
        if classifier_attr_key is None and classifiers:
            try:
                print(classifiers)
                log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute(
                    log, classifiers[0])
                print(classifier_attr_key)
            except:
                print("exception in handling classifier")

        if classifier_attr_key is None:
            classifier_attr_key = "concept:name"

        if len(event_log) > 0 and classifier_attr_key in event_log[0]:
            parameters = {
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key
            }

            dfg = dfg_algorithm.apply(log, parameters=parameters)
            gviz = dfg_vis.apply(dfg,
                                 log=log,
                                 variant="frequency",
                                 parameters=parameters)
            # dfg_vis.view(gviz)

            dfg_vis.save(gviz,
                         "xescert_images\\" + logName.replace("xes", "png"))

        print("Reimporting log file just exported - ", exp_log_name)

        log = xes_importer.apply(exp_log_name)
        print("log loaded", exp_log_name)
        print("Number of traces - ", len(log))
        event_log = log_conversion.apply(
            log, variant=log_conversion.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
예제 #7
0
def apply_tree(
    event_log: Union[pd.DataFrame, EventLog, EventStream],
    parameters: Optional[Dict[Union[Parameters, str],
                              Any]] = None) -> ProcessTree:
    if parameters is None:
        parameters = {}
    event_log = log_converter.apply(
        event_log,
        variant=log_converter.Variants.TO_EVENT_LOG,
        parameters=parameters)
    act_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY.value,
                                         parameters,
                                         xes_constants.DEFAULT_NAME_KEY)

    threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD,
                                           parameters, 0.0)

    if threshold == 0.0:
        # keep one trace per variant; more performant
        event_log = filtering_utils.keep_one_trace_per_variant(
            event_log, parameters=parameters)

    tree = __inductive_miner(
        event_log, discover_dfg.apply(event_log, parameters=parameters),
        threshold, None, act_key,
        exec_utils.get_param_value(Parameters.USE_MSD_PARALLEL_CUT, parameters,
                                   True))

    tree_consistency.fix_parent_pointers(tree)
    tree = generic.fold(tree)
    generic.tree_sort(tree)

    return tree
예제 #8
0
def execute_script():
    log = xes_importer.apply(
        os.path.join("..", "tests", "input_data", "running-example.xes"))
    frequency_dfg = dfg_miner.apply(log, variant=dfg_miner.Variants.FREQUENCY)
    net, im, fm = dfg_conv.apply(frequency_dfg)
    # perform the Montecarlo simulation with the arrival rate inferred by the log (the simulation lasts 5 secs)
    parameters = {}
    parameters[montecarlo_simulation.Variants.PETRI_SEMAPH_FIFO.value.
               Parameters.TOKEN_REPLAY_VARIANT] = Variants.BACKWARDS
    parameters[montecarlo_simulation.Variants.PETRI_SEMAPH_FIFO.value.
               Parameters.PARAM_ENABLE_DIAGNOSTICS] = False
    parameters[montecarlo_simulation.Variants.PETRI_SEMAPH_FIFO.value.
               Parameters.PARAM_MAX_THREAD_EXECUTION_TIME] = 5
    log, res = montecarlo_simulation.apply(log,
                                           net,
                                           im,
                                           fm,
                                           parameters=parameters)
    print(
        "\n(Montecarlo - Petri net) case arrival ratio inferred from the log")
    print(res["median_cases_ex_time"])
    print(res["total_cases_time"])
    # perform the Montecarlo simulation with the arrival rate specified (the simulation lasts 5 secs)
    parameters[montecarlo_simulation.Variants.PETRI_SEMAPH_FIFO.value.
               Parameters.PARAM_CASE_ARRIVAL_RATIO] = 60
    log, res = montecarlo_simulation.apply(log,
                                           net,
                                           im,
                                           fm,
                                           parameters=parameters)
    print(
        "\n(Montecarlo - Petri net) case arrival ratio specified by the user")
    print(res["median_cases_ex_time"])
    print(res["total_cases_time"])
예제 #9
0
def discover_dfg(log: Union[EventLog, pd.DataFrame]) -> Tuple[dict, dict, dict]:
    """
    Discovers a DFG from a log

    Parameters
    --------------
    log
        Event log

    Returns
    --------------
    dfg
        DFG
    start_activities
        Start activities
    end_activities
        End activities
    """
    if check_is_dataframe(log):
        check_dataframe_columns(log)
        from pm4py.objects.dfg.retrieval.pandas import get_dfg_graph
        dfg = get_dfg_graph(log)
        from pm4py.statistics.start_activities.pandas import get as start_activities_module
        from pm4py.statistics.end_activities.pandas import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log)
        end_activities = end_activities_module.get_end_activities(log)
    else:
        from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
        dfg = dfg_discovery.apply(log)
        from pm4py.statistics.start_activities.log import get as start_activities_module
        from pm4py.statistics.end_activities.log import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log)
        end_activities = end_activities_module.get_end_activities(log)
    return dfg, start_activities, end_activities
예제 #10
0
def detect(log: EventLog, alphabet: Dict[str, int], act_key: str, use_msd: bool) -> Optional[str]:
    candidates = set(alphabet.keys())
    for t in log:
        candidates = candidates.intersection(set(map(lambda e: e[act_key], t)))
        if len(candidates) == 0:
            return None
    for a in candidates:
        proj = EventLog()
        for t in log:
            proj.append(pm4py.filter_trace(lambda e: e[act_key] != a, t))
        if len(list(filter(lambda t: len(t) == 0, proj))) == 0:
            dfg_proj = discover_dfg.apply(proj, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            alphabet_proj = pm4py.get_attribute_values(proj, act_key)
            start_act_proj = get_starters.get_start_activities(proj, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            end_act_proj = get_ends.get_end_activities(log, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            pre_proj, post_proj = dfg_utils.get_transitive_relations(dfg_proj, alphabet_proj)
            cut = sequence_cut.detect(alphabet_proj, pre_proj, post_proj)
            if cut is not None:
                return a
            cut = xor_cut.detect(dfg_proj, alphabet_proj)
            if cut is not None:
                return a
            cut = concurrent_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj,
                                        msd= msdw_algo.derive_msd_witnesses(proj, msd_algo.apply(log, parameters={
                                        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}), parameters={
                                        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if use_msd else None)
            if cut is not None:
                return a
            cut = loop_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj)
            if cut is not None:
                return a
    return None
예제 #11
0
def discover_dfg(log):
    """
    Discovers a DFG from a log_skeleton

    Parameters
    --------------
    log
        Event log_skeleton

    Returns
    --------------
    dfg
        DFG
    start_activities
        Start activities
    end_activities
        End activities
    """
    from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
    dfg = dfg_discovery.apply(log)
    from pm4py.statistics.start_activities.log import get as start_activities_module
    from pm4py.statistics.end_activities.log import get as end_activities_module
    start_activities = start_activities_module.get_start_activities(log)
    end_activities = end_activities_module.get_end_activities(log)
    return dfg, start_activities, end_activities
예제 #12
0
def case_filter_dfg(request):
    event_logs_path = os.path.join(settings.MEDIA_ROOT, "event_logs")
    log_information = None
    # TODO Load the Log Information, else throw/redirect to Log Selection
    if "current_log" in request.session and request.session["current_log"] is not None:
        log_information = request.session["current_log"]
        print(log_information)

    if log_information is not None:

        event_log = os.path.join(event_logs_path, log_information["log_name"])
        log_format = log_import.get_log_format(log_information["log_name"])

        # Import the Log considering the given Format
        log, activities = log_import.log_import(event_log, log_format, log_information)

    if request.method == "POST":
        selected_case = request.POST["selected_case"]
       
        if log_format == "xes": 
            filtered_log = pm4py.filter_trace_attribute_values(
                log, log_information["case_id"], [selected_case], retain=True)
        else: 
            filtered_log = log[log["case:concept:name"].isin([selected_case])]

        dfg = dfg_discovery.apply(filtered_log)
        this_data, temp_file = plotting.dfg_to_g6(dfg)
        re.escape(temp_file)
    message = {
        "success": True,
        "data": json.dumps(this_data),
        "responseText": "Inactivated successfully!",
    }
    return JsonResponse(message)
예제 #13
0
 def test_alpha_miner_log(self):
     log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))
     from pm4py.algo.discovery.alpha import algorithm as alpha_miner
     net1, im1, fm1 = alpha_miner.apply(log, variant=alpha_miner.Variants.ALPHA_VERSION_CLASSIC)
     net2, im2, fm2 = alpha_miner.apply(log, variant=alpha_miner.Variants.ALPHA_VERSION_PLUS)
     from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
     dfg = dfg_discovery.apply(log)
     net3, im3, fm3 = alpha_miner.apply_dfg(dfg, variant=alpha_miner.Variants.ALPHA_VERSION_CLASSIC)
예제 #14
0
    def __init__(self,
                 log,
                 parameters=None,
                 variant=dfg_discovery.Variants.FREQUENCY):

        self.dfg = dfg_discovery.apply(log,
                                       parameters=parameters,
                                       variant=variant)
예제 #15
0
def extract_direct_follows_relationships(logs):
    results = []
    for log in logs:
        graph = dfg_discovery.apply(log)
        log_results = []
        for element in set(graph.elements()):
            log_results.append((str(element),graph[element]))
        results.append(log_results)
    return results
예제 #16
0
    def test_46(self):
        from pm4py.objects.log.importer.xes import importer as xes_importer
        log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))

        from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
        dfg = dfg_discovery.apply(log)

        from pm4py.objects.conversion.dfg import converter as dfg_mining
        net, im, fm = dfg_mining.apply(dfg)
예제 #17
0
 def test_exporting_dfg(self):
     log = xes_importer.apply(
         os.path.join("input_data", "running-example.xes"))
     dfg = dfg_discovery.apply(log)
     dfg_exporter.apply(
         dfg, os.path.join("test_output_data", "running-example.dfg"))
     dfg, sa, ea = dfg_importer.apply(
         os.path.join("test_output_data", "running-example.dfg"))
     os.remove(os.path.join("test_output_data", "running-example.dfg"))
예제 #18
0
def apply(log, parameters=None):
    """
    Discovers a footprint object from an event log
    (the footprints are returned case-by-case)

    Parameters
    --------------
    log
        Log
    parameters
        Parameters of the algorithm:
            - Parameters.ACTIVITY_KEY

    Returns
    --------------
    footprints_obj
        List of footprints for the cases of the log
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)

    log = converter.apply(log,
                          variant=converter.TO_EVENT_LOG,
                          parameters=parameters)

    ret = []

    for trace in log:
        dfg = dfg_discovery.apply(EventLog([trace]), parameters=parameters)
        parallel = {(x, y) for (x, y) in dfg if (y, x) in dfg}
        sequence = {(x, y) for (x, y) in dfg if not (y, x) in dfg}
        trace = tuple(x[activity_key] for x in trace)
        activities = set(trace)
        if len(trace) > 0:
            start_activities = {trace[0]}
            end_activities = {trace[-1]}
        else:
            start_activities = set()
            end_activities = set()

        ret.append({
            Outputs.DFG.value: dfg,
            Outputs.SEQUENCE.value: sequence,
            Outputs.PARALLEL.value: parallel,
            Outputs.ACTIVITIES.value: activities,
            Outputs.START_ACTIVITIES.value: start_activities,
            Outputs.END_ACTIVITIES.value: end_activities,
            Outputs.MIN_TRACE_LENGTH.value: len(trace),
            Outputs.TRACE.value: trace
        })

    return ret
예제 #19
0
def apply(log, parameters=None):
    """
    Discovers a footprint object from an event log
    (the footprints of the event log are returned)

    Parameters
    --------------
    log
        Log
    parameters
        Parameters of the algorithm:
            - Parameters.ACTIVITY_KEY

    Returns
    --------------
    footprints_obj
        Footprints object
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)

    log = converter.apply(log,
                          variant=converter.TO_EVENT_LOG,
                          parameters=parameters)

    dfg = dfg_discovery.apply(log, parameters=parameters)
    parallel = {(x, y) for (x, y) in dfg if (y, x) in dfg}
    sequence = set(
        causal_discovery.apply(dfg, causal_discovery.Variants.CAUSAL_ALPHA))

    start_activities = set(
        get_start_activities.get_start_activities(log, parameters=parameters))
    end_activities = set(
        get_end_activities.get_end_activities(log, parameters=parameters))
    activities = set(y[activity_key] for x in log for y in x)

    return {
        Outputs.DFG.value:
        dfg,
        Outputs.SEQUENCE.value:
        sequence,
        Outputs.PARALLEL.value:
        parallel,
        Outputs.START_ACTIVITIES.value:
        start_activities,
        Outputs.END_ACTIVITIES.value:
        end_activities,
        Outputs.ACTIVITIES.value:
        activities,
        Outputs.MIN_TRACE_LENGTH.value:
        min(len(x) for x in log) if len(log) > 0 else 0
    }
예제 #20
0
 def test_exporting_dfg_with_sa_ea(self):
     log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))
     dfg = dfg_discovery.apply(log)
     sa = start_activities.get_start_activities(log)
     ea = end_activities.get_end_activities(log)
     dfg_exporter.apply(dfg, os.path.join("test_output_data", "running-example.dfg"),
                        parameters={dfg_exporter.Variants.CLASSIC.value.Parameters.START_ACTIVITIES: sa,
                                    dfg_exporter.Variants.CLASSIC.value.Parameters.END_ACTIVITIES: ea})
     dfg, sa, ea = dfg_importer.apply(os.path.join("test_output_data", "running-example.dfg"))
     os.remove(os.path.join("test_output_data", "running-example.dfg"))
예제 #21
0
def discover_dfg_miner(log):
    dfg = dfg_discovery.apply(log)
    sa = sa_get.get_start_activities(log)
    ea = ea_get.get_end_activities(log)
    net, im, fm = dfg_converter.apply(dfg,
                                      parameters={
                                          "start_activities": sa,
                                          "end_activities": ea
                                      })
    return net, im, fm
예제 #22
0
    def test_44(self):
        import os
        from pm4py.objects.log.importer.xes import importer as xes_importer
        log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))

        from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
        from pm4py.visualization.dfg import visualizer as dfg_visualization

        dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE)
        gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE)
예제 #23
0
def discover_abstraction_log(
    log: EventLog,
    parameters: Optional[Dict[Any, Any]] = None
) -> Tuple[Any, Any, Any, Any, Any, Any, Any]:
    """
    Discovers an abstraction from a log that is useful for the Heuristics Miner ++ algorithm

    Parameters
    --------------
    log
        Event log
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY
        - Parameters.START_TIMESTAMP_KEY
        - Parameters.TIMESTAMP_KEY
        - Parameters.CASE_ID_KEY

    Returns
    --------------
    start_activities
        Start activities
    end_activities
        End activities
    activities_occurrences
        Activities along with their number of occurrences
    dfg
        Directly-follows graph
    performance_dfg
        (Performance) Directly-follows graph
    sojourn_time
        Sojourn time for each activity
    concurrent_activities
        Concurrent activities
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    start_activities = log_sa.get_start_activities(log, parameters=parameters)
    end_activities = log_ea.get_end_activities(log, parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(
        log, activity_key, parameters=parameters)
    efg_parameters = copy(parameters)
    efg_parameters[efg_get.Parameters.KEEP_FIRST_FOLLOWING] = True
    dfg = efg_get.apply(log, parameters=efg_parameters)
    performance_dfg = dfg_alg.apply(log,
                                    variant=dfg_alg.Variants.PERFORMANCE,
                                    parameters=parameters)
    sojourn_time = soj_get.apply(log, parameters=parameters)
    concurrent_activities = conc_act_get.apply(log, parameters=parameters)
    return (start_activities, end_activities, activities_occurrences, dfg,
            performance_dfg, sojourn_time, concurrent_activities)
예제 #24
0
def dfg_dist_calc(log1, log2):
    act1 = attributes_filter.get_attribute_values(log1, "concept:name")
    act2 = attributes_filter.get_attribute_values(log2, "concept:name")
    dfg1 = dfg_algorithm.apply(log1)
    dfg2 = dfg_algorithm.apply(log2)
    df1_act = act_dist_calc.occu_var_act(act1)
    df2_act = act_dist_calc.occu_var_act(act2)
    df1_dfg = act_dist_calc.occu_var_act(dfg1)
    df2_dfg = act_dist_calc.occu_var_act(dfg2)
    df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0)
    df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0)
    dist_act = pdist(
        np.array([df_act['freq_x'].values, df_act['freq_y'].values]),
        'cosine')[0]
    dist_dfg = pdist(
        np.array([df_dfg['freq_x'].values, df_dfg['freq_y'].values]),
        'cosine')[0]
    if (np.isnan(dist_dfg) == True):
        dist_dfg = 1
    return dist_act, dist_dfg
예제 #25
0
def discover_performance_dfg(log: Union[EventLog, pd.DataFrame], business_hours: bool = False, worktiming: List[int] = [7, 17], weekends: List[int] = [6, 7], workcalendar=constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR) -> Tuple[dict, dict, dict]:
    """
    Discovers a performance directly-follows graph from an event log

    Parameters
    ---------------
    log
        Event log
    business_hours
        Enables/disables the computation based on the business hours (default: False)
    worktiming
        (If the business hours are enabled) The hour range in which the resources of the log are working (default: 7 to 17)
    weekends
        (If the business hours are enabled) The weekends days (default: Saturday (6), Sunday (7))

    Returns
    ---------------
    performance_dfg
        Performance DFG
    start_activities
        Start activities
    end_activities
        End activities
    """
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    if check_is_pandas_dataframe(log):
        check_pandas_dataframe_columns(log)
        from pm4py.util import constants
        properties = get_properties(log)
        from pm4py.algo.discovery.dfg.adapters.pandas.df_statistics import get_dfg_graph
        activity_key = properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in properties else xes_constants.DEFAULT_NAME_KEY
        timestamp_key = properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in properties else xes_constants.DEFAULT_TIMESTAMP_KEY
        case_id_key = properties[constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in properties else constants.CASE_CONCEPT_NAME
        dfg = get_dfg_graph(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_key, measure="performance", perf_aggregation_key="all",
                            business_hours=business_hours, worktiming=worktiming, weekends=weekends, workcalendar=workcalendar)
        from pm4py.statistics.start_activities.pandas import get as start_activities_module
        from pm4py.statistics.end_activities.pandas import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log, parameters=properties)
        end_activities = end_activities_module.get_end_activities(log, parameters=properties)
    else:
        from pm4py.algo.discovery.dfg.variants import performance as dfg_discovery
        properties = get_properties(log)
        properties[dfg_discovery.Parameters.AGGREGATION_MEASURE] = "all"
        properties[dfg_discovery.Parameters.BUSINESS_HOURS] = business_hours
        properties[dfg_discovery.Parameters.WORKTIMING] = worktiming
        properties[dfg_discovery.Parameters.WEEKENDS] = weekends
        dfg = dfg_discovery.apply(log, parameters=properties)
        from pm4py.statistics.start_activities.log import get as start_activities_module
        from pm4py.statistics.end_activities.log import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log, parameters=properties)
        end_activities = end_activities_module.get_end_activities(log, parameters=properties)
    return dfg, start_activities, end_activities
예제 #26
0
def execute_script():
    log = xes_importer.apply(
        os.path.join("..", "tests", "input_data", "running-example.xes"))
    performance_dfg = dfg_miner.apply(log,
                                      variant=dfg_miner.Variants.PERFORMANCE)
    reach_graph, tang_reach_graph, stochastic_map, q_matrix = ctmc.get_tangible_reachability_and_q_matrix_from_dfg_performance(
        performance_dfg)
    # pick the source state
    state = [x for x in tang_reach_graph.states if x.name == "source1"][0]
    # analyse the distribution over the states of the system starting from the source after 86400.0 seconds (1 day)
    transient_result = ctmc.transient_analysis_from_tangible_q_matrix_and_single_state(
        tang_reach_graph, q_matrix, state, 86400.0)
    print(transient_result)
예제 #27
0
def gerar_previsoes_modelo_from_log_eventos(eventLog):

    dfg_perf = dfg_discovery.apply(eventLog,
                                   variant=dfg_discovery.Variants.PERFORMANCE)

    sa = start_activities.get_start_activities(eventLog)
    ea = end_activities.get_end_activities(eventLog)

    reach_graph, tang_reach_graph, stochastic_map, q_matrix = ctmc.get_tangible_reachability_and_q_matrix_from_dfg_performance(
        dfg_perf, parameters={
            "start_activities": sa,
            "end_activities": ea
        })

    intervalo_um_dia_em_segundos = 60 * 60 * 24
    intervalos = [
        intervalo_um_dia_em_segundos * 30, intervalo_um_dia_em_segundos * 60,
        intervalo_um_dia_em_segundos * 90, intervalo_um_dia_em_segundos * 180,
        intervalo_um_dia_em_segundos * 365, intervalo_um_dia_em_segundos *
        365 * 2, intervalo_um_dia_em_segundos * 365 * 3,
        intervalo_um_dia_em_segundos * 365 * 4, intervalo_um_dia_em_segundos *
        365 * 5, intervalo_um_dia_em_segundos * 365 * 6,
        intervalo_um_dia_em_segundos * 365 * 7,
        intervalo_um_dia_em_segundos * 365 * 8,
        intervalo_um_dia_em_segundos * 365 * 9,
        intervalo_um_dia_em_segundos * 365 * 10
    ]

    previsoes_por_intervalo = []

    # pick the source state
    initial_state = [
        x for x in tang_reach_graph.states if x.name == "source1"
    ][0]
    final_state = [x for x in tang_reach_graph.states if x.name == "sink1"][0]

    for intervalo in intervalos:
        # analyse the distribution over the states of the system starting from the source after 172800.0 seconds (2 days)
        transient_result = ctmc.transient_analysis_from_tangible_q_matrix_and_single_state(
            tang_reach_graph, q_matrix, initial_state, intervalo)

        for key, value in filter(lambda elem: elem[0].name == "sink1",
                                 transient_result.items()):
            previsoes_por_intervalo.append({
                "intervaloEmDias":
                intervalo / intervalo_um_dia_em_segundos,
                "probabilidadeDeTermino":
                float(value)
            })

    return previsoes_por_intervalo
예제 #28
0
def __add_operator_recursive_logs(operator, threshold, act_key, logs, use_msd):
    if operator.operator != pt.Operator.LOOP:
        for log in logs:
            operator.children.append(
                __inductive_miner(
                    log,
                    discover_dfg.apply(
                        log,
                        parameters={
                            constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key
                        }), threshold, operator, act_key, use_msd))
    else:
        operator.children.append(
            __inductive_miner(
                logs[0],
                discover_dfg.apply(
                    logs[0],
                    parameters={
                        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key
                    }), threshold, operator, act_key, use_msd))
        logs = logs[1:]
        if len(logs) == 1:
            operator.children.append(
                __inductive_miner(
                    logs[0],
                    discover_dfg.apply(
                        logs[0],
                        parameters={
                            constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key
                        }), threshold, operator, act_key, use_msd))
        else:
            operator.children.append(
                __add_operator_recursive_logs(
                    pt.ProcessTree(operator=pt.Operator.XOR, parent=operator),
                    threshold, act_key, logs, use_msd))
    return operator
예제 #29
0
    def test_45(self):
        import os
        from pm4py.objects.log.importer.xes import importer as xes_importer
        log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))

        from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
        from pm4py.visualization.dfg import visualizer as dfg_visualization

        dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE)
        parameters = {dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"}
        gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE,
                                       parameters=parameters)

        dfg_visualization.save(gviz, os.path.join("test_output_data", "dfg.svg"))
        os.remove(os.path.join("test_output_data", "dfg.svg"))
예제 #30
0
def save_full_dfg(log):
    dfg = dfg_discovery.apply(log)

    gviz = dfg_visualization.apply(
        dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY)
    dfg_visualization.view(gviz)
    parameters = {
        dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"
    }
    gviz = dfg_visualization.apply(
        dfg,
        log=log,
        variant=dfg_visualization.Variants.FREQUENCY,
        parameters=parameters)
    dfg_visualization.save(gviz, "dfg_full.svg")
    print('Full DFG saves as "dfg_full.svg"')
    return gviz