Ejemplo n.º 1
0
def detect(log: EventLog, alphabet: Dict[str, int], act_key: str, use_msd: bool) -> Optional[str]:
    candidates = set(alphabet.keys())
    for t in log:
        candidates = candidates.intersection(set(map(lambda e: e[act_key], t)))
        if len(candidates) == 0:
            return None
    for a in candidates:
        proj = EventLog()
        for t in log:
            proj.append(pm4py.filter_trace(lambda e: e[act_key] != a, t))
        if len(list(filter(lambda t: len(t) == 0, proj))) == 0:
            dfg_proj = discover_dfg.apply(proj, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            alphabet_proj = pm4py.get_attribute_values(proj, act_key)
            start_act_proj = get_starters.get_start_activities(proj, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            end_act_proj = get_ends.get_end_activities(log, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            pre_proj, post_proj = dfg_utils.get_transitive_relations(dfg_proj, alphabet_proj)
            cut = sequence_cut.detect(alphabet_proj, pre_proj, post_proj)
            if cut is not None:
                return a
            cut = xor_cut.detect(dfg_proj, alphabet_proj)
            if cut is not None:
                return a
            cut = concurrent_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj,
                                        msd= msdw_algo.derive_msd_witnesses(proj, msd_algo.apply(log, parameters={
                                        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}), parameters={
                                        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if use_msd else None)
            if cut is not None:
                return a
            cut = loop_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj)
            if cut is not None:
                return a
    return None
Ejemplo n.º 2
0
def execute_script():
    log_path = os.path.join("..", "tests", "input_data", "interval_event_log.xes")
    #log_path = os.path.join("..", "tests", "input_data", "reviewing.xes")
    log = xes_importer.apply(log_path)
    parameters = {}
    parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp"
    parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp"
    parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name"
    parameters["strict"] = False
    parameters["format"] = "svg"
    start_activities = sa_get.get_start_activities(log, parameters=parameters)
    end_activities = ea_get.get_end_activities(log, parameters=parameters)
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities
    soj_time = soj_time_get.apply(log, parameters=parameters)
    print("soj_time")
    print(soj_time)
    conc_act = conc_act_get.apply(log, parameters=parameters)
    print("conc_act")
    print(conc_act)
    efg = efg_get.apply(log, parameters=parameters)
    print("efg")
    print(efg)
    dfg_freq = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.FREQUENCY)
    dfg_perf = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.PERFORMANCE)
    dfg_gv_freq = dfg_vis_fact.apply(dfg_freq, log=log, variant=dfg_vis_fact.Variants.FREQUENCY,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_freq)
    dfg_gv_perf = dfg_vis_fact.apply(dfg_perf, log=log, variant=dfg_vis_fact.Variants.PERFORMANCE,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_perf)
    net, im, fm = dfg_conv.apply(dfg_freq)
    gviz = pn_vis.apply(net, im, fm, parameters=parameters)
    pn_vis.view(gviz)
Ejemplo n.º 3
0
def discover_dfg(log):
    """
    Discovers a DFG from a log_skeleton

    Parameters
    --------------
    log
        Event log_skeleton

    Returns
    --------------
    dfg
        DFG
    start_activities
        Start activities
    end_activities
        End activities
    """
    from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
    dfg = dfg_discovery.apply(log)
    from pm4py.statistics.start_activities.log import get as start_activities_module
    from pm4py.statistics.end_activities.log import get as end_activities_module
    start_activities = start_activities_module.get_start_activities(log)
    end_activities = end_activities_module.get_end_activities(log)
    return dfg, start_activities, end_activities
Ejemplo n.º 4
0
def apply(log, parameters=None):
    """
    Discovers a footprint object from an event log
    (the footprints of the event log are returned)

    Parameters
    --------------
    log
        Log
    parameters
        Parameters of the algorithm:
            - Parameters.ACTIVITY_KEY

    Returns
    --------------
    footprints_obj
        Footprints object
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)

    log = converter.apply(log,
                          variant=converter.TO_EVENT_LOG,
                          parameters=parameters)

    dfg = dfg_discovery.apply(log, parameters=parameters)
    parallel = {(x, y) for (x, y) in dfg if (y, x) in dfg}
    sequence = set(
        causal_discovery.apply(dfg, causal_discovery.Variants.CAUSAL_ALPHA))

    start_activities = set(
        get_start_activities.get_start_activities(log, parameters=parameters))
    end_activities = set(
        get_end_activities.get_end_activities(log, parameters=parameters))
    activities = set(y[activity_key] for x in log for y in x)

    return {
        Outputs.DFG.value:
        dfg,
        Outputs.SEQUENCE.value:
        sequence,
        Outputs.PARALLEL.value:
        parallel,
        Outputs.START_ACTIVITIES.value:
        start_activities,
        Outputs.END_ACTIVITIES.value:
        end_activities,
        Outputs.ACTIVITIES.value:
        activities,
        Outputs.MIN_TRACE_LENGTH.value:
        min(len(x) for x in log) if len(log) > 0 else 0
    }
Ejemplo n.º 5
0
def discover_dfg_miner(log):
    dfg = dfg_discovery.apply(log)
    sa = sa_get.get_start_activities(log)
    ea = ea_get.get_end_activities(log)
    net, im, fm = dfg_converter.apply(dfg,
                                      parameters={
                                          "start_activities": sa,
                                          "end_activities": ea
                                      })
    return net, im, fm
Ejemplo n.º 6
0
 def test_exporting_dfg_with_sa_ea(self):
     log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))
     dfg = dfg_discovery.apply(log)
     sa = start_activities.get_start_activities(log)
     ea = end_activities.get_end_activities(log)
     dfg_exporter.apply(dfg, os.path.join("test_output_data", "running-example.dfg"),
                        parameters={dfg_exporter.Variants.CLASSIC.value.Parameters.START_ACTIVITIES: sa,
                                    dfg_exporter.Variants.CLASSIC.value.Parameters.END_ACTIVITIES: ea})
     dfg, sa, ea = dfg_importer.apply(os.path.join("test_output_data", "running-example.dfg"))
     os.remove(os.path.join("test_output_data", "running-example.dfg"))
Ejemplo n.º 7
0
def apply_heu(log, parameters=None):
    """
    Discovers an Heuristics Net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including:
            - Parameters.ACTIVITY_KEY
            - Parameters.TIMESTAMP_KEY
            - Parameters.CASE_ID_KEY
            - Parameters.DEPENDENCY_THRESH
            - Parameters.AND_MEASURE_THRESH
            - Parameters.MIN_ACT_COUNT
            - Parameters.MIN_DFG_OCCURRENCES
            - Parameters.DFG_PRE_CLEANING_NOISE_THRESH
            - Parameters.LOOP_LENGTH_TWO_THRESH

    Returns
    ------------
    heu
        Heuristics Net
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)

    start_activities = log_sa_filter.get_start_activities(
        log, parameters=parameters)
    end_activities = log_ea_filter.get_end_activities(log,
                                                      parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(
        log, activity_key, parameters=parameters)
    activities = list(activities_occurrences.keys())
    dfg = dfg_alg.apply(log, parameters=parameters)
    parameters_w2 = deepcopy(parameters)
    parameters_w2["window"] = 2
    dfg_window_2 = dfg_alg.apply(log, parameters=parameters_w2)
    freq_triples = dfg_alg.apply(log,
                                 parameters=parameters,
                                 variant=dfg_alg.Variants.FREQ_TRIPLES)

    return apply_heu_dfg(dfg,
                         activities=activities,
                         activities_occurrences=activities_occurrences,
                         start_activities=start_activities,
                         end_activities=end_activities,
                         dfg_window_2=dfg_window_2,
                         freq_triples=freq_triples,
                         parameters=parameters)
Ejemplo n.º 8
0
def discover_abstraction_log(
    log: EventLog,
    parameters: Optional[Dict[Any, Any]] = None
) -> Tuple[Any, Any, Any, Any, Any, Any, Any]:
    """
    Discovers an abstraction from a log that is useful for the Heuristics Miner ++ algorithm

    Parameters
    --------------
    log
        Event log
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY
        - Parameters.START_TIMESTAMP_KEY
        - Parameters.TIMESTAMP_KEY
        - Parameters.CASE_ID_KEY

    Returns
    --------------
    start_activities
        Start activities
    end_activities
        End activities
    activities_occurrences
        Activities along with their number of occurrences
    dfg
        Directly-follows graph
    performance_dfg
        (Performance) Directly-follows graph
    sojourn_time
        Sojourn time for each activity
    concurrent_activities
        Concurrent activities
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    start_activities = log_sa.get_start_activities(log, parameters=parameters)
    end_activities = log_ea.get_end_activities(log, parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(
        log, activity_key, parameters=parameters)
    efg_parameters = copy(parameters)
    efg_parameters[efg_get.Parameters.KEEP_FIRST_FOLLOWING] = True
    dfg = efg_get.apply(log, parameters=efg_parameters)
    performance_dfg = dfg_alg.apply(log,
                                    variant=dfg_alg.Variants.PERFORMANCE,
                                    parameters=parameters)
    sojourn_time = soj_get.apply(log, parameters=parameters)
    concurrent_activities = conc_act_get.apply(log, parameters=parameters)
    return (start_activities, end_activities, activities_occurrences, dfg,
            performance_dfg, sojourn_time, concurrent_activities)
def apply_tree(log, parameters=None):
    """
    Apply the IMDF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]

    # get the DFG
    dfg = [(k, v) for k, v in dfg_inst.apply(
        log,
        parameters={
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
        }).items() if v > 0]

    # gets the start activities from the log
    start_activities = log_start_act_stats.get_start_activities(
        log, parameters=parameters)
    # gets the end activities from the log
    end_activities = log_end_act_stats.get_end_activities(
        log, parameters=parameters)

    # get the activities in the log
    activities = log_attributes_stats.get_attribute_values(log, activity_key)

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    return apply_tree_dfg(dfg,
                          parameters=parameters,
                          activities=activities,
                          contains_empty_traces=contains_empty_traces,
                          start_activities=start_activities,
                          end_activities=end_activities)
Ejemplo n.º 10
0
def apply_tree(log, parameters=None):
    """
    Apply the IMDF algorithm to a log_skeleton obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log_skeleton to use as activity name
            (default concept:name)

    Returns
    ----------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(
        Parameters.ACTIVITY_KEY, parameters,
        pmutil.xes_constants.DEFAULT_NAME_KEY)

    # get the DFG
    dfg = [(k, v)
           for k, v in dfg_inst.apply(log, parameters=parameters).items()
           if v > 0]

    # gets the start activities from the log_skeleton
    start_activities = log_start_act_stats.get_start_activities(
        log, parameters=parameters)
    # gets the end activities from the log_skeleton
    end_activities = log_end_act_stats.get_end_activities(
        log, parameters=parameters)

    # get the activities in the log_skeleton
    activities = log_attributes_stats.get_attribute_values(log, activity_key)

    # check if the log_skeleton contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    return apply_tree_dfg(dfg,
                          parameters=parameters,
                          activities=activities,
                          contains_empty_traces=contains_empty_traces,
                          start_activities=start_activities,
                          end_activities=end_activities)
Ejemplo n.º 11
0
def gerar_previsoes_modelo_from_log_eventos(eventLog):

    dfg_perf = dfg_discovery.apply(eventLog,
                                   variant=dfg_discovery.Variants.PERFORMANCE)

    sa = start_activities.get_start_activities(eventLog)
    ea = end_activities.get_end_activities(eventLog)

    reach_graph, tang_reach_graph, stochastic_map, q_matrix = ctmc.get_tangible_reachability_and_q_matrix_from_dfg_performance(
        dfg_perf, parameters={
            "start_activities": sa,
            "end_activities": ea
        })

    intervalo_um_dia_em_segundos = 60 * 60 * 24
    intervalos = [
        intervalo_um_dia_em_segundos * 30, intervalo_um_dia_em_segundos * 60,
        intervalo_um_dia_em_segundos * 90, intervalo_um_dia_em_segundos * 180,
        intervalo_um_dia_em_segundos * 365, intervalo_um_dia_em_segundos *
        365 * 2, intervalo_um_dia_em_segundos * 365 * 3,
        intervalo_um_dia_em_segundos * 365 * 4, intervalo_um_dia_em_segundos *
        365 * 5, intervalo_um_dia_em_segundos * 365 * 6,
        intervalo_um_dia_em_segundos * 365 * 7,
        intervalo_um_dia_em_segundos * 365 * 8,
        intervalo_um_dia_em_segundos * 365 * 9,
        intervalo_um_dia_em_segundos * 365 * 10
    ]

    previsoes_por_intervalo = []

    # pick the source state
    initial_state = [
        x for x in tang_reach_graph.states if x.name == "source1"
    ][0]
    final_state = [x for x in tang_reach_graph.states if x.name == "sink1"][0]

    for intervalo in intervalos:
        # analyse the distribution over the states of the system starting from the source after 172800.0 seconds (2 days)
        transient_result = ctmc.transient_analysis_from_tangible_q_matrix_and_single_state(
            tang_reach_graph, q_matrix, initial_state, intervalo)

        for key, value in filter(lambda elem: elem[0].name == "sink1",
                                 transient_result.items()):
            previsoes_por_intervalo.append({
                "intervaloEmDias":
                intervalo / intervalo_um_dia_em_segundos,
                "probabilidadeDeTermino":
                float(value)
            })

    return previsoes_por_intervalo
Ejemplo n.º 12
0
def apply_auto_filter(log, variants=None, parameters=None):
    """
    Apply an end attributes filter detecting automatically a percentage
    
    Parameters
    ----------
    log
        Log
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    parameters
        Parameters of the algorithm, including:
            Parameters.DECREASING_FACTOR -> Decreasing factor (stops the algorithm when the next activity by occurrence is below
            this factor in comparison to previous)
            Parameters.ACTIVITY_KEY -> Attribute key (must be specified if different from concept:name)
    
    Returns
    ---------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                               parameters, DEFAULT_NAME_KEY)
    decreasing_factor = exec_utils.get_param_value(
        Parameters.DECREASING_FACTOR, parameters,
        filtering_constants.DECREASING_FACTOR)

    if len(log) > 0:
        parameters_variants = {PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key}
        if variants is None:
            variants = variants_filter.get_variants(
                log, parameters=parameters_variants)
        vc = variants_filter.get_variants_sorted_by_count(variants)
        end_activities = get_end_activities(log,
                                            parameters=parameters_variants)
        ealist = end_activities_common.get_sorted_end_activities_list(
            end_activities)
        eathreshold = end_activities_common.get_end_activities_threshold(
            ealist, decreasing_factor)
        filtered_log = filter_log_by_end_activities(end_activities, variants,
                                                    vc, eathreshold,
                                                    attribute_key)

        return filtered_log

    return log
Ejemplo n.º 13
0
def apply_heu(log, parameters=None):
    """
    Discovers an Heuristics Net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including: activity_key, case_id_glue, timestamp_key,
        dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh,
        loops_length_two_thresh

    Returns
    ------------
    heu
        Heuristics Net
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    start_activities = log_sa_filter.get_start_activities(log, parameters=parameters)
    end_activities = log_ea_filter.get_end_activities(log, parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(log, activity_key, parameters=parameters)
    activities = list(activities_occurrences.keys())
    dfg = dfg_factory.apply(log, parameters=parameters)
    parameters_w2 = deepcopy(parameters)
    parameters_w2["window"] = 2
    dfg_window_2 = dfg_factory.apply(log, parameters=parameters_w2)
    freq_triples = dfg_factory.apply(log, parameters=parameters, variant="freq_triples")

    return apply_heu_dfg(dfg, activities=activities, activities_occurrences=activities_occurrences,
                         start_activities=start_activities,
                         end_activities=end_activities, dfg_window_2=dfg_window_2, freq_triples=freq_triples,
                         parameters=parameters)
Ejemplo n.º 14
0
    def detect_loop(self):
        # p0 is part of return value, it contains the partition of activities
        # write all start and end activities in p1
        if self.contains_empty_trace():
            return [False, []]
        start_activities = list(
            start_activities_get.get_start_activities(self.log, parameters=self.parameters).keys())
        end_activities = list(end_activities_get.get_end_activities(self.log, parameters=self.parameters).keys())
        p1 = []
        for act in start_activities:
            if act not in p1:
                p1.append(act)
        for act in end_activities:
            if act not in p1:
                p1.append(act)

        # create new dfg without the transitions to start and end activities
        new_dfg = copy(self.dfg)
        copy_dfg = copy(new_dfg)
        for ele in copy_dfg:
            if ele[0][0] in p1 or ele[0][1] in p1:
                new_dfg.remove(ele)
        # get connected components of this new dfg
        new_ingoing = get_ingoing_edges(new_dfg)
        new_outgoing = get_outgoing_edges(new_dfg)
        # it was a pain in the *** to get a working directory of the current_activities, as we can't iterate ove the dfg
        current_activities = {}
        for element in self.activities:
            if element not in p1:
                current_activities.update({element: 1})
        p0 = detection_utils.get_connected_components(new_ingoing, new_outgoing, current_activities)
        p0.insert(0, p1)

        iterable_dfg = []
        for i in range(0, len(self.dfg)):
            iterable_dfg.append(self.dfg[i][0])
        # p0 is like P1,P2,...,Pn in line 3 on page 190 of the IM Thesis
        # check for subsets in p0 that have connections to and end or from a start activity
        p0_copy = []
        for int_el in p0:
            p0_copy.append(int_el)
        for element in p0_copy:  # for every set in p0
            removed = False
            if element in p0 and element != p0[0]:
                for act in element:  # for every activity in this set
                    for e in end_activities:  # for every end activity
                        if e not in start_activities:
                            if (act, e) in iterable_dfg:  # check if connected
                                # is there an element in dfg pointing from any act in a subset of p0 to an end activity
                                for activ in element:
                                    if activ not in p0[0]:
                                        p0[0].append(activ)
                                if element in p0:
                                    p0.remove(element)  # remove subsets that are connected to an end activity
                                removed = True
                                break
                    if removed:
                        break
                    for s in start_activities:
                        if s not in end_activities:
                            if not removed:
                                if (s, act) in iterable_dfg:
                                    for acti in element:
                                        if acti not in p0[0]:
                                            p0[0].append(acti)
                                    if element in p0:
                                        p0.remove(element)  # remove subsets that are connected to an end activity
                                    removed = True
                                    break
                            else:
                                break
                    if removed:
                        break

        iterable_dfg = []
        for i in range(0, len(self.dfg)):
            iterable_dfg.append(self.dfg[i][0])

        p0_copy = []
        for int_el in p0:
            p0_copy.append(int_el)
        for element in p0_copy:
            if element in p0 and element != p0[0]:
                for act in element:
                    for e in self.end_activities:
                        if (e, act) in iterable_dfg:  # get those act, that are connected from an end activity
                            for e2 in self.end_activities:  # check, if the act is connected from all end activities
                                if (e2, act) not in iterable_dfg:
                                    for acti in element:
                                        if acti not in p0[0]:
                                            p0[0].append(acti)
                                    if element in p0:
                                        p0.remove(element)  # remove subsets that are connected to an end activity
                                    break
                    for s in self.start_activities:
                        if (act, s) in iterable_dfg:  # same as above (in this case for activities connected to
                            # a start activity)
                            for s2 in self.start_activities:
                                if (act, s2) not in iterable_dfg:
                                    for acti in element:
                                        if acti not in p0[0]:
                                            p0[0].append(acti)
                                    if element in p0:
                                        p0.remove(element)  # remove subsets that are connected to an end activity
                                    break

        if len(p0) > 1:
            return [True, p0]
        else:
            return [False, []]
Ejemplo n.º 15
0
def apply_tree(log, parameters):
    """
    Apply the IM_FF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    if pkgutil.find_loader("pandas"):
        import pandas as pd
        from pm4py.statistics.variants.pandas import get as variants_get

        if type(log) is pd.DataFrame:
            vars = variants_get.get_variants_count(log, parameters=parameters)
            return apply_tree_variants(vars, parameters=parameters)

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters,
                                              pmutil.xes_constants.DEFAULT_NAME_KEY)

    log = converter.apply(log, parameters=parameters)
    # keep only the activity attribute (since the others are not used)
    log = filtering_utils.keep_only_one_attribute_per_event(log, activity_key)

    noise_threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD, parameters,
                                                 shared_constants.NOISE_THRESHOLD_IMF)

    dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters=parameters).items() if v > 0]
    c = Counts()
    activities = attributes_get.get_attribute_values(log, activity_key)
    start_activities = list(start_activities_get.get_start_activities(log, parameters=parameters).keys())
    end_activities = list(end_activities_get.get_end_activities(log, parameters=parameters).keys())
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    # set the threshold parameter based on f and the max value in the dfg:
    max_value = 0
    for key, value in dfg:
        if value > max_value:
            max_value = value
    threshold = noise_threshold * max_value

    recursion_depth = 0
    sub = subtree.make_tree(log, dfg, dfg, dfg, activities, c, recursion_depth, noise_threshold, threshold,
                            start_activities, end_activities,
                            start_activities, end_activities, parameters=parameters)

    process_tree = get_tree_repr_implain.get_repr(sub, 0, contains_empty_traces=contains_empty_traces)
    # Ensures consistency to the parent pointers in the process tree
    tree_consistency.fix_parent_pointers(process_tree)
    # Fixes a 1 child XOR that is added when single-activities flowers are found
    tree_consistency.fix_one_child_xor_flower(process_tree)
    # folds the process tree (to simplify it in case fallthroughs/filtering is applied)
    process_tree = util.fold(process_tree)

    return process_tree
Ejemplo n.º 16
0
 def apply_cut_im_plain(self, type_of_cut, cut, activity_key):
     if type_of_cut == 'concurrent':
         self.detected_cut = 'concurrent'
         new_logs = split.split_xor(cut[1], self.log, activity_key)
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=self.parameters).items() if v > 0]
             activities = attributes_get.get_attribute_values(l, activity_key)
             start_activities = list(
                 start_activities_get.get_start_activities(l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_get.get_end_activities(l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts,
                                   self.rec_depth + 1, self.f,
                                   noise_threshold=self.noise_threshold, start_activities=start_activities,
                                   end_activities=end_activities,
                                   initial_start_activities=self.initial_start_activities,
                                   initial_end_activities=self.initial_end_activities, parameters=self.parameters))
     elif type_of_cut == 'sequential':
         new_logs = split.split_sequence(cut[1], self.log, activity_key)
         self.detected_cut = "sequential"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=self.parameters).items() if v > 0]
             activities = attributes_get.get_attribute_values(l, activity_key)
             start_activities = list(
                 start_activities_get.get_start_activities(l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_get.get_end_activities(l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts,
                                   self.rec_depth + 1, self.f,
                                   noise_threshold=self.noise_threshold, start_activities=start_activities,
                                   end_activities=end_activities,
                                   initial_start_activities=self.initial_start_activities,
                                   initial_end_activities=self.initial_end_activities, parameters=self.parameters))
     elif type_of_cut == 'parallel':
         new_logs = split.split_parallel(cut[1], self.log, activity_key)
         self.detected_cut = "parallel"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=self.parameters).items() if v > 0]
             activities = attributes_get.get_attribute_values(l, activity_key)
             start_activities = list(
                 start_activities_get.get_start_activities(l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_get.get_end_activities(l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts,
                                   self.rec_depth + 1, self.f,
                                   noise_threshold=self.noise_threshold, start_activities=start_activities,
                                   end_activities=end_activities,
                                   initial_start_activities=self.initial_start_activities,
                                   initial_end_activities=self.initial_end_activities, parameters=self.parameters))
     elif type_of_cut == 'loopCut':
         new_logs = split.split_loop(cut[1], self.log, activity_key)
         self.detected_cut = "loopCut"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=self.parameters).items() if v > 0]
             activities = attributes_get.get_attribute_values(l, activity_key)
             start_activities = list(
                 start_activities_get.get_start_activities(l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_get.get_end_activities(l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts,
                                   self.rec_depth + 1, self.f,
                                   noise_threshold=self.noise_threshold,
                                   start_activities=start_activities,
                                   end_activities=end_activities,
                                   initial_start_activities=self.initial_start_activities,
                                   initial_end_activities=self.initial_end_activities, parameters=self.parameters))
Ejemplo n.º 17
0
    def detect_cut_if(self, second_iteration=False, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters,
                                                  pmutil.xes_constants.DEFAULT_NAME_KEY)
        # check base cases:
        empty_log = base_case.empty_log(self.log)
        single_activity = base_case.single_activity(self.log, activity_key)
        if empty_log:
            self.detected_cut = 'empty_log'
        elif single_activity:
            self.detected_cut = 'single_activity'
        # if no base cases are found, search for a cut:
        # use the cutting and splitting functions of im_plain:
        else:
            found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()

            if found_plain_cut:
                self.apply_cut_im_plain(type_of_cut, cut, activity_key)
            # if im_plain does not find a cut, we filter on our threshold and then again apply the im_cut detection
            # but this time, we have to use different splitting functions:
            else:
                self.filter_dfg_on_threshold()
                found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()
                if found_plain_cut:
                    if type_of_cut == 'concurrent':
                        logging.debug("concurrent_cut_if")
                        self.detected_cut = 'concurrent'
                        new_logs = splitting_infrequent.split_xor_infrequent(cut[1], self.log, activity_key)
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=parameters).items() if v > 0]
                            activities = attributes_get.get_attribute_values(l, activity_key)
                            start_activities = list(
                                start_activities_get.get_start_activities(l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_get.get_end_activities(l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(l, new_dfg, self.master_dfg, self.initial_dfg, activities,
                                                  self.counts,
                                                  self.rec_depth + 1, self.f,
                                                  noise_threshold=self.noise_threshold,
                                                  start_activities=start_activities,
                                                  end_activities=end_activities,
                                                  initial_start_activities=self.initial_start_activities,
                                                  initial_end_activities=self.initial_end_activities,
                                                  parameters=parameters))
                    elif type_of_cut == 'sequential':
                        logging.debug("sequential_if")
                        new_logs = splitting_infrequent.split_sequence_infrequent(cut[1], self.log, activity_key)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=parameters).items() if v > 0]
                            activities = attributes_get.get_attribute_values(l, activity_key)
                            start_activities = list(
                                start_activities_get.get_start_activities(l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_get.get_end_activities(l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(l, new_dfg, self.master_dfg, self.initial_dfg, activities,
                                                  self.counts,
                                                  self.rec_depth + 1, self.f,
                                                  noise_threshold=self.noise_threshold,
                                                  start_activities=start_activities,
                                                  end_activities=end_activities,
                                                  initial_start_activities=self.initial_start_activities,
                                                  initial_end_activities=self.initial_end_activities,
                                                  parameters=parameters))
                    elif type_of_cut == 'parallel':
                        logging.debug("parallel_if")
                        new_logs = split.split_parallel(cut[1], self.log, activity_key)
                        self.detected_cut = "parallel"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=parameters).items() if v > 0]
                            activities = attributes_get.get_attribute_values(l, activity_key)
                            start_activities = list(
                                start_activities_get.get_start_activities(l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_get.get_end_activities(l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(l, new_dfg, self.master_dfg, self.initial_dfg, activities,
                                                  self.counts,
                                                  self.rec_depth + 1, self.f,
                                                  noise_threshold=self.noise_threshold,
                                                  start_activities=start_activities,
                                                  end_activities=end_activities,
                                                  initial_start_activities=self.initial_start_activities,
                                                  initial_end_activities=self.initial_end_activities,
                                                  parameters=parameters))
                    elif type_of_cut == 'loopCut':
                        logging.debug("loopCut_if")
                        new_logs = splitting_infrequent.split_loop_infrequent(cut[1], self.log, activity_key)
                        self.detected_cut = "loopCut"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=parameters).items() if v > 0]
                            activities = attributes_get.get_attribute_values(l, activity_key)
                            start_activities = list(
                                start_activities_get.get_start_activities(l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_get.get_end_activities(l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(l, new_dfg, self.master_dfg, self.initial_dfg, activities,
                                                  self.counts,
                                                  self.rec_depth + 1, self.f,
                                                  noise_threshold=self.noise_threshold,
                                                  start_activities=start_activities,
                                                  end_activities=end_activities,
                                                  initial_start_activities=self.initial_start_activities,
                                                  initial_end_activities=self.initial_end_activities,
                                                  parameters=parameters))

                else:
                    self.apply_fall_through_infrequent(parameters)
Ejemplo n.º 18
0
    def apply_fall_through_infrequent(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, self.parameters,
                                                  pmutil.xes_constants.DEFAULT_NAME_KEY)
    
        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters) or parameters[
            Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (Parameters.ONCE_PER_TRACE_KEY not in parameters) or parameters[
            Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters) or parameters[
            Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters) or parameters[
            Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_traces_present, enough_traces, new_log = fall_through_infrequent.empty_trace_filtering(self.log, self.f)
            self.log = new_log
        else:
            empty_traces_present = False
            enough_traces = False
        # if an empty trace is found, the empty trace fallthrough applies
        if empty_traces_present and enough_traces:
            logging.debug("empty_trace_if")
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(new_log, parameters=self.parameters).items() if v > 0]
            activities = attributes_get.get_attribute_values(new_log, activity_key)
            start_activities = list(
                                start_activities_get.get_start_activities(new_log, parameters=parameters).keys())
            end_activities = list(
                                end_activities_get.get_end_activities(new_log, parameters=parameters).keys())
            self.children.append(
                SubtreeInfrequent(new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts,
                                  self.rec_depth + 1, self.f,
                                  noise_threshold=self.noise_threshold,
                                  start_activities=start_activities,
                                  end_activities=end_activities,
                                  initial_start_activities=self.initial_start_activities,
                                  initial_end_activities=self.initial_end_activities, parameters=parameters))
        elif empty_traces_present and not enough_traces:
            # no node is added to the PT, instead we just use recursion on the log without the empty traces
            self.detect_cut_if(parameters=parameters)
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(self.log, self.activities, activity_key)
            else:
                activity_once = False
            if activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(new_log, parameters=parameters).items() if
                           v > 0]
                activities = attributes_get.get_attribute_values(new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(small_log, parameters=parameters).items() if
                             v > 0]
                small_activities = attributes_get.get_attribute_values(small_log, activity_key)
                start_activities = list(
                                start_activities_get.get_start_activities(new_log, parameters=parameters).keys())
                end_activities = list(
                                end_activities_get.get_end_activities(new_log, parameters=parameters).keys())
                # append the chosen activity as leaf:
                self.children.append(
                    SubtreeInfrequent(small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities,
                                      self.counts,
                                      self.rec_depth + 1, self.f,
                                      noise_threshold=self.noise_threshold,
                                      initial_start_activities=self.initial_start_activities,
                                      initial_end_activities=self.initial_end_activities, parameters=parameters))
                # continue with the recursion on the new log
                self.children.append(
                    SubtreeInfrequent(new_log, new_dfg, self.master_dfg, self.initial_dfg, activities,
                                      self.counts,
                                      self.rec_depth + 1, self.f,
                                      noise_threshold=self.noise_threshold,
                                      start_activities=start_activities,
                                      end_activities=end_activities,
                                      initial_start_activities=self.initial_start_activities,
                                      initial_end_activities=self.initial_end_activities, parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, key = fall_through.activity_concurrent(self, self.log,
                                                                                                self.activities,
                                                                                                activity_key,
                                                                                                parameters=parameters)
                else:
                    activity_concurrent = False
                if activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(new_log, parameters=parameters).items() if
                               v > 0]
                    activities = attributes_get.get_attribute_values(new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(small_log, parameters=parameters).items() if
                                 v > 0]
                    small_activities = attributes_get.get_attribute_values(small_log, activity_key)
                    start_activities = list(
                                start_activities_get.get_start_activities(new_log, parameters=parameters).keys())
                    end_activities = list(
                                end_activities_get.get_end_activities(new_log, parameters=parameters).keys())
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreeInfrequent(small_log, small_dfg, self.master_dfg, self.initial_dfg,
                                          small_activities,
                                          self.counts,
                                          self.rec_depth + 1, self.f,
                                          noise_threshold=self.noise_threshold,
                                          initial_start_activities=self.initial_start_activities,
                                          initial_end_activities=self.initial_end_activities, parameters=parameters))
                    # continue with the recursion on the new log:
                    self.children.append(
                        SubtreeInfrequent(new_log, new_dfg, self.master_dfg, self.initial_dfg,
                                          activities,
                                          self.counts,
                                          self.rec_depth + 1, self.f,
                                          noise_threshold=self.noise_threshold,
                                          start_activities=start_activities,
                                          end_activities=end_activities,
                                          initial_start_activities=self.initial_start_activities,
                                          initial_end_activities=self.initial_end_activities, parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(self.log, self.start_activities,
                                                                            self.end_activities, activity_key)
                    else:
                        strict_tau_loop = False
                    if strict_tau_loop:
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(new_log, parameters=parameters).items() if
                                   v > 0]
                        activities = attributes_get.get_attribute_values(new_log, activity_key)
                        start_activities = list(
                                start_activities_get.get_start_activities(new_log, parameters=parameters).keys())
                        end_activities = list(
                                end_activities_get.get_end_activities(new_log, parameters=parameters).keys())
                        self.children.append(
                            SubtreeInfrequent(new_log, new_dfg, self.master_dfg, self.initial_dfg,
                                              activities,
                                              self.counts,
                                              self.rec_depth + 1, self.f,
                                              noise_threshold=self.noise_threshold,
                                              start_activities=start_activities,
                                              end_activities=end_activities,
                                              initial_start_activities=self.initial_start_activities,
                                              initial_end_activities=self.initial_end_activities,
                                              parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(self.log, self.start_activities, activity_key)
                        else:
                            tau_loop = False
                        if tau_loop:
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(new_log, parameters=parameters).items() if
                                       v > 0]
                            activities = attributes_get.get_attribute_values(new_log, activity_key)
                            start_activities = list(
                                start_activities_get.get_start_activities(new_log, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_get.get_end_activities(new_log, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(new_log, new_dfg, self.master_dfg, self.initial_dfg,
                                                  activities,
                                                  self.counts,
                                                  self.rec_depth + 1, self.f,
                                                  noise_threshold=self.noise_threshold,
                                                  start_activities=start_activities,
                                                  end_activities=end_activities,
                                                  initial_start_activities=self.initial_start_activities,
                                                  initial_end_activities=self.initial_end_activities,
                                                  parameters=parameters))
                        else:
                            logging.debug("flower_if")
                            self.detected_cut = 'flower'
Ejemplo n.º 19
0
    def apply_fall_through(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_trace, new_log = fall_through.empty_trace(self.log)
            # if an empty trace is found, the empty trace fallthrough applies
            #
        else:
            empty_trace = False
        if empty_trace:
            logging.debug("empty_trace")
            activites_left = []
            for trace in new_log:
                for act in trace:
                    if act[activity_key] not in activites_left:
                        activites_left.append(act[activity_key])
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=parameters).items() if v > 0]
            activities = attributes_get.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_get.get_start_activities(
                    new_log, parameters=self.parameters).keys())
            end_activities = list(
                end_activities_get.get_end_activities(
                    new_log, parameters=self.parameters).keys())
            self.children.append(
                SubtreePlain(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
                small_log = filtering_utils.keep_one_trace_per_variant(
                    small_log, parameters=parameters)
            else:
                activity_once = False
            if use_act_once_per_trace and activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_get.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_get.get_attribute_values(
                    small_log, activity_key)
                self.children.append(
                    SubtreePlain(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log
                start_activities = list(
                    start_activities_get.get_start_activities(
                        new_log, parameters=self.parameters).keys())
                end_activities = list(
                    end_activities_get.get_end_activities(
                        new_log, parameters=self.parameters).keys())
                self.children.append(
                    SubtreePlain(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, activity_left_out = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                    small_log = filtering_utils.keep_one_trace_per_variant(
                        small_log, parameters=parameters)
                else:
                    activity_concurrent = False
                if use_act_concurrent and activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_get.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_get.get_attribute_values(
                        small_log, activity_key)
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreePlain(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log:
                    start_activities = list(
                        start_activities_get.get_start_activities(
                            new_log, parameters=self.parameters).keys())
                    end_activities = list(
                        end_activities_get.get_end_activities(
                            new_log, parameters=self.parameters).keys())
                    self.children.append(
                        SubtreePlain(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                        new_log = filtering_utils.keep_one_trace_per_variant(
                            new_log, parameters=parameters)
                    else:
                        strict_tau_loop = False
                    if use_strict_tau_loop and strict_tau_loop:
                        activites_left = []
                        for trace in new_log:
                            for act in trace:
                                if act[activity_key] not in activites_left:
                                    activites_left.append(act[activity_key])
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_get.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_get.get_start_activities(
                                new_log, parameters=self.parameters).keys())
                        end_activities = list(
                            end_activities_get.get_end_activities(
                                new_log, parameters=self.parameters).keys())
                        self.children.append(
                            SubtreePlain(new_log,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                            new_log = filtering_utils.keep_one_trace_per_variant(
                                new_log, parameters=parameters)
                        else:
                            tau_loop = False
                        if use_tau_loop and tau_loop:
                            activites_left = []
                            for trace in new_log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_get.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_get.get_start_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            end_activities = list(
                                end_activities_get.get_end_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower model")
                            activites_left = []
                            for trace in self.log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'flower'
Ejemplo n.º 20
0
    def detect_cut(self, second_iteration=False, parameters=None):
        if pkgutil.find_loader("networkx"):
            import networkx as nx

            if parameters is None:
                parameters = {}
            activity_key = exec_utils.get_param_value(
                Parameters.ACTIVITY_KEY, parameters,
                pmutil.xes_constants.DEFAULT_NAME_KEY)

            # check base cases:
            empty_log = base_case.empty_log(self.log)
            single_activity = base_case.single_activity(self.log, activity_key)
            if empty_log:
                self.detected_cut = 'empty_log'
            elif single_activity:
                self.detected_cut = 'single_activity'
            # if no base cases are found, search for a cut:
            else:
                conn_components = detection_utils.get_connected_components(
                    self.ingoing, self.outgoing, self.activities)
                this_nx_graph = transform_dfg_to_directed_nx_graph(
                    self.dfg, activities=self.activities)
                strongly_connected_components = [
                    list(x)
                    for x in nx.strongly_connected_components(this_nx_graph)
                ]
                xor_cut = self.detect_xor(conn_components)
                # the following part searches for a cut in the current log
                # if a cut is found, the log is split according to the cut, the resulting logs are saved in new_logs
                # recursion is used on all the logs in new_logs
                if xor_cut[0]:
                    logging.debug("xor_cut")
                    self.detected_cut = 'concurrent'
                    new_logs = split.split_xor(xor_cut[1], self.log,
                                               activity_key)
                    for i in range(len(new_logs)):
                        new_logs[
                            i] = filtering_utils.keep_one_trace_per_variant(
                                new_logs[i], parameters=parameters)
                    for l in new_logs:
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            l, parameters=parameters).items() if v > 0]
                        activities = attributes_get.get_attribute_values(
                            l, activity_key)
                        start_activities = list(
                            start_activities_get.get_start_activities(
                                l, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_get.get_end_activities(
                                l, parameters=parameters).keys())
                        self.children.append(
                            SubtreePlain(l,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                else:
                    sequence_cut = cut_detection.detect_sequential_cut(
                        self, self.dfg, strongly_connected_components)
                    if sequence_cut[0]:
                        logging.debug("sequence_cut")
                        new_logs = split.split_sequence(
                            sequence_cut[1], self.log, activity_key)
                        for i in range(len(new_logs)):
                            new_logs[
                                i] = filtering_utils.keep_one_trace_per_variant(
                                    new_logs[i], parameters=parameters)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_get.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_get.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_get.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    else:
                        parallel_cut = self.detect_concurrent()
                        if parallel_cut[0]:
                            logging.debug("parallel_cut")
                            new_logs = split.split_parallel(
                                parallel_cut[1], self.log, activity_key)
                            for i in range(len(new_logs)):
                                new_logs[
                                    i] = filtering_utils.keep_one_trace_per_variant(
                                        new_logs[i], parameters=parameters)
                            self.detected_cut = "parallel"
                            for l in new_logs:
                                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                    l, parameters=parameters).items() if v > 0]
                                activities = attributes_get.get_attribute_values(
                                    l, activity_key)
                                start_activities = list(
                                    start_activities_get.get_start_activities(
                                        l, parameters=parameters).keys())
                                end_activities = list(
                                    end_activities_get.get_end_activities(
                                        l, parameters=parameters).keys())
                                self.children.append(
                                    SubtreePlain(
                                        l,
                                        new_dfg,
                                        self.master_dfg,
                                        self.initial_dfg,
                                        activities,
                                        self.counts,
                                        self.rec_depth + 1,
                                        noise_threshold=self.noise_threshold,
                                        start_activities=start_activities,
                                        end_activities=end_activities,
                                        initial_start_activities=self.
                                        initial_start_activities,
                                        initial_end_activities=self.
                                        initial_end_activities,
                                        parameters=parameters))
                        else:
                            loop_cut = self.detect_loop()
                            if loop_cut[0]:
                                logging.debug("loop_cut")
                                new_logs = split.split_loop(
                                    loop_cut[1], self.log, activity_key)
                                for i in range(len(new_logs)):
                                    new_logs[
                                        i] = filtering_utils.keep_one_trace_per_variant(
                                            new_logs[i], parameters=parameters)
                                self.detected_cut = "loopCut"
                                for l in new_logs:
                                    new_dfg = [
                                        (k, v) for k, v in dfg_inst.apply(
                                            l, parameters=parameters).items()
                                        if v > 0
                                    ]
                                    activities = attributes_get.get_attribute_values(
                                        l, activity_key)
                                    start_activities = list(
                                        start_activities_get.
                                        get_start_activities(
                                            l, parameters=parameters).keys())
                                    end_activities = list(
                                        end_activities_get.get_end_activities(
                                            l, parameters=parameters).keys())
                                    self.children.append(
                                        SubtreePlain(
                                            l,
                                            new_dfg,
                                            self.master_dfg,
                                            self.initial_dfg,
                                            activities,
                                            self.counts,
                                            self.rec_depth + 1,
                                            noise_threshold=self.
                                            noise_threshold,
                                            start_activities=start_activities,
                                            end_activities=end_activities,
                                            initial_start_activities=self.
                                            initial_start_activities,
                                            initial_end_activities=self.
                                            initial_end_activities,
                                            parameters=parameters))

                            # if the code gets to this point, there is no base_case and no cut found in the log
                            # therefore, we now apply fall through:
                            else:
                                self.apply_fall_through(parameters)
        else:
            msg = "networkx is not available. inductive miner cannot be used!"
            logging.error(msg)
            raise Exception(msg)
Ejemplo n.º 21
0
def inductive_miner(log, dfg, threshold, root, act_key, use_msd):
    alphabet = pm4py.get_attribute_values(log, act_key)
    start_activities = get_starters.get_start_activities(
        log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
    end_activities = get_ends.get_end_activities(
        log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
    empty_traces = pm4py.filter_log(lambda trace: len(trace) == 0, log)
    if len(empty_traces) == 0:
        if _is_base_case_act(log, act_key) or _is_base_case_silent(log):
            return _apply_base_case(log, root, act_key)
        pre, post = dfg_utils.get_transitive_relations(dfg, alphabet)
        cut = sequence_cut.detect(alphabet, pre, post)
        if cut is not None:
            return _add_operator_recursive(
                pt.ProcessTree(pt.Operator.SEQUENCE, root), threshold, act_key,
                sequence_cut.project(log, cut, act_key), use_msd)
        cut = xor_cut.detect(dfg, alphabet)
        if cut is not None:
            return _add_operator_recursive(
                pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key,
                xor_cut.project(log, cut, act_key), use_msd)
        cut = concurrent_cut.detect(
            dfg,
            alphabet,
            start_activities,
            end_activities,
            msd=msdw_algo.derive_msd_witnesses(
                log,
                msd_algo.apply(log,
                               parameters={
                                   constants.PARAMETER_CONSTANT_ACTIVITY_KEY:
                                   act_key
                               }),
                parameters={
                    constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key
                }) if use_msd else None)
        if cut is not None:
            return _add_operator_recursive(
                pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key,
                concurrent_cut.project(log, cut, act_key), use_msd)
        cut = loop_cut.detect(dfg, alphabet, start_activities, end_activities)
        if cut is not None:
            return _add_operator_recursive(
                pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key,
                loop_cut.project(log, cut, act_key), use_msd)
    if len(empty_traces) > 0:
        nempty = pm4py.filter_log(lambda t: len(t) > 0, log)
        return _add_operator_recursive(pt.ProcessTree(pt.Operator.XOR,
                                                      root), threshold,
                                       act_key, [EventLog(), nempty], use_msd)
    aopt = activity_once_per_trace.detect(log, alphabet, act_key)
    if aopt is not None:
        operator = pt.ProcessTree(operator=pt.Operator.PARALLEL, parent=root)
        operator.children.append(
            pt.ProcessTree(operator=None, parent=operator, label=aopt))
        return _add_operator_recursive(
            operator, threshold, act_key,
            activity_once_per_trace.project(log, aopt, act_key), use_msd)
    act_conc = activity_concurrent.detect(log, alphabet, act_key, use_msd)
    if act_conc is not None:
        return _add_operator_recursive(
            pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key,
            activity_concurrent.project(log, act_conc, act_key), use_msd)
    stl = strict_tau_loop.detect(log, start_activities, end_activities,
                                 act_key)
    if stl is not None:
        return _add_operator_recursive(pt.ProcessTree(pt.Operator.LOOP,
                                                      root), threshold,
                                       act_key, [stl, EventLog()], use_msd)
    tl = tau_loop.detect(log, start_activities, act_key)
    if tl is not None:
        return _add_operator_recursive(pt.ProcessTree(pt.Operator.LOOP,
                                                      root), threshold,
                                       act_key, [tl, EventLog()], use_msd)
    return _flower(alphabet, root)
Ejemplo n.º 22
0
def __inductive_miner_internal(log,
                               dfg,
                               threshold,
                               root,
                               act_key,
                               use_msd,
                               remove_noise=False):
    alphabet = pm4py.get_event_attribute_values(log, act_key)
    if threshold > 0 and remove_noise:
        end_activities = get_ends.get_end_activities(
            log,
            parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})

        dfg = __filter_dfg_on_threshold(dfg, end_activities, threshold)

    original_length = len(log)
    log = pm4py.filter_log(lambda t: len(t) > 0, log)

    # revised EMPTYSTRACES
    if original_length - len(log) > original_length * threshold:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key,
            [EventLog(), log], use_msd)

    start_activities = get_starters.get_start_activities(
        log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
    end_activities = get_ends.get_end_activities(
        log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})

    if __is_base_case_act(log, act_key) or __is_base_case_silent(log):
        return __apply_base_case(log, root, act_key)
    pre, post = dfg_utils.get_transitive_relations(dfg, alphabet)
    cut = sequence_cut.detect(alphabet, pre, post)
    if cut is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.SEQUENCE, root), threshold, act_key,
            sequence_cut.project(log, cut, act_key), use_msd)
    cut = xor_cut.detect(dfg, alphabet)
    if cut is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key,
            xor_cut.project(log, cut, act_key), use_msd)
    cut = concurrent_cut.detect(
        dfg,
        alphabet,
        start_activities,
        end_activities,
        msd=msdw_algo.derive_msd_witnesses(
            log,
            msd_algo.apply(log,
                           parameters={
                               constants.PARAMETER_CONSTANT_ACTIVITY_KEY:
                               act_key
                           }),
            parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
        if use_msd else None)
    if cut is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key,
            concurrent_cut.project(log, cut, act_key), use_msd)
    cut = loop_cut.detect(dfg, alphabet, start_activities, end_activities)
    if cut is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key,
            loop_cut.project(log, cut, act_key), use_msd)

    aopt = activity_once_per_trace.detect(log, alphabet, act_key)
    if aopt is not None:
        operator = pt.ProcessTree(operator=pt.Operator.PARALLEL, parent=root)
        operator.children.append(
            pt.ProcessTree(operator=None, parent=operator, label=aopt))
        return __add_operator_recursive_logs(
            operator, threshold, act_key,
            activity_once_per_trace.project(log, aopt, act_key), use_msd)
    act_conc = activity_concurrent.detect(log, alphabet, act_key, use_msd)
    if act_conc is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key,
            activity_concurrent.project(log, act_conc, act_key), use_msd)
    stl = strict_tau_loop.detect(log, start_activities, end_activities,
                                 act_key)
    if stl is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key,
            [stl, EventLog()], use_msd)
    tl = tau_loop.detect(log, start_activities, act_key)
    if tl is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key,
            [tl, EventLog()], use_msd)

    if threshold > 0 and not remove_noise:
        return __inductive_miner(log,
                                 dfg,
                                 threshold,
                                 root,
                                 act_key,
                                 use_msd,
                                 remove_noise=True)

    return __flower(alphabet, root)
def create_process_models(output_case_traces_cluster, path_data_sources,
                          dir_runtime_files, dir_dfg_cluster_files,
                          filename_dfg_cluster, rel_proportion_dfg_threshold,
                          logging_level):
    """
    Creates directly follows graphs out of a event log.
    :param output_case_traces_cluster: traces that are visualised
    :param path_data_sources: path of sources and outputs
    :param dir_runtime_files: folder containing files read and written during runtime
    :param dir_dfg_cluster_files: folder containing dfg png files
    :param filename_dfg_cluster: filename of dfg file (per cluster)
    :param rel_proportion_dfg_threshold: threshold for filtering out sensors in dfg relative to max occurrences of a sensor
    :param logging_level: level of logging
    :return:
    """

    # keep only needed columns
    output_case_traces_cluster = output_case_traces_cluster.reindex(
        columns={'Case', 'LC_Activity', 'Timestamp', 'Cluster'})
    output_case_traces_cluster = output_case_traces_cluster.rename(
        columns={
            'Case': 'case:concept:name',
            'LC_Activity': 'concept:name',
            'Timestamp': 'time:timestamp'
        })

    # create directory for dfg pngs
    os.mkdir(path_data_sources + dir_runtime_files + dir_dfg_cluster_files)
    # create dfg for each cluster
    clusters = output_case_traces_cluster.Cluster.unique()
    for cluster in clusters:
        log = output_case_traces_cluster.loc[output_case_traces_cluster.Cluster
                                             == cluster]
        log = log.astype(str)

        # convert pandas data frame to pm4py event log for further processing
        log = log_converter.apply(log)

        # keep only activities with more than certain number of occurrences
        activities = attributes_get.get_attribute_values(log, 'concept:name')
        # determine that number relative to the max number of occurrences of a sensor in a cluster. (the result is
        # the threshold at which an activity/activity strand is kept)
        min_number_of_occurrences = round(
            (max(activities.values()) * rel_proportion_dfg_threshold), 0)
        activities = {
            x: y
            for x, y in activities.items() if y >= min_number_of_occurrences
        }
        log = attributes_filter.apply(log, activities)

        # create dfg out of event log
        dfg = dfg_discovery.apply(log)

        # define start and
        start_activities = sa_get.get_start_activities(log)
        end_activities = ea_get.get_end_activities(log)

        # create png of dfg (if the graph does not show a graph, it is possible that the sensors did not trigger often)
        gviz = dfg_visualization.apply(
            dfg=dfg,
            log=log,
            variant=dfg_visualization.Variants.FREQUENCY,
            parameters={
                'start_activities': start_activities,
                'end_activities': end_activities
            })
        dfg_visualization.save(
            gviz,
            path_data_sources + dir_runtime_files + dir_dfg_cluster_files +
            (filename_dfg_cluster.format(cluster=str(cluster))))

    # logger
    logger = logging.getLogger(inspect.stack()[0][3])
    logger.setLevel(logging_level)
    logger.info("Saved directly follows graphs into '../%s'.",
                path_data_sources + dir_runtime_files + dir_dfg_cluster_files)