Ejemplo n.º 1
0
def slice_dist_act(log_1, log_2, unit, parameters=None):
    (log1_list, freq1_list) = filter_subsets.logslice_percent(log_1, unit)
    (log2_list, freq2_list) = filter_subsets.logslice_percent(log_2, unit)

    if len(freq1_list) >= len(freq2_list):
        max_len = len(freq1_list)
        min_len = len(freq2_list)
        max_log = log1_list
        min_log = log2_list
        var_count_max = freq1_list
        var_count_min = freq2_list

    else:
        max_len = len(freq2_list)
        min_len = len(freq1_list)
        max_log = log2_list
        min_log = log1_list
        var_count_max = freq2_list
        var_count_min = freq1_list

    dist_matrix = np.zeros((max_len, min_len))
    max_per_var = np.zeros(max_len)
    max_freq = np.zeros(max_len)
    min_freq = np.zeros(min_len)
    min_per_var = np.zeros(min_len)
    index_rec = set(list(range(min_len)))

    if log1_list == log2_list:
        print("Please give different variant lists!")
        dist = 0
    else:
        for i in range(max_len):
            dist_vec = np.zeros(min_len)
            act1 = attributes_filter.get_attribute_values(max_log[i], "concept:name")
            df1_act = act_dist_calc.occu_var_act(act1)
            for j in range(min_len):
                act2 = attributes_filter.get_attribute_values(min_log[j], "concept:name")
                df2_act = act_dist_calc.occu_var_act(act2)
                df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0)
                dist_vec[j] = pdist(np.array([df_act['freq_x'].values, df_act['freq_y'].values]), 'cosine')[0]
                dist_matrix[i][j] = dist_vec[j]
                if j == (min_len - 1):
                    max_loc_col = np.argmin(dist_vec)
                    if abs(dist_vec[max_loc_col]) <= 1e-8:
                        index_rec.discard(max_loc_col)
                        max_freq[i] = var_count_max[i] * var_count_min[max_loc_col] * 2
                        max_per_var[i] = dist_vec[max_loc_col] * max_freq[i] * 2
                    else:
                        max_freq[i] = var_count_max[i] * var_count_min[max_loc_col]
                        max_per_var[i] = dist_vec[max_loc_col] * max_freq[i]

        if (len(index_rec) != 0):
            for i in list(index_rec):
                min_loc_row = np.argmin(dist_matrix[:, i])
                min_freq[i] = var_count_max[min_loc_row] * var_count_min[i]
                min_per_var[i] = dist_matrix[min_loc_row, i] * min_freq[i]

        dist = (np.sum(max_per_var) + np.sum(min_per_var)) / (np.sum(max_freq) + np.sum(min_freq))

    return dist
Ejemplo n.º 2
0
def dfg_dist_calc_minkowski(log1, log2, alpha):
    act1 = attributes_filter.get_attribute_values(log1, "concept:name")
    act2 = attributes_filter.get_attribute_values(log2, "concept:name")
    dfg1 = dfg_algorithm.apply(log1)
    dfg2 = dfg_algorithm.apply(log2)
    df1_act = act_dist_calc.occu_var_act(act1)
    df2_act = act_dist_calc.occu_var_act(act2)
    df1_dfg = act_dist_calc.occu_var_act(dfg1)
    df2_dfg = act_dist_calc.occu_var_act(dfg2)
    df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0)
    df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0)
    dist_act = pdist(np.array([
        df_act['freq_x'].values / np.sum(df_act['freq_x'].values),
        df_act['freq_y'].values / np.sum(df_act['freq_y'].values)
    ]),
                     'minkowski',
                     p=2.)[0]
    dist_dfg = pdist(np.array([
        df_dfg['freq_x'].values / np.sum(df_dfg['freq_x'].values),
        df_dfg['freq_y'].values / np.sum(df_dfg['freq_y'].values)
    ]),
                     'minkowski',
                     p=2.)[0]
    dist = dist_act * alpha + dist_dfg * (1 - alpha)
    return dist
Ejemplo n.º 3
0
def dfg_dist_calc_act(log1, log2):
    act1 = attributes_filter.get_attribute_values(log1, "concept:name")
    act2 = attributes_filter.get_attribute_values(log2, "concept:name")
    df1_act = act_dist_calc.occu_var_act(act1)
    df2_act = act_dist_calc.occu_var_act(act2)
    df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0)
    dist_act = pdist(
        np.array([df_act['freq_x'].values, df_act['freq_y'].values]),
        'cosine')[0]
    return dist_act
Ejemplo n.º 4
0
def apply(dfg, log=None, parameters=None, activities_count=None, soj_time=None):
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY)
    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png")
    max_no_of_edges_in_diagram = exec_utils.get_param_value(Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 100000)
    start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters, [])
    end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters, [])
    font_size = exec_utils.get_param_value(Parameters.FONT_SIZE, parameters, 12)
    font_size = str(font_size)
    activities = dfg_utils.get_activities_from_dfg(dfg)

    if activities_count is None:
        if log is not None:
            activities_count = attr_get.get_attribute_values(log, activity_key, parameters=parameters)
        else:
            activities_count = {key: 1 for key in activities}

    if soj_time is None:
        if log is not None:
            soj_time = soj_time_get.apply(log, parameters=parameters)
        else:
            soj_time = {key: 0 for key in activities}

    return graphviz_visualization(activities_count, dfg, image_format=image_format, measure="performance",
                                  max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
                                  start_activities=start_activities, end_activities=end_activities, soj_time=soj_time,
                                  font_size=font_size)
Ejemplo n.º 5
0
def form_encoding_dictio_from_log(log, parameters=None):
    """
    Forms the encoding dictionary from the current log

    Parameters
    -------------
    log
        Event log
    parameters
        Parameters of the algorithm

    Returns
    -------------
    encoding_dictio
        Encoding dictionary
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    shared_obj = SharedObj()

    activities = attributes_get.get_attribute_values(log,
                                                     activity_key,
                                                     parameters=parameters)

    mapping = {}

    for act in activities:
        get_new_char(act, shared_obj)
        mapping[act] = shared_obj.mapping_dictio[act]

    return mapping
Ejemplo n.º 6
0
def apply(dfg,
          log=None,
          parameters=None,
          activities_count=None,
          measure="frequency"):
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters,
                                              "png")
    max_no_of_edges_in_diagram = exec_utils.get_param_value(
        Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 75)
    start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES,
                                                  parameters, [])
    end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES,
                                                parameters, [])

    if activities_count is None:
        if log is not None:
            activities_count = attr_get.get_attribute_values(
                log, activity_key, parameters=parameters)
        else:
            activities = dfg_utils.get_activities_from_dfg(dfg)
            activities_count = {key: 1 for key in activities}

    return graphviz_visualization(
        activities_count,
        dfg,
        image_format=image_format,
        measure=measure,
        max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
        start_activities=start_activities,
        end_activities=end_activities)
def filter_log_on_max_no_activities(log,
                                    max_no_activities=25,
                                    parameters=None):
    """
    Filter a log on a maximum number of activities

    Parameters
    -------------
    log
        Log
    max_no_activities
        Maximum number of activities
    parameters
        Parameters of the algorithm

    Returns
    -------------
    filtered_log
        Filtered version of the event log
    """
    if parameters is None:
        parameters = {}
    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY
    parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key
    all_activities = sorted(
        [(x, y) for x, y in get_attribute_values(log, activity_key).items()],
        key=lambda x: x[1],
        reverse=True)
    activities = all_activities[:min(len(all_activities), max_no_activities)]
    activities = [x[0] for x in activities]

    if len(activities) < len(all_activities):
        log = apply_events(log, activities, parameters=parameters)
    return log
Ejemplo n.º 8
0
def get_activities_list(log, parameters=None):
    """
    Gets the activities list from a log_skeleton object, sorted by activity name

    Parameters
    --------------
    log
        Log
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    activities_list
        List of activities sorted by activity name
    """
    from pm4py.statistics.attributes.pandas import get as pd_attributes_filter
    from pm4py.statistics.attributes.log import get as log_attributes_filter

    if parameters is None:
        parameters = {}
    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    if type(log) is pd.DataFrame:
        activities = pd_attributes_filter.get_attribute_values(
            log, activity_key)
    else:
        activities = log_attributes_filter.get_attribute_values(
            log, activity_key)
    return sorted(list(activities.keys()))
Ejemplo n.º 9
0
def form_encoding_dictio_from_two_logs(log1: EventLog, log2: EventLog, parameters: Optional[Dict[str, Any]] = None) -> \
Dict[str, str]:
    """
    Forms the encoding dictionary from a couple of logs

    Parameters
    ----------------
    log1
        First log
    log2
        Second log
    parameters
        Parameters of the algorithm

    Returns
    ----------------
    encoding_dictio
        Encoding dictionary
    """
    from pm4py.statistics.attributes.log import get as attributes_get

    if parameters is None:
        parameters = {}

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    shared_obj = SharedObj()

    activities_log_1 = attributes_get.get_attribute_values(
        log1, activity_key, parameters=parameters)
    activities_log_2 = attributes_get.get_attribute_values(
        log2, activity_key, parameters=parameters)

    mapping = {}

    for act in activities_log_1:
        if act not in mapping:
            get_new_char(act, shared_obj)
            mapping[act] = shared_obj.mapping_dictio[act]

    for act in activities_log_2:
        if act not in mapping:
            get_new_char(act, shared_obj)
            mapping[act] = shared_obj.mapping_dictio[act]

    return mapping
Ejemplo n.º 10
0
def filter_log_relative_occurrence_event_attribute(
        log: EventLog,
        min_relative_stake: float,
        parameters: Optional[Dict[Any, Any]] = None) -> EventLog:
    """
    Filters the event log keeping only the events having an attribute value which occurs:
    - in at least the specified (min_relative_stake) percentage of events, when Parameters.KEEP_ONCE_PER_CASE = False
    - in at least the specified (min_relative_stake) percentage of cases, when Parameters.KEEP_ONCE_PER_CASE = True

    Parameters
    -------------------
    log
        Event log
    min_relative_stake
        Minimum percentage of cases (expressed as a number between 0 and 1) in which the attribute should occur.
    parameters
        Parameters of the algorithm, including:
        - Parameters.ATTRIBUTE_KEY => the attribute to use (default: concept:name)
        - Parameters.KEEP_ONCE_PER_CASE => decides the level of the filter to apply
        (if the filter should be applied on the cases, set it to True).

    Returns
    ------------------
    filtered_log
        Filtered event log
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY,
                                               parameters,
                                               xes.DEFAULT_NAME_KEY)
    keep_once_per_case = exec_utils.get_param_value(
        Parameters.KEEP_ONCE_PER_CASE, parameters, True)

    parameters_cp = copy(parameters)

    activities_occurrences = get_attribute_values(log,
                                                  attribute_key,
                                                  parameters=parameters_cp)

    if keep_once_per_case:
        # filter on cases
        filtered_attributes = set(x for x, y in activities_occurrences.items()
                                  if y >= min_relative_stake * len(log))
    else:
        # filter on events
        filtered_attributes = set(
            x for x, y in activities_occurrences.items()
            if y >= min_relative_stake * sum(len(x) for x in log))

    return apply_events(log, filtered_attributes, parameters=parameters)
Ejemplo n.º 11
0
def apply_heu(log, parameters=None):
    """
    Discovers an Heuristics Net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including:
            - Parameters.ACTIVITY_KEY
            - Parameters.TIMESTAMP_KEY
            - Parameters.CASE_ID_KEY
            - Parameters.DEPENDENCY_THRESH
            - Parameters.AND_MEASURE_THRESH
            - Parameters.MIN_ACT_COUNT
            - Parameters.MIN_DFG_OCCURRENCES
            - Parameters.DFG_PRE_CLEANING_NOISE_THRESH
            - Parameters.LOOP_LENGTH_TWO_THRESH

    Returns
    ------------
    heu
        Heuristics Net
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)

    start_activities = log_sa_filter.get_start_activities(
        log, parameters=parameters)
    end_activities = log_ea_filter.get_end_activities(log,
                                                      parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(
        log, activity_key, parameters=parameters)
    activities = list(activities_occurrences.keys())
    dfg = dfg_alg.apply(log, parameters=parameters)
    parameters_w2 = deepcopy(parameters)
    parameters_w2["window"] = 2
    dfg_window_2 = dfg_alg.apply(log, parameters=parameters_w2)
    freq_triples = dfg_alg.apply(log,
                                 parameters=parameters,
                                 variant=dfg_alg.Variants.FREQ_TRIPLES)

    return apply_heu_dfg(dfg,
                         activities=activities,
                         activities_occurrences=activities_occurrences,
                         start_activities=start_activities,
                         end_activities=end_activities,
                         dfg_window_2=dfg_window_2,
                         freq_triples=freq_triples,
                         parameters=parameters)
def apply_tree(log, parameters=None):
    """
    Apply the IMDF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]

    # get the DFG
    dfg = [(k, v) for k, v in dfg_inst.apply(
        log,
        parameters={
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
        }).items() if v > 0]

    # gets the start activities from the log
    start_activities = log_start_act_stats.get_start_activities(
        log, parameters=parameters)
    # gets the end activities from the log
    end_activities = log_end_act_stats.get_end_activities(
        log, parameters=parameters)

    # get the activities in the log
    activities = log_attributes_stats.get_attribute_values(log, activity_key)

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    return apply_tree_dfg(dfg,
                          parameters=parameters,
                          activities=activities,
                          contains_empty_traces=contains_empty_traces,
                          start_activities=start_activities,
                          end_activities=end_activities)
Ejemplo n.º 13
0
def dfg_dist_calc(log1, log2):
    act1 = attributes_filter.get_attribute_values(log1, "concept:name")
    act2 = attributes_filter.get_attribute_values(log2, "concept:name")
    dfg1 = dfg_algorithm.apply(log1)
    dfg2 = dfg_algorithm.apply(log2)
    df1_act = act_dist_calc.occu_var_act(act1)
    df2_act = act_dist_calc.occu_var_act(act2)
    df1_dfg = act_dist_calc.occu_var_act(dfg1)
    df2_dfg = act_dist_calc.occu_var_act(dfg2)
    df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0)
    df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0)
    dist_act = pdist(
        np.array([df_act['freq_x'].values, df_act['freq_y'].values]),
        'cosine')[0]
    dist_dfg = pdist(
        np.array([df_dfg['freq_x'].values, df_dfg['freq_y'].values]),
        'cosine')[0]
    if (np.isnan(dist_dfg) == True):
        dist_dfg = 1
    return dist_act, dist_dfg
Ejemplo n.º 14
0
def discover_abstraction_log(
    log: EventLog,
    parameters: Optional[Dict[Any, Any]] = None
) -> Tuple[Any, Any, Any, Any, Any, Any, Any]:
    """
    Discovers an abstraction from a log that is useful for the Heuristics Miner ++ algorithm

    Parameters
    --------------
    log
        Event log
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY
        - Parameters.START_TIMESTAMP_KEY
        - Parameters.TIMESTAMP_KEY
        - Parameters.CASE_ID_KEY

    Returns
    --------------
    start_activities
        Start activities
    end_activities
        End activities
    activities_occurrences
        Activities along with their number of occurrences
    dfg
        Directly-follows graph
    performance_dfg
        (Performance) Directly-follows graph
    sojourn_time
        Sojourn time for each activity
    concurrent_activities
        Concurrent activities
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    start_activities = log_sa.get_start_activities(log, parameters=parameters)
    end_activities = log_ea.get_end_activities(log, parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(
        log, activity_key, parameters=parameters)
    efg_parameters = copy(parameters)
    efg_parameters[efg_get.Parameters.KEEP_FIRST_FOLLOWING] = True
    dfg = efg_get.apply(log, parameters=efg_parameters)
    performance_dfg = dfg_alg.apply(log,
                                    variant=dfg_alg.Variants.PERFORMANCE,
                                    parameters=parameters)
    sojourn_time = soj_get.apply(log, parameters=parameters)
    concurrent_activities = conc_act_get.apply(log, parameters=parameters)
    return (start_activities, end_activities, activities_occurrences, dfg,
            performance_dfg, sojourn_time, concurrent_activities)
Ejemplo n.º 15
0
def apply_tree(log, parameters=None):
    """
    Apply the IMDF algorithm to a log_skeleton obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log_skeleton to use as activity name
            (default concept:name)

    Returns
    ----------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(
        Parameters.ACTIVITY_KEY, parameters,
        pmutil.xes_constants.DEFAULT_NAME_KEY)

    # get the DFG
    dfg = [(k, v)
           for k, v in dfg_inst.apply(log, parameters=parameters).items()
           if v > 0]

    # gets the start activities from the log_skeleton
    start_activities = log_start_act_stats.get_start_activities(
        log, parameters=parameters)
    # gets the end activities from the log_skeleton
    end_activities = log_end_act_stats.get_end_activities(
        log, parameters=parameters)

    # get the activities in the log_skeleton
    activities = log_attributes_stats.get_attribute_values(log, activity_key)

    # check if the log_skeleton contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    return apply_tree_dfg(dfg,
                          parameters=parameters,
                          activities=activities,
                          contains_empty_traces=contains_empty_traces,
                          start_activities=start_activities,
                          end_activities=end_activities)
def get_decorated_net(net, initial_marking, final_marking, log, parameters=None, variant="frequency"):
    """
    Get a decorated net according to the specified variant (decorate Petri net based on DFG)

    Parameters
    ------------
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    log
        Log to use to decorate the Petri net
    parameters
        Algorithm parameters
    variant
        Specify if the decoration should take into account the frequency or the performance

    Returns
    ------------
    gviz
        GraphViz object
    """
    if parameters is None:
        parameters = {}

    aggregation_measure = exec_utils.get_param_value(Parameters.AGGREGATION_MEASURE, parameters,
                                                     "sum" if "frequency" in variant else "mean")

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY)
    stat_locale = exec_utils.get_param_value(Parameters.STAT_LOCALE, parameters, {})

    # we find the DFG
    if variant == "performance":
        dfg = performance.performance(log, parameters=parameters)
    else:
        dfg = native.native(log, parameters=parameters)
    # we find shortest paths
    spaths = get_shortest_paths(net)
    # we find the number of activities occurrences in the log
    activities_count = attr_get.get_attribute_values(log, activity_key, parameters=parameters)
    aggregated_statistics = get_decorations_from_dfg_spaths_acticount(net, dfg, spaths,
                                                                      activities_count,
                                                                      variant=variant,
                                                                      aggregation_measure=aggregation_measure,
                                                                      stat_locale=stat_locale)

    return visualize.apply(net, initial_marking, final_marking, parameters=parameters,
                           decorations=aggregated_statistics)
Ejemplo n.º 17
0
def apply_auto_filter(log, variants=None, parameters=None):
    """
    Apply an attributes filter detecting automatically a percentage

    Parameters
    ----------
    log
        Log
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    parameters
        Parameters of the algorithm, including:
            Parameters.DECREASING_FACTOR -> Decreasing factor (stops the algorithm when the next activity by occurrence is
            below this factor in comparison to previous)
            Parameters.ATTRIBUTE_KEY -> Attribute key (must be specified if different from concept:name)

    Returns
    ---------
    filtered_log
        Filtered log_skeleton
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY,
                                               parameters, DEFAULT_NAME_KEY)
    decreasing_factor = exec_utils.get_param_value(
        Parameters.DECREASING_FACTOR, parameters,
        filtering_constants.DECREASING_FACTOR)

    parameters_variants = {
        PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key,
        PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key
    }
    if len(log) > 0:
        if variants is None:
            variants = variants_filter.get_variants(
                log, parameters=parameters_variants)
        vc = variants_filter.get_variants_sorted_by_count(variants)
        attributes_values = get_attribute_values(
            log, attribute_key, parameters=parameters_variants)
        alist = attributes_common.get_sorted_attributes_list(attributes_values)
        thresh = attributes_common.get_attributes_threshold(
            alist, decreasing_factor)
        filtered_log = filter_log_by_attributes_threshold(
            log, attributes_values, variants, vc, thresh, attribute_key)
        return filtered_log
    return log
Ejemplo n.º 18
0
    def test_dfdoc1(self):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"
        from pm4py.objects.log.importer.xes import factory as xes_importer
        log = xes_importer.import_log(
            os.path.join("input_data", "running-example.xes"))
        from pm4py.algo.discovery.dfg import factory as dfg_factory
        dfg = dfg_factory.apply(log)
        from pm4py.statistics.attributes.log import get as attributes_filter
        activities_count = attributes_filter.get_attribute_values(
            log, "concept:name")

        from pm4py.visualization.dfg.versions import simple_visualize as dfg_visualize
        gviz = dfg_visualize.graphviz_visualization(activities_count, dfg)
        del gviz
Ejemplo n.º 19
0
def execute_script():
    log = pm4py.read_xes(
        os.path.join("..", "tests", "input_data", "receipt.xes"))
    print("number of cases", len(log))
    print("number of events", sum(len(x) for x in log))
    print("number of variants", len(pm4py.get_variants(log)))
    ac = get.get_attribute_values(log, "concept:name")
    dfg, sa, ea = pm4py.discover_dfg(log)
    perc = 0.5
    dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_activities_percentage(
        dfg, sa, ea, ac, perc)
    dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_paths_percentage(
        dfg, sa, ea, ac, perc)
    aa = time.time()
    aligned_traces = dfg_alignment.apply(log, dfg, sa, ea)
    bb = time.time()
    net, im, fm = pm4py.convert_to_petri_net(dfg, sa, ea)
    for trace in aligned_traces:
        if trace["cost"] != trace["internal_cost"]:
            print(trace)
            pass
    print(bb - aa)
    print(sum(x["visited_states"] for x in aligned_traces))
    print(
        sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST
            for x in aligned_traces))
    gviz = visualizer.apply(dfg,
                            activities_count=ac,
                            parameters={
                                "start_activities": sa,
                                "end_activities": ea,
                                "format": "svg"
                            })
    visualizer.view(gviz)
    cc = time.time()
    aligned_traces2 = petri_alignments.apply(
        log,
        net,
        im,
        fm,
        variant=petri_alignments.Variants.VERSION_DIJKSTRA_LESS_MEMORY)
    dd = time.time()
    print(dd - cc)
    print(sum(x["visited_states"] for x in aligned_traces2))
    print(
        sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST
            for x in aligned_traces2))
Ejemplo n.º 20
0
def filter_log_on_max_no_activities(
    log: EventLog,
    max_no_activities: int = 25,
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> EventLog:
    """
    Filter a log on a maximum number of activities

    Parameters
    -------------
    log
        Log
    max_no_activities
        Maximum number of activities
    parameters
        Parameters of the algorithm

    Returns
    -------------
    filtered_log
        Filtered version of the event log
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY
    parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key
    all_activities = sorted(
        [(x, y) for x, y in get_attribute_values(log, activity_key).items()],
        key=lambda x: x[1],
        reverse=True)
    activities = all_activities[:min(len(all_activities), max_no_activities)]
    activities = [x[0] for x in activities]

    if len(activities) < len(all_activities):
        log = apply_events(log, activities, parameters=parameters)
    return log
def apply(dfg,
          log=None,
          parameters=None,
          activities_count=None,
          measure="frequency"):
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    image_format = "png"
    max_no_of_edges_in_diagram = 75

    if "format" in parameters:
        image_format = parameters["format"]
    if "maxNoOfEdgesInDiagram" in parameters:
        max_no_of_edges_in_diagram = parameters["maxNoOfEdgesInDiagram"]

    start_activities = parameters[
        "start_activities"] if "start_activities" in parameters else []
    end_activities = parameters[
        "end_activities"] if "end_activities" in parameters else []

    if activities_count is None:
        if log is not None:
            activities_count = attr_get.get_attribute_values(
                log, activity_key, parameters=parameters)
        else:
            activities = dfg_utils.get_activities_from_dfg(dfg)
            activities_count = {key: 1 for key in activities}

    return graphviz_visualization(
        activities_count,
        dfg,
        image_format=image_format,
        measure=measure,
        max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
        start_activities=start_activities,
        end_activities=end_activities)
Ejemplo n.º 22
0
def apply_heu(log, parameters=None):
    """
    Discovers an Heuristics Net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including: activity_key, case_id_glue, timestamp_key,
        dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh,
        loops_length_two_thresh

    Returns
    ------------
    heu
        Heuristics Net
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    start_activities = log_sa_filter.get_start_activities(log, parameters=parameters)
    end_activities = log_ea_filter.get_end_activities(log, parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(log, activity_key, parameters=parameters)
    activities = list(activities_occurrences.keys())
    dfg = dfg_factory.apply(log, parameters=parameters)
    parameters_w2 = deepcopy(parameters)
    parameters_w2["window"] = 2
    dfg_window_2 = dfg_factory.apply(log, parameters=parameters_w2)
    freq_triples = dfg_factory.apply(log, parameters=parameters, variant="freq_triples")

    return apply_heu_dfg(dfg, activities=activities, activities_occurrences=activities_occurrences,
                         start_activities=start_activities,
                         end_activities=end_activities, dfg_window_2=dfg_window_2, freq_triples=freq_triples,
                         parameters=parameters)
Ejemplo n.º 23
0
def apply_tree(log, parameters):
    """
    Apply the IM_FF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    if pkgutil.find_loader("pandas"):
        import pandas as pd
        from pm4py.statistics.variants.pandas import get as variants_get

        if type(log) is pd.DataFrame:
            vars = variants_get.get_variants_count(log, parameters=parameters)
            return apply_tree_variants(vars, parameters=parameters)

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters,
                                              pmutil.xes_constants.DEFAULT_NAME_KEY)

    log = converter.apply(log, parameters=parameters)
    # keep only the activity attribute (since the others are not used)
    log = filtering_utils.keep_only_one_attribute_per_event(log, activity_key)

    noise_threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD, parameters,
                                                 shared_constants.NOISE_THRESHOLD_IMF)

    dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters=parameters).items() if v > 0]
    c = Counts()
    activities = attributes_get.get_attribute_values(log, activity_key)
    start_activities = list(start_activities_get.get_start_activities(log, parameters=parameters).keys())
    end_activities = list(end_activities_get.get_end_activities(log, parameters=parameters).keys())
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    # set the threshold parameter based on f and the max value in the dfg:
    max_value = 0
    for key, value in dfg:
        if value > max_value:
            max_value = value
    threshold = noise_threshold * max_value

    recursion_depth = 0
    sub = subtree.make_tree(log, dfg, dfg, dfg, activities, c, recursion_depth, noise_threshold, threshold,
                            start_activities, end_activities,
                            start_activities, end_activities, parameters=parameters)

    process_tree = get_tree_repr_implain.get_repr(sub, 0, contains_empty_traces=contains_empty_traces)
    # Ensures consistency to the parent pointers in the process tree
    tree_consistency.fix_parent_pointers(process_tree)
    # Fixes a 1 child XOR that is added when single-activities flowers are found
    tree_consistency.fix_one_child_xor_flower(process_tree)
    # folds the process tree (to simplify it in case fallthroughs/filtering is applied)
    process_tree = util.fold(process_tree)

    return process_tree
Ejemplo n.º 24
0
 print("")
 print(log_path)
 if "xes" in log_name:
     from pm4py.statistics.attributes.log import get as attributes_get_log
     log = pm4py.read_xes(log_path)
     for trace in log:
         for event in trace:
             if True and "lifecycle:transition" in event:
                 event["@@classifier"] = event[
                     "concept:name"] + "+" + event[
                         "lifecycle:transition"]
                 # event["concept:name"] = event["concept:name"] + "+" + event["lifecycle:transition"]
             else:
                 event["@@classifier"] = event["concept:name"]
     activities = set(
         attributes_get_log.get_attribute_values(log,
                                                 CLASSIFIER).keys())
     variants = variants_get.get_variants(
         log, parameters={"pm4py:param:activity_key": CLASSIFIER})
     fp_log = pm4py.algo.discovery.footprints.log.variants.entire_event_log.apply(
         log, parameters={"pm4py:param:activity_key": CLASSIFIER})
 elif "parquet" in log_name:
     from pm4py.statistics.attributes.pandas import get as attributes_get_pandas
     dataframe = pd.read_parquet(log_path)
     activities = set(
         attributes_get_pandas.get_attribute_values(
             dataframe, CLASSIFIER).keys())
     variants = pm4py.get_variants(dataframe)
     fp_log = pm4py.algo.discovery.footprints.log.variants.entire_dataframe.apply(
         dataframe)
 print("start tree_im_clean")
 tree_im_clean = im_clean.apply_tree(
Ejemplo n.º 25
0
def apply(dfg: Dict[Tuple[str, str], int],
          log: EventLog = None,
          parameters: Optional[Dict[Any, Any]] = None,
          activities_count: Dict[str, int] = None,
          soj_time: Dict[str, float] = None) -> Digraph:
    """
    Visualize a frequency directly-follows graph

    Parameters
    -----------------
    dfg
        Frequency Directly-follows graph
    log
        (if provided) Event log for the calculation of statistics
    activities_count
        (if provided) Dictionary associating to each activity the number of occurrences in the log.
    soj_time
        (if provided) Dictionary associating to each activity the average sojourn time
    parameters
        Variant-specific parameters

    Returns
    -----------------
    gviz
        Graphviz digraph
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters,
                                              "png")
    max_no_of_edges_in_diagram = exec_utils.get_param_value(
        Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 100000)
    start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES,
                                                  parameters, {})
    end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES,
                                                parameters, {})
    font_size = exec_utils.get_param_value(Parameters.FONT_SIZE, parameters,
                                           12)
    font_size = str(font_size)
    activities = dfg_utils.get_activities_from_dfg(dfg)
    bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters,
                                         "transparent")
    stat_locale = exec_utils.get_param_value(Parameters.STAT_LOCALE,
                                             parameters, None)
    if stat_locale is None:
        stat_locale = {}

    if activities_count is None:
        if log is not None:
            activities_count = attr_get.get_attribute_values(
                log, activity_key, parameters=parameters)
        else:
            # the frequency of an activity in the log is at least the number of occurrences of
            # incoming arcs in the DFG.
            # if the frequency of the start activities nodes is also provided, use also that.
            activities_count = Counter({key: 0 for key in activities})
            for el in dfg:
                activities_count[el[1]] += dfg[el]
            if isinstance(start_activities, dict):
                for act in start_activities:
                    activities_count[act] += start_activities[act]

    if soj_time is None:
        if log is not None:
            soj_time = soj_time_get.apply(log, parameters=parameters)
        else:
            soj_time = {key: 0 for key in activities}

    return graphviz_visualization(
        activities_count,
        dfg,
        image_format=image_format,
        measure="frequency",
        max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
        start_activities=start_activities,
        end_activities=end_activities,
        soj_time=soj_time,
        font_size=font_size,
        bgcolor=bgcolor,
        stat_locale=stat_locale)
Ejemplo n.º 26
0
def select_attributes_from_log_for_tree(
        log: EventLog,
        max_cases_for_attr_selection=DEFAULT_MAX_CASES_FOR_ATTR_SELECTION,
        max_diff_occ=DEFAULT_MAX_CASES_FOR_ATTR_SELECTION / 4):
    """
    Select attributes from log for tree

    Parameters
    ------------
    log
        Log
    max_cases_for_attr_selection
        Maximum number of cases to consider for attribute selection
    max_diff_occ
        Maximum number of different occurrences

    Returns
    ------------

    """
    log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG)

    if len(log) > max_cases_for_attr_selection:
        filtered_log = sampling.sample(log, max_cases_for_attr_selection)
    else:
        filtered_log = log
    event_attributes = get_all_event_attributes_from_log(filtered_log)
    trace_attributes = get_all_trace_attributes_from_log(filtered_log)
    event_attributes_values = {}
    trace_attributes_values = {}
    for attr in event_attributes:
        event_attributes_values[attr] = set(
            get_attribute_values(log, attr).keys())
    for attr in trace_attributes:
        trace_attributes_values[attr] = set(
            get_trace_attribute_values(log, attr).keys())

    numeric_event_attributes_to_consider = list()
    string_event_attributes_to_consider = list()
    numeric_trace_attributes_to_consider = list()
    string_trace_attributes_to_consider = list()

    for attr in event_attributes_values:
        if type(list(event_attributes_values[attr])[0]) is int or type(
                list(event_attributes_values[attr])[0]) is float:
            numeric_event_attributes_to_consider.append(attr)
        elif type(list(event_attributes_values[attr])[0]) is str and len(
                event_attributes_values[attr]) < max_diff_occ:
            string_event_attributes_to_consider.append(attr)

    for attr in trace_attributes_values:
        if type(list(trace_attributes_values[attr])[0]) is int or type(
                list(trace_attributes_values[attr])[0]) is float:
            numeric_trace_attributes_to_consider.append(attr)
        elif type(list(trace_attributes_values[attr])[0]) is str and len(
                trace_attributes_values[attr]) < max_diff_occ:
            string_trace_attributes_to_consider.append(attr)

    numeric_event_attributes_to_consider = check_event_attributes_presence(
        log, numeric_event_attributes_to_consider)
    string_event_attributes_to_consider = check_event_attributes_presence(
        log, string_event_attributes_to_consider)
    numeric_trace_attributes_to_consider = check_trace_attributes_presence(
        log, numeric_trace_attributes_to_consider)
    string_trace_attributes_to_consider = check_trace_attributes_presence(
        log, string_trace_attributes_to_consider)

    return string_trace_attributes_to_consider, string_event_attributes_to_consider, numeric_trace_attributes_to_consider, numeric_event_attributes_to_consider
import numpy.random as random
random.seed(0)
import pm4py.statistics.variants.log.get as getvariants

'''
Author : Boltenhagen Mathilde
Date : June 2020

randomSequences.py : this file has been created to get 1000 mock traces 
'''

log = xes_importer.apply("<original log>")
variants = getvariants.get_variants(log)

# get activities and maximum length in log
activities = list(get_attribute_values(log,"concept:name").keys())
max_len = (len(max(project_traces(log),key=len)))

log._list=[]
for t in range(0,1000):
    new_sequence = Trace()
    # random length of the fake sequence
    size_of_sequence = random.randint(1,max_len-1)
    # random activities
    for e in range(0,size_of_sequence):
        event = Event()
        event["concept:name"]=activities[random.randint(1,len(activities))]
        new_sequence.append(event)
    log._list.append(new_sequence)

xes_exporter.apply(log,"<1000 mock traces>")
Ejemplo n.º 28
0
    def detect_cut(self, second_iteration=False, parameters=None):
        if pkgutil.find_loader("networkx"):
            import networkx as nx

            if parameters is None:
                parameters = {}
            activity_key = exec_utils.get_param_value(
                Parameters.ACTIVITY_KEY, parameters,
                pmutil.xes_constants.DEFAULT_NAME_KEY)

            # check base cases:
            empty_log = base_case.empty_log(self.log)
            single_activity = base_case.single_activity(self.log, activity_key)
            if empty_log:
                self.detected_cut = 'empty_log'
            elif single_activity:
                self.detected_cut = 'single_activity'
            # if no base cases are found, search for a cut:
            else:
                conn_components = detection_utils.get_connected_components(
                    self.ingoing, self.outgoing, self.activities)
                this_nx_graph = transform_dfg_to_directed_nx_graph(
                    self.dfg, activities=self.activities)
                strongly_connected_components = [
                    list(x)
                    for x in nx.strongly_connected_components(this_nx_graph)
                ]
                xor_cut = self.detect_xor(conn_components)
                # the following part searches for a cut in the current log
                # if a cut is found, the log is split according to the cut, the resulting logs are saved in new_logs
                # recursion is used on all the logs in new_logs
                if xor_cut[0]:
                    logging.debug("xor_cut")
                    self.detected_cut = 'concurrent'
                    new_logs = split.split_xor(xor_cut[1], self.log,
                                               activity_key)
                    for i in range(len(new_logs)):
                        new_logs[
                            i] = filtering_utils.keep_one_trace_per_variant(
                                new_logs[i], parameters=parameters)
                    for l in new_logs:
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            l, parameters=parameters).items() if v > 0]
                        activities = attributes_get.get_attribute_values(
                            l, activity_key)
                        start_activities = list(
                            start_activities_get.get_start_activities(
                                l, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_get.get_end_activities(
                                l, parameters=parameters).keys())
                        self.children.append(
                            SubtreePlain(l,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                else:
                    sequence_cut = cut_detection.detect_sequential_cut(
                        self, self.dfg, strongly_connected_components)
                    if sequence_cut[0]:
                        logging.debug("sequence_cut")
                        new_logs = split.split_sequence(
                            sequence_cut[1], self.log, activity_key)
                        for i in range(len(new_logs)):
                            new_logs[
                                i] = filtering_utils.keep_one_trace_per_variant(
                                    new_logs[i], parameters=parameters)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_get.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_get.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_get.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    else:
                        parallel_cut = self.detect_concurrent()
                        if parallel_cut[0]:
                            logging.debug("parallel_cut")
                            new_logs = split.split_parallel(
                                parallel_cut[1], self.log, activity_key)
                            for i in range(len(new_logs)):
                                new_logs[
                                    i] = filtering_utils.keep_one_trace_per_variant(
                                        new_logs[i], parameters=parameters)
                            self.detected_cut = "parallel"
                            for l in new_logs:
                                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                    l, parameters=parameters).items() if v > 0]
                                activities = attributes_get.get_attribute_values(
                                    l, activity_key)
                                start_activities = list(
                                    start_activities_get.get_start_activities(
                                        l, parameters=parameters).keys())
                                end_activities = list(
                                    end_activities_get.get_end_activities(
                                        l, parameters=parameters).keys())
                                self.children.append(
                                    SubtreePlain(
                                        l,
                                        new_dfg,
                                        self.master_dfg,
                                        self.initial_dfg,
                                        activities,
                                        self.counts,
                                        self.rec_depth + 1,
                                        noise_threshold=self.noise_threshold,
                                        start_activities=start_activities,
                                        end_activities=end_activities,
                                        initial_start_activities=self.
                                        initial_start_activities,
                                        initial_end_activities=self.
                                        initial_end_activities,
                                        parameters=parameters))
                        else:
                            loop_cut = self.detect_loop()
                            if loop_cut[0]:
                                logging.debug("loop_cut")
                                new_logs = split.split_loop(
                                    loop_cut[1], self.log, activity_key)
                                for i in range(len(new_logs)):
                                    new_logs[
                                        i] = filtering_utils.keep_one_trace_per_variant(
                                            new_logs[i], parameters=parameters)
                                self.detected_cut = "loopCut"
                                for l in new_logs:
                                    new_dfg = [
                                        (k, v) for k, v in dfg_inst.apply(
                                            l, parameters=parameters).items()
                                        if v > 0
                                    ]
                                    activities = attributes_get.get_attribute_values(
                                        l, activity_key)
                                    start_activities = list(
                                        start_activities_get.
                                        get_start_activities(
                                            l, parameters=parameters).keys())
                                    end_activities = list(
                                        end_activities_get.get_end_activities(
                                            l, parameters=parameters).keys())
                                    self.children.append(
                                        SubtreePlain(
                                            l,
                                            new_dfg,
                                            self.master_dfg,
                                            self.initial_dfg,
                                            activities,
                                            self.counts,
                                            self.rec_depth + 1,
                                            noise_threshold=self.
                                            noise_threshold,
                                            start_activities=start_activities,
                                            end_activities=end_activities,
                                            initial_start_activities=self.
                                            initial_start_activities,
                                            initial_end_activities=self.
                                            initial_end_activities,
                                            parameters=parameters))

                            # if the code gets to this point, there is no base_case and no cut found in the log
                            # therefore, we now apply fall through:
                            else:
                                self.apply_fall_through(parameters)
        else:
            msg = "networkx is not available. inductive miner cannot be used!"
            logging.error(msg)
            raise Exception(msg)
Ejemplo n.º 29
0
    def apply_fall_through(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_trace, new_log = fall_through.empty_trace(self.log)
            # if an empty trace is found, the empty trace fallthrough applies
            #
        else:
            empty_trace = False
        if empty_trace:
            logging.debug("empty_trace")
            activites_left = []
            for trace in new_log:
                for act in trace:
                    if act[activity_key] not in activites_left:
                        activites_left.append(act[activity_key])
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=parameters).items() if v > 0]
            activities = attributes_get.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_get.get_start_activities(
                    new_log, parameters=self.parameters).keys())
            end_activities = list(
                end_activities_get.get_end_activities(
                    new_log, parameters=self.parameters).keys())
            self.children.append(
                SubtreePlain(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
                small_log = filtering_utils.keep_one_trace_per_variant(
                    small_log, parameters=parameters)
            else:
                activity_once = False
            if use_act_once_per_trace and activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_get.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_get.get_attribute_values(
                    small_log, activity_key)
                self.children.append(
                    SubtreePlain(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log
                start_activities = list(
                    start_activities_get.get_start_activities(
                        new_log, parameters=self.parameters).keys())
                end_activities = list(
                    end_activities_get.get_end_activities(
                        new_log, parameters=self.parameters).keys())
                self.children.append(
                    SubtreePlain(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, activity_left_out = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                    small_log = filtering_utils.keep_one_trace_per_variant(
                        small_log, parameters=parameters)
                else:
                    activity_concurrent = False
                if use_act_concurrent and activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_get.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_get.get_attribute_values(
                        small_log, activity_key)
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreePlain(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log:
                    start_activities = list(
                        start_activities_get.get_start_activities(
                            new_log, parameters=self.parameters).keys())
                    end_activities = list(
                        end_activities_get.get_end_activities(
                            new_log, parameters=self.parameters).keys())
                    self.children.append(
                        SubtreePlain(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                        new_log = filtering_utils.keep_one_trace_per_variant(
                            new_log, parameters=parameters)
                    else:
                        strict_tau_loop = False
                    if use_strict_tau_loop and strict_tau_loop:
                        activites_left = []
                        for trace in new_log:
                            for act in trace:
                                if act[activity_key] not in activites_left:
                                    activites_left.append(act[activity_key])
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_get.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_get.get_start_activities(
                                new_log, parameters=self.parameters).keys())
                        end_activities = list(
                            end_activities_get.get_end_activities(
                                new_log, parameters=self.parameters).keys())
                        self.children.append(
                            SubtreePlain(new_log,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                            new_log = filtering_utils.keep_one_trace_per_variant(
                                new_log, parameters=parameters)
                        else:
                            tau_loop = False
                        if use_tau_loop and tau_loop:
                            activites_left = []
                            for trace in new_log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_get.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_get.get_start_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            end_activities = list(
                                end_activities_get.get_end_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower model")
                            activites_left = []
                            for trace in self.log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'flower'
Ejemplo n.º 30
0
 def test_get_attributes(self):
     from pm4py.statistics.attributes.log import get
     log = self.get_log()
     get.get_attribute_values(log, "concept:name")
     get.get_kde_date_attribute(log, "time:timestamp")
     get.get_kde_numeric_attribute(log, "amount")