Ejemplo n.º 1
0
def apply_dfg(
    dfg: Dict[Tuple[str, str], int],
    activities=None,
    activities_occurrences=None,
    start_activities=None,
    end_activities=None,
    parameters: Optional[Dict[Any, Any]] = None
) -> Tuple[PetriNet, Marking, Marking]:
    """
    Discovers a Petri net using Heuristics Miner

    Parameters
    ------------
    dfg
        Directly-Follows Graph
    activities
        (If provided) list of activities of the log
    activities_occurrences
        (If provided) dictionary of activities occurrences
    start_activities
        (If provided) dictionary of start activities occurrences
    end_activities
        (If provided) dictionary of end activities occurrences
    parameters
        Possible parameters of the algorithm,
        including:
            - Parameters.ACTIVITY_KEY
            - Parameters.TIMESTAMP_KEY
            - Parameters.CASE_ID_KEY
            - Parameters.DEPENDENCY_THRESH
            - Parameters.AND_MEASURE_THRESH
            - Parameters.MIN_ACT_COUNT
            - Parameters.MIN_DFG_OCCURRENCES
            - Parameters.DFG_PRE_CLEANING_NOISE_THRESH
            - Parameters.LOOP_LENGTH_TWO_THRESH

    Returns
    ------------
    net
        Petri net
    im
        Initial marking
    fm
        Final marking
    """
    if parameters is None:
        parameters = {}

    heu_net = apply_heu_dfg(dfg,
                            activities=activities,
                            activities_occurrences=activities_occurrences,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            parameters=parameters)
    net, im, fm = hn_conv_alg.apply(heu_net, parameters=parameters)

    return net, im, fm
Ejemplo n.º 2
0
def apply_pandas(
    df: pd.DataFrame,
    parameters: Optional[Dict[Any, Any]] = None
) -> Tuple[PetriNet, Marking, Marking]:
    """
    Discovers a Petri net using the Heuristics Miner ++ algorithm

    Implements the approach described in
    Burattin, Andrea, and Alessandro Sperduti. "Heuristics Miner for Time Intervals." ESANN. 2010.

    https://andrea.burattin.net/public-files/publications/2010-esann-slides.pdf

    Parameters
    --------------
    df
        Dataframe
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY
        - Parameters.START_TIMESTAMP_KEY
        - Parameters.TIMESTAMP_KEY
        - Parameters.CASE_ID_KEY
        - Parameters.DEPENDENCY_THRESH
        - Parameters.AND_MEASURE_THRESH
        - Parameters.MIN_ACT_COUNT
        - Parameters.MIN_DFG_OCCURRENCES
        - Parameters.HEU_NET_DECORATION

    Returns
    --------------
    net
        Petri net
    im
        Initial marking
    fm
        Final marking
    """
    heu_net = apply_heu_pandas(df, parameters=parameters)
    net, im, fm = hn_conv_alg.apply(heu_net, parameters=parameters)
    return net, im, fm
Ejemplo n.º 3
0
def apply(
    log: EventLog,
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> Tuple[PetriNet, Marking, Marking]:
    """
    Discovers a Petri net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including:
            - Parameters.ACTIVITY_KEY
            - Parameters.TIMESTAMP_KEY
            - Parameters.CASE_ID_KEY
            - Parameters.DEPENDENCY_THRESH
            - Parameters.AND_MEASURE_THRESH
            - Parameters.MIN_ACT_COUNT
            - Parameters.MIN_DFG_OCCURRENCES
            - Parameters.DFG_PRE_CLEANING_NOISE_THRESH
            - Parameters.LOOP_LENGTH_TWO_THRESH

    Returns
    ------------
    net
        Petri net
    im
        Initial marking
    fm
        Final marking
    """
    if parameters is None:
        parameters = {}

    heu_net = apply_heu(log, parameters=parameters)
    net, im, fm = hn_conv_alg.apply(heu_net, parameters=parameters)

    return net, im, fm
Ejemplo n.º 4
0
def apply_pandas(df, parameters=None):
    """
    Discovers a Petri net using Heuristics Miner

    Parameters
    ------------
    df
        Pandas dataframe
    parameters
        Possible parameters of the algorithm,
        including: activity_key, case_id_glue, timestamp_key,
        dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh,
        loops_length_two_thresh

    Returns
    ------------
    net
        Petri net
    im
        Initial marking
    fm
        Final marking
    """
    if parameters is None:
        parameters = {}

    if pkgutil.find_loader("pandas"):
        activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY)
        case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
        start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters,
                                                         None)
        timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY)

        from pm4py.algo.discovery.dfg.adapters.pandas import df_statistics, freq_triples as get_freq_triples
        from pm4py.statistics.attributes.pandas import get as pd_attributes
        from pm4py.statistics.start_activities.pandas import get as pd_sa_filter
        from pm4py.statistics.end_activities.pandas import get as pd_ea_filter

        start_activities = pd_sa_filter.get_start_activities(df, parameters=parameters)
        end_activities = pd_ea_filter.get_end_activities(df, parameters=parameters)
        activities_occurrences = pd_attributes.get_attribute_values(df, activity_key, parameters=parameters)
        activities = list(activities_occurrences.keys())
        heu_net_decoration = exec_utils.get_param_value(Parameters.HEU_NET_DECORATION, parameters, "frequency")

        if timestamp_key in df:
            dfg = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue,
                                              activity_key=activity_key, timestamp_key=timestamp_key,
                                              start_timestamp_key=start_timestamp_key)
            dfg_window_2 = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue,
                                                       activity_key=activity_key, timestamp_key=timestamp_key, window=2,
                                                       start_timestamp_key=start_timestamp_key)
            frequency_triples = get_freq_triples.get_freq_triples(df, case_id_glue=case_id_glue,
                                                                  activity_key=activity_key,
                                                                  timestamp_key=timestamp_key)

        else:
            dfg = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue,
                                              activity_key=activity_key, sort_timestamp_along_case_id=False)
            dfg_window_2 = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue,
                                                       activity_key=activity_key, sort_timestamp_along_case_id=False,
                                                       window=2)
            frequency_triples = get_freq_triples.get_freq_triples(df, case_id_glue=case_id_glue,
                                                                  activity_key=activity_key,
                                                                  timestamp_key=timestamp_key,
                                                                  sort_timestamp_along_case_id=False)

        performance_dfg = None
        if heu_net_decoration == "performance":
            performance_dfg = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue,
                                                          activity_key=activity_key, timestamp_key=timestamp_key,
                                                          start_timestamp_key=start_timestamp_key,
                                                          measure="performance")

        heu_net = apply_heu_dfg(dfg, activities=activities, activities_occurrences=activities_occurrences,
                                start_activities=start_activities, end_activities=end_activities,
                                dfg_window_2=dfg_window_2,
                                freq_triples=frequency_triples, performance_dfg=performance_dfg, parameters=parameters)
        net, im, fm = hn_conv_alg.apply(heu_net, parameters=parameters)

        return net, im, fm
Ejemplo n.º 5
0
def apply_pandas(df, parameters=None):
    """
    Discovers a Petri net using Heuristics Miner

    Parameters
    ------------
    df
        Pandas dataframe
    parameters
        Possible parameters of the algorithm,
        including: activity_key, case_id_glue, timestamp_key,
        dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh,
        loops_length_two_thresh

    Returns
    ------------
    net
        Petri net
    im
        Initial marking
    fm
        Final marking
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                              parameters,
                                              constants.CASE_CONCEPT_NAME)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               xes.DEFAULT_TIMESTAMP_KEY)

    start_activities = pd_sa_filter.get_start_activities(df,
                                                         parameters=parameters)
    end_activities = pd_ea_filter.get_end_activities(df, parameters=parameters)
    activities_occurrences = pd_attributes.get_attribute_values(
        df, activity_key, parameters=parameters)
    activities = list(activities_occurrences.keys())
    if timestamp_key in df:
        dfg = df_statistics.get_dfg_graph(df,
                                          case_id_glue=case_id_glue,
                                          activity_key=activity_key,
                                          timestamp_key=timestamp_key)
        dfg_window_2 = df_statistics.get_dfg_graph(df,
                                                   case_id_glue=case_id_glue,
                                                   activity_key=activity_key,
                                                   timestamp_key=timestamp_key,
                                                   window=2)
        frequency_triples = get_freq_triples.get_freq_triples(
            df,
            case_id_glue=case_id_glue,
            activity_key=activity_key,
            timestamp_key=timestamp_key)

    else:
        dfg = df_statistics.get_dfg_graph(df,
                                          case_id_glue=case_id_glue,
                                          activity_key=activity_key,
                                          sort_timestamp_along_case_id=False)
        dfg_window_2 = df_statistics.get_dfg_graph(
            df,
            case_id_glue=case_id_glue,
            activity_key=activity_key,
            sort_timestamp_along_case_id=False,
            window=2)
        frequency_triples = get_freq_triples.get_freq_triples(
            df,
            case_id_glue=case_id_glue,
            activity_key=activity_key,
            timestamp_key=timestamp_key,
            sort_timestamp_along_case_id=False)

    heu_net = apply_heu_dfg(dfg,
                            activities=activities,
                            activities_occurrences=activities_occurrences,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            dfg_window_2=dfg_window_2,
                            freq_triples=frequency_triples,
                            parameters=parameters)
    net, im, fm = hn_conv_alg.apply(heu_net, parameters=parameters)

    return net, im, fm