Exemple #1
0
def apply(log, parameters=None):
    """
    Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking

    Parameters
    -----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    -----------
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY
    if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_CASEID_KEY] = pmutil.constants.CASE_ATTRIBUTE_GLUE
    if isinstance(log, pandas.core.frame.DataFrame):
        dfg = df_statistics.get_dfg_graph(
            log,
            case_id_glue=parameters[
                pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY],
            activity_key=parameters[
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY],
            timestamp_key=parameters[
                pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY])
        start_activities = pd_start_act_stats.get_start_activities(
            log, parameters=parameters)
        end_activities = pd_end_act_stats.get_end_activities(
            log, parameters=parameters)
        activities = pd_attributes_stats.get_attribute_values(
            log,
            parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY],
            parameters=parameters)
        return apply_dfg(dfg,
                         activities=activities,
                         start_activities=start_activities,
                         end_activities=end_activities,
                         parameters=parameters)
    log = log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG)
    tree = apply_tree(log, parameters=parameters)
    net, initial_marking, final_marking = tree_to_petri.apply(tree)
    return net, initial_marking, final_marking
 def test_get_attributes(self):
     from pm4py.statistics.attributes.pandas import get
     df = self.get_dataframe()
     get.get_attribute_values(df, "concept:name")
     get.get_kde_date_attribute(df, "time:timestamp")
     get.get_kde_numeric_attribute(df, "amount")
def discover_abstraction_dataframe(
    df: pd.DataFrame,
    parameters: Optional[Dict[Any, Any]] = None
) -> Tuple[Any, Any, Any, Any, Any, Any, Any]:
    """
    Discovers an abstraction from a dataframe that is useful for the Heuristics Miner ++ algorithm

    Parameters
    --------------
    df
        Dataframe
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY
        - Parameters.START_TIMESTAMP_KEY
        - Parameters.TIMESTAMP_KEY
        - Parameters.CASE_ID_KEY

    Returns
    --------------
    start_activities
        Start activities
    end_activities
        End activities
    activities_occurrences
        Activities along with their number of occurrences
    dfg
        Directly-follows graph
    performance_dfg
        (Performance) Directly-follows graph
    sojourn_time
        Sojourn time for each activity
    concurrent_activities
        Concurrent activities
    """
    if parameters is None:
        parameters = {}
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters, None)
    if start_timestamp_key is None:
        start_timestamp_key = xes.DEFAULT_START_TIMESTAMP_KEY
        parameters = copy(parameters)
        parameters[Parameters.START_TIMESTAMP_KEY] = start_timestamp_key
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               xes.DEFAULT_TIMESTAMP_KEY)
    case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                              parameters,
                                              constants.CASE_CONCEPT_NAME)
    start_activities = pd_sa.get_start_activities(df, parameters=parameters)
    end_activities = pd_ea.get_end_activities(df, parameters=parameters)
    activities_occurrences = pd_attributes.get_attribute_values(
        df, activity_key, parameters=parameters)
    efg_parameters = copy(parameters)
    efg_parameters[pd_efg.Parameters.KEEP_FIRST_FOLLOWING] = True
    dfg = pd_efg.apply(df, parameters=efg_parameters)
    performance_dfg = df_statistics.get_dfg_graph(
        df,
        case_id_glue=case_id_glue,
        activity_key=activity_key,
        timestamp_key=timestamp_key,
        start_timestamp_key=start_timestamp_key,
        measure="performance")
    sojourn_time = pd_soj_time.apply(df, parameters=parameters)
    concurrent_activities = pd_conc_act.apply(df, parameters=parameters)
    return (start_activities, end_activities, activities_occurrences, dfg,
            performance_dfg, sojourn_time, concurrent_activities)