def apply(log, parameters=None): """ Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking Parameters ----------- log Log parameters Parameters of the algorithm, including: pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ----------- net Petri net initial_marking Initial marking final_marking Final marking """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_CASEID_KEY] = pmutil.constants.CASE_ATTRIBUTE_GLUE if isinstance(log, pandas.core.frame.DataFrame): dfg = df_statistics.get_dfg_graph( log, case_id_glue=parameters[ pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY], activity_key=parameters[ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY], timestamp_key=parameters[ pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY]) start_activities = pd_start_act_stats.get_start_activities( log, parameters=parameters) end_activities = pd_end_act_stats.get_end_activities( log, parameters=parameters) activities = pd_attributes_stats.get_attribute_values( log, parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY], parameters=parameters) return apply_dfg(dfg, activities=activities, start_activities=start_activities, end_activities=end_activities, parameters=parameters) log = log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG) tree = apply_tree(log, parameters=parameters) net, initial_marking, final_marking = tree_to_petri.apply(tree) return net, initial_marking, final_marking
def test_get_attributes(self): from pm4py.statistics.attributes.pandas import get df = self.get_dataframe() get.get_attribute_values(df, "concept:name") get.get_kde_date_attribute(df, "time:timestamp") get.get_kde_numeric_attribute(df, "amount")
def discover_abstraction_dataframe( df: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None ) -> Tuple[Any, Any, Any, Any, Any, Any, Any]: """ Discovers an abstraction from a dataframe that is useful for the Heuristics Miner ++ algorithm Parameters -------------- df Dataframe parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.START_TIMESTAMP_KEY - Parameters.TIMESTAMP_KEY - Parameters.CASE_ID_KEY Returns -------------- start_activities Start activities end_activities End activities activities_occurrences Activities along with their number of occurrences dfg Directly-follows graph performance_dfg (Performance) Directly-follows graph sojourn_time Sojourn time for each activity concurrent_activities Concurrent activities """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, None) if start_timestamp_key is None: start_timestamp_key = xes.DEFAULT_START_TIMESTAMP_KEY parameters = copy(parameters) parameters[Parameters.START_TIMESTAMP_KEY] = start_timestamp_key timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY) case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME) start_activities = pd_sa.get_start_activities(df, parameters=parameters) end_activities = pd_ea.get_end_activities(df, parameters=parameters) activities_occurrences = pd_attributes.get_attribute_values( df, activity_key, parameters=parameters) efg_parameters = copy(parameters) efg_parameters[pd_efg.Parameters.KEEP_FIRST_FOLLOWING] = True dfg = pd_efg.apply(df, parameters=efg_parameters) performance_dfg = df_statistics.get_dfg_graph( df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key, start_timestamp_key=start_timestamp_key, measure="performance") sojourn_time = pd_soj_time.apply(df, parameters=parameters) concurrent_activities = pd_conc_act.apply(df, parameters=parameters) return (start_activities, end_activities, activities_occurrences, dfg, performance_dfg, sojourn_time, concurrent_activities)