def discover_dfg(log: Union[EventLog, pd.DataFrame]) -> Tuple[dict, dict, dict]: """ Discovers a DFG from a log Parameters -------------- log Event log Returns -------------- dfg DFG start_activities Start activities end_activities End activities """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.objects.dfg.retrieval.pandas import get_dfg_graph dfg = get_dfg_graph(log) from pm4py.statistics.start_activities.pandas import get as start_activities_module from pm4py.statistics.end_activities.pandas import get as end_activities_module start_activities = start_activities_module.get_start_activities(log) end_activities = end_activities_module.get_end_activities(log) else: from pm4py.algo.discovery.dfg import algorithm as dfg_discovery dfg = dfg_discovery.apply(log) from pm4py.statistics.start_activities.log import get as start_activities_module from pm4py.statistics.end_activities.log import get as end_activities_module start_activities = start_activities_module.get_start_activities(log) end_activities = end_activities_module.get_end_activities(log) return dfg, start_activities, end_activities
def discover_performance_dfg(log: Union[EventLog, pd.DataFrame], business_hours: bool = False, worktiming: List[int] = [7, 17], weekends: List[int] = [6, 7], workcalendar=constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR) -> Tuple[dict, dict, dict]: """ Discovers a performance directly-follows graph from an event log Parameters --------------- log Event log business_hours Enables/disables the computation based on the business hours (default: False) worktiming (If the business hours are enabled) The hour range in which the resources of the log are working (default: 7 to 17) weekends (If the business hours are enabled) The weekends days (default: Saturday (6), Sunday (7)) Returns --------------- performance_dfg Performance DFG start_activities Start activities end_activities End activities """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.util import constants properties = get_properties(log) from pm4py.algo.discovery.dfg.adapters.pandas.df_statistics import get_dfg_graph activity_key = properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in properties else xes_constants.DEFAULT_NAME_KEY timestamp_key = properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in properties else xes_constants.DEFAULT_TIMESTAMP_KEY case_id_key = properties[constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in properties else constants.CASE_CONCEPT_NAME dfg = get_dfg_graph(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_key, measure="performance", perf_aggregation_key="all", business_hours=business_hours, worktiming=worktiming, weekends=weekends, workcalendar=workcalendar) from pm4py.statistics.start_activities.pandas import get as start_activities_module from pm4py.statistics.end_activities.pandas import get as end_activities_module start_activities = start_activities_module.get_start_activities(log, parameters=properties) end_activities = end_activities_module.get_end_activities(log, parameters=properties) else: from pm4py.algo.discovery.dfg.variants import performance as dfg_discovery properties = get_properties(log) properties[dfg_discovery.Parameters.AGGREGATION_MEASURE] = "all" properties[dfg_discovery.Parameters.BUSINESS_HOURS] = business_hours properties[dfg_discovery.Parameters.WORKTIMING] = worktiming properties[dfg_discovery.Parameters.WEEKENDS] = weekends dfg = dfg_discovery.apply(log, parameters=properties) from pm4py.statistics.start_activities.log import get as start_activities_module from pm4py.statistics.end_activities.log import get as end_activities_module start_activities = start_activities_module.get_start_activities(log, parameters=properties) end_activities = end_activities_module.get_end_activities(log, parameters=properties) return dfg, start_activities, end_activities
def execute_script(): df = pd.read_csv("../tests/input_data/interval_event_log.csv") df = dataframe_utils.convert_timestamp_columns_in_df(df) act_count = dict(df["concept:name"].value_counts()) parameters = {} parameters[ constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp" parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp" parameters["format"] = "svg" start_activities = sa_get.get_start_activities(df, parameters=parameters) end_activities = ea_get.get_end_activities(df, parameters=parameters) parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities soj_time = soj_time_get.apply(df, parameters=parameters) dfg, performance_dfg = correlation_miner.apply( df, variant=correlation_miner.Variants.CLASSIC, parameters=parameters) gviz_freq = dfg_vis.apply(dfg, activities_count=act_count, soj_time=soj_time, variant=dfg_vis.Variants.FREQUENCY, parameters=parameters) dfg_vis.view(gviz_freq) gviz_perf = dfg_vis.apply(performance_dfg, activities_count=act_count, soj_time=soj_time, variant=dfg_vis.Variants.PERFORMANCE, parameters=parameters) dfg_vis.view(gviz_perf)
def apply_auto_filter(df, parameters=None): """ Apply auto filter on end activities Parameters ----------- df Pandas dataframe parameters Parameters of the algorithm, including: Parameters.CASE_ID_KEY -> Case ID column in the dataframe Parameters.ACTIVITY_KEY -> Column that represents the activity Parameters.DECREASING_FACTOR -> Decreasing factor that should be passed to the algorithm Returns ----------- df Filtered dataframe """ if parameters is None: parameters = {} case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY) grouped_df = exec_utils.get_param_value(Parameters.GROUP_DATAFRAME, parameters, None) decreasing_factor = exec_utils.get_param_value(Parameters.DECREASING_FACTOR, parameters, filtering_constants.DECREASING_FACTOR) start_activities = get_start_activities(df, parameters=parameters) salist = start_activities_common.get_sorted_start_activities_list(start_activities) sathreshold = start_activities_common.get_start_activities_threshold(salist, decreasing_factor) return filter_df_on_start_activities_nocc(df, sathreshold, sa_count0=start_activities, case_id_glue=case_id_glue, activity_key=activity_key, grouped_df=grouped_df)
def discover_abstraction_dataframe(df: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None) -> Tuple[ Any, Any, Any, Any, Any, Any, Any]: """ Discovers an abstraction from a dataframe that is useful for the Heuristics Miner ++ algorithm Parameters -------------- df Dataframe parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.START_TIMESTAMP_KEY - Parameters.TIMESTAMP_KEY - Parameters.CASE_ID_KEY Returns -------------- start_activities Start activities end_activities End activities activities_occurrences Activities along with their number of occurrences dfg Directly-follows graph performance_dfg (Performance) Directly-follows graph sojourn_time Sojourn time for each activity concurrent_activities Concurrent activities """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters, None) if start_timestamp_key is None: start_timestamp_key = xes.DEFAULT_START_TIMESTAMP_KEY parameters = copy(parameters) parameters[Parameters.START_TIMESTAMP_KEY] = start_timestamp_key timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY) case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME) start_activities = pd_sa.get_start_activities(df, parameters=parameters) end_activities = pd_ea.get_end_activities(df, parameters=parameters) activities_occurrences = pd_attributes.get_attribute_values(df, activity_key, parameters=parameters) efg_parameters = copy(parameters) efg_parameters[pd_efg.Parameters.KEEP_FIRST_FOLLOWING] = True dfg = pd_efg.apply(df, parameters=efg_parameters) performance_dfg = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key, start_timestamp_key=start_timestamp_key, measure="performance") sojourn_time = pd_soj_time.apply(df, parameters=parameters) concurrent_activities = pd_conc_act.apply(df, parameters=parameters) return ( start_activities, end_activities, activities_occurrences, dfg, performance_dfg, sojourn_time, concurrent_activities)
def discover_dfg(log: Union[EventLog, pd.DataFrame]) -> Tuple[dict, dict, dict]: """ Discovers a DFG from a log Parameters -------------- log Event log Returns -------------- dfg DFG start_activities Start activities end_activities End activities """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.util import constants properties = get_properties(log) from pm4py.algo.discovery.dfg.adapters.pandas.df_statistics import get_dfg_graph activity_key = properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in properties else xes_constants.DEFAULT_NAME_KEY timestamp_key = properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in properties else xes_constants.DEFAULT_TIMESTAMP_KEY case_id_key = properties[constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in properties else constants.CASE_CONCEPT_NAME dfg = get_dfg_graph(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_key) from pm4py.statistics.start_activities.pandas import get as start_activities_module from pm4py.statistics.end_activities.pandas import get as end_activities_module start_activities = start_activities_module.get_start_activities(log, parameters=properties) end_activities = end_activities_module.get_end_activities(log, parameters=properties) else: from pm4py.algo.discovery.dfg import algorithm as dfg_discovery dfg = dfg_discovery.apply(log, parameters=get_properties(log)) from pm4py.statistics.start_activities.log import get as start_activities_module from pm4py.statistics.end_activities.log import get as end_activities_module start_activities = start_activities_module.get_start_activities(log, parameters=get_properties(log)) end_activities = end_activities_module.get_end_activities(log, parameters=get_properties(log)) return dfg, start_activities, end_activities
def get_start_activities(log): """ Returns the start activities from a log object Parameters --------------- log Log object Returns --------------- start_activities Dictionary of start activities along with their count """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.statistics.start_activities.pandas import get return get.get_start_activities(log) else: from pm4py.statistics.start_activities.log import get return get.get_start_activities(log)
def apply(log, parameters=None): """ Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking Parameters ----------- log Log parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ----------- net Petri net initial_marking Initial marking final_marking Final marking """ if parameters is None: parameters = {} case_id_glue = exec_utils.get_param_value( Parameters.CASE_ID_KEY, parameters, pmutil.constants.CASE_CONCEPT_NAME) activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, pmutil.xes_constants.DEFAULT_TIMESTAMP_KEY) if isinstance(log, pandas.core.frame.DataFrame): dfg = df_statistics.get_dfg_graph(log, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key) start_activities = pd_start_act_stats.get_start_activities( log, parameters=parameters) end_activities = pd_end_act_stats.get_end_activities( log, parameters=parameters) activities = pd_attributes_stats.get_attribute_values( log, activity_key, parameters=parameters) return apply_dfg(dfg, activities=activities, start_activities=start_activities, end_activities=end_activities, parameters=parameters) log = log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG) tree = apply_tree(log, parameters=parameters) net, initial_marking, final_marking = tree_to_petri.apply(tree) return net, initial_marking, final_marking
def get_start_activities(log: Union[EventLog, pd.DataFrame]) -> Dict[str, int]: """ Returns the start activities from a log object Parameters --------------- log Log object Returns --------------- start_activities Dictionary of start activities along with their count """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.statistics.start_activities.pandas import get return get.get_start_activities(log, parameters=get_properties(log)) else: from pm4py.statistics.start_activities.log import get return get.get_start_activities(log, parameters=get_properties(log))
def filter_df_on_start_activities_nocc(df, nocc, sa_count0=None, case_id_glue=CASE_CONCEPT_NAME, activity_key=DEFAULT_NAME_KEY, grouped_df=None): """ Filter dataframe on start activities number of occurrences Parameters ----------- df Dataframe nocc Minimum number of occurrences of the start activity sa_count0 (if provided) Dictionary that associates each start activity with its count case_id_glue Column that contains the Case ID activity_key Column that contains the activity grouped_df Grouped dataframe Returns ------------ df Filtered dataframe """ if grouped_df is None: grouped_df = df.groupby(case_id_glue) first_eve_df = grouped_df.first() if sa_count0 is None: parameters = { Parameters.CASE_ID_KEY: case_id_glue, Parameters.ACTIVITY_KEY: activity_key, Parameters.GROUP_DATAFRAME: grouped_df } sa_count0 = get_start_activities(df, parameters=parameters) sa_count = [k for k, v in sa_count0.items() if v >= nocc] if len(sa_count) < len(sa_count0): first_eve_df = first_eve_df[first_eve_df[activity_key].isin(sa_count)] i1 = df.set_index(case_id_glue).index i2 = first_eve_df.index ret = df[i1.isin(i2)] else: ret = df ret.attrs = copy(df.attrs) if hasattr(df, 'attrs') else {} return ret
def execute_script(): log_path = os.path.join("..", "tests", "input_data", "interval_event_log.csv") dataframe = pm4py.read_csv(log_path) log_path = os.path.join("..", "tests", "input_data", "reviewing.xes") log = pm4py.read_xes(log_path) dataframe = pm4py.convert_to_dataframe(log) parameters = {} #parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp" parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp" parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name" parameters[constants.PARAMETER_CONSTANT_CASEID_KEY] = "case:concept:name" parameters["strict"] = True parameters["format"] = "svg" start_activities = sa_get.get_start_activities(dataframe, parameters=parameters) end_activities = ea_get.get_end_activities(dataframe, parameters=parameters) att_count = att_get.get_attribute_values(dataframe, "concept:name", parameters=parameters) parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities soj_time = soj_time_get.apply(dataframe, parameters=parameters) print("soj_time") print(soj_time) conc_act = conc_act_get.apply(dataframe, parameters=parameters) print("conc_act") print(conc_act) efg = efg_get.apply(dataframe, parameters=parameters) print("efg") print(efg) dfg_freq, dfg_perf = df_statistics.get_dfg_graph( dataframe, measure="both", start_timestamp_key="start_timestamp") dfg_gv_freq = dfg_vis_fact.apply(dfg_freq, activities_count=att_count, variant=dfg_vis_fact.Variants.FREQUENCY, soj_time=soj_time, parameters=parameters) dfg_vis_fact.view(dfg_gv_freq) dfg_gv_perf = dfg_vis_fact.apply(dfg_perf, activities_count=att_count, variant=dfg_vis_fact.Variants.PERFORMANCE, soj_time=soj_time, parameters=parameters) dfg_vis_fact.view(dfg_gv_perf) net, im, fm = dfg_conv.apply(dfg_freq) gviz = pn_vis.apply(net, im, fm, parameters=parameters) pn_vis.view(gviz)
def get_process_svg(): parameters = request.args.get("parameters") parameters = __process_parameters(parameters) log = __prepare_event_log(parameters) ext_type = parameters[ "ext_type"] if "ext_type" in parameters else "document_flow_log" log_type = __get_log_type_from_ext_type(ext_type) if log_type == 0: log.type = "succint" from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory model = discovery.apply(log, model_type_variant="model3", node_freq_variant="type31", edge_freq_variant="type11") gviz = vis_factory.apply(model, parameters={"format": "svg"}) elif log_type == 1 or log_type == 2: import pandas as pd if type(log) is pd.DataFrame: from pm4py.objects.dfg.retrieval.pandas import get_dfg_graph dfg = get_dfg_graph(log) from pm4py.statistics.start_activities.pandas import get as pd_sa_get from pm4py.statistics.end_activities.pandas import get as pd_ea_get sa = pd_sa_get.get_start_activities(log) ea = pd_ea_get.get_end_activities(log) else: dfg, sa, ea = pm4py.discover_dfg(log) act_count = pm4py.get_attribute_values(log, "concept:name") dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, act_count, 0.2, keep_all_activities=True) gviz = pm4py.visualization.dfg.visualizer.apply( dfg, activities_count=act_count, parameters={ "format": "svg", "start_activities": sa, "end_activities": ea }) ser = pm4py.visualization.dfg.visualizer.serialize(gviz).decode("utf-8") return ser
def apply(log, parameters=None): """ Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking Parameters ----------- log Log parameters Parameters of the algorithm, including: pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ----------- net Petri net initial_marking Initial marking final_marking Final marking """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters: parameters[pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters: parameters[pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY] = pmutil.constants.CASE_ATTRIBUTE_GLUE if isinstance(log, pandas.core.frame.DataFrame): dfg = df_statistics.get_dfg_graph(log, case_id_glue=parameters[pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY], activity_key=parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY], timestamp_key=parameters[pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY]) start_activities = pd_start_act_stats.get_start_activities(log, parameters=parameters) end_activities = pd_end_act_stats.get_end_activities(log, parameters=parameters) activities = pd_attributes_stats.get_attribute_values(log, parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY], parameters=parameters) return apply_dfg(dfg, activities=activities, start_activities=start_activities, end_activities=end_activities, parameters=parameters) log = log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG) tree = apply_tree(log, parameters=parameters) net, initial_marking, final_marking = tree_to_petri.apply(tree) return net, initial_marking, final_marking
def apply_auto_filter(df, parameters=None): """ Apply auto filter on end activities Parameters ----------- df Pandas dataframe parameters Parameters of the algorithm, including: case_id_glue -> Case ID column in the dataframe activity_key -> Column that represents the activity decreasingFactor -> Decreasing factor that should be passed to the algorithm Returns ----------- df Filtered dataframe """ if parameters is None: parameters = {} case_id_glue = parameters[ PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY decreasing_factor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else filtering_constants.DECREASING_FACTOR grouped_df = parameters[GROUPED_DATAFRAME] if GROUPED_DATAFRAME in parameters else None start_activities = get_start_activities(df, parameters=parameters) salist = start_activities_common.get_sorted_start_activities_list(start_activities) sathreshold = start_activities_common.get_start_activities_threshold(salist, decreasing_factor) return filter_df_on_start_activities_nocc(df, sathreshold, sa_count0=start_activities, case_id_glue=case_id_glue, activity_key=activity_key, grouped_df=grouped_df)
def apply_pandas(df, parameters=None): """ Discovers a Petri net using Heuristics Miner Parameters ------------ df Pandas dataframe parameters Possible parameters of the algorithm, including: activity_key, case_id_glue, timestamp_key, dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh, loops_length_two_thresh Returns ------------ net Petri net im Initial marking fm Final marking """ if parameters is None: parameters = {} if pkgutil.find_loader("pandas"): activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME) start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters, None) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY) from pm4py.algo.discovery.dfg.adapters.pandas import df_statistics, freq_triples as get_freq_triples from pm4py.statistics.attributes.pandas import get as pd_attributes from pm4py.statistics.start_activities.pandas import get as pd_sa_filter from pm4py.statistics.end_activities.pandas import get as pd_ea_filter start_activities = pd_sa_filter.get_start_activities(df, parameters=parameters) end_activities = pd_ea_filter.get_end_activities(df, parameters=parameters) activities_occurrences = pd_attributes.get_attribute_values(df, activity_key, parameters=parameters) activities = list(activities_occurrences.keys()) heu_net_decoration = exec_utils.get_param_value(Parameters.HEU_NET_DECORATION, parameters, "frequency") if timestamp_key in df: dfg = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key, start_timestamp_key=start_timestamp_key) dfg_window_2 = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key, window=2, start_timestamp_key=start_timestamp_key) frequency_triples = get_freq_triples.get_freq_triples(df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key) else: dfg = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue, activity_key=activity_key, sort_timestamp_along_case_id=False) dfg_window_2 = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue, activity_key=activity_key, sort_timestamp_along_case_id=False, window=2) frequency_triples = get_freq_triples.get_freq_triples(df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key, sort_timestamp_along_case_id=False) performance_dfg = None if heu_net_decoration == "performance": performance_dfg = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key, start_timestamp_key=start_timestamp_key, measure="performance") heu_net = apply_heu_dfg(dfg, activities=activities, activities_occurrences=activities_occurrences, start_activities=start_activities, end_activities=end_activities, dfg_window_2=dfg_window_2, freq_triples=frequency_triples, performance_dfg=performance_dfg, parameters=parameters) net, im, fm = hn_conv_alg.apply(heu_net, parameters=parameters) return net, im, fm
import pandas as pd # this part is required because the dataframe provided by PowerBI has strings dataset["time:timestamp"] = pd.to_datetime(dataset["time:timestamp"]) from pm4py.algo.discovery.dfg.adapters.pandas import df_statistics dfg = df_statistics.get_dfg_graph(dataset, measure="frequency") from pm4py.statistics.attributes.pandas import get as attributes_get activities_count = attributes_get.get_attribute_values(dataset, "concept:name") from pm4py.statistics.start_activities.pandas import get as sa_get start_activities = sa_get.get_start_activities(dataset) from pm4py.statistics.end_activities.pandas import get as ea_get end_activities = ea_get.get_end_activities(dataset) from pm4py.visualization.dfg import visualizer gviz = visualizer.apply(dfg, activities_count=activities_count, variant=visualizer.Variants.FREQUENCY, parameters={ "start_activities": start_activities, "end_activities": end_activities }) visualizer.matplotlib_view(gviz)
def apply_pandas(df, parameters=None): """ Discovers a Petri net using Heuristics Miner Parameters ------------ df Pandas dataframe parameters Possible parameters of the algorithm, including: activity_key, case_id_glue, timestamp_key, dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh, loops_length_two_thresh Returns ------------ net Petri net im Initial marking fm Final marking """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY) start_activities = pd_sa_filter.get_start_activities(df, parameters=parameters) end_activities = pd_ea_filter.get_end_activities(df, parameters=parameters) activities_occurrences = pd_attributes.get_attribute_values( df, activity_key, parameters=parameters) activities = list(activities_occurrences.keys()) if timestamp_key in df: dfg = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key) dfg_window_2 = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key, window=2) frequency_triples = get_freq_triples.get_freq_triples( df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key) else: dfg = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue, activity_key=activity_key, sort_timestamp_along_case_id=False) dfg_window_2 = df_statistics.get_dfg_graph( df, case_id_glue=case_id_glue, activity_key=activity_key, sort_timestamp_along_case_id=False, window=2) frequency_triples = get_freq_triples.get_freq_triples( df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key, sort_timestamp_along_case_id=False) heu_net = apply_heu_dfg(dfg, activities=activities, activities_occurrences=activities_occurrences, start_activities=start_activities, end_activities=end_activities, dfg_window_2=dfg_window_2, freq_triples=frequency_triples, parameters=parameters) net, im, fm = hn_conv_alg.apply(heu_net, parameters=parameters) return net, im, fm