def execute_script(): df = pd.read_csv("../tests/input_data/interval_event_log.csv") df = dataframe_utils.convert_timestamp_columns_in_df(df) act_count = dict(df["concept:name"].value_counts()) parameters = {} parameters[ constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp" parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp" parameters["format"] = "svg" start_activities = sa_get.get_start_activities(df, parameters=parameters) end_activities = ea_get.get_end_activities(df, parameters=parameters) parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities soj_time = soj_time_get.apply(df, parameters=parameters) dfg, performance_dfg = correlation_miner.apply( df, variant=correlation_miner.Variants.CLASSIC, parameters=parameters) gviz_freq = dfg_vis.apply(dfg, activities_count=act_count, soj_time=soj_time, variant=dfg_vis.Variants.FREQUENCY, parameters=parameters) dfg_vis.view(gviz_freq) gviz_perf = dfg_vis.apply(performance_dfg, activities_count=act_count, soj_time=soj_time, variant=dfg_vis.Variants.PERFORMANCE, parameters=parameters) dfg_vis.view(gviz_perf)
def discover_abstraction_dataframe(df: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None) -> Tuple[ Any, Any, Any, Any, Any, Any, Any]: """ Discovers an abstraction from a dataframe that is useful for the Heuristics Miner ++ algorithm Parameters -------------- df Dataframe parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.START_TIMESTAMP_KEY - Parameters.TIMESTAMP_KEY - Parameters.CASE_ID_KEY Returns -------------- start_activities Start activities end_activities End activities activities_occurrences Activities along with their number of occurrences dfg Directly-follows graph performance_dfg (Performance) Directly-follows graph sojourn_time Sojourn time for each activity concurrent_activities Concurrent activities """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters, None) if start_timestamp_key is None: start_timestamp_key = xes.DEFAULT_START_TIMESTAMP_KEY parameters = copy(parameters) parameters[Parameters.START_TIMESTAMP_KEY] = start_timestamp_key timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY) case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME) start_activities = pd_sa.get_start_activities(df, parameters=parameters) end_activities = pd_ea.get_end_activities(df, parameters=parameters) activities_occurrences = pd_attributes.get_attribute_values(df, activity_key, parameters=parameters) efg_parameters = copy(parameters) efg_parameters[pd_efg.Parameters.KEEP_FIRST_FOLLOWING] = True dfg = pd_efg.apply(df, parameters=efg_parameters) performance_dfg = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key, start_timestamp_key=start_timestamp_key, measure="performance") sojourn_time = pd_soj_time.apply(df, parameters=parameters) concurrent_activities = pd_conc_act.apply(df, parameters=parameters) return ( start_activities, end_activities, activities_occurrences, dfg, performance_dfg, sojourn_time, concurrent_activities)
def execute_script(): log_path = os.path.join("..", "tests", "input_data", "interval_event_log.csv") dataframe = pm4py.read_csv(log_path) log_path = os.path.join("..", "tests", "input_data", "reviewing.xes") log = pm4py.read_xes(log_path) dataframe = pm4py.convert_to_dataframe(log) parameters = {} #parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp" parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp" parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name" parameters[constants.PARAMETER_CONSTANT_CASEID_KEY] = "case:concept:name" parameters["strict"] = True parameters["format"] = "svg" start_activities = sa_get.get_start_activities(dataframe, parameters=parameters) end_activities = ea_get.get_end_activities(dataframe, parameters=parameters) att_count = att_get.get_attribute_values(dataframe, "concept:name", parameters=parameters) parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities soj_time = soj_time_get.apply(dataframe, parameters=parameters) print("soj_time") print(soj_time) conc_act = conc_act_get.apply(dataframe, parameters=parameters) print("conc_act") print(conc_act) efg = efg_get.apply(dataframe, parameters=parameters) print("efg") print(efg) dfg_freq, dfg_perf = df_statistics.get_dfg_graph( dataframe, measure="both", start_timestamp_key="start_timestamp") dfg_gv_freq = dfg_vis_fact.apply(dfg_freq, activities_count=att_count, variant=dfg_vis_fact.Variants.FREQUENCY, soj_time=soj_time, parameters=parameters) dfg_vis_fact.view(dfg_gv_freq) dfg_gv_perf = dfg_vis_fact.apply(dfg_perf, activities_count=att_count, variant=dfg_vis_fact.Variants.PERFORMANCE, soj_time=soj_time, parameters=parameters) dfg_vis_fact.view(dfg_gv_perf) net, im, fm = dfg_conv.apply(dfg_freq) gviz = pn_vis.apply(net, im, fm, parameters=parameters) pn_vis.view(gviz)