def test_concurrent_activities_xes(self): log = xes_importer.apply( os.path.join("input_data", "interval_event_log.xes")) from pm4py.statistics.concurrent_activities.log import get conc_act = get.apply( log, parameters={get.Parameters.START_TIMESTAMP_KEY: "start_timestamp"})
def test_efg_xes(self): log = xes_importer.apply( os.path.join("input_data", "interval_event_log.xes")) from pm4py.statistics.eventually_follows.log import get efg = get.apply( log, parameters={get.Parameters.START_TIMESTAMP_KEY: "start_timestamp"})
def test_sojourn_time_xes(self): log = xes_importer.apply( os.path.join("input_data", "interval_event_log.xes")) from pm4py.statistics.sojourn_time.log import get soj_time = get.apply( log, parameters={get.Parameters.START_TIMESTAMP_KEY: "start_timestamp"})
def apply(dfg, log=None, parameters=None, activities_count=None, soj_time=None): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") max_no_of_edges_in_diagram = exec_utils.get_param_value(Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 75) start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters, []) end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters, []) activities = dfg_utils.get_activities_from_dfg(dfg) if activities_count is None: if log is not None: activities_count = attr_get.get_attribute_values(log, activity_key, parameters=parameters) else: activities_count = {key: 1 for key in activities} if soj_time is None: if log is not None: soj_time = soj_time_get.apply(log, parameters=parameters) else: soj_time = {key: 0 for key in activities} return graphviz_visualization(activities_count, dfg, image_format=image_format, measure="frequency", max_no_of_edges_in_diagram=max_no_of_edges_in_diagram, start_activities=start_activities, end_activities=end_activities, soj_time=soj_time)
def execute_script(): log_path = os.path.join("..", "tests", "input_data", "interval_event_log.xes") #log_path = os.path.join("..", "tests", "input_data", "reviewing.xes") log = xes_importer.apply(log_path) parameters = {} parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp" parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp" parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name" parameters["strict"] = False parameters["format"] = "svg" start_activities = sa_get.get_start_activities(log, parameters=parameters) end_activities = ea_get.get_end_activities(log, parameters=parameters) parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities soj_time = soj_time_get.apply(log, parameters=parameters) print("soj_time") print(soj_time) conc_act = conc_act_get.apply(log, parameters=parameters) print("conc_act") print(conc_act) efg = efg_get.apply(log, parameters=parameters) print("efg") print(efg) dfg_freq = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.FREQUENCY) dfg_perf = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.PERFORMANCE) dfg_gv_freq = dfg_vis_fact.apply(dfg_freq, log=log, variant=dfg_vis_fact.Variants.FREQUENCY, parameters=parameters) dfg_vis_fact.view(dfg_gv_freq) dfg_gv_perf = dfg_vis_fact.apply(dfg_perf, log=log, variant=dfg_vis_fact.Variants.PERFORMANCE, parameters=parameters) dfg_vis_fact.view(dfg_gv_perf) net, im, fm = dfg_conv.apply(dfg_freq) gviz = pn_vis.apply(net, im, fm, parameters=parameters) pn_vis.view(gviz)
def test_efg_pandas(self): import pm4py import pandas as pd dataframe = pd.read_csv(os.path.join("input_data", "interval_event_log.csv")) from pm4py.objects.log.util import dataframe_utils dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe) from pm4py.statistics.eventually_follows.pandas import get efg = get.apply(dataframe, parameters={get.Parameters.START_TIMESTAMP_KEY: "start_timestamp"})
def discover_abstraction_log( log: EventLog, parameters: Optional[Dict[Any, Any]] = None ) -> Tuple[Any, Any, Any, Any, Any, Any, Any]: """ Discovers an abstraction from a log that is useful for the Heuristics Miner ++ algorithm Parameters -------------- log Event log parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.START_TIMESTAMP_KEY - Parameters.TIMESTAMP_KEY - Parameters.CASE_ID_KEY Returns -------------- start_activities Start activities end_activities End activities activities_occurrences Activities along with their number of occurrences dfg Directly-follows graph performance_dfg (Performance) Directly-follows graph sojourn_time Sojourn time for each activity concurrent_activities Concurrent activities """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) start_activities = log_sa.get_start_activities(log, parameters=parameters) end_activities = log_ea.get_end_activities(log, parameters=parameters) activities_occurrences = log_attributes.get_attribute_values( log, activity_key, parameters=parameters) efg_parameters = copy(parameters) efg_parameters[efg_get.Parameters.KEEP_FIRST_FOLLOWING] = True dfg = efg_get.apply(log, parameters=efg_parameters) performance_dfg = dfg_alg.apply(log, variant=dfg_alg.Variants.PERFORMANCE, parameters=parameters) sojourn_time = soj_get.apply(log, parameters=parameters) concurrent_activities = conc_act_get.apply(log, parameters=parameters) return (start_activities, end_activities, activities_occurrences, dfg, performance_dfg, sojourn_time, concurrent_activities)
def apply(dfg: Dict[Tuple[str, str], int], log: EventLog = None, parameters: Optional[Dict[Any, Any]] = None, activities_count: Dict[str, int] = None, soj_time: Dict[str, float] = None) -> Digraph: """ Visualize a frequency directly-follows graph Parameters ----------------- dfg Frequency Directly-follows graph log (if provided) Event log for the calculation of statistics activities_count (if provided) Dictionary associating to each activity the number of occurrences in the log. soj_time (if provided) Dictionary associating to each activity the average sojourn time parameters Variant-specific parameters Returns ----------------- gviz Graphviz digraph """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") max_no_of_edges_in_diagram = exec_utils.get_param_value( Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 100000) start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters, {}) end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters, {}) font_size = exec_utils.get_param_value(Parameters.FONT_SIZE, parameters, 12) font_size = str(font_size) activities = dfg_utils.get_activities_from_dfg(dfg) bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters, "transparent") stat_locale = exec_utils.get_param_value(Parameters.STAT_LOCALE, parameters, None) if stat_locale is None: stat_locale = {} if activities_count is None: if log is not None: activities_count = attr_get.get_attribute_values( log, activity_key, parameters=parameters) else: # the frequency of an activity in the log is at least the number of occurrences of # incoming arcs in the DFG. # if the frequency of the start activities nodes is also provided, use also that. activities_count = Counter({key: 0 for key in activities}) for el in dfg: activities_count[el[1]] += dfg[el] if isinstance(start_activities, dict): for act in start_activities: activities_count[act] += start_activities[act] if soj_time is None: if log is not None: soj_time = soj_time_get.apply(log, parameters=parameters) else: soj_time = {key: 0 for key in activities} return graphviz_visualization( activities_count, dfg, image_format=image_format, measure="frequency", max_no_of_edges_in_diagram=max_no_of_edges_in_diagram, start_activities=start_activities, end_activities=end_activities, soj_time=soj_time, font_size=font_size, bgcolor=bgcolor, stat_locale=stat_locale)