def test_statistics_log(self): log = pm4py.read_xes("input_data/running-example.xes") pm4py.get_start_activities(log) pm4py.get_end_activities(log) pm4py.get_event_attributes(log) pm4py.get_trace_attributes(log) pm4py.get_event_attribute_values(log, "org:resource") pm4py.get_variants_as_tuples(log)
def test_statistics_df(self): df = pd.read_csv("input_data/running-example.csv") df = pm4py.format_dataframe(df, case_id="case:concept:name", activity_key="concept:name", timestamp_key="time:timestamp") pm4py.get_start_activities(df) pm4py.get_end_activities(df) pm4py.get_event_attributes(df) pm4py.get_event_attribute_values(df, "org:resource") pm4py.get_variants_as_tuples(df)
def detect(log: EventLog, alphabet: Dict[str, int], act_key: str, use_msd: bool) -> Optional[str]: candidates = set(alphabet.keys()) for t in log: candidates = candidates.intersection(set(map(lambda e: e[act_key], t))) if len(candidates) == 0: return None for a in candidates: proj = EventLog() for t in log: proj.append(pm4py.filter_trace(lambda e: e[act_key] != a, t)) if len(list(filter(lambda t: len(t) == 0, proj))) == 0: dfg_proj = discover_dfg.apply(proj, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) alphabet_proj = pm4py.get_event_attribute_values(proj, act_key) start_act_proj = get_starters.get_start_activities(proj, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) end_act_proj = get_ends.get_end_activities(log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) pre_proj, post_proj = dfg_utils.get_transitive_relations(dfg_proj, alphabet_proj) cut = sequence_cut.detect(alphabet_proj, pre_proj, post_proj) if cut is not None: return a cut = xor_cut.detect(dfg_proj, alphabet_proj) if cut is not None: return a cut = concurrent_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj, msd= msdw_algo.derive_msd_witnesses(proj, msd_algo.apply(log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}), parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if use_msd else None) if cut is not None: return a cut = loop_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj) if cut is not None: return a return None
def test_filter_act_percentage(self): from pm4py.algo.filtering.dfg import dfg_filtering log = pm4py.read_xes("input_data/running-example.xes") dfg, sa, ea = pm4py.discover_dfg(log) act_count = pm4py.get_event_attribute_values(log, "concept:name") dfg_filtering.filter_dfg_on_activities_percentage( dfg, sa, ea, act_count, 0.1)
def execute_script(): log = pm4py.read_xes( os.path.join("..", "tests", "input_data", "receipt.xes")) activities = pm4py.get_event_attribute_values(log, "concept:name") dfg, sa, ea = pm4py.discover_dfg(log) # filters the DFG to make a simpler one perc = 0.5 dfg, sa, ea, activities = dfg_filtering.filter_dfg_on_activities_percentage( dfg, sa, ea, activities, perc) dfg, sa, ea, activities = dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, activities, perc) # creates the simulated log simulated_log = dfg_playout.apply(dfg, sa, ea) print(simulated_log) print(len(simulated_log)) print(sum(x.attributes["probability"] for x in simulated_log)) # shows the two DFGs to show that they are identical pm4py.view_dfg(dfg, sa, ea, log=log, format="svg") new_dfg, new_sa, new_ea = pm4py.discover_dfg(simulated_log) pm4py.view_dfg(new_dfg, new_sa, new_ea, log=simulated_log, format="svg") for trace in simulated_log: print(list(x["concept:name"] for x in trace)) print(trace.attributes["probability"], dfg_playout.get_trace_probability(trace, dfg, sa, ea)) break dfg, sa, ea = pm4py.discover_dfg(log) variants = pm4py.get_variants_as_tuples(log) sum_prob_log_variants = 0.0 for var in variants: sum_prob_log_variants += dfg_playout.get_trace_probability( variants[var][0], dfg, sa, ea) print( "percentage of behavior allowed from DFG that is in the log (from 0.0 to 1.0): ", sum_prob_log_variants)
def test_dfg_align(self): import pm4py from pm4py.algo.filtering.dfg import dfg_filtering from pm4py.algo.conformance.alignments.dfg import algorithm as dfg_alignment log = pm4py.read_xes(os.path.join("input_data", "running-example.xes")) dfg, sa, ea = pm4py.discover_dfg(log) act_count = pm4py.get_event_attribute_values(log, "concept:name") dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_activities_percentage( dfg, sa, ea, act_count, 0.5) dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, act_count, 0.5) aligned_traces = dfg_alignment.apply(log, dfg, sa, ea)
def apply( log: Union[DataFrame, EventLog, EventStream], parameters: Optional[Dict[Union[str, Parameters], Any]] = None ) -> Dict[str, int]: ''' This algorithm computes the minimum self-distance for each activity observed in an event log. The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc. The minimum self distance is the minimal observed self distance value in the event log. The activity key needs to be specified in the parameters input object (if None, default value 'concept:name' is used). Parameters ---------- log event log (either EventLog or EventStream) parameters parameters object; Returns ------- dict mapping an activity to its self-distance, if it exists, otherwise it is not part of the dict. ''' log = converter.apply(log, variant=converter.Variants.TO_EVENT_LOG, parameters=parameters) act_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) alphabet = pm4py.get_event_attribute_values(log, act_key) log = list(map(lambda t: list(map(lambda e: e[act_key], t)), log)) min_self_distances = dict() for a in alphabet: if len( list( filter( lambda t: len(t) > 1, list( map(lambda t: list(filter(lambda e: e == a, t)), log))))) > 0: activity_indices = list( filter( lambda t: len(t) > 1, list( map(lambda t: [i for i, x in enumerate(t) if x == a], log)))) min_self_distances[a] = min([ i for l in list( map( lambda t: [ t[i] - t[i - 1] - 1 for i, x in enumerate(t) if i > 0 ], activity_indices)) for i in l ]) return min_self_distances
def derive_msd_witnesses( log: EventLog, msd: Optional[Dict[Any, int]] = None, parameters: Optional[Dict[Union[str, Parameters], Any]] = None ) -> Dict[str, Set[str]]: ''' This function derives the minimum self distance witnesses. The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc. The minimum self distance is the minimal observed self distance value in the event log. A 'witness' is an activity that witnesses the minimum self distance. For example, if the minimum self distance of activity a in some log L is 2, then, if trace <a,b,c,a> is in log L, b and c are a witness of a. Parameters ---------- log Event Log to use msd Optional minimum self distance dictionary parameters Optional parameters dictionary Returns ------- Dictionary mapping each activity to a set of witnesses. ''' log = converter.apply(log, variant=converter.Variants.TO_EVENT_LOG, parameters=parameters) act_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) alphabet = pm4py.get_event_attribute_values(log, act_key) msd = msd if msd is not None else msd_algo.apply(log, parameters) log = list(map(lambda t: list(map(lambda e: e[act_key], t)), log)) witnesses = dict() for a in alphabet: if a in msd and msd[a] > 0: witnesses[a] = set() else: continue for t in log: if len(list(filter(lambda e: e == a, t))) > 1: indices = [i for i, x in enumerate(t) if x == a] for i in range(len(indices) - 1): if indices[i + 1] - indices[i] - 1 == msd[a]: for b in t[indices[i] + 1:indices[i + 1]]: witnesses[a].add(b) return witnesses
def execute_script(): log = pm4py.read_xes("../tests/input_data/receipt.xes") dfg, sa, ea = pm4py.discover_dfg(log) act_count = pm4py.get_event_attribute_values(log, "concept:name") # keep the specified amount of activities dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_activities_percentage( dfg, sa, ea, act_count, 0.3) # keep the specified amount of paths dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, act_count, 0.3) # view the DFG gviz = dfg_visualizer.apply( dfg, activities_count=act_count, parameters={ dfg_visualizer.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES: sa, dfg_visualizer.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES: ea, dfg_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "svg" }) dfg_visualizer.view(gviz)
def __inductive_miner_internal(log, dfg, threshold, root, act_key, use_msd, remove_noise=False): alphabet = pm4py.get_event_attribute_values(log, act_key) if threshold > 0 and remove_noise: end_activities = get_ends.get_end_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) dfg = __filter_dfg_on_threshold(dfg, end_activities, threshold) original_length = len(log) log = pm4py.filter_log(lambda t: len(t) > 0, log) # revised EMPTYSTRACES if original_length - len(log) > original_length * threshold: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key, [EventLog(), log], use_msd) start_activities = get_starters.get_start_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) end_activities = get_ends.get_end_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if __is_base_case_act(log, act_key) or __is_base_case_silent(log): return __apply_base_case(log, root, act_key) pre, post = dfg_utils.get_transitive_relations(dfg, alphabet) cut = sequence_cut.detect(alphabet, pre, post) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.SEQUENCE, root), threshold, act_key, sequence_cut.project(log, cut, act_key), use_msd) cut = xor_cut.detect(dfg, alphabet) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key, xor_cut.project(log, cut, act_key), use_msd) cut = concurrent_cut.detect( dfg, alphabet, start_activities, end_activities, msd=msdw_algo.derive_msd_witnesses( log, msd_algo.apply(log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key }), parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if use_msd else None) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key, concurrent_cut.project(log, cut, act_key), use_msd) cut = loop_cut.detect(dfg, alphabet, start_activities, end_activities) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, loop_cut.project(log, cut, act_key), use_msd) aopt = activity_once_per_trace.detect(log, alphabet, act_key) if aopt is not None: operator = pt.ProcessTree(operator=pt.Operator.PARALLEL, parent=root) operator.children.append( pt.ProcessTree(operator=None, parent=operator, label=aopt)) return __add_operator_recursive_logs( operator, threshold, act_key, activity_once_per_trace.project(log, aopt, act_key), use_msd) act_conc = activity_concurrent.detect(log, alphabet, act_key, use_msd) if act_conc is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key, activity_concurrent.project(log, act_conc, act_key), use_msd) stl = strict_tau_loop.detect(log, start_activities, end_activities, act_key) if stl is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, [stl, EventLog()], use_msd) tl = tau_loop.detect(log, start_activities, act_key) if tl is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, [tl, EventLog()], use_msd) if threshold > 0 and not remove_noise: return __inductive_miner(log, dfg, threshold, root, act_key, use_msd, remove_noise=True) return __flower(alphabet, root)
def execute_script(): ENABLE_VISUALIZATION = True # reads a XES into an event log log1 = pm4py.read_xes("../tests/input_data/running-example.xes") # reads a CSV into a dataframe df = pd.read_csv("../tests/input_data/running-example.csv") # formats the dataframe with the mandatory columns for process mining purposes df = pm4py.format_dataframe(df, case_id="case:concept:name", activity_key="concept:name", timestamp_key="time:timestamp") # converts the dataframe to an event log log2 = pm4py.convert_to_event_log(df) # converts the log read from XES into a stream and dataframe respectively stream1 = pm4py.convert_to_event_stream(log1) df2 = pm4py.convert_to_dataframe(log1) # writes the log1 to a XES file pm4py.write_xes(log1, "ru1.xes") dfg, dfg_sa, dfg_ea = pm4py.discover_dfg(log1) petri_alpha, im_alpha, fm_alpha = pm4py.discover_petri_net_alpha(log1) petri_inductive, im_inductive, fm_inductive = pm4py.discover_petri_net_inductive(log1) petri_heuristics, im_heuristics, fm_heuristics = pm4py.discover_petri_net_heuristics(log1) tree_inductive = pm4py.discover_process_tree_inductive(log1) heu_net = pm4py.discover_heuristics_net(log1) pm4py.write_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.dfg") pm4py.write_pnml(petri_alpha, im_alpha, fm_alpha, "ru_alpha.pnml") pm4py.write_pnml(petri_inductive, im_inductive, fm_inductive, "ru_inductive.pnml") pm4py.write_pnml(petri_heuristics, im_heuristics, fm_heuristics, "ru_heuristics.pnml") pm4py.write_ptml(tree_inductive, "ru_inductive.ptml") dfg, dfg_sa, dfg_ea = pm4py.read_dfg("ru_dfg.dfg") petri_alpha, im_alpha, fm_alpha = pm4py.read_pnml("ru_alpha.pnml") petri_inductive, im_inductive, fm_inductive = pm4py.read_pnml("ru_inductive.pnml") petri_heuristics, im_heuristics, fm_heuristics = pm4py.read_pnml("ru_heuristics.pnml") tree_inductive = pm4py.read_ptml("ru_inductive.ptml") pm4py.save_vis_petri_net(petri_alpha, im_alpha, fm_alpha, "ru_alpha.png") pm4py.save_vis_petri_net(petri_inductive, im_inductive, fm_inductive, "ru_inductive.png") pm4py.save_vis_petri_net(petri_heuristics, im_heuristics, fm_heuristics, "ru_heuristics.png") pm4py.save_vis_process_tree(tree_inductive, "ru_inductive_tree.png") pm4py.save_vis_heuristics_net(heu_net, "ru_heunet.png") pm4py.save_vis_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.png") pm4py.save_vis_events_per_time_graph(log1, "ev_time.png") pm4py.save_vis_case_duration_graph(log1, "cd.png") pm4py.save_vis_dotted_chart(log1, "dotted_chart.png") pm4py.save_vis_performance_spectrum(log1, ["register request", "decide"], "ps.png") if ENABLE_VISUALIZATION: pm4py.view_petri_net(petri_alpha, im_alpha, fm_alpha, format="svg") pm4py.view_petri_net(petri_inductive, im_inductive, fm_inductive, format="svg") pm4py.view_petri_net(petri_heuristics, im_heuristics, fm_heuristics, format="svg") pm4py.view_process_tree(tree_inductive, format="svg") pm4py.view_heuristics_net(heu_net, format="svg") pm4py.view_dfg(dfg, dfg_sa, dfg_ea, format="svg") aligned_traces = pm4py.conformance_diagnostics_alignments(log1, petri_inductive, im_inductive, fm_inductive) replayed_traces = pm4py.conformance_diagnostics_token_based_replay(log1, petri_inductive, im_inductive, fm_inductive) fitness_tbr = pm4py.fitness_token_based_replay(log1, petri_inductive, im_inductive, fm_inductive) print("fitness_tbr", fitness_tbr) fitness_align = pm4py.fitness_alignments(log1, petri_inductive, im_inductive, fm_inductive) print("fitness_align", fitness_align) precision_tbr = pm4py.precision_token_based_replay(log1, petri_inductive, im_inductive, fm_inductive) print("precision_tbr", precision_tbr) precision_align = pm4py.precision_alignments(log1, petri_inductive, im_inductive, fm_inductive) print("precision_align", precision_align) print("log start activities = ", pm4py.get_start_activities(log2)) print("df start activities = ", pm4py.get_start_activities(df2)) print("log end activities = ", pm4py.get_end_activities(log2)) print("df end activities = ", pm4py.get_end_activities(df2)) print("log attributes = ", pm4py.get_event_attributes(log2)) print("df attributes = ", pm4py.get_event_attributes(df2)) print("log org:resource values = ", pm4py.get_event_attribute_values(log2, "org:resource")) print("df org:resource values = ", pm4py.get_event_attribute_values(df2, "org:resource")) print("start_activities len(filt_log) = ", len(pm4py.filter_start_activities(log2, ["register request"]))) print("start_activities len(filt_df) = ", len(pm4py.filter_start_activities(df2, ["register request"]))) print("end_activities len(filt_log) = ", len(pm4py.filter_end_activities(log2, ["pay compensation"]))) print("end_activities len(filt_df) = ", len(pm4py.filter_end_activities(df2, ["pay compensation"]))) print("attributes org:resource len(filt_log) (cases) cases = ", len(pm4py.filter_event_attribute_values(log2, "org:resource", ["Ellen"], level="case"))) print("attributes org:resource len(filt_log) (cases) events = ", len(pm4py.filter_event_attribute_values(log2, "org:resource", ["Ellen"], level="event"))) print("attributes org:resource len(filt_df) (events) cases = ", len(pm4py.filter_event_attribute_values(df2, "org:resource", ["Ellen"], level="case"))) print("attributes org:resource len(filt_df) (events) events = ", len(pm4py.filter_event_attribute_values(df2, "org:resource", ["Ellen"], level="event"))) print("attributes org:resource len(filt_df) (events) events notpositive = ", len(pm4py.filter_event_attribute_values(df2, "org:resource", ["Ellen"], level="event", retain=False))) print("rework df = ", pm4py.get_rework_cases_per_activity(df2)) print("rework log = ", pm4py.get_rework_cases_per_activity(log2)) print("cases overlap df = ", pm4py.get_case_overlap(df2)) print("cases overlap log = ", pm4py.get_case_overlap(log2)) print("cycle time df = ", pm4py.get_cycle_time(df2)) print("cycle time log = ", pm4py.get_cycle_time(log2)) pm4py.view_events_distribution_graph(df2, format="svg") pm4py.view_events_distribution_graph(log2, format="svg") print("variants log = ", pm4py.get_variants_as_tuples(log2)) print("variants df = ", pm4py.get_variants_as_tuples(df2)) print("variants filter log = ", len(pm4py.filter_variants(log2, [ ("register request", "examine thoroughly", "check ticket", "decide", "reject request")]))) print("variants filter df = ", len(pm4py.filter_variants(df2, [ ("register request", "examine thoroughly", "check ticket", "decide", "reject request")]))) print("paths filter log len = ", len(pm4py.filter_directly_follows_relation(log2, [("register request", "examine casually")]))) print("paths filter dataframe len = ", len(pm4py.filter_directly_follows_relation(df2, [("register request", "examine casually")]))) print("timeframe filter log events len = ", len(pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print("timeframe filter log traces_contained len = ", len(pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print("timeframe filter log traces_intersecting len = ", len(pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) print("timeframe filter df events len = ", len(pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print("timeframe filter df traces_contained len = ", len(pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print("timeframe filter df traces_intersecting len = ", len(pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) # remove the temporary files os.remove("ru1.xes") os.remove("ru_dfg.dfg") os.remove("ru_alpha.pnml") os.remove("ru_inductive.pnml") os.remove("ru_heuristics.pnml") os.remove("ru_inductive.ptml") os.remove("ru_alpha.png") os.remove("ru_inductive.png") os.remove("ru_heuristics.png") os.remove("ru_inductive_tree.png") os.remove("ru_heunet.png") os.remove("ru_dfg.png") os.remove("ev_time.png") os.remove("cd.png") os.remove("dotted_chart.png") os.remove("ps.png") wt_log = pm4py.discover_working_together_network(log2) wt_df = pm4py.discover_working_together_network(df2) print("log working together", wt_log) print("df working together", wt_df) print("log subcontracting", pm4py.discover_subcontracting_network(log2)) print("df subcontracting", pm4py.discover_subcontracting_network(df2)) print("log working together", pm4py.discover_working_together_network(log2)) print("df working together", pm4py.discover_working_together_network(df2)) print("log similar activities", pm4py.discover_activity_based_resource_similarity(log2)) print("df similar activities", pm4py.discover_activity_based_resource_similarity(df2)) print("log org roles", pm4py.discover_organizational_roles(log2)) print("df org roles", pm4py.discover_organizational_roles(df2)) pm4py.view_sna(wt_log) pm4py.save_vis_sna(wt_df, "ru_wt_df.png") os.remove("ru_wt_df.png")