def execute_script(): # imports a XES event log log = pm4py.read_xes( os.path.join("..", "tests", "input_data", "receipt.xes")) # converts the log into a list of events (not anymore grouped in cases) event_stream = pm4py.convert_to_event_stream(log) # calculates a process tree using the IMf algorithm (50% noise) tree = pm4py.discover_tree_inductive(log, noise_threshold=0.5) # discovers the footprint matrix from the process tree footprints = fp_discovery.apply(tree) # creates a live event stream (an object that distributes the messages to the algorithm) live_stream = LiveEventStream() # creates the TBR streaming conformance checking object conf_obj = streaming_fp_conf.apply(footprints) # register the conformance checking object to the live event stream live_stream.register(conf_obj) # start the recording of events from the live event stream live_stream.start() # append each event of the original log to the live event stream # (so it is sent to the conformance checking algorithm) for event in event_stream: live_stream.append(event) # stops the live event stream live_stream.stop() # sends a termination signal to the conformance checking algorithm; # the conditions on the closure of all the cases are checked # (for each case, it is checked whether the end activity of the case # is possible according to the footprints) diagn_df = conf_obj.get() conf_obj.terminate_all() print(diagn_df) print(diagn_df[diagn_df["is_fit"] == False])
def execute_script(): # read an event log log = pm4py.read_xes("../tests/compressed_input_data/02_teleclaims.xes.gz") # log = pm4py.read_xes("../tests/input_data/receipt.xes") print("number of variants of the original log ->", len(pm4py.get_variants(log))) # discover a process model tree = pm4py.discover_tree_inductive(log) # simulate a log out of the model (to have another log that is similar to the original) aa = time.time() min_trace_length = bottomup_discovery.get_min_trace_length(tree) simulated_log = tree_playout.apply(tree, variant=tree_playout.Variants.EXTENSIVE, parameters={"max_trace_length": min_trace_length + 2}) print("number of variants of the simulated log -> ", len(simulated_log)) # apply the alignments between this log and the model bb = time.time() aligned_traces = logs_alignment.apply(log, simulated_log) cc = time.time() print(aligned_traces[0]) print("playout time", bb - aa) print("alignments time", cc - bb) print("TOTAL", cc - aa) print(alignment_based.evaluate(aligned_traces)) # apply the anti alignments between this log and the model dd = time.time() anti_aligned_traces = logs_alignment.apply(log, simulated_log, parameters={ logs_alignment.Variants.EDIT_DISTANCE.value.Parameters.PERFORM_ANTI_ALIGNMENT: True}) ee = time.time() print(anti_aligned_traces[0]) print("anti alignments time", ee - dd) print(alignment_based.evaluate(anti_aligned_traces))
def execute_script(): log = pm4py.read_xes( os.path.join("..", "tests", "input_data", "running-example.xes")) alpha_petri_net, alpha_im, alpha_fm = pm4py.discover_petri_net_alpha(log) heuristics_petri_net, heuristics_im, heuristics_fm = pm4py.discover_petri_net_heuristics( log) tree = pm4py.discover_tree_inductive(log) print("tree discovered by inductive miner=") print(tree) inductive_petri_net, inductive_im, inductive_fm = pt_converter.apply(tree) print("is_wf_net alpha", is_wf_net.apply(alpha_petri_net)) print("is_wf_net heuristics", is_wf_net.apply(heuristics_petri_net)) print("is_wf_net inductive", is_wf_net.apply(inductive_petri_net)) print( "woflan alpha", woflan.apply(alpha_petri_net, alpha_im, alpha_fm, parameters={ woflan.Parameters.RETURN_ASAP_WHEN_NOT_SOUND: True, woflan.Parameters.PRINT_DIAGNOSTICS: False })) print( "woflan heuristics", woflan.apply(heuristics_petri_net, heuristics_im, heuristics_fm, parameters={ woflan.Parameters.RETURN_ASAP_WHEN_NOT_SOUND: True, woflan.Parameters.PRINT_DIAGNOSTICS: False })) print( "woflan inductive", woflan.apply(inductive_petri_net, inductive_im, inductive_fm, parameters={ woflan.Parameters.RETURN_ASAP_WHEN_NOT_SOUND: True, woflan.Parameters.PRINT_DIAGNOSTICS: False })) try: tree_alpha = wf_net_converter.apply(alpha_petri_net, alpha_im, alpha_fm) print(tree_alpha) except: traceback.print_exc() try: tree_heuristics = wf_net_converter.apply(heuristics_petri_net, heuristics_im, heuristics_fm) print(tree_heuristics) except: traceback.print_exc() try: tree_inductive = wf_net_converter.apply(inductive_petri_net, inductive_im, inductive_fm) print(tree_inductive) pm4py.view_process_tree(tree_inductive, format="svg") except: traceback.print_exc()
def test_footprints_extensive(self): log = pm4py.read_xes("input_data/running-example.xes") fp_log = footprints_discovery.apply( log, variant=footprints_discovery.Variants.TRACE_BY_TRACE) tree = pm4py.discover_tree_inductive(log, noise_threshold=0.2) fp_model = footprints_discovery.apply(tree) conf_result = trace_extensive.apply(fp_log, fp_model) diagn_df = trace_extensive.get_diagnostics_dataframe(log, conf_result)
def execute_script(): log = pm4py.read_xes( os.path.join("..", "tests", "input_data", "receipt.xes")) # the tree discovered by inductive miner is huge and can replay the behavior of the log tree = pm4py.discover_tree_inductive(log) pm4py.view_process_tree(tree, "svg") # to make a more effective replay, remove the elements that are not being used during the replay of the trace # (that are the skippable ones, with empty intersection with the trace) tree_first_trace = reducer.apply(tree, log[0], variant=reducer.Variants.TREE_TR_BASED) pm4py.view_process_tree(tree_first_trace, "svg")
def execute_script(): log = pm4py.read_xes("../tests/input_data/running-example.xes") dfg, sa, ea = pm4py.discover_dfg(log) tree = pm4py.discover_tree_inductive(log) heu_net = pm4py.discover_heuristics_net(log) net, im, fm = pm4py.discover_petri_net_alpha(log) bpmn = pm4py.convert_to_bpmn(tree) ts = ts_discovery.apply(log) x_cases, y_cases = case_statistics.get_kde_caseduration(log) gviz1 = dfg_visualizer.apply(dfg) gviz2 = tree_visualizer.apply(tree) gviz3 = hn_visualizer.apply(heu_net) gviz4 = pn_visualizer.apply(net, im, fm) gviz5 = bpmn_visualizer.apply(bpmn) gviz6 = ts_visualizer.apply(ts) gviz7 = graphs_visualizer.apply( x_cases, y_cases, variant=graphs_visualizer.Variants.CASES, parameters={ graphs_visualizer.Variants.CASES.value.Parameters.FORMAT: "svg" }) print("1", len(dfg_visualizer.serialize_dot(gviz1))) print("1", len(dfg_visualizer.serialize(gviz1))) print("2", len(tree_visualizer.serialize_dot(gviz2))) print("2", len(tree_visualizer.serialize(gviz2))) print("3", len(hn_visualizer.serialize(gviz3))) print("4", len(pn_visualizer.serialize_dot(gviz4))) print("4", len(pn_visualizer.serialize(gviz4))) print("5", len(bpmn_visualizer.serialize_dot(gviz5))) print("5", len(bpmn_visualizer.serialize(gviz5))) print("6", len(ts_visualizer.serialize_dot(gviz6))) print("6", len(ts_visualizer.serialize(gviz6))) print("7", len(graphs_visualizer.serialize(gviz7)))
""" import pandas as pd import pm4py ''' def import_csv(file_path): event_log = pandas.read_csv(file_path, sep=';') event_log = pm4py.format_dataframe(event_log, case_id='case_id', activity_key='activity', timestamp_key='timestamp') start_activities = pm4py.get_start_activities(event_log) end_activities = pm4py.get_end_activities(event_log) print("Start activities: {}\nEnd activities: {}".format(start_activities, end_activities)) ''' file_path = r'file' log = pm4py.format_dataframe(pd.read_csv(file_path, sep=';'), case_id='case_id', activity_key='activity', timestamp_key='timestamp') #log = log[log['@@index']< 40] process_tree = pm4py.discover_tree_inductive(log) bpmn_model = pm4py.convert_to_bpmn(process_tree) pm4py.view_bpmn(bpmn_model) from pm4py.algo.discovery.inductive import algorithm as inductive_miner from pm4py.visualization.process_tree import visualizer as pt_visualizer tree = inductive_miner.apply_tree(log) gviz = pt_visualizer.apply(tree) pt_visualizer.view(gviz)
def test_inductive_miner_tree(self): log = pm4py.read_xes("input_data/running-example.xes") tree = pm4py.discover_tree_inductive(log) tree = pm4py.discover_tree_inductive(log, noise_threshold=0.2)
def execute_script(): ENABLE_VISUALIZATION = True # reads a XES into an event log log1 = pm4py.read_xes("../tests/input_data/running-example.xes") # reads a CSV into a dataframe df = pd.read_csv("../tests/input_data/running-example.csv") # formats the dataframe with the mandatory columns for process mining purposes df = pm4py.format_dataframe(df, case_id="case:concept:name", activity_key="concept:name", timestamp_key="time:timestamp") # converts the dataframe to an event log log2 = pm4py.convert_to_event_log(df) # converts the log read from XES into a stream and dataframe respectively stream1 = pm4py.convert_to_event_stream(log1) df2 = pm4py.convert_to_dataframe(log1) # writes the log1 to a XES file pm4py.write_xes(log1, "ru1.xes") dfg, dfg_sa, dfg_ea = pm4py.discover_dfg(log1) petri_alpha, im_alpha, fm_alpha = pm4py.discover_petri_net_alpha(log1) petri_inductive, im_inductive, fm_inductive = pm4py.discover_petri_net_inductive( log1) petri_heuristics, im_heuristics, fm_heuristics = pm4py.discover_petri_net_heuristics( log1) tree_inductive = pm4py.discover_tree_inductive(log1) heu_net = pm4py.discover_heuristics_net(log1) pm4py.write_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.dfg") pm4py.write_petri_net(petri_alpha, im_alpha, fm_alpha, "ru_alpha.pnml") pm4py.write_petri_net(petri_inductive, im_inductive, fm_inductive, "ru_inductive.pnml") pm4py.write_petri_net(petri_heuristics, im_heuristics, fm_heuristics, "ru_heuristics.pnml") pm4py.write_process_tree(tree_inductive, "ru_inductive.ptml") dfg, dfg_sa, dfg_ea = pm4py.read_dfg("ru_dfg.dfg") petri_alpha, im_alpha, fm_alpha = pm4py.read_petri_net("ru_alpha.pnml") petri_inductive, im_inductive, fm_inductive = pm4py.read_petri_net( "ru_inductive.pnml") petri_heuristics, im_heuristics, fm_heuristics = pm4py.read_petri_net( "ru_heuristics.pnml") tree_inductive = pm4py.read_process_tree("ru_inductive.ptml") pm4py.save_vis_petri_net(petri_alpha, im_alpha, fm_alpha, "ru_alpha.png") pm4py.save_vis_petri_net(petri_inductive, im_inductive, fm_inductive, "ru_inductive.png") pm4py.save_vis_petri_net(petri_heuristics, im_heuristics, fm_heuristics, "ru_heuristics.png") pm4py.save_vis_process_tree(tree_inductive, "ru_inductive_tree.png") pm4py.save_vis_heuristics_net(heu_net, "ru_heunet.png") pm4py.save_vis_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.png") if ENABLE_VISUALIZATION: pm4py.view_petri_net(petri_alpha, im_alpha, fm_alpha, format="svg") pm4py.view_petri_net(petri_inductive, im_inductive, fm_inductive, format="svg") pm4py.view_petri_net(petri_heuristics, im_heuristics, fm_heuristics, format="svg") pm4py.view_process_tree(tree_inductive, format="svg") pm4py.view_heuristics_net(heu_net, format="svg") pm4py.view_dfg(dfg, dfg_sa, dfg_ea, format="svg") aligned_traces = pm4py.conformance_alignments(log1, petri_inductive, im_inductive, fm_inductive) replayed_traces = pm4py.conformance_tbr(log1, petri_inductive, im_inductive, fm_inductive) fitness_tbr = pm4py.evaluate_fitness_tbr(log1, petri_inductive, im_inductive, fm_inductive) print("fitness_tbr", fitness_tbr) fitness_align = pm4py.evaluate_fitness_alignments(log1, petri_inductive, im_inductive, fm_inductive) print("fitness_align", fitness_align) precision_tbr = pm4py.evaluate_precision_tbr(log1, petri_inductive, im_inductive, fm_inductive) print("precision_tbr", precision_tbr) precision_align = pm4py.evaluate_precision_alignments( log1, petri_inductive, im_inductive, fm_inductive) print("precision_align", precision_align) print("log start activities = ", pm4py.get_start_activities(log2)) print("df start activities = ", pm4py.get_start_activities(df2)) print("log end activities = ", pm4py.get_end_activities(log2)) print("df end activities = ", pm4py.get_end_activities(df2)) print("log attributes = ", pm4py.get_attributes(log2)) print("df attributes = ", pm4py.get_attributes(df2)) print("log org:resource values = ", pm4py.get_attribute_values(log2, "org:resource")) print("df org:resource values = ", pm4py.get_attribute_values(df2, "org:resource")) print("start_activities len(filt_log) = ", len(pm4py.filter_start_activities(log2, ["register request"]))) print("start_activities len(filt_df) = ", len(pm4py.filter_start_activities(df2, ["register request"]))) print("end_activities len(filt_log) = ", len(pm4py.filter_end_activities(log2, ["pay compensation"]))) print("end_activities len(filt_df) = ", len(pm4py.filter_end_activities(df2, ["pay compensation"]))) print( "attributes org:resource len(filt_log) (cases) cases = ", len( pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="case"))) print( "attributes org:resource len(filt_log) (cases) events = ", len( pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="event"))) print( "attributes org:resource len(filt_df) (events) cases = ", len( pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="case"))) print( "attributes org:resource len(filt_df) (events) events = ", len( pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="event"))) print( "attributes org:resource len(filt_df) (events) events notpositive = ", len( pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="event", retain=False))) print("variants log = ", pm4py.get_variants(log2)) print("variants df = ", pm4py.get_variants(df2)) print( "variants filter log = ", len( pm4py.filter_variants(log2, [[ "register request", "examine thoroughly", "check ticket", "decide", "reject request" ]]))) print( "variants filter df = ", len( pm4py.filter_variants(df2, [[ "register request", "examine thoroughly", "check ticket", "decide", "reject request" ]]))) print("variants filter percentage = ", len(pm4py.filter_variants_percentage(log2, threshold=0.8))) print( "paths filter log len = ", len( pm4py.filter_directly_follows_relation( log2, [("register request", "examine casually")]))) print( "paths filter dataframe len = ", len( pm4py.filter_directly_follows_relation( df2, [("register request", "examine casually")]))) print( "timeframe filter log events len = ", len( pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print( "timeframe filter log traces_contained len = ", len( pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print( "timeframe filter log traces_intersecting len = ", len( pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) print( "timeframe filter df events len = ", len( pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print( "timeframe filter df traces_contained len = ", len( pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print( "timeframe filter df traces_intersecting len = ", len( pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) # remove the temporary files os.remove("ru1.xes") os.remove("ru_dfg.dfg") os.remove("ru_alpha.pnml") os.remove("ru_inductive.pnml") os.remove("ru_heuristics.pnml") os.remove("ru_inductive.ptml") os.remove("ru_alpha.png") os.remove("ru_inductive.png") os.remove("ru_heuristics.png") os.remove("ru_inductive_tree.png") os.remove("ru_heunet.png") os.remove("ru_dfg.png")
def test_bpmn_layouting(self): log = pm4py.read_xes(os.path.join("input_data", "running-example.xes")) tree = pm4py.discover_tree_inductive(log) bpmn_graph = tree_converter.apply(tree, variant=tree_converter.Variants.TO_BPMN) bpmn_graph = bpmn_layouter.apply(bpmn_graph)