def view_dfg(dfg: dict, start_activities: dict, end_activities: dict, format: str = "png", log: Optional[EventLog] = None): """ Views a (composite) DFG Parameters ------------- dfg DFG object start_activities Start activities end_activities End activities format Format of the output picture (default: png) """ from pm4py.visualization.dfg import visualizer as dfg_visualizer dfg_parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters parameters = get_properties(log) parameters[dfg_parameters.FORMAT] = format parameters[dfg_parameters.START_ACTIVITIES] = start_activities parameters[dfg_parameters.END_ACTIVITIES] = end_activities gviz = dfg_visualizer.apply(dfg, log=log, variant=dfg_visualizer.Variants.FREQUENCY, parameters=parameters) dfg_visualizer.view(gviz)
def view_performance_dfg(dfg: dict, start_activities: dict, end_activities: dict, format: str = "png", aggregation_measure="mean"): """ Views a performance DFG Parameters ---------------- dfg DFG object start_activities Start activities end_activities End activities format Format of the output picture (default: png) aggregation_measure Aggregation measure (default: mean): mean, median, min, max, sum, stdev """ from pm4py.visualization.dfg import visualizer as dfg_visualizer from pm4py.visualization.dfg.variants import performance as dfg_perf_visualizer dfg_parameters = dfg_perf_visualizer.Parameters parameters = {} parameters[dfg_parameters.FORMAT] = format parameters[dfg_parameters.START_ACTIVITIES] = start_activities parameters[dfg_parameters.END_ACTIVITIES] = end_activities parameters[dfg_parameters.AGGREGATION_MEASURE] = aggregation_measure gviz = dfg_perf_visualizer.apply(dfg, parameters=parameters) dfg_visualizer.view(gviz)
def execute_script(): log_path = os.path.join("..", "tests", "input_data", "interval_event_log.xes") #log_path = os.path.join("..", "tests", "input_data", "reviewing.xes") log = xes_importer.apply(log_path) parameters = {} parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp" parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp" parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name" parameters["strict"] = False parameters["format"] = "svg" start_activities = sa_get.get_start_activities(log, parameters=parameters) end_activities = ea_get.get_end_activities(log, parameters=parameters) parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities soj_time = soj_time_get.apply(log, parameters=parameters) print("soj_time") print(soj_time) conc_act = conc_act_get.apply(log, parameters=parameters) print("conc_act") print(conc_act) efg = efg_get.apply(log, parameters=parameters) print("efg") print(efg) dfg_freq = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.FREQUENCY) dfg_perf = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.PERFORMANCE) dfg_gv_freq = dfg_vis_fact.apply(dfg_freq, log=log, variant=dfg_vis_fact.Variants.FREQUENCY, parameters=parameters) dfg_vis_fact.view(dfg_gv_freq) dfg_gv_perf = dfg_vis_fact.apply(dfg_perf, log=log, variant=dfg_vis_fact.Variants.PERFORMANCE, parameters=parameters) dfg_vis_fact.view(dfg_gv_perf) net, im, fm = dfg_conv.apply(dfg_freq) gviz = pn_vis.apply(net, im, fm, parameters=parameters) pn_vis.view(gviz)
def execute_script(): # imports a XES event log log = pm4py.read_xes( os.path.join("..", "tests", "input_data", "receipt.xes")) # converts the log into a list of events (not anymore grouped in cases) event_stream = pm4py.convert_to_event_stream(log) # creates a live event stream (an object that distributes the messages to the algorithm) live_stream = LiveEventStream() # creates the streaming DFG discovery object stream_dfg_disc = dfg_discovery.apply() # register the discovery algorithm to the stream live_stream.register(stream_dfg_disc) # start the recording of events from the live event stream live_stream.start() # append each event of the original log to the live event stream # (so it is sent to the conformance checking algorithm) for event in event_stream: live_stream.append(event) # stops the live event stream live_stream.stop() # gets the DFG along with the start and end activities from the stream dfg, activities, start_activities, end_activities = stream_dfg_disc.get() # visualize the DFG gviz = dfg_visualizer.apply(dfg, variant=dfg_visualizer.Variants.FREQUENCY, activities_count=activities, parameters={ "format": "svg", "start_activities": start_activities, "end_activities": end_activities }) dfg_visualizer.view(gviz)
def view_dfg(dfg, start_activities, end_activities, format="png", log=None): """ Views a (composite) DFG Parameters ------------- dfg DFG object start_activities Start activities end_activities End activities format Format of the output picture (default: png) """ from pm4py.visualization.dfg import visualizer as dfg_visualizer parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters gviz = dfg_visualizer.apply(dfg, log=log, variant=dfg_visualizer.Variants.FREQUENCY, parameters={ parameters.FORMAT: format, parameters.START_ACTIVITIES: start_activities, parameters.END_ACTIVITIES: end_activities }) dfg_visualizer.view(gviz)
def execute_script(): df = pd.read_csv("../tests/input_data/interval_event_log.csv") df = dataframe_utils.convert_timestamp_columns_in_df(df) act_count = dict(df["concept:name"].value_counts()) parameters = {} parameters[ constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp" parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp" parameters["format"] = "svg" start_activities = sa_get.get_start_activities(df, parameters=parameters) end_activities = ea_get.get_end_activities(df, parameters=parameters) parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities soj_time = soj_time_get.apply(df, parameters=parameters) dfg, performance_dfg = correlation_miner.apply( df, variant=correlation_miner.Variants.CLASSIC, parameters=parameters) gviz_freq = dfg_vis.apply(dfg, activities_count=act_count, soj_time=soj_time, variant=dfg_vis.Variants.FREQUENCY, parameters=parameters) dfg_vis.view(gviz_freq) gviz_perf = dfg_vis.apply(performance_dfg, activities_count=act_count, soj_time=soj_time, variant=dfg_vis.Variants.PERFORMANCE, parameters=parameters) dfg_vis.view(gviz_perf)
def execute_script(): df = pd.read_csv("../tests/input_data/receipt.csv") df = dataframe_utils.convert_timestamp_columns_in_df(df) act_count = dict(df["concept:name"].value_counts()) dfg, performance_dfg = correlation_miner.apply(df, variant=correlation_miner.Variants.CLASSIC) gviz_freq = dfg_vis.apply(dfg, activities_count=act_count, variant=dfg_vis.Variants.FREQUENCY, parameters={"format": "svg"}) dfg_vis.view(gviz_freq) gviz_perf = dfg_vis.apply(performance_dfg, activities_count=act_count, variant=dfg_vis.Variants.PERFORMANCE, parameters={"format": "svg"}) dfg_vis.view(gviz_perf)
def execute_script(): log_path = os.path.join("..", "tests", "input_data", "interval_event_log.csv") dataframe = pm4py.read_csv(log_path) log_path = os.path.join("..", "tests", "input_data", "reviewing.xes") log = pm4py.read_xes(log_path) dataframe = pm4py.convert_to_dataframe(log) parameters = {} #parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp" parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp" parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name" parameters[constants.PARAMETER_CONSTANT_CASEID_KEY] = "case:concept:name" parameters["strict"] = True parameters["format"] = "svg" start_activities = sa_get.get_start_activities(dataframe, parameters=parameters) end_activities = ea_get.get_end_activities(dataframe, parameters=parameters) att_count = att_get.get_attribute_values(dataframe, "concept:name", parameters=parameters) parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities soj_time = soj_time_get.apply(dataframe, parameters=parameters) print("soj_time") print(soj_time) conc_act = conc_act_get.apply(dataframe, parameters=parameters) print("conc_act") print(conc_act) efg = efg_get.apply(dataframe, parameters=parameters) print("efg") print(efg) dfg_freq, dfg_perf = df_statistics.get_dfg_graph( dataframe, measure="both", start_timestamp_key="start_timestamp") dfg_gv_freq = dfg_vis_fact.apply(dfg_freq, activities_count=att_count, variant=dfg_vis_fact.Variants.FREQUENCY, soj_time=soj_time, parameters=parameters) dfg_vis_fact.view(dfg_gv_freq) dfg_gv_perf = dfg_vis_fact.apply(dfg_perf, activities_count=att_count, variant=dfg_vis_fact.Variants.PERFORMANCE, soj_time=soj_time, parameters=parameters) dfg_vis_fact.view(dfg_gv_perf) net, im, fm = dfg_conv.apply(dfg_freq) gviz = pn_vis.apply(net, im, fm, parameters=parameters) pn_vis.view(gviz)
def execute_script(): df = csv_import_adapter.import_dataframe_from_path( "../tests/input_data/receipt.csv") act_count = dict(df["concept:name"].value_counts()) dfg, performance_dfg = correlation_miner.apply( df, variant=correlation_miner.Variants.CLASSIC) gviz_freq = dfg_vis.apply(dfg, activities_count=act_count, variant=dfg_vis.Variants.FREQUENCY, parameters={"format": "svg"}) dfg_vis.view(gviz_freq) gviz_perf = dfg_vis.apply(performance_dfg, activities_count=act_count, variant=dfg_vis.Variants.PERFORMANCE, parameters={"format": "svg"}) dfg_vis.view(gviz_perf)
def execute_script(): log = pm4py.read_xes( os.path.join("..", "tests", "input_data", "receipt.xes")) print("number of cases", len(log)) print("number of events", sum(len(x) for x in log)) print("number of variants", len(pm4py.get_variants(log))) ac = get.get_attribute_values(log, "concept:name") dfg, sa, ea = pm4py.discover_dfg(log) perc = 0.5 dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_activities_percentage( dfg, sa, ea, ac, perc) dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, ac, perc) aa = time.time() aligned_traces = dfg_alignment.apply(log, dfg, sa, ea) bb = time.time() net, im, fm = pm4py.convert_to_petri_net(dfg, sa, ea) for trace in aligned_traces: if trace["cost"] != trace["internal_cost"]: print(trace) pass print(bb - aa) print(sum(x["visited_states"] for x in aligned_traces)) print( sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST for x in aligned_traces)) gviz = visualizer.apply(dfg, activities_count=ac, parameters={ "start_activities": sa, "end_activities": ea, "format": "svg" }) visualizer.view(gviz) cc = time.time() aligned_traces2 = petri_alignments.apply( log, net, im, fm, variant=petri_alignments.Variants.VERSION_DIJKSTRA_LESS_MEMORY) dd = time.time() print(dd - cc) print(sum(x["visited_states"] for x in aligned_traces2)) print( sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST for x in aligned_traces2))
def save_full_dfg(log): dfg = dfg_discovery.apply(log) gviz = dfg_visualization.apply( dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.view(gviz) parameters = { dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg" } gviz = dfg_visualization.apply( dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY, parameters=parameters) dfg_visualization.save(gviz, "dfg_full.svg") print('Full DFG saves as "dfg_full.svg"') return gviz
def execute_script(): log_path = os.path.join("..", "tests", "input_data", "running-example.xes") log = xes_importer.apply(log_path) dfg = dfg_algorithm.apply(log) dfg_gv = dfg_vis_fact.apply( dfg, log, parameters={ dfg_vis_fact.Variants.FREQUENCY.value.Parameters.FORMAT: "svg" }) dfg_vis_fact.view(dfg_gv) net, im, fm = dfg_conv.apply(dfg) gviz = pn_vis.apply( net, im, fm, parameters={ pn_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "svg" }) pn_vis.view(gviz)
def execute_script(): log = pm4py.read_xes("../tests/input_data/receipt.xes") dfg, sa, ea = pm4py.discover_dfg(log) act_count = pm4py.get_attribute_values(log, "concept:name") # keep the specified amount of activities dfg, sa, ea, act_count = pm4py.objects.dfg.filtering.dfg_filtering.filter_dfg_on_activities_percentage( dfg, sa, ea, act_count, 0.3) # keep the specified amount of paths dfg, sa, ea, act_count = pm4py.objects.dfg.filtering.dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, act_count, 0.3) # view the DFG gviz = dfg_visualizer.apply( dfg, activities_count=act_count, parameters={ dfg_visualizer.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES: sa, dfg_visualizer.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES: ea, dfg_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "svg" }) dfg_visualizer.view(gviz)
partition.keys(), node_size=40, cmap=cmap, node_color=list(partition.values())) nx.draw_networkx_edges(G, pos, alpha=0.5) plt.show() community_louvain.modularity(partition, G) girvannewman = graphLearning.community_dection_graph(graph, mst=False) from pm4py.visualization.dfg import visualizer as dfg_visualization gviz = dfg_visualization.apply(dfg, log=ex1_personal_log_1_converted, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.view(gviz) def fixDfg(dfg, activityList=[ 'Read_Labsheet', 'Read_Lecture_Note', 'Excercise', 'Check_solution' ]): result = {} transitionList = [] for i in activityList: for j in activityList: transitionList.append((i, j)) for t in transitionList: if t in dfg: result.update({t: dfg[t]})
def filter_for_periods(detect_result, event_counts): start_element1 = 0 if CHOSEN_PERIOD1 == 1 else detect_result[CHOSEN_PERIOD1 - 2] end_element1 = detect_result[CHOSEN_PERIOD1 - 1] start_element2 = 0 if CHOSEN_PERIOD2 == 1 else detect_result[CHOSEN_PERIOD2 - 2] end_element2 = detect_result[CHOSEN_PERIOD2 - 1] days = list(event_counts.keys()) #print(days[start_element1]) start_day1 = days[start_element1] end_day1 = days[end_element1 - 1] days_count1 = end_element1 - start_element1 start_day2 = days[start_element2] end_day2 = days[end_element2 - 1] days_count2 = end_element2 - start_element2 # Traces that are FULLY CONTAINED in the given timeframe period_1_log = timestamp_filter.filter_traces_contained( log, start_day1 + " 00:00:00", end_day1 + " 23:59:59") period_2_log = timestamp_filter.filter_traces_contained( log, start_day2 + " 00:00:00", end_day2 + " 23:59:59") # Traces that INTERSECT with the given timeframe # period_1_log = timestamp_filter.filter_traces_intersecting(log, start_day+" 00:00:00", end_day+" 23:59:59") dfg1 = dfg_discovery.apply(period_1_log) dfg2 = dfg_discovery.apply(period_2_log) gviz1 = dfg_visualization.apply( dfg1, log=period_1_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.view(gviz1) # Saving the DFG parameters = { dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg" } gviz1 = dfg_visualization.apply( dfg1, log=period_1_log, variant=dfg_visualization.Variants.FREQUENCY, parameters=parameters) dfg_visualization.save(gviz1, "dfg1.svg") nodes_period1, edges_period1 = dot_to_df(gviz1) gviz2 = dfg_visualization.apply( dfg2, log=period_2_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.view(gviz2) # Saving the DFG parameters = { dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg" } gviz2 = dfg_visualization.apply( dfg2, log=period_2_log, variant=dfg_visualization.Variants.FREQUENCY, parameters=parameters) dfg_visualization.save(gviz2, "dfg2.svg") return days_count1, days_count2, period_1_log, period_2_log, gviz1, gviz2
#--------------------------------------------------------------------------------- # print(os.getcwd()) log = import_log_file(INPUT_XES_FILE) event_counts, detect_result = get_change_points(log) gviz = save_full_dfg(log) nodes_full, edges_full = dot_to_df(gviz) days_count1, days_count2, period_1_log, period_2_log, gviz1, gviz2 = filter_for_periods( detect_result, event_counts) nodes_period1, edges_period1 = dot_to_df(gviz1) nodes_period2, edges_period2 = dot_to_df(gviz2) get_statistics(period_1_log, period_2_log) add_slope_to_period_df(edges_period1, period_1_log) add_slope_to_period_df(edges_period2, period_2_log) print('Slopes are added to the period dataframes') edges_merged = merge_graphs(edges_period1, edges_period2, days_count1, days_count2, edges_full) gviz_merge = df_to_dot(nodes_full, edges_merged, OUTPUT_GRAPH_FILE) print(gviz_merge) dfg_visualization.view(gviz_merge)
from pm4pydistr.remote_wrapper import factory as wrapper_factory from pm4py.visualization.dfg import visualizer as dfg_visualizer wrapper = wrapper_factory.apply("127.0.0.1", "5001", "hello", "receipt") dfg, performance_dfg, activities_counter = wrapper.correlation_miner(parameters={"min_act_freq": 100}) gviz = dfg_visualizer.apply(dfg, activities_count=activities_counter, parameters={"format": "svg"}) dfg_visualizer.view(gviz)
def dfg_visualizer(dfg, log, variant=dfg_visualization.Variants.FREQUENCY): gviz = dfg_visualization.apply(dfg, log=log, variant=variant) dfg_visualization.view(gviz)