def save_dfg(self, name="DFG", high_level=False): """ Save DFG to file :param name: optional name of dfg file :param high_level: generate high level dfg """ dfg, log = self._createDFG() parameters = self._createImageParameters(log=log, high_level=high_level) if high_level: gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency", parameters=parameters) else: gviz = dfg_vis_factory.apply(dfg, log=self._log, variant="frequency", parameters=parameters) self._create_image(gviz, name)
def execute_script(): log_input_directory = "xesinput" all_logs_names = os.listdir(log_input_directory) all_logs_names = [log for log in all_logs_names if ".xe" in log] for logName in all_logs_names: # logPath = os.path.join("..", "tests", "inputData", logName) log_path = log_input_directory + "\\" + logName log = xes_importer.import_log(log_path) print("\n\n") print("log loaded") print("Number of traces - ", len(log)) event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers) exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName print("exporting log", exp_log_name) xes_exporter.export_log(log, exp_log_name) print("exported log", exp_log_name) log, classifier_attr_key = insert_classifier.search_act_class_attr(log) classifiers = list(log.classifiers.keys()) if classifier_attr_key is None and classifiers: try: print(classifiers) log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute( log, classifiers[0]) print(classifier_attr_key) except: print("exception in handling classifier") if classifier_attr_key is None: classifier_attr_key = "concept:name" if len(event_log) > 0 and classifier_attr_key in event_log[0]: parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key } dfg = dfg_factory.apply(log, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency", parameters=parameters) # dfg_vis_factory.view(gviz) dfg_vis_factory.save( gviz, "xescert_images\\" + logName.replace("xes", "png")) print("Reimporting log file just exported - ", exp_log_name) log = xes_importer.import_log(exp_log_name) print("log loaded", exp_log_name) print("Number of traces - ", len(log)) event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers)
def bot_dfg(chat_id): log = get_current_log(chat_id) dfg = dfg_factory.apply(log) gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency") new_file, filename = tempfile.mkstemp(suffix="png") dfg_vis_factory.save(gviz, filename) return filename
def execute_script(): log_path = os.path.join("..", "tests", "input_data", "running-example.xes") log = xes_importer.apply(log_path) dfg = dfg_factory.apply(log) dfg_gv = dfg_vis_fact.apply(dfg, log, parameters={"format": "svg"}) dfg_vis_fact.view(dfg_gv) net, im, fm = dfg_conv_factory.apply(dfg) gviz = pn_vis_factory.apply(net, im, fm, parameters={"format": "svg"}) pn_vis_factory.view(gviz)
def draw_DFG(dfg): parameters = {"format": "svg"} gviz = dfg_vis_factory.apply(dfg, variant="performance", parameters=parameters) # log = xes_importer.import_log(r"") # dfg = dfg_factory.apply(log) # gviz = dfg_vis_factory.apply(dfg, log=log, variant="performance", parameters=parameters) dfg_vis_factory.save(gviz, "dfg.svg")
def uselog(loginput): log = xes_import_factory.apply(loginput) log = sorting.sort_timestamp(log) # print(log) dfg = dfg_factory.apply(log) dfg_gv = dfg_vis_fact.apply(dfg, log, parameters={"format": "svg"}) this_data = dfg_to_g6.dfg_to_g6(dfg) # dfg_vis_fact.view(dfg_gv) return this_data '''grouplist = get_groups(log)
def visualize_dfg(log, filename): """ Visualizes an event log as a DFG :param log: event log that will be visualized :param filename: filename for the created DFG """ dfg = dfg_factory.apply(log) parameters = {"format": "svg"} gviz = dfg_vis_factory.apply(dfg, log=log, parameters=parameters, variant='frequency') dfg_vis_factory.save(gviz, filename)
def Hueristics(file): #import os from pm4py.objects.log.importer.xes import factory as xes_importer log = xes_importer.import_log(file) from pm4py.algo.discovery.dfg import factory as dfg_factory dfg = dfg_factory.apply(log) from pm4py.visualization.dfg import factory as dfg_vis_factory gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency") location = "/mnt/c/Users/harim/Downloads/dfg.png" dfg_vis_factory.save(gviz, location) return location
def highLevelDFG(self): """ Create high level DFG of entire process """ try: df, log, parameters = modules.eventAbstraction.aggregateData(self.dataframe, remove_duplicates=False) dfg = dfg_factory.apply(log, variant="frequency", parameters=parameters) gviz_parameters = self._createImageParameters(log=log, high_level=True) gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency", parameters=gviz_parameters) self._create_image(gviz, "DFG_model") except Exception as e: print(f"[PROCESS MINING] Could not create DFG: {e}") return False
def generate_process_model(log): ''' Description: to generate graphical process model in .svg format using pm4py library function Used: generate process model under provided log Input: log file Output: Display process model ''' dfg = dfg_factory.apply(log) '''To decorate DFG with the frequency of activities''' gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency") dfg_vis_factory.view(gviz) return dfg
def export_process_model(dfgModel, log, filename): ''' Description: to export graphical process model in .svg format using pm4py library function Used: generate and export process model under provided file name Input: dfgModel, log file, file name Output: N/A ''' # dfg = dfg_factory.apply(log, variant="performance") parameters = {"format": "svg"} gviz = dfg_vis_factory.apply(dfgModel, log=log, variant="frequency", parameters=parameters) dfg_vis_factory.save(gviz, filename)
def execute_script(): # import csv & create log dataframe = csv_import_adapter.import_dataframe_from_path( datasourceMockdata(), sep=";") dataframe = dataframe.rename(columns={ 'coID': 'case:concept:name', 'Activity': 'concept:name' }) log = conversion_factory.apply(dataframe) # option 1: Directly-Follows Graph, represent frequency or performance parameters = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"} variant = 'frequency' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz1 = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz1) # option 2: Heuristics Miner, acts on the Directly-Follows Graph, find common structures, output: Heuristic Net (.svg) heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.00 }) gviz2 = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz2) # option 3: Petri Net based on Heuristic Miner (.png) net, im, fm = heuristics_miner.apply( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.00 }) gviz3 = petri_vis.apply( net, im, fm, parameters={ petri_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "png" }) petri_vis.view(gviz3)
def view_model(dfg, dir=""): #rounding for better plot for key in dfg.keys(): dfg[key] = round(dfg[key]) parameters = {"format": "svg"} gviz = dfg_vis_factory.apply(dfg, variant="frequency", parameters=parameters) # gviz = dfg_vis_factory.apply(dfg, variant="performance", parameters=parameters)# for time dfg_vis_factory.save(gviz, dir + ".svg") drawing = svg2rlg(dir + ".svg") renderPDF.drawToFile(drawing, dir + ".pdf") return # log = xes_import_factory.apply(r"C:\Gamal Elkoumy\PhD\OneDrive - Tartu Ãœlikool\Data\Data XES\Sepsis Cases - Event Log.xes") # data=get_dataframe_from_event_stream(log) # dfg_freq = dfg_factory.apply(log,variant="frequency") # # view_model(dfg_freq,r"C:\Gamal Elkoumy\PhD\OneDrive - Tartu Ãœlikool\Differential Privacy\source code\experiment_figures\temp")
def apply(dataframe, parameters=None): """ Gets the performance DFG Parameters ------------ dataframe Dataframe parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY timestamp_key = parameters[ pm4_constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY case_id_glue = parameters[ pm4_constants. PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True dataframe = attributes_filter.filter_df_keeping_spno_activities( dataframe, activity_key=activity_key, max_no_activities=constants.MAX_NO_ACTIVITIES) dataframe, end_activities = auto_filter.apply_auto_filter( dataframe, parameters=parameters) end_activities = list(end_activities.keys()) [dfg, dfg_perf ] = df_statistics.get_dfg_graph(dataframe, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_glue, sort_caseid_required=False, sort_timestamp_along_case_id=False, measure="both") activities_count = attributes_filter.get_attribute_values( dataframe, activity_key, parameters=parameters) activities = list(activities_count.keys()) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) dfg_perf = {x: y for x, y in dfg_perf.items() if x in dfg} start_activities = list( start_activities_filter.get_start_activities( dataframe, parameters=parameters).keys()) gviz = dfg_vis_factory.apply(dfg_perf, activities_count=activities_count, variant="performance", parameters={ "format": "svg", "start_activities": start_activities, "end_activities": end_activities }) gviz_base64 = base64.b64encode(str(gviz).encode('utf-8')) ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities, end_activities) net, im, fm = dfg_conv_factory.apply(dfg, parameters={ "start_activities": start_activities, "end_activities": end_activities }) return get_base64_from_gviz(gviz), export_petri_as_string( net, im, fm ), ".pnml", "parquet", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "perf", None, "", activity_key
time3 = time.time() print("time3 - time2: "+str(time3-time2)) #dataframe = df_filtering.filter_df_on_ncases(dataframe, case_id_glue="case:concept:name", max_no_cases=1000) time4 = time.time() print("time4 - time3: "+str(time4-time3)) #dataframe = df_filtering.filter_df_on_case_length(dataframe, case_id_glue="case:concept:name", min_trace_length=3, max_trace_length=50) print(dataframe) time5 = time.time() print("time5 - time4: "+str(time5-time4)) dataframe = csv_importer.convert_timestamp_columns_in_df(dataframe) time6 = time.time() print("time6 - time5: "+str(time6-time5)) #dataframe = dataframe.sort_values('time:timestamp') time7 = time.time() print("time7 - time6: "+str(time7-time6)) # show the filtered dataframe on the screen activities_count = df_statistics.get_activities_count(dataframe) [dfg_frequency, dfg_performance] = df_statistics.get_dfg_graph(dataframe, measure="both") #activities_count = df_statistics.get_activities_count(dataframe, activity_key="event") #[dfg_frequency, dfg_performance] = df_statistics.get_dfg_graph(dataframe, measure="both", case_id_glue="case", activity_key="event", timestamp_key="startTime") time8 = time.time() print("time8 - time7: "+str(time8-time7)) gviz = dfg_vis_factory.apply(dfg_frequency, activities_count=activities_count) gviz.view() net, initial_marking, final_marking = inductive_factory.apply_dfg(dfg_frequency) gviz = pn_viz.graphviz_visualization(net, initial_marking=initial_marking, final_marking=final_marking, debug=True) gviz.view() time9 = time.time() print("time9 - time8: "+str(time9-time8)) print("time9 - time1: "+str(time9-time1))
##################################################################################################################### # Datentransformation ##################################################################################################################### # datetime64[Y]=year, datetime64[M]=month, datetime64[D]=day, datetime64[h]=hour, datetime64[m]=minute, datetime64[s]=second dataframe['Time'] = dataframe['Time'].astype('datetime64[D]') # Attribute benennen dataframe = dataframe.rename(columns={'CaseID':'case:concept:name', 'Time':'time:timestamp', 'Activity':'concept:name', 'Resource':'org:resource'}) #print(dataframe) # Dataframe als log log = conversion_factory.apply(dataframe) #print(log) ##################################################################################################################### # Directly-Follows Graphs (DFG) erstellen ##################################################################################################################### # AKCTIVITIES: ACTIVITY_KEY = "concept:name" # RESOURCE: ACTIVITY_KEY = "org:resource" parameters = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"} # frequency = Haeufigkeiten der Activity / Resource # performance = durchschnittliche Dauer der Activity / Resource variant='frequency' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) ##################################################################################################################### # Datenvisualisierung: Directly-Follows Graphs (DFG) mit GRAPHVIZ visualisieren ##################################################################################################################### gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz)
def visualize_dfg(self, log, save_file=False, file_name="dfg", variant="relevance"): """ Visualises the event log as direct follower graph (DFG). :param log: event log as a list of traces [list]. :param save_file: boolean flog indicating to save the DFG or not [bool]. :param file_name: name of the file [str]. :param variant: dfg version to be produced: "frequency", "time", "relevance" or "all" [str] :return: file_names [list]. """ parameters = {"format": "svg"} file_names = [] relevance_scores = self.aggregate_relevance_scores(log) if variant == "relevance" or variant == "all": for label, items in relevance_scores.items(): data = filter_log_by_caseid(log, items['traces']) dfg = dfg_factory.apply(data) gviz = dfg_vis_factory.apply(dfg, activities_count=items['scores'], parameters=parameters) if len(items['traces']) == 1: title = "Prediction: " + str( label) + ", Case ID: " + items['traces'][0] else: title = "No of Service Orders: " + str(len( log)) + ", Filter: Repair not on time (Label = " + str( label) + ")" gviz.body.append('\t// title') gviz.body.append('\tfontsize = 50;') gviz.body.append('\tlabelloc = "t";') gviz.body.append('\tlabel = "' + title + '";') print("rel_sc: ", items['scores']) if save_file: filen = file_name + "_rel_" + str(label) + ".svg" dfg_vis_factory.save(gviz, filen) print("Saved DFG image to: " + filen) file_names.append(filen) if variant == "frequency" or variant == "all": for label, items in relevance_scores.items(): data = filter_log_by_caseid(log, items['traces']) dfg = dfg_factory.apply(data) activities_cnt = attributes_filter.get_attribute_values( log, attribute_key="concept:name") gviz = dfg_vis_factory.apply(dfg, activities_count=activities_cnt, parameters=parameters) if len(items['traces']) == 1: title = "Prediction: " + str( label) + ", Case ID: " + items['traces'][0] else: title = "No of Service Orders: " + str(len( log)) + ", Filter: Repair not on time (Label = " + str( label) + ")" gviz.body.append('\t// title') gviz.body.append('\tfontsize = 50;') gviz.body.append('\tlabelloc = "t";') gviz.body.append('\tlabel = "' + title + '";') if save_file: filen = file_name + "_freq_" + str(label) + ".svg" dfg_vis_factory.save(gviz, filen) print("Saved DFG image to: " + filen) file_names.append(filen) if variant == "time" or variant == "all": for label, items in relevance_scores.items(): data = filter_log_by_caseid(log, items['traces']) dfg = dfg_factory.apply(data) parameters = {"format": "svg", "AGGREGATION_MEASURE": "mean"} gviz = dfg_vis_factory.apply(dfg, variant="performance", parameters=parameters) if len(items['traces']) == 1: title = "Prediction: " + str( label) + ", Case ID: " + items['traces'][0] else: title = "No of Service Orders: " + str(len( log)) + ", Filter: Repair not on time (Label = " + str( label) + ")" gviz.body.append('\t// title') gviz.body.append('\tfontsize = 50;') gviz.body.append('\tlabelloc = "t";') gviz.body.append('\tlabel = "' + title + '";') if save_file: filen = file_name + "_time_" + str(label) + ".svg" dfg_vis_factory.save(gviz, filen) print("Saved DFG image to: " + filen) file_names.append(filen) return file_names
2 load-scroll 0.034766 5 load-selection 0.033676 20 blur-scroll 0.033074 26 selection-scroll 0.032246 31 click-0-load 0.029780 ''' #----------------- from pm4py.objects.log.importer.csv import factory as csv_importer excellentLog1A = csv_importer.import_event_stream('Excellent1A_fixed.csv') from pm4py.objects.conversion.log import factory as conversion_factory log1 = conversion_factory.apply(excellentLog1A) from pm4py.visualization.dfg import factory as dfg_vis_factory gviz = dfg_vis_factory.apply(dfg1, log=log1, variant="frequency") dfg_vis_factory.view(gviz) from pm4py.objects.conversion.dfg import factory as dfg_mining_factory net, im, fm = dfg_mining_factory.apply(dfg1) from pm4py.visualization.petrinet import factory as pn_vis_factory gviz = pn_vis_factory.apply(net, im, fm) pn_vis_factory.view(gviz) from pm4py.evaluation.replay_fitness import factory as replay_factory fitness_alpha = replay_factory.apply(log1, net, im, fm) from pm4py.algo.conformance.alignments import factory as align_factory
import os, sys, inspect currentdir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(currentdir) sys.path.insert(0, parentdir) from pm4py.algo.dfg import factory as dfg_factory, replacement as dfg_replacement from pm4py.log.importer import xes as xes_importer from pm4py.filtering.tracelog.auto_filter import auto_filter from pm4py.filtering.tracelog.attributes import attributes_filter from pm4py.visualization.dfg import factory as dfg_vis_factory # measure could be frequency or performance measure = "frequency" log = xes_importer.import_from_file_xes( '..\\tests\\inputData\\running-example.xes') filtered_log = auto_filter.apply_auto_filter(log) filtered_log_activities_count = attributes_filter.get_activities_from_log( filtered_log) intermediate_log = attributes_filter.filter_log_by_specified_attributes( log, list(filtered_log_activities_count.keys())) dfg_filtered_log = dfg_factory.apply(filtered_log, variant=measure) dfg_intermediate_log = dfg_factory.apply(intermediate_log, variant=measure) dfg_filtered_log = dfg_replacement.replace_values(dfg_filtered_log, dfg_intermediate_log) gviz = dfg_vis_factory.apply(dfg_filtered_log, log=intermediate_log) gviz.view() #base64 = dfg_visualize.return_diagram_as_base64(activities_count, dfg_filtered_log, measure=measure) #print(base64)
def makeDFG_connector(ConnectorBasicStructure, frequency_threshold, dfg_path, **keyword_param): unique_activities = ConnectorBasicStructure['activity'].unique() unique_next_activities = ConnectorBasicStructure[ 'prev_activity'].unique() activitySet = set(unique_activities) | set(unique_next_activities) activityList = list(activitySet) activityList.sort() activityList.remove(':Start:') #edges groupedbyactivityPairs = ConnectorBasicStructure.groupby( ['prev_activity', 'activity']).size().reset_index(name='counts') #just to return as matrix ActActMatrix = np.zeros([len(activityList), len(activityList)]) for prev_activity, activity in zip( ConnectorBasicStructure['prev_activity'], ConnectorBasicStructure['activity']): if (prev_activity == ":Start:"): continue ActActMatrix[activityList.index(prev_activity)][activityList.index( activity)] += 1 edges_dict = {} sumFrequency = groupedbyactivityPairs['counts'].sum( ) - groupedbyactivityPairs.loc[ groupedbyactivityPairs['prev_activity'] == ":Start:", 'counts'][0] #edges_list = [] for index, row in groupedbyactivityPairs.iterrows(): if (row['prev_activity'] == ":Start:"): continue #edge_dict = {} edge_list = [] if (keyword_param['encryption']): edge_list.append( Utilities.AES_ECB_Encrypt( row['prev_activity'].encode('utf-8')[0:5], keyword_param['key'])) edge_list.append( Utilities.AES_ECB_Encrypt( row['activity'].encode('utf-8')[0:5], keyword_param['key'])) else: edge_list.append(row['prev_activity']) edge_list.append(row['activity']) edge_tuple = tuple(edge_list) if (row['counts'] / sumFrequency >= frequency_threshold): edges_dict[edge_tuple] = row['counts'] #edges_list.append(edge_dict) #edges_dict.append(edge_dict) #nodes activity_frequencyDF = ConnectorBasicStructure.groupby( ['activity']).size().reset_index(name='counts') prev_activity_frequencyDF = ConnectorBasicStructure.groupby( ['prev_activity']).size().reset_index(name='counts') prev_activity_frequencyDF = prev_activity_frequencyDF.rename( columns={'prev_activity': 'activity'}) final_activity_fequency = pd.concat([ activity_frequencyDF, prev_activity_frequencyDF ]).drop_duplicates(subset='activity', keep="first").reset_index(drop=True) nodes = final_activity_fequency.set_index('activity').T.to_dict( 'records') nodes[0].pop(':Start:') #Making encrypted nodes nodes_new = {} for key, value in nodes[0].items(): nodes_new[Utilities.AES_ECB_Encrypt( key.encode('utf-8'), keyword_param['key'])[0:5]] = value if (keyword_param['encryption']): gviz = dfg_vis_factory.apply(edges_dict, activities_count=nodes_new, parameters={"format": "svg"}) else: gviz = dfg_vis_factory.apply(edges_dict, activities_count=nodes[0], parameters={"format": "svg"}) if (keyword_param['visualization']): dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, dfg_path) return ActActMatrix, activityList
def create_graphs(without_error, log, approach): """ creates visualization: Directly-Follows-Graph and Heuristic Net """ # create dfg frequency path = "common_path" vis_type = "dfg_frequency" naming_error = "with_error" if without_error: naming_error = "no_error" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name", "format": "svg" } variant = 'frequency' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, filename) log_info.info("DFG frequency has been stored in '%s' in file '%s'", path, file) # create dfg performance vis_type = "dfg_performance" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" variant = 'performance' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, filename) log_info.info("DFG performance has been stored in '%s' in file '%s'", path, file) # create heuristic net vis_type = "heuristicnet" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.60 }) gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz) hn_vis.save(gviz, filename) log_info.info("Heuristic Net has been stored in '%s' in file '%s'", path, file) # save heuristic net in plain-ext format file = f"{vis_type}_{approach}_{naming_error}.plain-ext" filename = f"{path}/{vis_type}_{approach}_{naming_error}.plain-ext" gviz = hn_vis.apply(heu_net, parameters={ hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "plain-ext" }) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .plain-ext has been stored in '%s' " "in file '%s'", path, file) # save heuristic net in dot format file = f"{vis_type}_{approach}_{naming_error}.dot" filename = f"{path}/{vis_type}_{approach}_{naming_error}.dot" gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "dot"}) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .dot has been stored in '%s' " "in file '%s'", path, file) # save heuristic net in xdot format file = f"{vis_type}_{approach}_{naming_error}.xdot" filename = f"{path}/{vis_type}_{approach}_{naming_error}.xdot" gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "xdot"}) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .xdot has been stored in '%s' " "in file '%s'", path, file)
def _discover_draw_process_model( case_type, hl_activity_types, time_type): from .utilities import _trim_activity_label_tail # TODO: handle process model discovery for CSV inputs el = session['event_log'] fn_server = '{}.log.{}'.format( session.sid[:32], session['last_upload_event_log_filetype'] ) from pm4py.objects.log.importer.xes import importer as xes_importer pm4py_log = xes_importer.apply(join( app.config['TEMP'], fn_server )) exec_mode_miner = session['exec_mode_miner'] sel_cases = exec_mode_miner.get_values_by_type(case_type) \ if case_type != '' else set(el['case_id']) ''' # NOTE: CSV only - trim the additional markings appended by Disco hl_activity_types = [_trim_activity_label_tail(x, r'-complete') for x in hl_activity_types] ''' # filter event log from pm4py.objects.log.log import EventLog, Trace pm4py_log_filtered = EventLog() # filter event log: keep selected cases only for trace in pm4py_log: if trace.attributes['concept:name'] in sel_cases: pm4py_log_filtered.append(trace) from pm4py.algo.discovery.dfg import factory as dfg_miner dfg = dfg_miner.apply(pm4py_log_filtered) from pm4py.visualization.dfg import factory as dfg_vis_factory gviz = dfg_vis_factory.apply(dfg, log=pm4py_log_filtered, variant='frequency', parameters={'maxNoOfEdgesInDiagram': 30} ) gv_source = gviz.source # find activity labels that should be highlighted hl_activities = [] for at in hl_activity_types: hl_activities.extend(exec_mode_miner.get_values_by_type(at)) # TODO: delegate to front-end: edit and annotate the graph import pygraphviz as pgv graph = pgv.AGraph(gviz.source) for node in graph.nodes(): if node.attr['shape'] == 'box' and node.attr['label'] != '': # trim the count in the labels from DFG node.attr['label'] = _trim_activity_label_tail( node.attr['label'], r' \(\d+\)') node.attr['fontname'] = 'Helvetica' # TODO: NOT an elegant solution for highlighting purpose - need rev. if exec_mode_miner._atypes[node.attr['label']] \ in hl_activity_types: # highlight node.attr['style'] = 'bold' node.attr['fontcolor'] = 'red3' else: node.attr['style'] = 'filled' node.attr['fillcolor'] = 'gainsboro' gv_source = graph.string() return gv_source, hl_activities
def directly_follows_graphs_perf(log_file): dfg = dfg_factory.apply(log_file) gviz = dfg_vis_factory.apply(dfg, log=log_file, variant="performance") pn_vis_factory.save(gviz, "static/dag_performance.png") return "success!"
# 1. Import libraries import os import csv from pm4py.objects.log.importer.xes import factory as xes_importer from pm4py.objects.conversion.dfg import factory as dfg_mining_factory from pm4py.algo.discovery.dfg import factory as dfg_factory from pm4py.visualization.dfg import factory as dfg_vis_factory from pm4py.visualization.petrinet import factory as pn_vis_factory # 2. preprocessing with open('dfr1.txt') as file: file_reader = csv.reader(file, delimiter='\t') dfg = dict() for row in file_reader: _from, _to = row[0].split(',') rel = (_from, _to) freq = int(row[1]) dfg[rel] = freq # 3. Visualize Directly-follows-graph (DFG) gviz = dfg_vis_factory.apply(dfg) dfg_vis_factory.view(gviz) # 4. Discover and Visualize Workflow-Net net, im, fm = dfg_mining_factory.apply(dfg) gviz = pn_vis_factory.apply(net, im, fm) pn_vis_factory.view(gviz)
def show(model, tel, file_name, parameters): ''' Show model and its quality measures :param model: model type (transition system, state based region, DFG miner, alpha miner) :param tel: input log :param file_name: img file name to show model :param parameters: parmater for transition system (afreq, sfreq) :return: ''' tel_flag = False if isinstance(tel[0][0], tel_event): tel_flag = True if model in ['ts', 'sbr']: if tel_flag: output_file_path = os.path.join( "static", "images", file_name[:file_name.find('.')] + '_' + model + '_' + str(parameters['afreq_thresh']) + '_' + str(parameters['sfreq_thresh']) + ".png") else: output_file_path = os.path.join( "static", "images", "2" + "_" + file_name[:file_name.find('.')] + '_' + model + '_' + str(parameters[PARAM_KEY_DIRECTION]) + '_' + str(parameters[PARAM_KEY_WINDOW]) + "_" + str(parameters[PARAM_KEY_VIEW]) + ".png") auto = utils.discover_annotated_automaton(tel, parameters=parameters) max_thresh = {} max_afreq = 0 max_sfreq = 0 if tel_flag: for trans in auto.transitions: max_afreq = max(max_afreq, trans.afreq) for state in auto.states: max_sfreq = max(max_sfreq, state.sfreq) max_thresh['afreq'] = max_afreq max_thresh['sfreq'] = max_sfreq if model == 'ts': result = {} gviz = vis_factory.apply(auto) vis_factory.save(gviz, output_file_path) result['num of transitions'] = len(auto.transitions) result['num of states'] = len(auto.states) else: net, im, fm = sb.petri_net_synthesis(auto) gviz = petri_vis_factory.apply(net, im, fm) petri_vis_factory.save(gviz, output_file_path) result = evaluation(net, im, fm, tel) else: if tel_flag: output_file_path = os.path.join( "static", "images", file_name[:file_name.find('.')] + '_' + model + '_' + ".png") else: output_file_path = os.path.join( "static", "images", "2" + file_name[:file_name.find('.')] + '_' + model + '_' + ".png") if model == 'alpha': if isinstance(tel[0][0], Event): net, im, fm = trans_alpha(tel) else: net, im, fm = alpha_miner.apply(tel) gviz = petri_vis_factory.apply(net, im, fm) petri_vis_factory.save(gviz, output_file_path) result = evaluation(net, im, fm, tel) else: dfg = dfg_factory.apply(tel) if tel_flag: dfg_tel = inductive_revise.get_dfg_graph_trans(tel) #dfg = dfg_tel + dfg dfg = dfg_tel gviz = dfg_vis_factory.apply(dfg, log=tel) dfg_vis_factory.save(gviz, output_file_path) result = dict( sorted(dfg.items(), key=operator.itemgetter(1), reverse=True)) max_thresh = None return output_file_path, result, max_thresh
from pm4py.visualization.dfg import factory as dfg_vis_factory from pm4pyspark.importer.csv import spark_df_imp as importer from pm4pyspark.algo.discovery.dfg import factory as dfg_factory parameters = {"format": "svg"} event_stream_ex = importer.import_event_stream(os.path.join( INPUT_DATA_DIR, "running-example.csv"), parameters={"header": True}) log_ex = importer.transform_event_stream_to_event_log(event_stream_ex) df_ex = importer.import_sparkdf_from_path(os.path.join(INPUT_DATA_DIR, "running-example.csv"), header=True, inferSchema=True) dfg_freq = dfg_factory.apply(df_ex) gviz_freq = dfg_vis_factory.apply(dfg_freq, log=log_ex, parameters=parameters, variant="frequency") dfg_vis_factory.save(gviz_freq, os.path.join(OUTPUT_DATA_DIR, "running-example_freq.svg")) dfg_perf = dfg_factory.apply(df_ex, variant="performance") gviz_perf = dfg_vis_factory.apply(dfg_perf, log=log_ex, parameters=parameters, variant="performance") dfg_vis_factory.save(gviz_perf, os.path.join(OUTPUT_DATA_DIR, "running-example_perf.svg"))
def directly_follows_graphs_freq(log_file): dfg = dfg_factory.apply(log_file) gviz = dfg_vis_factory.apply(dfg, log=log_file, variant="frequency") pn_vis_factory.save(gviz, "static/dag_frequency.png") return "success!"
# from pm4py.algo.discovery.heuristics import factory as heuristics_miner # heu_net = heuristics_miner.apply_heu(log, parameters={"dependency_thresh": 0.99}) ######################################## #### mean of number of days between each process ####### from pm4py.algo.discovery.dfg import factory as dfg_factory dfg = dfg_factory.apply(conv_log, variant="performance") parameters = {"format": "png"} from pm4py.visualization.dfg import factory as dfg_vis_factory gviz = dfg_vis_factory.apply(dfg, log=conv_log, variant="performance", parameters=parameters) dfg_vis_factory.save(gviz, "performance_invoice.png") dfg_vis_factory.view(gviz) ################################## #### mean of frequency between each process ####### from pm4py.algo.discovery.dfg import factory as dfg_factory from pm4py.visualization.dfg import factory as dfg_vis_factory dfg = dfg_factory.apply(conv_log) parameters = {"format": "png"} gviz1 = dfg_vis_factory.apply(dfg,
def apply(log, parameters=None): """ Gets the frequency DFG Parameters ------------ log Log parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY log = attributes_filter.filter_log_on_max_no_activities( log, max_no_activities=constants.MAX_NO_ACTIVITIES, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters) activities_count = attributes_filter.get_attribute_values( filtered_log, activity_key) activities = list(activities_count.keys()) start_activities = list( start_activities_filter.get_start_activities( filtered_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities(filtered_log, parameters=parameters).keys()) dfg = dfg_factory.apply(filtered_log, parameters=parameters) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) parameters["format"] = "svg" parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities gviz = dfg_vis_factory.apply(dfg, log=filtered_log, variant="frequency", parameters=parameters) gviz_base64 = base64.b64encode(str(gviz).encode('utf-8')) ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities, end_activities) net, im, fm = dfg_conv_factory.apply(dfg, parameters={ "start_activities": start_activities, "end_activities": end_activities }) return get_base64_from_gviz(gviz), export_petri_as_string( net, im, fm ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "freq", None, "", activity_key
#----------------------------- #libraries import os from pm4py.objects.log.importer.xes import factory as xes_importer log = xes_importer.import_log(os.path.join("pmdata/", "running-example.xes")) log from pm4py.algo.discovery.alpha import factory as alpha_miner net, initial_marking, final_marking = alpha_miner.apply(log) net #direct flow import os from pm4py.objects.log.importer.xes import factory as xes_importer log = xes_importer.import_log(os.path.join("pmdata/running-example.xes")) from pm4py.algo.discovery.dfg import factory as dfg_factory dfg = dfg_factory.apply(log) from pm4py.visualization.dfg import factory as dfg_vis_factory gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency") dfg_vis_factory.view(gviz) #decoration from pm4py.algo.discovery.dfg import factory as dfg_factory from pm4py.visualization.dfg import factory as dfg_vis_factory dfg = dfg_factory.apply(log, variant="performance") gviz = dfg_vis_factory.apply(dfg, log=log, variant="performance") dfg_vis_factory.view(gviz)