def execute_script(): log_path = os.path.join("..", "tests", "input_data", "running-example.xes") log = xes_importer.apply(log_path) dfg = dfg_factory.apply(log) dfg_gv = dfg_vis_fact.apply(dfg, log, parameters={"format": "svg"}) dfg_vis_fact.view(dfg_gv) net, im, fm = dfg_conv_factory.apply(dfg) gviz = pn_vis_factory.apply(net, im, fm, parameters={"format": "svg"}) pn_vis_factory.view(gviz)
def generate_process_model(log): ''' Description: to generate graphical process model in .svg format using pm4py library function Used: generate process model under provided log Input: log file Output: Display process model ''' dfg = dfg_factory.apply(log) '''To decorate DFG with the frequency of activities''' gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency") dfg_vis_factory.view(gviz) return dfg
def execute_script(): # import csv & create log dataframe = csv_import_adapter.import_dataframe_from_path( datasourceMockdata(), sep=";") dataframe = dataframe.rename(columns={ 'coID': 'case:concept:name', 'Activity': 'concept:name' }) log = conversion_factory.apply(dataframe) # option 1: Directly-Follows Graph, represent frequency or performance parameters = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"} variant = 'frequency' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz1 = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz1) # option 2: Heuristics Miner, acts on the Directly-Follows Graph, find common structures, output: Heuristic Net (.svg) heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.00 }) gviz2 = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz2) # option 3: Petri Net based on Heuristic Miner (.png) net, im, fm = heuristics_miner.apply( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.00 }) gviz3 = petri_vis.apply( net, im, fm, parameters={ petri_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "png" }) petri_vis.view(gviz3)
5 load-selection 0.033676 20 blur-scroll 0.033074 26 selection-scroll 0.032246 31 click-0-load 0.029780 ''' #----------------- from pm4py.objects.log.importer.csv import factory as csv_importer excellentLog1A = csv_importer.import_event_stream('Excellent1A_fixed.csv') from pm4py.objects.conversion.log import factory as conversion_factory log1 = conversion_factory.apply(excellentLog1A) from pm4py.visualization.dfg import factory as dfg_vis_factory gviz = dfg_vis_factory.apply(dfg1, log=log1, variant="frequency") dfg_vis_factory.view(gviz) from pm4py.objects.conversion.dfg import factory as dfg_mining_factory net, im, fm = dfg_mining_factory.apply(dfg1) from pm4py.visualization.petrinet import factory as pn_vis_factory gviz = pn_vis_factory.apply(net, im, fm) pn_vis_factory.view(gviz) from pm4py.evaluation.replay_fitness import factory as replay_factory fitness_alpha = replay_factory.apply(log1, net, im, fm) from pm4py.algo.conformance.alignments import factory as align_factory
def create_graphs(without_error, log, approach): """ creates visualization: Directly-Follows-Graph and Heuristic Net """ # create dfg frequency path = "common_path" vis_type = "dfg_frequency" naming_error = "with_error" if without_error: naming_error = "no_error" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name", "format": "svg" } variant = 'frequency' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, filename) log_info.info("DFG frequency has been stored in '%s' in file '%s'", path, file) # create dfg performance vis_type = "dfg_performance" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" variant = 'performance' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, filename) log_info.info("DFG performance has been stored in '%s' in file '%s'", path, file) # create heuristic net vis_type = "heuristicnet" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.60 }) gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz) hn_vis.save(gviz, filename) log_info.info("Heuristic Net has been stored in '%s' in file '%s'", path, file) # save heuristic net in plain-ext format file = f"{vis_type}_{approach}_{naming_error}.plain-ext" filename = f"{path}/{vis_type}_{approach}_{naming_error}.plain-ext" gviz = hn_vis.apply(heu_net, parameters={ hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "plain-ext" }) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .plain-ext has been stored in '%s' " "in file '%s'", path, file) # save heuristic net in dot format file = f"{vis_type}_{approach}_{naming_error}.dot" filename = f"{path}/{vis_type}_{approach}_{naming_error}.dot" gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "dot"}) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .dot has been stored in '%s' " "in file '%s'", path, file) # save heuristic net in xdot format file = f"{vis_type}_{approach}_{naming_error}.xdot" filename = f"{path}/{vis_type}_{approach}_{naming_error}.xdot" gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "xdot"}) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .xdot has been stored in '%s' " "in file '%s'", path, file)
def makeDFG_connector(ConnectorBasicStructure, frequency_threshold, dfg_path, **keyword_param): unique_activities = ConnectorBasicStructure['activity'].unique() unique_next_activities = ConnectorBasicStructure[ 'prev_activity'].unique() activitySet = set(unique_activities) | set(unique_next_activities) activityList = list(activitySet) activityList.sort() activityList.remove(':Start:') #edges groupedbyactivityPairs = ConnectorBasicStructure.groupby( ['prev_activity', 'activity']).size().reset_index(name='counts') #just to return as matrix ActActMatrix = np.zeros([len(activityList), len(activityList)]) for prev_activity, activity in zip( ConnectorBasicStructure['prev_activity'], ConnectorBasicStructure['activity']): if (prev_activity == ":Start:"): continue ActActMatrix[activityList.index(prev_activity)][activityList.index( activity)] += 1 edges_dict = {} sumFrequency = groupedbyactivityPairs['counts'].sum( ) - groupedbyactivityPairs.loc[ groupedbyactivityPairs['prev_activity'] == ":Start:", 'counts'][0] #edges_list = [] for index, row in groupedbyactivityPairs.iterrows(): if (row['prev_activity'] == ":Start:"): continue #edge_dict = {} edge_list = [] if (keyword_param['encryption']): edge_list.append( Utilities.AES_ECB_Encrypt( row['prev_activity'].encode('utf-8')[0:5], keyword_param['key'])) edge_list.append( Utilities.AES_ECB_Encrypt( row['activity'].encode('utf-8')[0:5], keyword_param['key'])) else: edge_list.append(row['prev_activity']) edge_list.append(row['activity']) edge_tuple = tuple(edge_list) if (row['counts'] / sumFrequency >= frequency_threshold): edges_dict[edge_tuple] = row['counts'] #edges_list.append(edge_dict) #edges_dict.append(edge_dict) #nodes activity_frequencyDF = ConnectorBasicStructure.groupby( ['activity']).size().reset_index(name='counts') prev_activity_frequencyDF = ConnectorBasicStructure.groupby( ['prev_activity']).size().reset_index(name='counts') prev_activity_frequencyDF = prev_activity_frequencyDF.rename( columns={'prev_activity': 'activity'}) final_activity_fequency = pd.concat([ activity_frequencyDF, prev_activity_frequencyDF ]).drop_duplicates(subset='activity', keep="first").reset_index(drop=True) nodes = final_activity_fequency.set_index('activity').T.to_dict( 'records') nodes[0].pop(':Start:') #Making encrypted nodes nodes_new = {} for key, value in nodes[0].items(): nodes_new[Utilities.AES_ECB_Encrypt( key.encode('utf-8'), keyword_param['key'])[0:5]] = value if (keyword_param['encryption']): gviz = dfg_vis_factory.apply(edges_dict, activities_count=nodes_new, parameters={"format": "svg"}) else: gviz = dfg_vis_factory.apply(edges_dict, activities_count=nodes[0], parameters={"format": "svg"}) if (keyword_param['visualization']): dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, dfg_path) return ActActMatrix, activityList
def execute_script(): time1 = time.time() dataframe = csv_import_adapter.import_dataframe_from_path_wo_timeconversion( inputLog, sep=SEP, quotechar=QUOTECHAR) time2 = time.time() print("time2 - time1: " + str(time2 - time1)) parameters_filtering = { constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE, constants.PARAMETER_CONSTANT_ACTIVITY_KEY: ACTIVITY_KEY } if enable_auto_filter: dataframe = auto_filter.apply_auto_filter( dataframe, parameters=parameters_filtering) else: dataframe = attributes_filter.apply_auto_filter( dataframe, parameters=parameters_filtering) time3 = time.time() print("time3 - time2: " + str(time3 - time2)) if enable_filtering_on_cases: dataframe = case_filter.filter_on_ncases(dataframe, case_id_glue=CASEID_GLUE, max_no_cases=max_no_cases) time4 = time.time() dataframe = csv_import_adapter.convert_caseid_column_to_str( dataframe, case_id_glue=CASEID_GLUE) dataframe = csv_import_adapter.convert_timestamp_columns_in_df( dataframe, timest_columns=TIMEST_COLUMNS, timest_format=TIMEST_FORMAT) time6 = time.time() print("time6 - time4: " + str(time6 - time4)) # dataframe = dataframe.sort_values('time:timestamp') time7 = time.time() print("time7 - time6: " + str(time7 - time6)) # show the filtered dataframe on the screen activities_count = attributes_filter.get_attribute_values( dataframe, attribute_key=ACTIVITY_KEY) [dfg_frequency, dfg_performance ] = df_statistics.get_dfg_graph(dataframe, measure="both", perf_aggregation_key="median", case_id_glue=CASEID_GLUE, activity_key=ACTIVITY_KEY, timestamp_key=TIMEST_KEY) if enable_filtering_df: print("len dfg_frequency 0=", len(dfg_frequency)) dfg_frequency = dfg_filtering.apply( dfg_frequency, {"noiseThreshold": filtering_df_noise}) print("len dfg_frequency 1=", len(dfg_frequency)) time8 = time.time() print("time8 - time7: " + str(time8 - time7)) gviz = dfg_vis_factory.apply(dfg_frequency, activities_count=activities_count, parameters={"format": "svg"}) dfg_vis_factory.view(gviz) net, initial_marking, final_marking = inductive_factory.apply_dfg( dfg_frequency) # net, initial_marking, final_marking = alpha_factory.apply_dfg(dfg_frequency) spaths = get_shortest_paths(net) time9 = time.time() print("time9 - time8: " + str(time9 - time8)) aggregated_statistics = get_decorations_from_dfg_spaths_acticount( net, dfg_performance, spaths, activities_count, variant="performance") gviz = pn_vis_factory.apply(net, initial_marking, final_marking, variant="performance", aggregated_statistics=aggregated_statistics, parameters={"format": "svg"}) time10 = time.time() print("time10 - time9: " + str(time10 - time9)) print("time10 - time1: " + str(time10 - time1)) pn_vis_factory.view(gviz)