def save_vis_performance_dfg(dfg: dict, start_activities: dict, end_activities: dict, file_path: str, aggregation_measure="mean"): """ Saves the visualization of a performance DFG Parameters ---------------- dfg DFG object start_activities Start activities end_activities End activities file_path Destination path aggregation_measure Aggregation measure (default: mean): mean, median, min, max, sum, stdev """ format = os.path.splitext(file_path)[1][1:] from pm4py.visualization.dfg import visualizer as dfg_visualizer from pm4py.visualization.dfg.variants import performance as dfg_perf_visualizer dfg_parameters = dfg_perf_visualizer.Parameters parameters = {} parameters[dfg_parameters.FORMAT] = format parameters[dfg_parameters.START_ACTIVITIES] = start_activities parameters[dfg_parameters.END_ACTIVITIES] = end_activities parameters[dfg_parameters.AGGREGATION_MEASURE] = aggregation_measure gviz = dfg_perf_visualizer.apply(dfg, parameters=parameters) dfg_visualizer.save(gviz, file_path)
def execute_script(): log_input_directory = "xesinput" all_logs_names = os.listdir(log_input_directory) all_logs_names = [log for log in all_logs_names if ".xe" in log] for logName in all_logs_names: # logPath = os.path.join("..", "tests", "inputData", logName) log_path = log_input_directory + "\\" + logName log = xes_importer.apply(log_path) print("\n\n") print("log loaded") print("Number of traces - ", len(log)) event_log = log_conversion.apply( log, variant=log_conversion.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers) exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName print("exporting log", exp_log_name) xes_exporter.apply(log, exp_log_name) print("exported log", exp_log_name) log, classifier_attr_key = insert_classifier.search_act_class_attr(log) classifiers = list(log.classifiers.keys()) if classifier_attr_key is None and classifiers: try: print(classifiers) log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute( log, classifiers[0]) print(classifier_attr_key) except: print("exception in handling classifier") if classifier_attr_key is None: classifier_attr_key = "concept:name" if len(event_log) > 0 and classifier_attr_key in event_log[0]: parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key } dfg = dfg_algorithm.apply(log, parameters=parameters) gviz = dfg_vis.apply(dfg, log=log, variant="frequency", parameters=parameters) # dfg_vis.view(gviz) dfg_vis.save(gviz, "xescert_images\\" + logName.replace("xes", "png")) print("Reimporting log file just exported - ", exp_log_name) log = xes_importer.apply(exp_log_name) print("log loaded", exp_log_name) print("Number of traces - ", len(log)) event_log = log_conversion.apply( log, variant=log_conversion.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers)
def save_vis_dfg(dfg, start_activities, end_activities, file_path, log=None): """ Saves a DFG visualization to a file Parameters -------------- dfg DFG object start_activities Start activities end_activities End activities file_path Destination path """ format = file_path[file_path.index(".") + 1:].lower() from pm4py.visualization.dfg import visualizer as dfg_visualizer parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters gviz = dfg_visualizer.apply(dfg, log=log, variant=dfg_visualizer.Variants.FREQUENCY, parameters={ parameters.FORMAT: format, parameters.START_ACTIVITIES: start_activities, parameters.END_ACTIVITIES: end_activities }) dfg_visualizer.save(gviz, file_path)
def save_directly_follows_graph(graph, path): """ Saves a directly-follows graph to the specified path. :param graph: the directly-follows graph :param path: the path """ log.info('saving directly follows graph %s to path %s', graph, path) dfg_vis.save(graph, path)
def test_45(self): import os from pm4py.objects.log.importer.xes import importer as xes_importer log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.dfg import algorithm as dfg_discovery from pm4py.visualization.dfg import visualizer as dfg_visualization dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE) parameters = {dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"} gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE, parameters=parameters) dfg_visualization.save(gviz, os.path.join("test_output_data", "dfg.svg")) os.remove(os.path.join("test_output_data", "dfg.svg"))
def save_full_dfg(log): dfg = dfg_discovery.apply(log) gviz = dfg_visualization.apply( dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.view(gviz) parameters = { dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg" } gviz = dfg_visualization.apply( dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY, parameters=parameters) dfg_visualization.save(gviz, "dfg_full.svg") print('Full DFG saves as "dfg_full.svg"') return gviz
def save_graph_file(type, gviz, path): if type == "alpha": pn_visualizer.save(gviz, path) elif type == "heuristic-heu-net": hn_visualizer.save(gviz, path) elif type == "heuristic-pet-net": pn_visualizer.save(gviz, path) elif type == "dfg-discovery-frequency": dfg_visualization.save(gviz, path) elif type == "dfg-discovery-active-time": dfg_visualization.save(gviz, path) elif type == "dfg-discovery-pet-net": pt_visualizer.save(gviz, path) elif type == "inductive-miner-tree": pt_visualizer.save(gviz, path) elif type == "inductive-miner-petri": pn_visualizer.save(gviz, path)
def create_directly_follows_graph(frame: DataFrame, output_format='svg'): """ Creates a Directly Follows Graph from the supplied DataFrame. :param frame: the DataFrame :param output_format: desired output format :return: object representing the created graph """ event_log = _convert_data_frame_to_event_log(frame) dfg = dfg_alg.apply(log=event_log, variant=DfgAlgVariants.FREQUENCY) apply = dfg_vis.apply( dfg, log=event_log, variant=DfgVisVariants.FREQUENCY, parameters={VisualisationParams.FORMAT: output_format}) saved_dfg = tempfile.NamedTemporaryFile(prefix='pm_', suffix=f'.{output_format}', delete=False) dfg_vis.save(apply, saved_dfg.name) # close here and delete after final use to work around access issues on # in case anybody tries to run this on windows saved_dfg.close() return saved_dfg
def save_vis_dfg(dfg: dict, start_activities: dict, end_activities: dict, file_path: str, log: Optional[EventLog] = None): """ Saves a DFG visualization to a file Parameters -------------- dfg DFG object start_activities Start activities end_activities End activities file_path Destination path """ if log is not None: if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception( "the method can be applied only to a traditional event log!") format = os.path.splitext(file_path)[1][1:] from pm4py.visualization.dfg import visualizer as dfg_visualizer dfg_parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters parameters = get_properties(log) parameters[dfg_parameters.FORMAT] = format parameters[dfg_parameters.START_ACTIVITIES] = start_activities parameters[dfg_parameters.END_ACTIVITIES] = end_activities gviz = dfg_visualizer.apply(dfg, log=log, variant=dfg_visualizer.Variants.FREQUENCY, parameters=parameters) dfg_visualizer.save(gviz, file_path)
def api_gerar_modelo_pm(): ramojustica = request.args.get('ramojustica') codtribunal = request.args.get('codtribunal') atuacao = request.args.get('atuacao') cluster = request.args.get('cluster') grau = request.args.get('grau') codorgaoj = request.args.get('codorgaoj') natureza = request.args.get('natureza') codclasse = request.args.get('codclasse') dtinicio = request.args.get('dtinicio') dtfim = request.args.get('dtfim') baixado = request.args.get('baixado') sensibilidade = request.args.get('sensibilidade') metrica = request.args.get('metrica') formato = request.args.get('formato') if ramojustica is None: abort(400, description="ramojustica nao informado") if atuacao is None: abort(400, description="atuacao nao informado") if codtribunal is None and cluster is None: abort(400, description="codtribunal ou cluster deve ser informado") gviz = gerar_view_dfg_model_from_params(ramojustica, codtribunal, atuacao, cluster, grau, codorgaoj, natureza, codclasse, \ dtinicio, dtfim, baixado=baixado, sensibility=sensibilidade, metric_type=metrica, image_format=formato) if gviz != None: file_remover = FileRemover() tempdir = tempfile.mkdtemp() path = tempdir + "/model_mp." + str(formato).lower() dfg_visualization.save(gviz, path) resp = send_file(path, as_attachment=False) file_remover.cleanup_once_done(resp, path) return resp else: print("sem dados") abort(404, description="Nao encontrado")
def filter_for_periods(detect_result, event_counts): start_element1 = 0 if CHOSEN_PERIOD1 == 1 else detect_result[CHOSEN_PERIOD1 - 2] end_element1 = detect_result[CHOSEN_PERIOD1 - 1] start_element2 = 0 if CHOSEN_PERIOD2 == 1 else detect_result[CHOSEN_PERIOD2 - 2] end_element2 = detect_result[CHOSEN_PERIOD2 - 1] days = list(event_counts.keys()) #print(days[start_element1]) start_day1 = days[start_element1] end_day1 = days[end_element1 - 1] days_count1 = end_element1 - start_element1 start_day2 = days[start_element2] end_day2 = days[end_element2 - 1] days_count2 = end_element2 - start_element2 # Traces that are FULLY CONTAINED in the given timeframe period_1_log = timestamp_filter.filter_traces_contained( log, start_day1 + " 00:00:00", end_day1 + " 23:59:59") period_2_log = timestamp_filter.filter_traces_contained( log, start_day2 + " 00:00:00", end_day2 + " 23:59:59") # Traces that INTERSECT with the given timeframe # period_1_log = timestamp_filter.filter_traces_intersecting(log, start_day+" 00:00:00", end_day+" 23:59:59") dfg1 = dfg_discovery.apply(period_1_log) dfg2 = dfg_discovery.apply(period_2_log) gviz1 = dfg_visualization.apply( dfg1, log=period_1_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.view(gviz1) # Saving the DFG parameters = { dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg" } gviz1 = dfg_visualization.apply( dfg1, log=period_1_log, variant=dfg_visualization.Variants.FREQUENCY, parameters=parameters) dfg_visualization.save(gviz1, "dfg1.svg") nodes_period1, edges_period1 = dot_to_df(gviz1) gviz2 = dfg_visualization.apply( dfg2, log=period_2_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.view(gviz2) # Saving the DFG parameters = { dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg" } gviz2 = dfg_visualization.apply( dfg2, log=period_2_log, variant=dfg_visualization.Variants.FREQUENCY, parameters=parameters) dfg_visualization.save(gviz2, "dfg2.svg") return days_count1, days_count2, period_1_log, period_2_log, gviz1, gviz2
def apply_filter(req): sessions[req.session["id"]] = datetime.now() filters = { "time": True, "variants": True, "performance": True, "activities": True, "attribute": True } req.session.set_expiry(7200) #print(str(req.body)) o = json.loads(req.body) print(str(o)) custom_time_range = [] for pair in o["filter1"]: #custom_time_range.append((dateutil.parser.parse(pair[0]),dateutil.parser.parse(pair[1]))) custom_time_range.append((pair[0],pair[1])) if o["filter1"] == []: filters["time"] = False #print(o["filter1"][0]) #print(custom_time_range[0][0]) #print(custom_time_range) custom_path_range = [] for pair in o["filter2"]: custom_path_range.append((float(pair[0]),float(pair[1]))) if o["filter2"] == []: filters["variants"] = False #custom_path_range = [(0,1)] #filter2 custom_performance_range = [] for pair in o["filter3"]: custom_performance_range.append((float(pair[0]),float(pair[1]))) if o["filter3"] == []: filters["performance"] = False custom_activitiy_range = [] for pair in o["filter4"]: custom_activitiy_range.append((float(pair[0]),float(pair[1]))) if o["filter4"] == []: filters["activities"] = False #custom_activitiy_range = [(0,1)] #filter3 custom_attribute_range = [] for pair in o["filter5"]: custom_attribute_range.append((float(pair[0]),float(pair[1]))) if o["filter5"] == [] or o["filter5attribute"] == "Empty": filters["attribute"] = False additional_attribute = o["filter5attribute"] selected_viz = o["visualization"] calc_lev = o["distance"] #input_file = os.path.join("webapp","static", req.session["id"] + "_l0.xes") input_file = os.path.join("webapp","static", "sepsis.xes") input_log = xes_importer.apply(input_file) not_filtered_logs = {} flatten = lambda l: [item for sublist in l for item in sublist] time_timestamp_started = datetime.now() if filters["time"]: #TODO check overlapping for filter custom_time_range = sorted(custom_time_range, reverse=False) for i in range(0,len(custom_time_range)-1): if(custom_time_range[i][1] > custom_time_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for time filter"})) response.status_code = 200 return response #raise ValueError("Overlapping time ranges") logs = [] for (x,y) in custom_time_range: logs.append(timestamp_filter.filter_traces_contained(input_log, x, y)) #log = timestamp_filter.filter_traces_contained(input_log, custom_time_range[0][0], custom_time_range[0][1]) log = pm4py.objects.log.log.EventLog() for timeslice in logs: for trace in timeslice: log.append(trace) print(len(input_log)) print(len(log)) #l2 not_filtered_logs["timestamp_filter"] = pm4py.objects.log.log.EventLog() for trace in input_log: if trace not in log: not_filtered_logs["timestamp_filter"].append(trace) print(len(not_filtered_logs["timestamp_filter"])) else: log = input_log time_variants_started = datetime.now() # where should I start? if filters["variants"]: variants = variants_filter.get_variants(log) variants_count = case_statistics.get_variant_statistics(log) variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False) custom_path_range = sorted(custom_path_range, reverse=False) # check overlapping for i in range(0,len(custom_path_range)-1): if(custom_path_range[i][1] > custom_path_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for variants filter"})) response.status_code = 200 return response #raise ValueError("Overlapping variants ranges") nr_variants = len(variants_count) custom_path_range * nr_variants idx = [(math.floor(x*nr_variants), math.ceil(y*nr_variants)) for (x,y) in custom_path_range] variants_subset = [variants_count[x:y+1] for (x,y) in idx] variants_subset = flatten(variants_subset) filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]} #l2 not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]} filtered_log = variants_filter.apply(log, filtered_variants) #l2 not_filtered_logs["variant_filter"] = variants_filter.apply(log, not_filtered_variants) else: filtered_log = log time_variants_finished = datetime.now() # note: incl log2 generation if filters["performance"]: custom_performance_range = sorted(custom_performance_range, reverse=False) # check overlapping for i in range(0,len(custom_performance_range)-1): if(custom_performance_range[i][1] > custom_performance_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for performance filter"})) response.status_code = 200 return response #raise ValueError("Overlapping performance ranges") #all_case_durations = case_statistics.get_all_casedurations(log, parameters={case_statistics.Parameters.TIMESTAMP_KEY: "time:timestamp"}) #case_filter.filter_case_performance(log, 86400, 864000) performances = [] for i in range(len(filtered_log)): filtered_log[i].attributes["throughput"] = (max([event["time:timestamp"]for event in filtered_log[i]])-min([event["time:timestamp"] for event in filtered_log[i]])).total_seconds() performances.append(filtered_log[i].attributes["throughput"]) nr_cases = len(filtered_log) performances = sorted(performances, reverse=False) idx = [(math.floor(x*nr_cases), math.ceil(y*nr_cases)) for (x,y) in custom_performance_range] perf_subset = [performances[x:y+1] for (x,y) in idx] perf_subset = flatten(perf_subset) performance_log = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] in perf_subset]) #l2 not_filtered_logs["performance_filter"] = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] not in perf_subset]) #print(str(len(not_filtered_logs["performance_filter"]))) else: performance_log = filtered_log time_performance_finished = datetime.now() if filters["activities"]: variants = variants_filter.get_variants(performance_log) variants_count = case_statistics.get_variant_statistics(performance_log) variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False) activities = dict() for variant in variants_count: for activity in variant["variant"].split(","): if (activity not in activities.keys()): activities[activity] = variant["count"] else: activities[activity] += variant["count"] sorted_activities = {k: v for k, v in sorted(activities.items(), key=lambda item: item[1])} activities_sorted_list = list(sorted_activities) custom_activitiy_range = sorted(custom_activitiy_range, reverse=False) # check overlapping for i in range(0,len(custom_activitiy_range)-1): if(custom_activitiy_range[i][1] > custom_activitiy_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for activities filter"})) response.status_code = 200 return response #raise ValueError("Overlapping activities ranges") nr_activities = len(activities_sorted_list) idx = [(math.floor(x*nr_activities), math.ceil(y*nr_activities)) for (x,y) in custom_activitiy_range] activities_to_keep = [activities_sorted_list[x:y+1] for (x,y) in idx] activities_to_keep = flatten(activities_to_keep) variants_idx = [] for i in range(len(variants_count)): for activity in activities_to_keep: if (activity in variants_count[i]["variant"].split(",") and (i not in variants_idx)): variants_idx.append(i) variants_subset = [variants_count[i] for i in variants_idx] filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]} #l2 not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]} filtered_log = variants_filter.apply(performance_log, filtered_variants) #l2 not_filtered_logs["activities_filter"] = variants_filter.apply(performance_log, not_filtered_variants) new_log = pm4py.objects.log.log.EventLog() #not_filtered_logs["activities_filter_traces"] = pm4py.objects.log.log.EventLog() for trace in filtered_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(event['concept:name'] in activities_to_keep): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): new_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["activities_filter"].append(not_new_trace) else: new_log = performance_log time_activities_finished = datetime.now() if filters["attribute"]: custom_attribute_range = sorted(custom_attribute_range, reverse=False) # check overlapping for i in range(0,len(custom_attribute_range)-1): if(custom_attribute_range[i][1] > custom_attribute_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for additional attribute filter"})) response.status_code = 200 return response newest_log = pm4py.objects.log.log.EventLog() not_filtered_logs["additional_filter"] = pm4py.objects.log.log.EventLog() traces_with_attr = [] not_traces_with_attr = [] for trace in new_log: if additional_attribute in trace.attributes.keys(): traces_with_attr.append(trace) else: not_traces_with_attr.append(trace) #check if trace attribute if len(traces_with_attr)>0: #check if numeric if type(traces_with_attr[0].attributes[additional_attribute]) in [int, float]: for trace in traces_with_attr: if any([trace.attributes[additional_attribute] >= x and trace.attributes[additional_attribute] <= y for (x,y) in custom_attribute_range]): newest_log.append(trace) else: not_filtered_logs["additional_filter"].append(trace) for trace in not_traces_with_attr: not_filtered_logs["additional_filter"].append(trace) else: #string attribute_frequencies = dict() for trace in traces_with_attr: if trace.attributes[additional_attribute] not in attribute_frequencies.keys(): attribute_frequencies[trace.attributes[additional_attribute]] = 0 attribute_frequencies[trace.attributes[additional_attribute]] += 1 sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])} frequencies_sorted_list = list(sorted_frequencies) nr_values = len(frequencies_sorted_list) idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range] values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx] values_to_keep = flatten(values_to_keep) for trace in traces_with_attr: if trace.attributes[additional_attribute] in values_to_keep: newest_log.append(trace) else: not_filtered_logs["additional_filter"].append(trace) for trace in not_traces_with_attr: not_filtered_logs["additional_filter"].append(trace) else: #event attribute if [type(event[additional_attribute]) for trace in new_log for event in trace if additional_attribute in event.keys()][0] in [int, float]: for trace in new_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(additional_attribute in event.keys() and any([event[additional_attribute] >= x and event[additional_attribute] <= y for (x,y) in custom_attribute_range ])): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): newest_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["additional_filter"].append(not_new_trace) else: #string attribute_frequencies = dict() for trace in new_log: for event in trace: if additional_attribute in event.keys(): if event[additional_attribute] not in attribute_frequencies.keys(): attribute_frequencies[event[additional_attribute]] = 0 attribute_frequencies[event[additional_attribute]] += 1 sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])} frequencies_sorted_list = list(sorted_frequencies) nr_values = len(frequencies_sorted_list) idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range] values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx] values_to_keep = flatten(values_to_keep) for trace in new_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(additional_attribute in event.keys() and event[additional_attribute] in values_to_keep): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): newest_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["additional_filter"].append(not_new_trace) else: newest_log = new_log time_attribute_finished = datetime.now() if(selected_viz=="dfgf"): dfg = dfg_discovery.apply(newest_log) gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) elif(selected_viz=="dfgp"): dfg = dfg_discovery.apply(newest_log) gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.PERFORMANCE) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) else: heu_net = heuristics_miner.apply_heu(newest_log, parameters={"dependency_thresh": 0.99}) gviz = hn_vis_factory.apply(heu_net) hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) xes_exporter.apply(newest_log, os.path.join("webapp","static", req.session["id"] + "_l1.xes")) #l2 not_filtered_log = pm4py.objects.log.log.EventLog() for part in not_filtered_logs.keys(): for trace in not_filtered_logs[part]: not_filtered_log.append(trace) if(selected_viz=="dfgf"): dfg = dfg_discovery.apply(not_filtered_log) gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) elif(selected_viz=="dfgp"): dfg = dfg_discovery.apply(not_filtered_log) gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.PERFORMANCE) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) else: heu_net = heuristics_miner.apply_heu(not_filtered_log, parameters={"dependency_thresh": 0.99}) gviz = hn_vis_factory.apply(heu_net) hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) xes_exporter.apply(not_filtered_log, os.path.join("webapp","static", req.session["id"] + "_l2.xes")) if(calc_lev): lev_new = [0]*len(newest_log) for i in range(len(newest_log)): lev_new[i] = [hash(event['concept:name']) for event in newest_log[i]] lev_not = [0]*len(not_filtered_log) for i in range(len(not_filtered_log)): lev_not[i] = [hash(event['concept:name']) for event in not_filtered_log[i]] distances = [] for i in range(len(lev_new)): for j in range(len(lev_not)): distances.append(lev_dist(lev_new[i], lev_not[j])) lev_d = sum(distances)/len(distances) print("Levenshtein's distance: "+str(lev_d)) else: lev_d = "null" used_paths = 0 for lower, higher in custom_path_range: used_paths += round((higher-lower)*100) print(f"Using {used_paths}% of paths. {100-used_paths}% of paths are discarded.") print("Timestamp filter: {} seconds. \nVariants filter: {} seconds. \nPerformance filter: {} seconds. \nActivities filter: {} seconds. \nAttribute filter: {} seconds.".format((time_variants_started - time_timestamp_started).total_seconds(), (time_variants_finished - time_variants_started).total_seconds(), (time_performance_finished - time_variants_finished).total_seconds(), (time_activities_finished - time_performance_finished).total_seconds(), (time_attribute_finished - time_activities_finished).total_seconds())) response = HttpResponse(json.dumps({'time':(time_variants_started - time_timestamp_started).total_seconds(), 'variants':(time_variants_finished - time_variants_started).total_seconds(),'performance':(time_performance_finished - time_variants_finished).total_seconds(), 'activities':(time_activities_finished - time_performance_finished).total_seconds(), 'attribute':(time_attribute_finished - time_activities_finished).total_seconds(), 'traces':[len(newest_log), len(not_filtered_log)], 'distance':lev_d})) response.status_code = 200 return response
def discover_process_models(log_path, log_name): custom_print('Importando log') log_complete = xes_importer.apply(log_path) log = variants_filter.filter_log_variants_percentage(log_complete, 0.9) #A_ACTIVATED, A_DECLINED, A_CANCELLED #log = attributes_filter.apply(log_complete, ["A_ACTIVATED"], parameters={attributes_filter.Parameters.ATTRIBUTE_KEY: "concept:name", attributes_filter.Parameters.POSITIVE: True}) custom_print('Log importado') if (1 == 2): #Inductive Miner custom_print('Iniciando Inductive Miner') parameters = { inductive_miner.Variants.IM.value.Parameters.CASE_ID_KEY: 'case:concept:name', inductive_miner.Variants.IM.value.Parameters.TIMESTAMP_KEY: 'time:timestamp' } variant = inductive_miner.Variants.IM petrinet = inductive_miner.apply(log, parameters=parameters, variant=variant) print_statistics(petrinet[0], 'IM') custom_print('Inductive Miner finalizado\n') if (1 == 2): #Inductive Miner Infrequent 0.2 custom_print('Iniciando Inductive Miner Infrequent 0.2') parameters = { inductive_miner.Variants.IMf.value.Parameters.NOISE_THRESHOLD: 0.2, inductive_miner.Variants.IMf.value.Parameters.CASE_ID_KEY: 'case:concept:name', inductive_miner.Variants.IMf.value.Parameters.TIMESTAMP_KEY: 'time:timestamp' } variant = inductive_miner.Variants.IMf petrinet = inductive_miner.apply(log, parameters=parameters, variant=variant) print_statistics(petrinet[0], 'IMf0.2') custom_print('Inductive Miner Infrequent 0.2 finalizado\n') if (1 == 1): #Inductive Miner Infrequent 0.5 custom_print('Iniciando Inductive Miner Infrequent 0.5') parameters = { inductive_miner.Variants.IMf.value.Parameters.NOISE_THRESHOLD: 0.5, inductive_miner.Variants.IMf.value.Parameters.CASE_ID_KEY: 'case:concept:name', inductive_miner.Variants.IMf.value.Parameters.TIMESTAMP_KEY: 'time:timestamp' } variant = inductive_miner.Variants.IMf petrinet, initial_marking, final_marking = inductive_miner.apply( log, parameters=parameters, variant=variant) print_statistics(petrinet, 'IMf0.5') custom_print('Inductive Miner Infrequent 0.5 finalizado\n') ts = reachability_graph.construct_reachability_graph( petrinet, initial_marking) gviz = ts_visualizer.apply( ts, parameters={ ts_visualizer.Variants.VIEW_BASED.value.Parameters.FORMAT: "png" }) gviz.render('petrinets/simple-reach', cleanup=True) pnml_exporter.apply(petrinet, initial_marking, "petrinets/simple-petri.pnml") if (1 == 2): #Inductive Miner Infrequent 0.8 custom_print('Iniciando Inductive Miner Infrequent 0.8') parameters = { inductive_miner.Variants.IMf.value.Parameters.NOISE_THRESHOLD: 0.8, inductive_miner.Variants.IMf.value.Parameters.CASE_ID_KEY: 'case:concept:name', inductive_miner.Variants.IMf.value.Parameters.TIMESTAMP_KEY: 'time:timestamp' } variant = inductive_miner.Variants.IMf petrinet = inductive_miner.apply(log, parameters=parameters, variant=variant) print_statistics(petrinet[0], 'IMf0.8') custom_print('Inductive Miner Infrequent 0.8 finalizado\n') if (1 == 2): #Inductive Miner Directly-Follows custom_print('Iniciando Inductive Miner Directly-Follows') parameters = { inductive_miner.Variants.IMd.value.Parameters.CASE_ID_KEY: 'case:concept:name', inductive_miner.Variants.IMd.value.Parameters.TIMESTAMP_KEY: 'time:timestamp' } variant = inductive_miner.Variants.IMd petrinet = inductive_miner.apply(log, parameters=parameters, variant=variant) print_statistics(petrinet[0], 'IMd') custom_print('Inductive Miner Infrequent Directly-Follows\n') if (1 == 2): #Alpha Miner custom_print('Iniciando Alpha Miner') parameters = {} variant = alpha_miner.Variants.ALPHA_VERSION_CLASSIC petrinet = alpha_miner.apply(log, parameters=parameters, variant=variant) print_statistics(petrinet[0], 'Alpha') custom_print('Alpha Miner finalizado\n') if (1 == 2): #Heuristic Miner 0.5 custom_print('Iniciando Heuristic Miner 0.5') parameters = { heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.5 } petrinet = heuristics_miner.apply(log, parameters=parameters) print_statistics(petrinet[0], 'HM0.5') custom_print('Heuristic Miner 0.5 finalizado\n') if (1 == 2): #Heuristic Miner 0.99 custom_print('Iniciando Heuristic Miner 0.99') parameters = { heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99 } petrinet = heuristics_miner.apply(log, parameters=parameters) print_statistics(petrinet[0], 'HM0.99') custom_print('Heuristic Miner 0.99 finalizado\n') if (1 == 2): #Heuristic Miner 0.1 custom_print('Iniciando Heuristic Miner 0.1') parameters = { heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.1 } petrinet = heuristics_miner.apply(log, parameters=parameters) print_statistics(petrinet[0], 'HM0.1') custom_print('Heuristic Miner 0.1 finalizado\n') if (1 == 2): #Heuristic Miner 1.0 custom_print('Iniciando Heuristic Miner 1.0') parameters = { heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 1.0 } petrinet = heuristics_miner.apply(log, parameters=parameters) print_statistics(petrinet[0], 'HM1.0') custom_print('Heuristic Miner 1.0 finalizado\n') if (1 == 2): #DFG custom_print('Iniciando DFG') dfg = dfg_discovery.apply(log) parameters = { dfg_visualization.Variants.FREQUENCY.value.Parameters.FORMAT: 'png' } gviz = dfg_visualization.apply( dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY, parameters=parameters) dfg_visualization.save(gviz, 'petrinets/simple-DFG.png') custom_print('DFG finalizado\n')
#with performance from pm4py.algo.discovery.dfg import algorithm as dfg_discovery from pm4py.visualization.dfg import visualizer as dfg_visualization dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE) gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE) dfg_visualization.view(gviz) #svg format from pm4py.algo.discovery.dfg import algorithm as dfg_discovery from pm4py.visualization.dfg import visualizer as dfg_visualization dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE) parameters = {dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"} gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE, parameters=parameters) dfg_visualization.save(gviz, "dfg.svg") #Conver DF graph to a workflow net from pm4py.objects.log.importer.xes import importer as xes_importer import os filepath = os.path.join('E:/data/pm/running-example.xes') log = xes_importer.apply(filepath) from pm4py.algo.discovery.dfg import algorithm as dfg_discovery dfg = dfg_discovery.apply(log) from pm4py.objects.conversion.dfg import converter as dfg_mining net, im, fm = dfg_mining.apply(dfg) net
def create_process_models(output_case_traces_cluster, path_data_sources, dir_runtime_files, dir_dfg_cluster_files, filename_dfg_cluster, rel_proportion_dfg_threshold, logging_level): """ Creates directly follows graphs out of a event log. :param output_case_traces_cluster: traces that are visualised :param path_data_sources: path of sources and outputs :param dir_runtime_files: folder containing files read and written during runtime :param dir_dfg_cluster_files: folder containing dfg png files :param filename_dfg_cluster: filename of dfg file (per cluster) :param rel_proportion_dfg_threshold: threshold for filtering out sensors in dfg relative to max occurrences of a sensor :param logging_level: level of logging :return: """ # keep only needed columns output_case_traces_cluster = output_case_traces_cluster.reindex( columns={'Case', 'LC_Activity', 'Timestamp', 'Cluster'}) output_case_traces_cluster = output_case_traces_cluster.rename( columns={ 'Case': 'case:concept:name', 'LC_Activity': 'concept:name', 'Timestamp': 'time:timestamp' }) # create directory for dfg pngs os.mkdir(path_data_sources + dir_runtime_files + dir_dfg_cluster_files) # create dfg for each cluster clusters = output_case_traces_cluster.Cluster.unique() for cluster in clusters: log = output_case_traces_cluster.loc[output_case_traces_cluster.Cluster == cluster] log = log.astype(str) # convert pandas data frame to pm4py event log for further processing log = log_converter.apply(log) # keep only activities with more than certain number of occurrences activities = attributes_get.get_attribute_values(log, 'concept:name') # determine that number relative to the max number of occurrences of a sensor in a cluster. (the result is # the threshold at which an activity/activity strand is kept) min_number_of_occurrences = round( (max(activities.values()) * rel_proportion_dfg_threshold), 0) activities = { x: y for x, y in activities.items() if y >= min_number_of_occurrences } log = attributes_filter.apply(log, activities) # create dfg out of event log dfg = dfg_discovery.apply(log) # define start and start_activities = sa_get.get_start_activities(log) end_activities = ea_get.get_end_activities(log) # create png of dfg (if the graph does not show a graph, it is possible that the sensors did not trigger often) gviz = dfg_visualization.apply( dfg=dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY, parameters={ 'start_activities': start_activities, 'end_activities': end_activities }) dfg_visualization.save( gviz, path_data_sources + dir_runtime_files + dir_dfg_cluster_files + (filename_dfg_cluster.format(cluster=str(cluster)))) # logger logger = logging.getLogger(inspect.stack()[0][3]) logger.setLevel(logging_level) logger.info("Saved directly follows graphs into '../%s'.", path_data_sources + dir_runtime_files + dir_dfg_cluster_files)