def execute_script(): df = pm4py.read_csv("../tests/input_data/interval_event_log.csv") log = pm4py.read_xes("../tests/input_data/interval_event_log.xes") heu_net = plusplus.apply_heu(log, parameters={"heu_net_decoration": "performance"}) heu_net_2 = plusplus.apply_heu_pandas(df, parameters={"heu_net_decoration": "performance"}) gviz = visualizer.apply(heu_net, parameters={"format": "svg"}) visualizer.view(gviz) gviz2 = visualizer.apply(heu_net_2, parameters={"format": "svg"}) visualizer.view(gviz2) net1, im1, fm1 = plusplus.apply(log) net2, im2, fm2 = plusplus.apply(log) gviz3 = pn_visualizer.apply(net1, im1, fm1, parameters={"format": "svg"}) pn_visualizer.view(gviz3) gviz4 = pn_visualizer.apply(net2, im2, fm2, parameters={"format": "svg"}) pn_visualizer.view(gviz4)
def execute_script(): log = pm4py.read_xes("../tests/input_data/running-example.xes") dfg, sa, ea = pm4py.discover_dfg(log) tree = pm4py.discover_process_tree_inductive(log) heu_net = pm4py.discover_heuristics_net(log) net, im, fm = pm4py.discover_petri_net_alpha(log) bpmn = pm4py.convert_to_bpmn(tree) ts = ts_discovery.apply(log) x_cases, y_cases = case_statistics.get_kde_caseduration(log) gviz1 = dfg_visualizer.apply(dfg) gviz2 = tree_visualizer.apply(tree) gviz3 = hn_visualizer.apply(heu_net) gviz4 = pn_visualizer.apply(net, im, fm) gviz5 = bpmn_visualizer.apply(bpmn) gviz6 = ts_visualizer.apply(ts) gviz7 = graphs_visualizer.apply(x_cases, y_cases, variant=graphs_visualizer.Variants.CASES, parameters={graphs_visualizer.Variants.CASES.value.Parameters.FORMAT: "svg"}) print("1", len(dfg_visualizer.serialize_dot(gviz1))) print("1", len(dfg_visualizer.serialize(gviz1))) print("2", len(tree_visualizer.serialize_dot(gviz2))) print("2", len(tree_visualizer.serialize(gviz2))) print("3", len(hn_visualizer.serialize(gviz3))) print("4", len(pn_visualizer.serialize_dot(gviz4))) print("4", len(pn_visualizer.serialize(gviz4))) print("5", len(bpmn_visualizer.serialize_dot(gviz5))) print("5", len(bpmn_visualizer.serialize(gviz5))) print("6", len(ts_visualizer.serialize_dot(gviz6))) print("6", len(ts_visualizer.serialize(gviz6))) print("7", len(graphs_visualizer.serialize(gviz7)))
def test_heuplusplus_perf_df(self): df = pd.read_csv(os.path.join(INPUT_DATA_DIR, "interval_event_log.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df) heu_net = heuristics_miner.Variants.PLUSPLUS.value.apply_heu_pandas( df, parameters={"heu_net_decoration": "performance"}) gviz = hn_vis.apply(heu_net)
def test_heuplusplus_perf_log(self): log = xes_importer.apply( os.path.join(INPUT_DATA_DIR, "interval_event_log.xes")) heu_net = heuristics_miner.apply_heu( log, variant=heuristics_miner.Variants.PLUSPLUS, parameters={"heu_net_decoration": "performance"}) gviz = hn_vis.apply(heu_net)
def test_heunet_running_example(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" log = xes_importer.apply(os.path.join(INPUT_DATA_DIR, "running-example.xes")) heu_net = heuristics_miner.apply_heu(log) gviz = hn_vis.apply(heu_net) del gviz
def execute_script(): log = xes_importer.apply(os.path.join("..", "tests", "compressed_input_data", "09_a32f0n00.xes.gz")) heu_net = heuristics_miner.apply_heu(log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99}) gviz = hn_vis.apply(heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz) net, im, fm = heuristics_miner.apply(log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99}) gviz2 = petri_vis.apply(net, im, fm, parameters={petri_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "svg"}) petri_vis.view(gviz2)
def create_heuristic_net(frame: DataFrame, output_format: str = 'svg'): """ Creates a Heuristic Net from the supplied DataFrame. :param frame: the DataFrame :param output_format: desired output format :return: object representing the created graph """ event_log = _convert_data_frame_to_event_log(frame) log.info('creating heuristic net') heu_net = hn_alg.apply_heu(log=event_log) return hn_vis.apply(heu_net=heu_net, parameters={VisualisationParams.FORMAT: output_format})
def upload_log(req): #input_log_file = req.FILES["input_log_file"] sessions[req.session["id"]] = datetime.now() req.session.set_expiry(7200) #7200 = 2hrs subprocess.call(["rm", "-f", req.session["id"] + "_*"]) #with open(os.path.join("webapp","static", req.session["id"] + "_l0.xes"), 'wb') as file: # file.write(input_log_file.read()) #input_file = os.path.join("webapp","static", req.session["id"] + "_l0.xes") input_file = os.path.join("webapp","static", "sepsis.xes") log = xes_importer.apply(input_file) heu_net = heuristics_miner.apply_heu(log, parameters={"dependency_thresh": 0.99}) gviz = hn_vis_factory.apply(heu_net) hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l0.png")) #Find minimum and maximum timestamps start_time = min([event["time:timestamp"] for trace in log for event in trace]) start_times_a = [event["start_timestamp"] for trace in log for event in trace if "start_timestamp" in event.keys()] if len(start_times_a)>0: start_time = min(start_time, min(start_times_a)) end_time = max([event["time:timestamp"] for trace in log for event in trace]) + timedelta(seconds=1) flatten = lambda l: [item for sublist in l for item in sublist] trace_attributes = list(set(flatten([trace.attributes.keys() for trace in log])).difference(['concept:name'])) trace_attributes = [{'name':attribute, 'level':'trace'} for attribute in trace_attributes] for i in range(len(trace_attributes)): trace_attributes[i]['min'] = min([trace.attributes[trace_attributes[i]['name']] for trace in log if trace_attributes[i]['name'] in trace.attributes.keys()]) trace_attributes[i]['max'] = max([trace.attributes[trace_attributes[i]['name']] for trace in log if trace_attributes[i]['name'] in trace.attributes.keys()]) trace_attributes[i]['type'] = type(trace_attributes[i]['min']).__name__ if trace_attributes[i]['type']=='str': trace_attributes[i]['min']=0 trace_attributes[i]['max']=1 trace_attributes = [attribute for attribute in trace_attributes if attribute['type'] != 'bool'] event_attributes = list(set(flatten([event.keys() for trace in log for event in trace])).difference(['concept:name', 'time:timestamp'])) event_attributes = [{'name':attribute, 'level':'event'} for attribute in event_attributes] for i in range(len(event_attributes)): event_attributes[i]['min'] = min([event[event_attributes[i]['name']] for trace in log for event in trace if event_attributes[i]['name'] in event.keys()]) event_attributes[i]['max'] = max([event[event_attributes[i]['name']] for trace in log for event in trace if event_attributes[i]['name'] in event.keys()]) event_attributes[i]['type'] = type(event_attributes[i]['min']).__name__ if event_attributes[i]['type']=='str': event_attributes[i]['min']=0 event_attributes[i]['max']=1 event_attributes = [attribute for attribute in event_attributes if attribute['type'] != 'bool'] print(trace_attributes) print(event_attributes) response = HttpResponse(json.dumps({'start_time': start_time, 'end_time': end_time, 'traces_u':len(log), 'trace_attributes':trace_attributes, 'event_attributes':event_attributes}, sort_keys=True, indent=1, cls=DjangoJSONEncoder)) response.status_code = 200 return response
def view_heuristics_net(heu_net, format="png"): """ Views an heuristics net Parameters -------------- heu_net Heuristics net format Format of the visualization (default: png) """ from pm4py.visualization.heuristics_net import visualizer as hn_visualizer parameters = hn_visualizer.Variants.PYDOTPLUS.value.Parameters gviz = hn_visualizer.apply(heu_net, parameters={parameters.FORMAT: format}) hn_visualizer.view(gviz)
def save_vis_heuristics_net(heu_net: HeuristicsNet, file_path: str): """ Saves the visualization of an heuristics net Parameters -------------- heu_net Heuristics nte file_path Destination path """ format = os.path.splitext(file_path)[1][1:] from pm4py.visualization.heuristics_net import visualizer as hn_visualizer parameters = hn_visualizer.Variants.PYDOTPLUS.value.Parameters gviz = hn_visualizer.apply(heu_net, parameters={parameters.FORMAT: format}) hn_visualizer.save(gviz, file_path)
def save_vis_heuristics_net(heu_net, file_path): """ Saves the visualization of an heuristics net Parameters -------------- heu_net Heuristics nte file_path Destination path """ format = file_path[file_path.index(".") + 1:].lower() from pm4py.visualization.heuristics_net import visualizer as hn_visualizer parameters = hn_visualizer.Variants.PYDOTPLUS.value.Parameters gviz = hn_visualizer.apply(heu_net, parameters={parameters.FORMAT: format}) hn_visualizer.save(gviz, file_path)
def execute_script(): # import csv & create log dataframe = csv_import_adapter.import_dataframe_from_path( datasourceMockdata(), sep=";") dataframe = dataframe.rename(columns={ 'coID': 'case:concept:name', 'Activity': 'concept:name' }) log = conversion_factory.apply(dataframe) # option 1: Directly-Follows Graph, represent frequency or performance parameters = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"} variant = 'frequency' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz1 = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz1) # option 2: Heuristics Miner, acts on the Directly-Follows Graph, find common structures, output: Heuristic Net (.svg) heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.00 }) gviz2 = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz2) # option 3: Petri Net based on Heuristic Miner (.png) net, im, fm = heuristics_miner.apply( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.00 }) gviz3 = petri_vis.apply( net, im, fm, parameters={ petri_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "png" }) petri_vis.view(gviz3)
def create_graphs_errortypes(log, errortype): """ creates visualization for the error types: Heuristic Net """ path = "documentation_errortypes" vis_type = "heuristicnet" filename = f"{path}/{vis_type}_{errortype}.svg" heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.00 }) gviz_error = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz_error) hn_vis.save(gviz_error, filename)
def test_42(self): from pm4py.objects.log.importer.xes import importer as xes_importer import os log_path = os.path.join("compressed_input_data", "09_a32f0n00.xes.gz") log = xes_importer.apply(log_path) from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner heu_net = heuristics_miner.apply_heu(log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99}) from pm4py.visualization.heuristics_net import visualizer as hn_visualizer gviz = hn_visualizer.apply(heu_net) from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner net, im, fm = heuristics_miner.apply(log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99}) from pm4py.visualization.petrinet import visualizer as pn_visualizer gviz = pn_visualizer.apply(net, im, fm)
def execute_script(): dataframe2 = csv_import_adapter.import_dataframe_from_path( datasourceMockdata(), sep=",") dataframe2 = dataframe2.rename( columns={ 'timestamp': 'time:timestamp', 'source': 'case:concept:name', 'message': 'concept:name' }) log2 = conversion_factory.apply(dataframe2) # option 1: Heuristics Miner, acts on the Directly-Follows Graph, find common structures, output: Heuristic Net (.svg) heu_net = heuristics_miner.apply_heu( log2, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99 }) gviz2 = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz2) # option 2: Petri Net based on Heuristic Miner (.png) net, im, fm = heuristics_miner.apply( log2, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99 }) gviz3 = petri_vis.apply( net, im, fm, parameters={ petri_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "png" }) petri_vis.view(gviz3)
def create_graphs(without_error, log, approach): """ creates visualization: Directly-Follows-Graph and Heuristic Net """ # create dfg frequency path = "common_path" vis_type = "dfg_frequency" naming_error = "with_error" if without_error: naming_error = "no_error" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name", "format": "svg" } variant = 'frequency' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, filename) log_info.info("DFG frequency has been stored in '%s' in file '%s'", path, file) # create dfg performance vis_type = "dfg_performance" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" variant = 'performance' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, filename) log_info.info("DFG performance has been stored in '%s' in file '%s'", path, file) # create heuristic net vis_type = "heuristicnet" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.60 }) gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz) hn_vis.save(gviz, filename) log_info.info("Heuristic Net has been stored in '%s' in file '%s'", path, file) # save heuristic net in plain-ext format file = f"{vis_type}_{approach}_{naming_error}.plain-ext" filename = f"{path}/{vis_type}_{approach}_{naming_error}.plain-ext" gviz = hn_vis.apply(heu_net, parameters={ hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "plain-ext" }) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .plain-ext has been stored in '%s' " "in file '%s'", path, file) # save heuristic net in dot format file = f"{vis_type}_{approach}_{naming_error}.dot" filename = f"{path}/{vis_type}_{approach}_{naming_error}.dot" gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "dot"}) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .dot has been stored in '%s' " "in file '%s'", path, file) # save heuristic net in xdot format file = f"{vis_type}_{approach}_{naming_error}.xdot" filename = f"{path}/{vis_type}_{approach}_{naming_error}.xdot" gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "xdot"}) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .xdot has been stored in '%s' " "in file '%s'", path, file)
def heuristic_miner_heu_net(log): heu_net = heuristics_miner.apply_heu(log) gviz = hn_visualizer.apply(heu_net) return gviz, heu_net
# gviz = pt_visualizer.apply(tree) # pt_visualizer.view(gviz) net, initial_marking, final_marking = heuristics_miner.apply( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99 }) heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99 }) gviz = hn_visualizer.apply(heu_net) hn_visualizer.view(gviz) if (net and initial_marking and final_marking) is None: raise Exception("This is for your safety. Check configuration") #%% gviz = pn_visualizer.apply(net, initial_marking, final_marking) pn_visualizer.view(gviz) #%% # fit log against model fitness_eval_TOKEN_BASED = replay_fitness_evaluator.apply( log, net, initial_marking,
def apply_filter(req): sessions[req.session["id"]] = datetime.now() filters = { "time": True, "variants": True, "performance": True, "activities": True, "attribute": True } req.session.set_expiry(7200) #print(str(req.body)) o = json.loads(req.body) print(str(o)) custom_time_range = [] for pair in o["filter1"]: #custom_time_range.append((dateutil.parser.parse(pair[0]),dateutil.parser.parse(pair[1]))) custom_time_range.append((pair[0],pair[1])) if o["filter1"] == []: filters["time"] = False #print(o["filter1"][0]) #print(custom_time_range[0][0]) #print(custom_time_range) custom_path_range = [] for pair in o["filter2"]: custom_path_range.append((float(pair[0]),float(pair[1]))) if o["filter2"] == []: filters["variants"] = False #custom_path_range = [(0,1)] #filter2 custom_performance_range = [] for pair in o["filter3"]: custom_performance_range.append((float(pair[0]),float(pair[1]))) if o["filter3"] == []: filters["performance"] = False custom_activitiy_range = [] for pair in o["filter4"]: custom_activitiy_range.append((float(pair[0]),float(pair[1]))) if o["filter4"] == []: filters["activities"] = False #custom_activitiy_range = [(0,1)] #filter3 custom_attribute_range = [] for pair in o["filter5"]: custom_attribute_range.append((float(pair[0]),float(pair[1]))) if o["filter5"] == [] or o["filter5attribute"] == "Empty": filters["attribute"] = False additional_attribute = o["filter5attribute"] selected_viz = o["visualization"] calc_lev = o["distance"] #input_file = os.path.join("webapp","static", req.session["id"] + "_l0.xes") input_file = os.path.join("webapp","static", "sepsis.xes") input_log = xes_importer.apply(input_file) not_filtered_logs = {} flatten = lambda l: [item for sublist in l for item in sublist] time_timestamp_started = datetime.now() if filters["time"]: #TODO check overlapping for filter custom_time_range = sorted(custom_time_range, reverse=False) for i in range(0,len(custom_time_range)-1): if(custom_time_range[i][1] > custom_time_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for time filter"})) response.status_code = 200 return response #raise ValueError("Overlapping time ranges") logs = [] for (x,y) in custom_time_range: logs.append(timestamp_filter.filter_traces_contained(input_log, x, y)) #log = timestamp_filter.filter_traces_contained(input_log, custom_time_range[0][0], custom_time_range[0][1]) log = pm4py.objects.log.log.EventLog() for timeslice in logs: for trace in timeslice: log.append(trace) print(len(input_log)) print(len(log)) #l2 not_filtered_logs["timestamp_filter"] = pm4py.objects.log.log.EventLog() for trace in input_log: if trace not in log: not_filtered_logs["timestamp_filter"].append(trace) print(len(not_filtered_logs["timestamp_filter"])) else: log = input_log time_variants_started = datetime.now() # where should I start? if filters["variants"]: variants = variants_filter.get_variants(log) variants_count = case_statistics.get_variant_statistics(log) variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False) custom_path_range = sorted(custom_path_range, reverse=False) # check overlapping for i in range(0,len(custom_path_range)-1): if(custom_path_range[i][1] > custom_path_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for variants filter"})) response.status_code = 200 return response #raise ValueError("Overlapping variants ranges") nr_variants = len(variants_count) custom_path_range * nr_variants idx = [(math.floor(x*nr_variants), math.ceil(y*nr_variants)) for (x,y) in custom_path_range] variants_subset = [variants_count[x:y+1] for (x,y) in idx] variants_subset = flatten(variants_subset) filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]} #l2 not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]} filtered_log = variants_filter.apply(log, filtered_variants) #l2 not_filtered_logs["variant_filter"] = variants_filter.apply(log, not_filtered_variants) else: filtered_log = log time_variants_finished = datetime.now() # note: incl log2 generation if filters["performance"]: custom_performance_range = sorted(custom_performance_range, reverse=False) # check overlapping for i in range(0,len(custom_performance_range)-1): if(custom_performance_range[i][1] > custom_performance_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for performance filter"})) response.status_code = 200 return response #raise ValueError("Overlapping performance ranges") #all_case_durations = case_statistics.get_all_casedurations(log, parameters={case_statistics.Parameters.TIMESTAMP_KEY: "time:timestamp"}) #case_filter.filter_case_performance(log, 86400, 864000) performances = [] for i in range(len(filtered_log)): filtered_log[i].attributes["throughput"] = (max([event["time:timestamp"]for event in filtered_log[i]])-min([event["time:timestamp"] for event in filtered_log[i]])).total_seconds() performances.append(filtered_log[i].attributes["throughput"]) nr_cases = len(filtered_log) performances = sorted(performances, reverse=False) idx = [(math.floor(x*nr_cases), math.ceil(y*nr_cases)) for (x,y) in custom_performance_range] perf_subset = [performances[x:y+1] for (x,y) in idx] perf_subset = flatten(perf_subset) performance_log = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] in perf_subset]) #l2 not_filtered_logs["performance_filter"] = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] not in perf_subset]) #print(str(len(not_filtered_logs["performance_filter"]))) else: performance_log = filtered_log time_performance_finished = datetime.now() if filters["activities"]: variants = variants_filter.get_variants(performance_log) variants_count = case_statistics.get_variant_statistics(performance_log) variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False) activities = dict() for variant in variants_count: for activity in variant["variant"].split(","): if (activity not in activities.keys()): activities[activity] = variant["count"] else: activities[activity] += variant["count"] sorted_activities = {k: v for k, v in sorted(activities.items(), key=lambda item: item[1])} activities_sorted_list = list(sorted_activities) custom_activitiy_range = sorted(custom_activitiy_range, reverse=False) # check overlapping for i in range(0,len(custom_activitiy_range)-1): if(custom_activitiy_range[i][1] > custom_activitiy_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for activities filter"})) response.status_code = 200 return response #raise ValueError("Overlapping activities ranges") nr_activities = len(activities_sorted_list) idx = [(math.floor(x*nr_activities), math.ceil(y*nr_activities)) for (x,y) in custom_activitiy_range] activities_to_keep = [activities_sorted_list[x:y+1] for (x,y) in idx] activities_to_keep = flatten(activities_to_keep) variants_idx = [] for i in range(len(variants_count)): for activity in activities_to_keep: if (activity in variants_count[i]["variant"].split(",") and (i not in variants_idx)): variants_idx.append(i) variants_subset = [variants_count[i] for i in variants_idx] filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]} #l2 not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]} filtered_log = variants_filter.apply(performance_log, filtered_variants) #l2 not_filtered_logs["activities_filter"] = variants_filter.apply(performance_log, not_filtered_variants) new_log = pm4py.objects.log.log.EventLog() #not_filtered_logs["activities_filter_traces"] = pm4py.objects.log.log.EventLog() for trace in filtered_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(event['concept:name'] in activities_to_keep): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): new_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["activities_filter"].append(not_new_trace) else: new_log = performance_log time_activities_finished = datetime.now() if filters["attribute"]: custom_attribute_range = sorted(custom_attribute_range, reverse=False) # check overlapping for i in range(0,len(custom_attribute_range)-1): if(custom_attribute_range[i][1] > custom_attribute_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for additional attribute filter"})) response.status_code = 200 return response newest_log = pm4py.objects.log.log.EventLog() not_filtered_logs["additional_filter"] = pm4py.objects.log.log.EventLog() traces_with_attr = [] not_traces_with_attr = [] for trace in new_log: if additional_attribute in trace.attributes.keys(): traces_with_attr.append(trace) else: not_traces_with_attr.append(trace) #check if trace attribute if len(traces_with_attr)>0: #check if numeric if type(traces_with_attr[0].attributes[additional_attribute]) in [int, float]: for trace in traces_with_attr: if any([trace.attributes[additional_attribute] >= x and trace.attributes[additional_attribute] <= y for (x,y) in custom_attribute_range]): newest_log.append(trace) else: not_filtered_logs["additional_filter"].append(trace) for trace in not_traces_with_attr: not_filtered_logs["additional_filter"].append(trace) else: #string attribute_frequencies = dict() for trace in traces_with_attr: if trace.attributes[additional_attribute] not in attribute_frequencies.keys(): attribute_frequencies[trace.attributes[additional_attribute]] = 0 attribute_frequencies[trace.attributes[additional_attribute]] += 1 sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])} frequencies_sorted_list = list(sorted_frequencies) nr_values = len(frequencies_sorted_list) idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range] values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx] values_to_keep = flatten(values_to_keep) for trace in traces_with_attr: if trace.attributes[additional_attribute] in values_to_keep: newest_log.append(trace) else: not_filtered_logs["additional_filter"].append(trace) for trace in not_traces_with_attr: not_filtered_logs["additional_filter"].append(trace) else: #event attribute if [type(event[additional_attribute]) for trace in new_log for event in trace if additional_attribute in event.keys()][0] in [int, float]: for trace in new_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(additional_attribute in event.keys() and any([event[additional_attribute] >= x and event[additional_attribute] <= y for (x,y) in custom_attribute_range ])): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): newest_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["additional_filter"].append(not_new_trace) else: #string attribute_frequencies = dict() for trace in new_log: for event in trace: if additional_attribute in event.keys(): if event[additional_attribute] not in attribute_frequencies.keys(): attribute_frequencies[event[additional_attribute]] = 0 attribute_frequencies[event[additional_attribute]] += 1 sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])} frequencies_sorted_list = list(sorted_frequencies) nr_values = len(frequencies_sorted_list) idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range] values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx] values_to_keep = flatten(values_to_keep) for trace in new_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(additional_attribute in event.keys() and event[additional_attribute] in values_to_keep): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): newest_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["additional_filter"].append(not_new_trace) else: newest_log = new_log time_attribute_finished = datetime.now() if(selected_viz=="dfgf"): dfg = dfg_discovery.apply(newest_log) gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) elif(selected_viz=="dfgp"): dfg = dfg_discovery.apply(newest_log) gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.PERFORMANCE) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) else: heu_net = heuristics_miner.apply_heu(newest_log, parameters={"dependency_thresh": 0.99}) gviz = hn_vis_factory.apply(heu_net) hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) xes_exporter.apply(newest_log, os.path.join("webapp","static", req.session["id"] + "_l1.xes")) #l2 not_filtered_log = pm4py.objects.log.log.EventLog() for part in not_filtered_logs.keys(): for trace in not_filtered_logs[part]: not_filtered_log.append(trace) if(selected_viz=="dfgf"): dfg = dfg_discovery.apply(not_filtered_log) gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) elif(selected_viz=="dfgp"): dfg = dfg_discovery.apply(not_filtered_log) gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.PERFORMANCE) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) else: heu_net = heuristics_miner.apply_heu(not_filtered_log, parameters={"dependency_thresh": 0.99}) gviz = hn_vis_factory.apply(heu_net) hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) xes_exporter.apply(not_filtered_log, os.path.join("webapp","static", req.session["id"] + "_l2.xes")) if(calc_lev): lev_new = [0]*len(newest_log) for i in range(len(newest_log)): lev_new[i] = [hash(event['concept:name']) for event in newest_log[i]] lev_not = [0]*len(not_filtered_log) for i in range(len(not_filtered_log)): lev_not[i] = [hash(event['concept:name']) for event in not_filtered_log[i]] distances = [] for i in range(len(lev_new)): for j in range(len(lev_not)): distances.append(lev_dist(lev_new[i], lev_not[j])) lev_d = sum(distances)/len(distances) print("Levenshtein's distance: "+str(lev_d)) else: lev_d = "null" used_paths = 0 for lower, higher in custom_path_range: used_paths += round((higher-lower)*100) print(f"Using {used_paths}% of paths. {100-used_paths}% of paths are discarded.") print("Timestamp filter: {} seconds. \nVariants filter: {} seconds. \nPerformance filter: {} seconds. \nActivities filter: {} seconds. \nAttribute filter: {} seconds.".format((time_variants_started - time_timestamp_started).total_seconds(), (time_variants_finished - time_variants_started).total_seconds(), (time_performance_finished - time_variants_finished).total_seconds(), (time_activities_finished - time_performance_finished).total_seconds(), (time_attribute_finished - time_activities_finished).total_seconds())) response = HttpResponse(json.dumps({'time':(time_variants_started - time_timestamp_started).total_seconds(), 'variants':(time_variants_finished - time_variants_started).total_seconds(),'performance':(time_performance_finished - time_variants_finished).total_seconds(), 'activities':(time_activities_finished - time_performance_finished).total_seconds(), 'attribute':(time_attribute_finished - time_activities_finished).total_seconds(), 'traces':[len(newest_log), len(not_filtered_log)], 'distance':lev_d})) response.status_code = 200 return response