def residual_trace_resolution(R, CS, log, output=False): print("STEP 3 : residual trace resolution ahead step start") # LOOK AHEAD STEPS for no, r in enumerate(R): # print("\n{}".format(r)) fit_max = 0 fit_max_idx = -1 for i in range(len(CS)): C_log = variants_filter.apply(log, [CS[i]]) r_log = variants_filter.apply(log, [r]) net, im, fm = heuristics_miner.apply(C_log) # net, im, fm = inductive_miner.apply(C_log) fit = replay_fitness_evaluator.apply( r_log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED )['log_fitness'] if fit_max < fit: fit_max = fit fit_max_idx = i print("{} out of {} is added to {} cluster with fitness{} : {}".format( no, len(R), fit_max_idx, round(fit_max, 2), r)) CS[i].append(r) return CS
def check_model_quality(original_log, anonymized_log, result_path): anonymized_model, anonymized_initial_marking, anonymized_final_marking = inductive_miner.apply( anonymized_log) results = dict() fitness = replay_fitness_evaluator.apply( original_log, anonymized_model, anonymized_initial_marking, anonymized_final_marking, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) print("Fitness: " + str(fitness)) results["fitness"] = fitness precision = precision_evaluator.apply( original_log, anonymized_model, anonymized_initial_marking, anonymized_final_marking, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) print("Precision: " + str(precision)) results["precision"] = precision gen = generalization_evaluator.apply(original_log, anonymized_model, anonymized_initial_marking, anonymized_final_marking) print("Generalization: " + str(gen)) results["generalization"] = gen simp = simplicity_evaluator.apply(anonymized_model) print("Simplicity: " + str(simp)) results["simplicity"] = simp with open(result_path, 'wb') as file: pickle.dump(results, file)
def evaluate_logwithmodel(logpath): """ Calculate and return evaluation measurements like fitness, precision, simplicity and generalization, given the path of event log. Parameters: logpath (str): Path of event log Returns: fitness (float): Fitness value measured using pm4py precision (float): Precision value measured using pm4py simplicity (float): Simplicity value measured using pm4py generalization (float): Generalization value measured using pm4py """ xes_log = importer.apply(logpath) net, initial_marking, final_marking = inductive_miner.apply(xes_log) fitness = replay_fitness_evaluator.apply( xes_log, net, initial_marking, final_marking, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) prec = precision_evaluator.apply( xes_log, net, initial_marking, final_marking, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) simp = simplicity_evaluator.apply(net) gen = generalization_evaluator.apply(xes_log, net, initial_marking, final_marking) return round(fitness['log_fitness'], 3), round(prec, 3), round(simp, 3), round(gen, 3)
def test_tokenreplay(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.alpha import algorithm as alpha_miner net, im, fm = alpha_miner.apply(log) from pm4py.algo.conformance.tokenreplay import algorithm as token_replay replayed_traces = token_replay.apply( log, net, im, fm, variant=token_replay.Variants.TOKEN_REPLAY) replayed_traces = token_replay.apply( log, net, im, fm, variant=token_replay.Variants.BACKWARDS) from pm4py.evaluation.replay_fitness import evaluator as rp_fitness_evaluator fitness = rp_fitness_evaluator.apply( log, net, im, fm, variant=rp_fitness_evaluator.Variants.TOKEN_BASED) evaluation = rp_fitness_evaluator.evaluate( replayed_traces, variant=rp_fitness_evaluator.Variants.TOKEN_BASED) from pm4py.evaluation.precision import evaluator as precision_evaluator precision = precision_evaluator.apply( log, net, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) from pm4py.evaluation.generalization import evaluator as generalization_evaluation generalization = generalization_evaluation.apply( log, net, im, fm, variant=generalization_evaluation.Variants.GENERALIZATION_TOKEN)
def evaluate_fitness_tbr(log, petri_net, initial_marking, final_marking): """ Calculates the fitness using token-based replay Parameters --------------- log Event log petri_net Petri net initial_marking Initial marking final_marking Final marking Returns --------------- fitness_dictionary Fitness dictionary (from TBR) """ from pm4py.evaluation.replay_fitness import evaluator as replay_fitness return replay_fitness.apply(log, petri_net, initial_marking, final_marking, variant=replay_fitness.Variants.TOKEN_BASED)
def evaluate_fitness_alignments(log, petri_net, initial_marking, final_marking): """ Calculates the fitness using alignments Parameters -------------- log Event log petri_net Petri net object initial_marking Initial marking final_marking Final marking Returns --------------- fitness_dictionary Fitness dictionary (from alignments) """ from pm4py.evaluation.replay_fitness import evaluator as replay_fitness return replay_fitness.apply( log, petri_net, initial_marking, final_marking, variant=replay_fitness.Variants.ALIGNMENT_BASED)
def evaluate_fitness_alignments(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking) -> \ Dict[str, float]: warnings.warn( 'evaluate_fitness_alignments is deprecated, use fitness_alignments', DeprecationWarning) """ Calculates the fitness using alignments Parameters -------------- log Event log petri_net Petri net object initial_marking Initial marking final_marking Final marking Returns --------------- fitness_dictionary Fitness dictionary (from alignments) """ from pm4py.evaluation.replay_fitness import evaluator as replay_fitness return replay_fitness.apply( log, petri_net, initial_marking, final_marking, variant=replay_fitness.Variants.ALIGNMENT_BASED)
def test_alignment(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.alpha import algorithm as alpha_miner net, im, fm = alpha_miner.apply(log) from pm4py.algo.conformance.alignments import algorithm as alignments aligned_traces = alignments.apply( log, net, im, fm, variant=alignments.Variants.VERSION_STATE_EQUATION_A_STAR) aligned_traces = alignments.apply( log, net, im, fm, variant=alignments.Variants.VERSION_DIJKSTRA_NO_HEURISTICS) from pm4py.evaluation.replay_fitness import evaluator as rp_fitness_evaluator fitness = rp_fitness_evaluator.apply( log, net, im, fm, variant=rp_fitness_evaluator.Variants.ALIGNMENT_BASED) evaluation = rp_fitness_evaluator.evaluate( aligned_traces, variant=rp_fitness_evaluator.Variants.ALIGNMENT_BASED) from pm4py.evaluation.precision import evaluator as precision_evaluator precision = precision_evaluator.apply( log, net, im, fm, variant=rp_fitness_evaluator.Variants.ALIGNMENT_BASED)
def fitness_alignments(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking) -> \ Dict[str, float]: """ Calculates the fitness using alignments Parameters -------------- log Event log petri_net Petri net object initial_marking Initial marking final_marking Final marking Returns --------------- fitness_dictionary dictionary describing average fitness (key: average_trace_fitness) and the percentage of fitting traces (key: percentage_of_fitting_traces) """ from pm4py.evaluation.replay_fitness import evaluator as replay_fitness return replay_fitness.apply( log, petri_net, initial_marking, final_marking, variant=replay_fitness.Variants.ALIGNMENT_BASED)
def evaluate_fitness_tbr(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking) -> Dict[str, float]: warnings.warn( 'evaluate_fitness_tbr is deprecated, use fitness_token_based_replay', DeprecationWarning) """ Calculates the fitness using token-based replay. Parameters --------------- log Event log petri_net Petri net initial_marking Initial marking final_marking Final marking Returns --------------- fitness_dictionary Fitness dictionary (from TBR) """ from pm4py.evaluation.replay_fitness import evaluator as replay_fitness return replay_fitness.apply(log, petri_net, initial_marking, final_marking, variant=replay_fitness.Variants.TOKEN_BASED)
def fitness_token_based_replay(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking) -> \ Dict[ str, float]: """ Calculates the fitness using token-based replay. The fitness is calculated on a log-based level. Parameters --------------- log Event log petri_net Petri net initial_marking Initial marking final_marking Final marking Returns --------------- fitness_dictionary dictionary describing average fitness (key: average_trace_fitness) and the percentage of fitting traces (key: percentage_of_fitting_traces) """ from pm4py.evaluation.replay_fitness import evaluator as replay_fitness return replay_fitness.apply(log, petri_net, initial_marking, final_marking, variant=replay_fitness.Variants.TOKEN_BASED)
def test_inductiveminer_log(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) net, im, fm = inductive_miner.apply(log) aligned_traces_tr = tr_alg.apply(log, net, im, fm) aligned_traces_alignments = align_alg.apply(log, net, im, fm) evaluation = eval_alg.apply(log, net, im, fm) fitness = rp_fit.apply(log, net, im, fm) precision = precision_evaluator.apply(log, net, im, fm) gen = generalization.apply(log, net, im, fm) sim = simplicity.apply(net)
def test_alphaminer_df(self): log = pd.read_csv(os.path.join("input_data", "running-example.csv")) log = dataframe_utils.convert_timestamp_columns_in_df(log) net, im, fm = alpha_miner.apply(log) aligned_traces_tr = tr_alg.apply(log, net, im, fm) aligned_traces_alignments = align_alg.apply(log, net, im, fm) evaluation = eval_alg.apply(log, net, im, fm) fitness = rp_fit.apply(log, net, im, fm) precision = precision_evaluator.apply(log, net, im, fm) gen = generalization.apply(log, net, im, fm) sim = simplicity.apply(net)
def fit_check(log: list, C: list) -> float: log = variants_filter.apply( # get the log containing variants in C log, [c for c in C]) net, im, fm = heuristics_miner.apply(log) # net, im, fm = inductive_miner.apply(log) fit = replay_fitness_evaluator.apply( log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) return fit['log_fitness']
def test_inductiveminer_stream(self): df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df) stream = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM) net, im, fm = inductive_miner.apply(stream) aligned_traces_tr = tr_alg.apply(stream, net, im, fm) aligned_traces_alignments = align_alg.apply(stream, net, im, fm) evaluation = eval_alg.apply(stream, net, im, fm) fitness = rp_fit.apply(stream, net, im, fm) precision = precision_evaluator.apply(stream, net, im, fm) gen = generalization.apply(stream, net, im, fm) sim = simplicity.apply(net)
def test_evaluation_pm1(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" log = xes_importer.apply( os.path.join(INPUT_DATA_DIR, "running-example.xes")) net, marking, final_marking = inductive_miner.apply(log) fitness = fitness_alg.apply(log, net, marking, final_marking) precision = precision_alg.apply(log, net, marking, final_marking) generalization = generalization_alg.apply(log, net, marking, final_marking) simplicity = simplicity_alg.apply(net) del fitness del precision del generalization del simplicity
def evaluation_w_hm(log): # print("evaluation_w_hm() called") net, im, fm = heuristics_miner.apply(log) # print("herusitic miner performed with the given log") # net, im, fm = inductive_miner.apply(log) fitness = replay_fitness_evaluator.apply( log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED)['log_fitness'] # print("fitness", fitness) prec = precision_evaluator.apply( log, net, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) # print("pecision", prec) gen = generalization_evaluator.apply(log, net, im, fm) # print('generality',gen) simp = simplicity_evaluator.apply(net) # print('simplicity',simp) return [len(log), fitness, prec, gen, simp]
def extract_metrics(log, net, im, fm): """ Extracts model quality criteria: fitness, precision, generalization, simplicity Also records time spent in each metric """ start_time = time.time() fitness = replay_fitness_evaluator.apply( log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) fitness_time = time.time() - start_time start_time = time.time() precision = precision_evaluator.apply( log, net, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) precision_time = time.time() - start_time start_time = time.time() generalization = generalization_evaluator.apply(log, net, im, fm) generalization_time = time.time() - start_time start_time = time.time() simplicity = simplicity_evaluator.apply(net) simplicity_time = time.time() - start_time return [ fitness_time, precision_time, generalization_time, simplicity_time, *fitness.values(), precision, generalization, simplicity, ]
def alignment_based_final_cluster_evaluation(clusters): f1_scores = {} weighted_sum_f1 = 0 weighted_sum_fitness = 0 weighted_sum_precision = 0 cluster_lenghts = [] lenghts = 0 for index, cluster in enumerate(clusters): net, im, fm = inductive_miner.apply(cluster) fitness = replay_fitness_evaluator.apply( cluster, net, im, fm, variant=replay_fitness_evaluator.Variants.ALIGNMENT_BASED) precision = precision_evaluator.apply( cluster, net, im, fm, variant=precision_evaluator.Variants.ALIGN_ETCONFORMANCE) f1_score = 2 * (fitness["averageFitness"] * precision) / (fitness["averageFitness"] + precision) f1_scores['cluster ' + str(len(cluster))] = f1_score cluster_lenghts.append(len(cluster)) weighted_sum_f1 += len(cluster) * f1_score weighted_sum_fitness += len(cluster) * fitness["averageFitness"] weighted_sum_precision += len(cluster) * precision lenghts += len(cluster) if len(cluster_lenghts) < 4: cluster_lenghts.append(0) if len(cluster_lenghts) < 5: cluster_lenghts.append(0) weighted_average_fitness = weighted_sum_fitness / lenghts weighted_average_precision = weighted_sum_precision / lenghts weighted_average_f1 = weighted_sum_f1 / lenghts return weighted_average_f1, weighted_average_fitness, weighted_average_precision, f1_scores, cluster_lenghts
def token_based_replay(real_logs, petri_nets): dic = {} for ind, variant in enumerate(variants): net, im, fm = petri_nets[ind] fitness = [] for i in range(8): fitness.append( replay_fitness_evaluator.apply( real_logs[i], net[i], im[i], fm[i], variant=replay_fitness_evaluator.Variants.TOKEN_BASED, )["log_fitness"]) dic[variant] = fitness plot( pd.DataFrame(dic, index=list(range(1, 9))), "Fitness using Token-Based Replay", "token_based_replay.png", ylabel="Fitness", )
def cluster_evaluation(sublog, discovery_technique): if discovery_technique == 'heuristic miner': net, im, fm = heuristics_miner.apply( sublog, parameters={"dependency_thresh": 0.99}) if discovery_technique == 'inductive miner': net, im, fm = inductive_miner.apply(sublog) fitness = replay_fitness_evaluator.apply( sublog, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) precision = precision_evaluator.apply( sublog, net, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) f1_score = 2 * (fitness["log_fitness"] * precision) / (fitness["log_fitness"] + precision) return fitness, precision, f1_score
def look_ahead(log: list, C, R, output=False): if output: print("\n * Look_ahead()") C_log = variants_filter.apply(log, C) net, im, fm = heuristics_miner.apply(C_log) # net, im, fm = inductive_miner.apply(C_log) for i, r in enumerate(R): if i % 10 == 0: print("\t = {} dpi(s) checked".format(i)) r_log = [variants_filter.apply(log, [r])[0]] fit = replay_fitness_evaluator.apply( r_log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) if fit == 1: print("fitness:", fit) if output: print("\tFound a perfect fitness - {}".format(r)) R.remove(r) C.append(r) return C, R
# Eventlog log = xes_importer.apply("../patterns_file/BPI2017Denied" + '.xes') sub = ['3', '4', '15', '65', '92'] for s in sub: # Modello Rete net, initial_marking, final_marking = pnml_importer.apply( '../patterns_file/reti_Fahland/repaired_' + s + '_adjusted.pnml') print("\nValutazione rete sub_" + s + ":") fitness = replay_evaluator.apply( log, net, initial_marking, final_marking, variant=replay_evaluator.Variants.ALIGNMENT_BASED) print("Fitness: ", fitness) precision = precision_evaluator.apply( log, net, initial_marking, final_marking, variant=precision_evaluator.Variants.ALIGN_ETCONFORMANCE) print("Precision: ", precision) generalization = generalization_evaluator.apply(log, net, initial_marking, final_marking) print("Generalization: ", generalization) simplicity = simplicity_evaluator.apply(net) print("Simplicity: ", simplicity)
from pm4py.evaluation.generalization import evaluator as generalization_evaluator from pm4py.objects.log.util import get_log_representation from sklearn.ensemble import IsolationForest import pandas as pd import sys original_log_path = sys.argv[1] original_log = xes_importer.apply(original_log_path) model, initial_marking, final_marking = inductive_miner.apply(original_log) fitness = replay_fitness_evaluator.apply( original_log, model, initial_marking, final_marking, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) print(fitness) fitness = fitness["average_trace_fitness"] precision = precision_evaluator.apply( original_log, model, initial_marking, final_marking, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) print(str(precision)) fscore = 2 * precision * fitness / (precision + fitness) print("Fscore of: " + str(fscore)) generalization = generalization_evaluator.apply(original_log, model,
def calc_and_time_fitness(): start_time = time.time() fitness = calc_fitness.apply(log, petrinet_res, initial_mark, final_mark) calc_duration = time.time() - start_time return fitness, calc_duration
for x in range(1, 6): sum1 = 0 sum2 = 0 for i in range(1, 6): logfile = "E://noise log//Experiment 2 Ex//Level " + str( x) + "//log file//" + str(i) + "//0.xes" print(logfile) log = xes_importer.apply(logfile) from pm4py.algo.discovery.inductive import algorithm as inductive_miner net, im, fm = inductive_miner.apply(log) from pm4py.evaluation.replay_fitness import evaluator as replay_fitness_evaluator fitness = replay_fitness_evaluator.apply( log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) #print("fitness is equal to",fitness) sum1 = sum1 + fitness['log_fitness'] from pm4py.evaluation.precision import evaluator as precision_evaluator prec = precision_evaluator.apply( log, net, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) #print("precision is euqal to",prec)
}) gviz = hn_visualizer.apply(heu_net) hn_visualizer.view(gviz) if (net and initial_marking and final_marking) is None: raise Exception("This is for your safety. Check configuration") #%% gviz = pn_visualizer.apply(net, initial_marking, final_marking) pn_visualizer.view(gviz) #%% # fit log against model fitness_eval_TOKEN_BASED = replay_fitness_evaluator.apply( log, net, initial_marking, final_marking, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) results['1-log-token_based'] = fitness_eval_TOKEN_BASED fitness_eval_ALIGNMENT_BASED = replay_fitness_evaluator.apply( log, net, initial_marking, final_marking, variant=replay_fitness_evaluator.Variants.ALIGNMENT_BASED) results['1-log-alignment_based'] = fitness_eval_ALIGNMENT_BASED #%% # fit alfons small against model fitness_eval_TOKEN_BASED = replay_fitness_evaluator.apply( log_alfons_small,
generated_log = pt_semantics.generate_log(tree) print("first trace of log", [x["concept:name"] for x in generated_log[0]]) """ t2 = time.time() print("time interlapsed for calculating Inductive Model", (t2 - t1)) if CHECK_SOUNDNESS: print("inductive is_sound_wfnet", check_soundness.check_petri_wfnet_and_soundness(inductive_model, debug=True)) parameters = {fitness_evaluator.Variants.TOKEN_BASED.value.Parameters.ACTIVITY_KEY: activity_key, fitness_evaluator.Variants.TOKEN_BASED.value.Parameters.ATTRIBUTE_KEY: activity_key, "format": "png"} t1 = time.time() fitness_token_alpha[logName] = \ fitness_evaluator.apply(log, alpha_model, alpha_initial_marking, alpha_final_marking, parameters=parameters, variant=fitness_evaluator.Variants.TOKEN_BASED)[ 'perc_fit_traces'] print(str(time.time()) + " fitness_token_alpha for " + logName + " succeeded! " + str( fitness_token_alpha[logName])) t2 = time.time() times_tokenreplay_alpha[logName] = t2 - t1 t1 = time.time() fitness_token_imdf[logName] = \ fitness_evaluator.apply(log, inductive_model, inductive_im, inductive_fm, parameters=parameters, variant=fitness_evaluator.Variants.TOKEN_BASED)[ 'perc_fit_traces'] print(str(time.time()) + " fitness_token_inductive for " + logName + " succeeded! " + str( fitness_token_imdf[logName])) t2 = time.time() times_tokenreplay_imdf[logName] = t2 - t1
log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY: 'number' } data = pd.read_csv(sublogs_dir + sublog_filename) log = data2log(data) log = log_converter.apply(log, parameters=parameters) parameters = { inductive_miner.Variants.DFG_BASED.value.Parameters.CASE_ID_KEY: 'number', inductive_miner.Variants.DFG_BASED.value.Parameters.ACTIVITY_KEY: 'activity', } petrinet_res = inductive_miner.apply(log, parameters=parameters) fitness = calc_fitness.apply(log, *petrinet_res, parameters=parameters) precision = calc_precision.apply(log, *petrinet_res, parameters=parameters) simplic = calc_simplic.apply(petrinet_res[0], parameters=parameters) generaliz = calc_generaliz.apply(log, *petrinet_res, parameters=parameters) generaliz_mean.append(generaliz) precision_mean.append(precision) fitness_mean.append(fitness) simplic_mean.append(simplic) spamwriter.writerow([ sublog_filename.replace('.csv', ''),
Log (with one trace per variant) """ if parameters is None: parameters = {} variants = variants_module.get_variants(log, parameters=parameters) new_log = EventLog() for var in variants: new_log.append(variants[var][0]) return new_log def execute_script(): log_path = os.path.join("/Users/Julian/Documents/HiWi/PADS/EventLogs/BPI_Challenge_2012.xes") log = xes_import.apply(log_path) #log = keep_one_trace_per_variant(log) #log = log[15:30] ptree = ind_miner.apply_tree(log, parameters={Parameters.NOISE_THRESHOLD: 0.5}, variant=ind_miner.Variants.IMf) gviz = pt_vis.apply(ptree, parameters={pt_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "svg"}) net, im, fm = converter.apply(ptree) pt_vis.view(gviz) print(evaluator.apply(log, net, im, fm, variant=evaluator.Variants.TOKEN_BASED)) if __name__ == "__main__": execute_script()