def test_tokenreplay(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.alpha import algorithm as alpha_miner net, im, fm = alpha_miner.apply(log) from pm4py.algo.conformance.tokenreplay import algorithm as token_replay replayed_traces = token_replay.apply( log, net, im, fm, variant=token_replay.Variants.TOKEN_REPLAY) replayed_traces = token_replay.apply( log, net, im, fm, variant=token_replay.Variants.BACKWARDS) from pm4py.evaluation.replay_fitness import evaluator as rp_fitness_evaluator fitness = rp_fitness_evaluator.apply( log, net, im, fm, variant=rp_fitness_evaluator.Variants.TOKEN_BASED) evaluation = rp_fitness_evaluator.evaluate( replayed_traces, variant=rp_fitness_evaluator.Variants.TOKEN_BASED) from pm4py.evaluation.precision import evaluator as precision_evaluator precision = precision_evaluator.apply( log, net, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) from pm4py.evaluation.generalization import evaluator as generalization_evaluation generalization = generalization_evaluation.apply( log, net, im, fm, variant=generalization_evaluation.Variants.GENERALIZATION_TOKEN)
def evaluate_logwithmodel(logpath): """ Calculate and return evaluation measurements like fitness, precision, simplicity and generalization, given the path of event log. Parameters: logpath (str): Path of event log Returns: fitness (float): Fitness value measured using pm4py precision (float): Precision value measured using pm4py simplicity (float): Simplicity value measured using pm4py generalization (float): Generalization value measured using pm4py """ xes_log = importer.apply(logpath) net, initial_marking, final_marking = inductive_miner.apply(xes_log) fitness = replay_fitness_evaluator.apply( xes_log, net, initial_marking, final_marking, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) prec = precision_evaluator.apply( xes_log, net, initial_marking, final_marking, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) simp = simplicity_evaluator.apply(net) gen = generalization_evaluator.apply(xes_log, net, initial_marking, final_marking) return round(fitness['log_fitness'], 3), round(prec, 3), round(simp, 3), round(gen, 3)
def model_metrics(model_log_path, metric_log_path, gexp_name): start_time = time() model_log_csv = pd.read_csv(model_log_path, ',') metric_log_csv = pd.read_csv(metric_log_path, ',') parameters = {log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY: 'number'} model_log = log_converter.apply(model_log_csv, parameters=parameters, variant=log_converter.Variants.TO_EVENT_LOG) metric_log = log_converter.apply(metric_log_csv, parameters=parameters, variant=log_converter.Variants.TO_EVENT_LOG) parameters = {inductive_miner.Variants.DFG_BASED.value.Parameters.CASE_ID_KEY: 'number', inductive_miner.Variants.DFG_BASED.value.Parameters.ACTIVITY_KEY: 'incident_state', inductive_miner.Variants.DFG_BASED.value.Parameters.TIMESTAMP_KEY: 'sys_updated_at', alignments.Variants.VERSION_STATE_EQUATION_A_STAR.value.Parameters.ACTIVITY_KEY: 'incident_state'} petrinet, initial_marking, final_marking = inductive_miner.apply(model_log, parameters=parameters) gviz = pn_visualizer.apply(petrinet, initial_marking, final_marking) #gviz.render('petrinets\\'+gexp_name+'\\petri_' + model_base + '.png') gviz.render('test_time\\test.png') pn_visualizer.view(gviz) alignments_res = alignments.apply_log(metric_log, petrinet, initial_marking, final_marking, parameters=parameters) fitness = replay_fitness.evaluate(alignments_res, variant=replay_fitness.Variants.ALIGNMENT_BASED, parameters=parameters) precision = calc_precision.apply(metric_log, petrinet, initial_marking, final_marking, parameters=parameters) generaliz = calc_generaliz.apply(metric_log, petrinet, initial_marking, final_marking, parameters=parameters) #generaliz = 0 simplic = calc_simplic.apply(petrinet) f_score = 2 * ((fitness['averageFitness'] * precision) / (fitness['averageFitness'] + precision)) end_time = time() m, s = divmod(end_time - start_time, 60) h, m = divmod(m, 60) print('Fin %02d:%02d:%02d' % (h, m, s)) print(' F:', '%.10f' % fitness['averageFitness'], ' P:', '%.10f' % precision, ' FS:', '%.10f' % f_score, ' G:', '%.10f' % generaliz, ' S:', '%.10f' % simplic, ' T:', '%02d:%02d:%02d' % (h, m, s)) #metrics = pd.Series([model_base, metric_base, '%.10f' % fitness['averageFitness'], # '%.10f' % precision, '%.10f' % f_score, '%.10f' % generaliz, '%.10f' % simplic, # '%02d:%02d:%02d' % (h, m, s)]) return model_base
def check_model_quality(original_log, anonymized_log, result_path): anonymized_model, anonymized_initial_marking, anonymized_final_marking = inductive_miner.apply( anonymized_log) results = dict() fitness = replay_fitness_evaluator.apply( original_log, anonymized_model, anonymized_initial_marking, anonymized_final_marking, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) print("Fitness: " + str(fitness)) results["fitness"] = fitness precision = precision_evaluator.apply( original_log, anonymized_model, anonymized_initial_marking, anonymized_final_marking, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) print("Precision: " + str(precision)) results["precision"] = precision gen = generalization_evaluator.apply(original_log, anonymized_model, anonymized_initial_marking, anonymized_final_marking) print("Generalization: " + str(gen)) results["generalization"] = gen simp = simplicity_evaluator.apply(anonymized_model) print("Simplicity: " + str(simp)) results["simplicity"] = simp with open(result_path, 'wb') as file: pickle.dump(results, file)
def test_inductiveminer_log(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) net, im, fm = inductive_miner.apply(log) aligned_traces_tr = tr_alg.apply(log, net, im, fm) aligned_traces_alignments = align_alg.apply(log, net, im, fm) evaluation = eval_alg.apply(log, net, im, fm) fitness = rp_fit.apply(log, net, im, fm) precision = precision_evaluator.apply(log, net, im, fm) gen = generalization.apply(log, net, im, fm) sim = simplicity.apply(net)
def test_alphaminer_df(self): log = pd.read_csv(os.path.join("input_data", "running-example.csv")) log = dataframe_utils.convert_timestamp_columns_in_df(log) net, im, fm = alpha_miner.apply(log) aligned_traces_tr = tr_alg.apply(log, net, im, fm) aligned_traces_alignments = align_alg.apply(log, net, im, fm) evaluation = eval_alg.apply(log, net, im, fm) fitness = rp_fit.apply(log, net, im, fm) precision = precision_evaluator.apply(log, net, im, fm) gen = generalization.apply(log, net, im, fm) sim = simplicity.apply(net)
def test_inductiveminer_stream(self): df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df) stream = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM) net, im, fm = inductive_miner.apply(stream) aligned_traces_tr = tr_alg.apply(stream, net, im, fm) aligned_traces_alignments = align_alg.apply(stream, net, im, fm) evaluation = eval_alg.apply(stream, net, im, fm) fitness = rp_fit.apply(stream, net, im, fm) precision = precision_evaluator.apply(stream, net, im, fm) gen = generalization.apply(stream, net, im, fm) sim = simplicity.apply(net)
def test_evaluation_pm1(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" log = xes_importer.apply( os.path.join(INPUT_DATA_DIR, "running-example.xes")) net, marking, final_marking = inductive_miner.apply(log) fitness = fitness_alg.apply(log, net, marking, final_marking) precision = precision_alg.apply(log, net, marking, final_marking) generalization = generalization_alg.apply(log, net, marking, final_marking) simplicity = simplicity_alg.apply(net) del fitness del precision del generalization del simplicity
def evaluation_w_hm(log): # print("evaluation_w_hm() called") net, im, fm = heuristics_miner.apply(log) # print("herusitic miner performed with the given log") # net, im, fm = inductive_miner.apply(log) fitness = replay_fitness_evaluator.apply( log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED)['log_fitness'] # print("fitness", fitness) prec = precision_evaluator.apply( log, net, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) # print("pecision", prec) gen = generalization_evaluator.apply(log, net, im, fm) # print('generality',gen) simp = simplicity_evaluator.apply(net) # print('simplicity',simp) return [len(log), fitness, prec, gen, simp]
def extract_metrics(log, net, im, fm): """ Extracts model quality criteria: fitness, precision, generalization, simplicity Also records time spent in each metric """ start_time = time.time() fitness = replay_fitness_evaluator.apply( log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) fitness_time = time.time() - start_time start_time = time.time() precision = precision_evaluator.apply( log, net, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) precision_time = time.time() - start_time start_time = time.time() generalization = generalization_evaluator.apply(log, net, im, fm) generalization_time = time.time() - start_time start_time = time.time() simplicity = simplicity_evaluator.apply(net) simplicity_time = time.time() - start_time return [ fitness_time, precision_time, generalization_time, simplicity_time, *fitness.values(), precision, generalization, simplicity, ]
'percFitTraces'] print(str(time.time()) + " fitness_token_align for " + logName + " succeeded! " + str( fitness_align_imdf[logName])) t2 = time.time() times_alignments_imdf[logName] = t2 - t1 if ENABLE_PRECISION: precision_alpha[logName] = precision_evaluator.apply(log, alpha_model, alpha_initial_marking, alpha_final_marking, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN, parameters=parameters) else: precision_alpha[logName] = 0.0 print(str(time.time()) + " precision_alpha for " + logName + " succeeded! " + str(precision_alpha[logName])) generalization_alpha[logName] = generalization_evaluator.apply(log, alpha_model, alpha_initial_marking, alpha_final_marking, parameters=parameters) print(str(time.time()) + " generalization_alpha for " + logName + " succeeded! " + str( generalization_alpha[logName])) simplicity_alpha[logName] = simplicity_evaluator.apply(alpha_model, parameters=parameters) print( str(time.time()) + " simplicity_alpha for " + logName + " succeeded! " + str(simplicity_alpha[logName])) if ENABLE_PRECISION: precision_imdf[logName] = precision_evaluator.apply(log, inductive_model, inductive_im, inductive_fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN, parameters=parameters) else: precision_imdf[logName] = 0.0 print(str(time.time()) + " precision_imdf for " + logName + " succeeded! " + str(precision_imdf[logName])) generalization_imdf[logName] = generalization_evaluator.apply(log, inductive_model, inductive_im,
log = log_converter.apply(log, parameters=parameters) parameters = { inductive_miner.Variants.DFG_BASED.value.Parameters.CASE_ID_KEY: 'number', inductive_miner.Variants.DFG_BASED.value.Parameters.ACTIVITY_KEY: 'activity', } petrinet_res = inductive_miner.apply(log, parameters=parameters) #fitness = calc_fitness.apply(log, *petrinet_res, parameters=parameters) #print('Conformidade',round(fitness['average_trace_fitness'],4)) precision = calc_precision.apply(log, *petrinet_res, parameters=parameters) print('Precisao', round(precision, 4)) simplic = calc_simplic.apply(petrinet_res[0], parameters=parameters) print('Simplicidade', round(simplic, 4)) generaliz = calc_generaliz.apply(log, *petrinet_res, parameters=parameters) print('Generalização', round(generaliz, 4)) # Precisao 0.1023 # Simplicidade 0.5802 # Generalização 0.575 # ---------------------------------------------------------------------------- # Metrics for kmeans # Generate a csv file with the metrics for all the kmeans results def attributes_selection(filename): if 'specialist' in filename: return [ 'number', 'incident_state', 'priority', 'category',
def calc_and_time_generaliz(): start_time = time.time() generaliz = calc_generaliz.apply(log, petrinet_res, initial_mark, final_mark) calc_duration = time.time() - start_time return round(generaliz, 4), calc_duration
initial_marking, final_marking, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) print(fitness) fitness = fitness["average_trace_fitness"] precision = precision_evaluator.apply( original_log, model, initial_marking, final_marking, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) print(str(precision)) fscore = 2 * precision * fitness / (precision + fitness) print("Fscore of: " + str(fscore)) generalization = generalization_evaluator.apply(original_log, model, initial_marking, final_marking) print("Generalization of: " + str(generalization)) log_features, feature_names_log = get_log_representation.get_representation( original_log, str_ev_attr=["concept:name"], str_tr_attr=[], num_ev_attr=[], num_tr_attr=[], str_evsucc_attr=["concept:name"]) log_df = pd.DataFrame(log_features, columns=feature_names_log) model = IsolationForest() model.fit(log_df) log_df["scores"] = model.decision_function(log_df)
sub = ['3', '4', '15', '65', '92'] for s in sub: # Modello Rete net, initial_marking, final_marking = pnml_importer.apply( '../patterns_file/reti_Fahland/repaired_' + s + '_adjusted.pnml') print("\nValutazione rete sub_" + s + ":") fitness = replay_evaluator.apply( log, net, initial_marking, final_marking, variant=replay_evaluator.Variants.ALIGNMENT_BASED) print("Fitness: ", fitness) precision = precision_evaluator.apply( log, net, initial_marking, final_marking, variant=precision_evaluator.Variants.ALIGN_ETCONFORMANCE) print("Precision: ", precision) generalization = generalization_evaluator.apply(log, net, initial_marking, final_marking) print("Generalization: ", generalization) simplicity = simplicity_evaluator.apply(net) print("Simplicity: ", simplicity)
def calc_and_time_generaliz(): start_time = time.time() generaliz = calc_generaliz.apply(log, *petrinet_res) calc_duration = time.time() - start_time return round(generaliz, 4), calc_duration