def test_tokenreplay(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.alpha import algorithm as alpha_miner net, im, fm = alpha_miner.apply(log) from pm4py.algo.conformance.tokenreplay import algorithm as token_replay replayed_traces = token_replay.apply( log, net, im, fm, variant=token_replay.Variants.TOKEN_REPLAY) replayed_traces = token_replay.apply( log, net, im, fm, variant=token_replay.Variants.BACKWARDS) from pm4py.evaluation.replay_fitness import evaluator as rp_fitness_evaluator fitness = rp_fitness_evaluator.apply( log, net, im, fm, variant=rp_fitness_evaluator.Variants.TOKEN_BASED) evaluation = rp_fitness_evaluator.evaluate( replayed_traces, variant=rp_fitness_evaluator.Variants.TOKEN_BASED) from pm4py.evaluation.precision import evaluator as precision_evaluator precision = precision_evaluator.apply( log, net, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN) from pm4py.evaluation.generalization import evaluator as generalization_evaluation generalization = generalization_evaluation.apply( log, net, im, fm, variant=generalization_evaluation.Variants.GENERALIZATION_TOKEN)
def test_tbr_normal(self): log = pm4py.read_xes("input_data/running-example.xes") net, im, fm = pm4py.discover_petri_net_inductive(log, noise_threshold=0.2) replayed_traces = token_based_replay.apply(log, net, im, fm) diagn_df = token_based_replay.get_diagnostics_dataframe( log, replayed_traces)
def test_applyAlphaMinerToProblematicLogs(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" logs = os.listdir(PROBLEMATIC_XES_DIR) for log in logs: try: log_full_path = os.path.join(PROBLEMATIC_XES_DIR, log) # calculate and compare Petri nets obtained on the same log to verify that instances # are working correctly log1, net1, marking1, fmarking1 = self.obtainPetriNetThroughAlphaMiner(log_full_path) log2, net2, marking2, fmarking2 = self.obtainPetriNetThroughAlphaMiner(log_full_path) self.assertEqual(len(net1.places), len(net2.places)) self.assertEqual(len(net1.transitions), len(net2.transitions)) self.assertEqual(len(net1.arcs), len(net2.arcs)) final_marking = petri.petrinet.Marking() for p in net1.places: if not p.out_arcs: final_marking[p] = 1 aligned_traces = token_replay.apply(log1, net1, marking1, final_marking) self.assertEqual(aligned_traces, aligned_traces) except SyntaxError as e: logging.info("SyntaxError on log " + str(log) + ": " + str(e)) except NoConceptNameException as e: logging.info("Concept name error on log " + str(log) + ": " + str(e))
def test_applyAlphaMinerToCSV(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" # calculate and compare Petri nets obtained on the same log to verify that instances # are working correctly log1, net1, marking1, fmarking1 = self.obtainPetriNetThroughAlphaMiner( os.path.join(INPUT_DATA_DIR, "running-example.csv")) log2, net2, marking2, fmarking2 = self.obtainPetriNetThroughAlphaMiner( os.path.join(INPUT_DATA_DIR, "running-example.csv")) log1 = sorting.sort_timestamp(log1) log1 = sampling.sample(log1) log1 = index_attribute.insert_trace_index_as_event_attribute(log1) log2 = sorting.sort_timestamp(log2) log2 = sampling.sample(log2) log2 = index_attribute.insert_trace_index_as_event_attribute(log2) petri_exporter.apply(net1, marking1, os.path.join(OUTPUT_DATA_DIR, "running-example.pnml")) os.remove(os.path.join(OUTPUT_DATA_DIR, "running-example.pnml")) self.assertEqual(len(net1.places), len(net2.places)) self.assertEqual(len(net1.transitions), len(net2.transitions)) self.assertEqual(len(net1.arcs), len(net2.arcs)) final_marking = petri.petrinet.Marking() for p in net1.places: if not p.out_arcs: final_marking[p] = 1 aligned_traces = token_replay.apply(log1, net1, marking1, final_marking) self.assertEqual(aligned_traces, aligned_traces)
def conformance_diagnostics_token_based_replay( log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking) -> List[Dict[str, Any]]: """ Apply token-based replay for conformance checking analysis. The methods return the full token-based-replay diagnostics. Parameters -------------- log Event log petri_net Petri net initial_marking Initial marking final_marking Final marking Returns -------------- replay_results A list of replay results for each trace of the log (in the same order as the traces in the event log) """ from pm4py.algo.conformance.tokenreplay import algorithm as token_replay return token_replay.apply(log, petri_net, initial_marking, final_marking)
def conformance_tbr(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking) -> List[Dict[str, Any]]: """ Apply token-based replay for conformance checking analysis. Parameters -------------- log Event log petri_net Petri net initial_marking Initial marking final_marking Final marking Returns -------------- replay_results A list of replay results for each trace of the log """ from pm4py.algo.conformance.tokenreplay import algorithm as token_replay return token_replay.apply(log, petri_net, initial_marking, final_marking)
def execute_script(): log = xes_importer.apply( os.path.join("..", "tests", "input_data", "running-example.xes")) net, im, fm = inductive_miner.apply(log) # perform the backwards token-based replay replayed_traces = tr.apply(log, net, im, fm, variant=tr.Variants.BACKWARDS) print(replayed_traces)
def conformance_diagnostics_token_based_replay(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking) -> List[Dict[str, Any]]: """ Apply token-based replay for conformance checking analysis. The methods return the full token-based-replay diagnostics. Parameters -------------- log Event log petri_net Petri net initial_marking Initial marking final_marking Final marking Returns -------------- replay_results A list of replay results for each trace of the log (in the same order as the traces in the event log) """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.conformance.tokenreplay import algorithm as token_replay return token_replay.apply(log, petri_net, initial_marking, final_marking, parameters=get_properties(log))
def conformance_tbr(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking) -> List[Dict[str, Any]]: warnings.warn('conformance_tbr is deprecated, use conformance_token_based_replay', DeprecationWarning) """ Apply token-based replay for conformance checking analysis. Parameters -------------- log Event log petri_net Petri net initial_marking Initial marking final_marking Final marking Returns -------------- replay_results A list of replay results for each trace of the log """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.conformance.tokenreplay import algorithm as token_replay return token_replay.apply(log, petri_net, initial_marking, final_marking, parameters=get_properties(log))
def get_decorations(log, net, initial_marking, final_marking, parameters=None, measure="frequency", ht_perf_method="last"): """ Calculate decorations in order to annotate the Petri net Parameters ----------- log Trace log net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters associated to the algorithm measure Measure to represent on the process model (frequency/performance) ht_perf_method Method to use in order to annotate hidden transitions (performance value could be put on the last possible point (last) or in the first possible point (first) Returns ------------ decorations Decorations to put on the process model """ if parameters is None: parameters = {} aggregation_measure = exec_utils.get_param_value(Parameters.AGGREGATION_MEASURE, parameters, None) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) variants_idx = variants_get.get_variants_from_log_trace_idx(log, parameters=parameters) variants = variants_get.convert_variants_trace_idx_to_trace_obj(log, variants_idx) parameters_tr = {token_replay.Variants.TOKEN_REPLAY.value.Parameters.ACTIVITY_KEY: activity_key, token_replay.Variants.TOKEN_REPLAY.value.Parameters.VARIANTS: variants} # do the replay aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr) # apply petri_reduction technique in order to simplify the Petri net # net = reduction.apply(net, parameters={"aligned_traces": aligned_traces}) element_statistics = performance_map.single_element_statistics(log, net, initial_marking, aligned_traces, variants_idx, activity_key=activity_key, timestamp_key=timestamp_key, ht_perf_method=ht_perf_method) aggregated_statistics = performance_map.aggregate_statistics(element_statistics, measure=measure, aggregation_measure=aggregation_measure) return aggregated_statistics
def test_inductiveminer_log(self): log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) net, im, fm = inductive_miner.apply(log) aligned_traces_tr = tr_alg.apply(log, net, im, fm) aligned_traces_alignments = align_alg.apply(log, net, im, fm) evaluation = eval_alg.apply(log, net, im, fm) fitness = rp_fit.apply(log, net, im, fm) precision = precision_evaluator.apply(log, net, im, fm) gen = generalization.apply(log, net, im, fm) sim = simplicity.apply(net)
def test_tbr_backwards(self): log = pm4py.read_xes("input_data/running-example.xes") net, im, fm = pm4py.discover_petri_net_inductive(log, noise_threshold=0.2) replayed_traces = token_based_replay.apply( log, net, im, fm, variant=token_based_replay.Variants.BACKWARDS) diagn_df = token_based_replay.get_diagnostics_dataframe( log, replayed_traces, variant=token_based_replay.Variants.BACKWARDS)
def test_inductiveminer_df(self): log = pd.read_csv(os.path.join("input_data", "running-example.csv")) log = dataframe_utils.convert_timestamp_columns_in_df(log) net, im, fm = inductive_miner.apply(log) aligned_traces_tr = tr_alg.apply(log, net, im, fm) aligned_traces_alignments = align_alg.apply(log, net, im, fm) evaluation = eval_alg.apply(log, net, im, fm) fitness = rp_fit.apply(log, net, im, fm) precision = precision_evaluator.apply(log, net, im, fm) gen = generalization.apply(log, net, im, fm) sim = simplicity.apply(net)
def test_importingPetriLogTokenReplay(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" imported_petri1, marking1, fmarking1 = petri_importer.apply( os.path.join(INPUT_DATA_DIR, "running-example.pnml")) log = xes_importer.apply( os.path.join(INPUT_DATA_DIR, "running-example.xes")) aligned_traces = token_replay.apply(log, imported_petri1, marking1, fmarking1) del aligned_traces
def apply(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking, parameters: Optional[Dict[Union[str, Parameters], Any]] = None): """ Calculates generalization on the provided log and Petri net. The approach has been suggested by the paper Buijs, Joos CAM, Boudewijn F. van Dongen, and Wil MP van der Aalst. "Quality dimensions in process discovery: The importance of fitness, precision, generalization and simplicity." International Journal of Cooperative Information Systems 23.01 (2014): 1440001. A token replay is applied and, for each transition, we can measure the number of occurrences in the replay. The following formula is applied for generalization \sum_{t \in transitions} (math.sqrt(1.0/(n_occ_replay(t))) 1 - ---------------------------------------------------------- # transitions Parameters ----------- log Trace log petri_net Petri net initial_marking Initial marking final_marking Final marking parameters Algorithm parameters Returns ----------- generalization Generalization measure """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) parameters_tr = {Parameters.ACTIVITY_KEY: activity_key} aligned_traces = token_replay.apply(log, petri_net, initial_marking, final_marking, parameters=parameters_tr) return get_generalization(petri_net, aligned_traces)
def test_inductiveminer_stream(self): df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df) stream = log_conversion.apply(df, variant=log_conversion.TO_EVENT_STREAM) net, im, fm = inductive_miner.apply(stream) aligned_traces_tr = tr_alg.apply(stream, net, im, fm) aligned_traces_alignments = align_alg.apply(stream, net, im, fm) evaluation = eval_alg.apply(stream, net, im, fm) fitness = rp_fit.apply(stream, net, im, fm) precision = precision_evaluator.apply(stream, net, im, fm) gen = generalization.apply(stream, net, im, fm) sim = simplicity.apply(net)
def apply(log, petri_net, initial_marking, final_marking, parameters=None): """ Apply token replay fitness evaluation Parameters ----------- log Trace log petri_net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters Returns ----------- dictionary Containing two keys (percFitTraces and averageFitness) """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY) token_replay_variant = exec_utils.get_param_value( Parameters.TOKEN_REPLAY_VARIANT, parameters, executor.Variants.TOKEN_REPLAY) cleaning_token_flood = exec_utils.get_param_value( Parameters.CLEANING_TOKEN_FLOOD, parameters, False) remaining_in_fitness = exec_utils.get_param_value( token_replay.Parameters.CONSIDER_REMAINING_IN_FITNESS, parameters, True) parameters_tr = { token_replay.Parameters.ACTIVITY_KEY: activity_key, token_replay.Parameters.CONSIDER_REMAINING_IN_FITNESS: remaining_in_fitness, token_replay.Parameters.CLEANING_TOKEN_FLOOD: cleaning_token_flood } aligned_traces = executor.apply(log, petri_net, initial_marking, final_marking, variant=token_replay_variant, parameters=parameters_tr) return evaluate(aligned_traces)
def execute_script(): log_path = os.path.join("..", "tests", "input_data", "running-example.xes") log = xes_importer.apply(log_path) net, marking, final_marking = alpha_miner.apply(log) for place in marking: print("initial marking " + place.name) for place in final_marking: print("final marking " + place.name) gviz = pn_vis.apply(net, marking, final_marking, parameters={pn_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "svg"}) pn_vis.view(gviz) print("started token replay") aligned_traces = token_replay.apply(log, net, marking, final_marking) fit_traces = [x for x in aligned_traces if x['trace_is_fit']] perc_fitness = 0.00 if len(aligned_traces) > 0: perc_fitness = len(fit_traces) / len(aligned_traces) print("perc_fitness=", perc_fitness)
def filtering_traces(self): """ Calculates the filtered traces which fits the model :return: The filtered traces in a list """ newLog = [] if (self.logtype) == 'CSV' or 'csv': log = self.csv_loghandler() else: log = self.ieee_xes_loghandler() net = self.petrinethandler() replayed_traces = token_replay.apply(log, net[0], net[1], net[2]) for i in range(0, len(log)): if replayed_traces[i].get('trace_is_fit') == False: continue newLog = newLog + log[i] return newLog
def test_alphaMinerVisualizationFromXES(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" log, net, marking, fmarking = self.obtainPetriNetThroughAlphaMiner( os.path.join(INPUT_DATA_DIR, "running-example.xes")) log = sorting.sort_timestamp(log) log = sampling.sample(log) log = index_attribute.insert_trace_index_as_event_attribute(log) petri_exporter.apply(net, marking, os.path.join(OUTPUT_DATA_DIR, "running-example.pnml")) os.remove(os.path.join(OUTPUT_DATA_DIR, "running-example.pnml")) gviz = pn_viz.graphviz_visualization(net) self.assertEqual(gviz, gviz) final_marking = petri.petrinet.Marking() for p in net.places: if not p.out_arcs: final_marking[p] = 1 aligned_traces = token_replay.apply(log, net, marking, fmarking) self.assertEqual(aligned_traces, aligned_traces)
def apply(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_marking: Marking, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> Dict[str, float]: """ Apply token replay fitness evaluation Parameters ----------- log Trace log petri_net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters Returns ----------- dictionary Containing two keys (percFitTraces and averageFitness) """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY) token_replay_variant = exec_utils.get_param_value(Parameters.TOKEN_REPLAY_VARIANT, parameters, executor.Variants.TOKEN_REPLAY) cleaning_token_flood = exec_utils.get_param_value(Parameters.CLEANING_TOKEN_FLOOD, parameters, False) show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True) parameters_tr = {token_replay.Parameters.ACTIVITY_KEY: activity_key, token_replay.Parameters.CONSIDER_REMAINING_IN_FITNESS: True, token_replay.Parameters.CLEANING_TOKEN_FLOOD: cleaning_token_flood, token_replay.Parameters.SHOW_PROGRESS_BAR: show_progress_bar} aligned_traces = executor.apply(log, petri_net, initial_marking, final_marking, variant=token_replay_variant, parameters=parameters_tr) return evaluate(aligned_traces)
def conformance_tbr(log, petri_net, initial_marking, final_marking): """ Apply token-based replay Parameters -------------- log Event log petri_net Petri net initial_marking Initial marking final_marking Final marking Returns -------------- replay_results A list of replay results for each trace of the log """ from pm4py.algo.conformance.tokenreplay import algorithm as token_replay return token_replay.apply(log, petri_net, initial_marking, final_marking)
def get_token_replayed_traces_from_params(net, initial_marking, final_marking, ramo_justica, codtribunal, atuacao, cluster, grau, codorgaoj, codnatureza, codclasse, dtinicio, dtfim, baixado=None, sensibility='60'): eventLog_oj = gerar_log_eventos(ramo_justica, codtribunal, atuacao, cluster, grau, codorgaoj, codnatureza, codclasse, dtinicio, dtfim, baixado, sensibility) replayed_traces = token_replay.apply(eventLog_oj, net, initial_marking, final_marking) return replayed_traces
def apply_token_replay(log, net, initial_marking, final_marking, parameters=None): """ Calculates all metrics based on token-based replay and returns a unified dictionary Parameters ----------- log Log net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters Returns ----------- dictionary Dictionary containing fitness, precision, generalization and simplicity; along with the average weight of these metrics """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters: parameters[pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters: parameters[pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY] = pmutil.constants.CASE_ATTRIBUTE_GLUE log = log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG) activity_key = parameters[ PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY fitness_weight = parameters[PARAM_FITNESS_WEIGHT] if PARAM_FITNESS_WEIGHT in parameters else 0.25 precision_weight = parameters[PARAM_PRECISION_WEIGHT] if PARAM_PRECISION_WEIGHT in parameters else 0.25 simplicity_weight = parameters[PARAM_SIMPLICITY_WEIGHT] if PARAM_SIMPLICITY_WEIGHT in parameters else 0.25 generalization_weight = parameters[ PARAM_GENERALIZATION_WEIGHT] if PARAM_GENERALIZATION_WEIGHT in parameters else 0.25 sum_of_weights = (fitness_weight + precision_weight + simplicity_weight + generalization_weight) fitness_weight = fitness_weight / sum_of_weights precision_weight = precision_weight / sum_of_weights simplicity_weight = simplicity_weight / sum_of_weights generalization_weight = generalization_weight / sum_of_weights parameters_tr = {pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key} aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr) parameters = { pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key } fitness = fitness_token_based.evaluate(aligned_traces) precision = precision_token_based.apply(log, net, initial_marking, final_marking, parameters=parameters) generalization = generalization_token_based.get_generalization(net, aligned_traces) simplicity = simplicity_arc_degree.apply(net) metrics_average_weight = fitness_weight * fitness["log_fitness"] + precision_weight * precision \ + generalization_weight * generalization + simplicity_weight * simplicity fscore = 0.0 if (fitness['log_fitness'] + precision) > 0: fscore = (2*fitness['log_fitness']*precision)/(fitness['log_fitness']+precision) dictionary = { "fitness": fitness, "precision": precision, "generalization": generalization, "simplicity": simplicity, "metricsAverageWeight": metrics_average_weight, "fscore": fscore } return dictionary
def apply(log, net, marking, final_marking, parameters=None): """ Get ET Conformance precision Parameters ---------- log Trace log net Petri net marking Initial marking final_marking Final marking parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> Activity key """ if parameters is None: parameters = {} cleaning_token_flood = exec_utils.get_param_value( Parameters.CLEANING_TOKEN_FLOOD, parameters, False) token_replay_variant = exec_utils.get_param_value( Parameters.TOKEN_REPLAY_VARIANT, parameters, executor.Variants.TOKEN_REPLAY) activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, log_lib.util.xes.DEFAULT_NAME_KEY) # default value for precision, when no activated transitions (not even by looking at the initial marking) are found precision = 1.0 sum_ee = 0 sum_at = 0 parameters_tr = { token_replay.Parameters.CONSIDER_REMAINING_IN_FITNESS: False, token_replay.Parameters.TRY_TO_REACH_FINAL_MARKING_THROUGH_HIDDEN: False, token_replay.Parameters.STOP_IMMEDIATELY_UNFIT: True, token_replay.Parameters.WALK_THROUGH_HIDDEN_TRANS: True, token_replay.Parameters.CLEANING_TOKEN_FLOOD: cleaning_token_flood, token_replay.Parameters.ACTIVITY_KEY: activity_key } prefixes, prefix_count = precision_utils.get_log_prefixes( log, activity_key=activity_key) prefixes_keys = list(prefixes.keys()) fake_log = precision_utils.form_fake_log(prefixes_keys, activity_key=activity_key) aligned_traces = executor.apply(fake_log, net, marking, final_marking, variant=token_replay_variant, parameters=parameters_tr) # fix: also the empty prefix should be counted! start_activities = set(get_start_activities(log, parameters=parameters)) trans_en_ini_marking = set([ x.label for x in get_visible_transitions_eventually_enabled_by_marking( net, marking) ]) diff = trans_en_ini_marking.difference(start_activities) sum_at += len(log) * len(trans_en_ini_marking) sum_ee += len(log) * len(diff) # end fix for i in range(len(aligned_traces)): if aligned_traces[i]["trace_is_fit"]: log_transitions = set(prefixes[prefixes_keys[i]]) activated_transitions_labels = set([ x.label for x in aligned_traces[i]["enabled_transitions_in_marking"] if x.label is not None ]) sum_at += len(activated_transitions_labels) * prefix_count[ prefixes_keys[i]] escaping_edges = activated_transitions_labels.difference( log_transitions) sum_ee += len(escaping_edges) * prefix_count[prefixes_keys[i]] if sum_at > 0: precision = 1 - float(sum_ee) / float(sum_at) return precision
def compare_element_usage_two_logs(net, im, fm, log1, log2, parameters=None): """ Returns some statistics (also visual) about the comparison of the usage of the elements in two logs given an accepting Petri net Parameters ------------- net Petri net im Initial marking fm Final marking log1 First log log2 Second log parameters Parameters of the algorithm (to be passed to the token-based replay) Returns ---------------- aggregated_statistics Statistics about the usage of places, transitions and arcs in the net """ if parameters is None: parameters = {} tr_parameters = copy(parameters) tr_parameters[tr_algorithm.Variants.TOKEN_REPLAY.value.Parameters.ENABLE_PLTR_FITNESS] = True rep_traces1, pl_fit_trace1, tr_fit_trace1, ne_act_model1 = tr_algorithm.apply(log1, net, im, fm, parameters=tr_parameters) rep_traces2, pl_fit_trace2, tr_fit_trace2, ne_act_model2 = tr_algorithm.apply(log2, net, im, fm, parameters=tr_parameters) tr_occ1 = Counter([y for x in rep_traces1 for y in x["activated_transitions"]]) tr_occ2 = Counter([y for x in rep_traces2 for y in x["activated_transitions"]]) pl_occ1 = Counter({p: pl_fit_trace1[p]["c"] + pl_fit_trace1[p]["r"] for p in pl_fit_trace1}) pl_occ2 = Counter({p: pl_fit_trace2[p]["c"] + pl_fit_trace2[p]["r"] for p in pl_fit_trace2}) all_replayed_transitions = set(tr_occ1.keys()).union(set(tr_occ2.keys())) all_replayed_places = set(pl_occ1.keys()).union(set(pl_occ2.keys())) all_transitions = all_replayed_transitions.union(set(net.transitions)) all_places = all_replayed_places.union(set(net.places)) aggregated_statistics = {} for place in all_places: aggregated_statistics[place] = {"log1_occ": pl_occ1[place], "log2_occ": pl_occ2[place], "total_occ": pl_occ1[place] + pl_occ2[place]} aggregated_statistics[place]["label"] = "(%d/%d/%d)" % ( pl_occ1[place], pl_occ2[place], pl_occ1[place] + pl_occ2[place]) dir = (pl_occ2[place] - pl_occ1[place]) / (pl_occ1[place] + pl_occ2[place]) if (pl_occ1[place] + pl_occ2[ place]) > 0 else 0 aggregated_statistics[place]["direction"] = dir aggregated_statistics[place]["color"] = give_color_to_direction_dynamic(dir) for trans in all_transitions: aggregated_statistics[trans] = {"log1_occ": tr_occ1[trans], "log2_occ": tr_occ2[trans], "total_occ": tr_occ1[trans] + tr_occ2[trans]} if trans.label is not None: aggregated_statistics[trans]["label"] = trans.label+" " else: aggregated_statistics[trans]["label"] = "" aggregated_statistics[trans]["label"] = aggregated_statistics[trans]["label"] + "(%d/%d/%d)" % ( tr_occ1[trans], tr_occ2[trans], tr_occ1[trans] + tr_occ2[trans]) dir = (tr_occ2[trans] - tr_occ1[trans]) / (tr_occ1[trans] + tr_occ2[trans]) if (tr_occ1[trans] + tr_occ2[ trans]) > 0 else 0 aggregated_statistics[trans]["direction"] = dir aggregated_statistics[trans]["color"] = give_color_to_direction_dynamic(dir) for arc in trans.in_arcs: aggregated_statistics[arc] = aggregated_statistics[trans] for arc in trans.out_arcs: aggregated_statistics[arc] = aggregated_statistics[trans] return aggregated_statistics
def get_transition_performance_with_token_replay(log, net, im, fm): """ Gets the transition performance through the usage of token-based replay Parameters ------------- log Event log net Petri net im Initial marking fm Final marking Returns -------------- transition_performance Dictionary where each transition label is associated to performance measures """ from pm4py.algo.conformance.tokenreplay import algorithm as token_replay from pm4py.statistics.variants.log import get as variants_get variants_idx = variants_get.get_variants_from_log_trace_idx(log) aligned_traces = token_replay.apply(log, net, im, fm) element_statistics = single_element_statistics(log, net, im, aligned_traces, variants_idx) transition_performance = {} for el in element_statistics: if type(el) is PetriNet.Transition and el.label is not None: if "log_idx" in element_statistics[ el] and "performance" in element_statistics[el]: if len(element_statistics[el]["performance"]) > 0: transition_performance[str(el)] = { "all_values": [], "case_association": {}, "mean": 0.0, "median": 0.0 } for i in range(len(element_statistics[el]["log_idx"])): if not element_statistics[el]["log_idx"][ i] in transition_performance[str( el)]["case_association"]: transition_performance[str( el)]["case_association"][element_statistics[el] ["log_idx"][i]] = [] transition_performance[str(el)]["case_association"][ element_statistics[el]["log_idx"][i]].append( element_statistics[el]["performance"][i]) transition_performance[str(el)]["all_values"].append( element_statistics[el]["performance"][i]) transition_performance[str(el)]["all_values"] = sorted( transition_performance[str(el)]["all_values"]) if transition_performance[str(el)]["all_values"]: transition_performance[str(el)]["mean"] = mean( transition_performance[str(el)]["all_values"]) transition_performance[str(el)]["median"] = median( transition_performance[str(el)]["all_values"]) return transition_performance
def apply(df, discovery_algorithm=discover_inductive, parameters=None): if parameters is None: parameters = {} allowed_activities = parameters[ "allowed_activities"] if "allowed_activities" in parameters else None debug = parameters["debug"] if "debug" in parameters else True try: if df.type == "succint": df = succint_mdl_to_exploded_mdl.apply(df) df.type = "exploded" except: pass if len(df) == 0: df = pd.DataFrame({"event_id": [], "event_activity": []}) min_node_freq = parameters[ "min_node_freq"] if "min_node_freq" in parameters else 0 min_edge_freq = parameters[ "min_edge_freq"] if "min_edge_freq" in parameters else 0 df = clean_frequency.apply(df, min_node_freq) df = clean_arc_frequency.apply(df, min_edge_freq) if len(df) == 0: df = pd.DataFrame({"event_id": [], "event_activity": []}) persps = [x for x in df.columns if not x.startswith("event_")] ret = {} ret["nets"] = {} ret["act_count"] = {} ret["replay"] = {} ret["group_size_hist"] = {} ret["act_count_replay"] = {} ret["group_size_hist_replay"] = {} ret["aligned_traces"] = {} ret["place_fitness_per_trace"] = {} ret["aggregated_statistics_frequency"] = {} ret["aggregated_statistics_performance_min"] = {} ret["aggregated_statistics_performance_max"] = {} ret["aggregated_statistics_performance_median"] = {} ret["aggregated_statistics_performance_mean"] = {} diff_log = 0 diff_model = 0 diff_token_replay = 0 diff_performance_annotation = 0 diff_basic_stats = 0 for persp in persps: aa = time.time() if debug: print(persp, "getting log") log = algorithm.apply(df, persp, parameters=parameters) if debug: print(len(log)) if allowed_activities is not None: if persp not in allowed_activities: continue filtered_log = attributes_filter.apply_events( log, allowed_activities[persp]) else: filtered_log = log bb = time.time() diff_log += (bb - aa) # filtered_log = variants_filter.apply_auto_filter(deepcopy(filtered_log), parameters={"decreasingFactor": 0.5}) if debug: print(len(log)) print(persp, "got log") cc = time.time() #net, im, fm = inductive_miner.apply(filtered_log) net, im, fm = discovery_algorithm(filtered_log) """if persp == "items": trans_map = {t.label:t for t in net.transitions} source_place_it = list(trans_map["item out of stock"].in_arcs)[0].source target_place_re = list(trans_map["reorder item"].out_arcs)[0].target skip_trans_1 = PetriNet.Transition(str(uuid.uuid4()), None) net.transitions.add(skip_trans_1) add_arc_from_to(source_place_it, skip_trans_1, net) add_arc_from_to(skip_trans_1, target_place_re, net)""" #net = reduce_petri_net(net) dd = time.time() diff_model += (dd - cc) # net, im, fm = alpha_miner.apply(filtered_log) if debug: print(persp, "got model") xx1 = time.time() activ_count = algorithm.apply(df, persp, variant="activity_occurrence", parameters=parameters) if debug: print(persp, "got activ_count") xx2 = time.time() ee = time.time() variants_idx = variants_module.get_variants_from_log_trace_idx(log) # variants = variants_module.convert_variants_trace_idx_to_trace_obj(log, variants_idx) # parameters_tr = {PARAM_ACTIVITY_KEY: "concept:name", "variants": variants} if debug: print(persp, "got variants") aligned_traces, place_fitness_per_trace, transition_fitness_per_trace, notexisting_activities_in_model = tr_factory.apply( log, net, im, fm, parameters={ "enable_pltr_fitness": True, "disable_variants": True }) if debug: print(persp, "done tbr") element_statistics = performance_map.single_element_statistics( log, net, im, aligned_traces, variants_idx) if debug: print(persp, "done element_statistics") ff = time.time() diff_token_replay += (ff - ee) aggregated_statistics = performance_map.aggregate_statistics( element_statistics) if debug: print(persp, "done aggregated_statistics") element_statistics_performance = performance_map.single_element_statistics( log, net, im, aligned_traces, variants_idx) if debug: print(persp, "done element_statistics_performance") gg = time.time() aggregated_statistics_performance_min = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="min") aggregated_statistics_performance_max = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="max") aggregated_statistics_performance_median = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="median") aggregated_statistics_performance_mean = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="mean") hh = time.time() diff_performance_annotation += (hh - ee) if debug: print(persp, "done aggregated_statistics_performance") group_size_hist = algorithm.apply(df, persp, variant="group_size_hist", parameters=parameters) if debug: print(persp, "done group_size_hist") occurrences = {} for trans in transition_fitness_per_trace: occurrences[trans.label] = set() for trace in transition_fitness_per_trace[trans]["fit_traces"]: if not trace in transition_fitness_per_trace[trans][ "underfed_traces"]: case_id = trace.attributes["concept:name"] for event in trace: if event["concept:name"] == trans.label: occurrences[trans.label].add( (case_id, event["event_id"])) # print(transition_fitness_per_trace[trans]) len_different_ids = {} for act in occurrences: len_different_ids[act] = len(set(x[1] for x in occurrences[act])) eid_acti_count = {} for act in occurrences: eid_acti_count[act] = {} for x in occurrences[act]: if not x[0] in eid_acti_count: eid_acti_count[act][x[0]] = 0 eid_acti_count[act][x[0]] = eid_acti_count[act][x[0]] + 1 eid_acti_count[act] = sorted(list(eid_acti_count[act].values())) ii = time.time() diff_basic_stats += (ii - hh) + (xx2 - xx1) ret["nets"][persp] = [net, im, fm] ret["act_count"][persp] = activ_count ret["aligned_traces"][persp] = aligned_traces ret["place_fitness_per_trace"][persp] = place_fitness_per_trace ret["aggregated_statistics_frequency"][persp] = aggregated_statistics ret["aggregated_statistics_performance_min"][ persp] = aggregated_statistics_performance_min ret["aggregated_statistics_performance_max"][ persp] = aggregated_statistics_performance_max ret["aggregated_statistics_performance_median"][ persp] = aggregated_statistics_performance_median ret["aggregated_statistics_performance_mean"][ persp] = aggregated_statistics_performance_mean ret["replay"][persp] = aggregated_statistics ret["group_size_hist"][persp] = group_size_hist ret["act_count_replay"][persp] = len_different_ids ret["group_size_hist_replay"][persp] = eid_acti_count ret["computation_statistics"] = { "diff_log": diff_log, "diff_model": diff_model, "diff_token_replay": diff_token_replay, "diff_performance_annotation": diff_performance_annotation, "diff_basic_stats": diff_basic_stats } return ret
def get_map_from_log_and_net(log, net, initial_marking, final_marking, force_distribution=None, parameters=None): """ Get transition stochastic distribution map given the log and the Petri net Parameters ----------- log Event log net Petri net initial_marking Initial marking of the Petri net final_marking Final marking of the Petri net force_distribution If provided, distribution to force usage (e.g. EXPONENTIAL) parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> activity name Parameters.TIMESTAMP_KEY -> timestamp key Returns ----------- stochastic_map Map that to each transition associates a random variable """ stochastic_map = {} if parameters is None: parameters = {} token_replay_variant = exec_utils.get_param_value( Parameters.TOKEN_REPLAY_VARIANT, parameters, executor.Variants.TOKEN_REPLAY) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) parameters_variants = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key } variants_idx = variants_module.get_variants_from_log_trace_idx( log, parameters=parameters_variants) variants = variants_module.convert_variants_trace_idx_to_trace_obj( log, variants_idx) parameters_tr = { token_replay.Parameters.ACTIVITY_KEY: activity_key, token_replay.Parameters.VARIANTS: variants } # do the replay aligned_traces = executor.apply(log, net, initial_marking, final_marking, variant=token_replay_variant, parameters=parameters_tr) element_statistics = performance_map.single_element_statistics( log, net, initial_marking, aligned_traces, variants_idx, activity_key=activity_key, timestamp_key=timestamp_key, parameters={"business_hours": True}) for el in element_statistics: if type( el ) is PetriNet.Transition and "performance" in element_statistics[el]: values = element_statistics[el]["performance"] rand = RandomVariable() rand.calculate_parameters(values, force_distribution=force_distribution) no_of_times_enabled = element_statistics[el]['no_of_times_enabled'] no_of_times_activated = element_statistics[el][ 'no_of_times_activated'] if no_of_times_enabled > 0: rand.set_weight( float(no_of_times_activated) / float(no_of_times_enabled)) else: rand.set_weight(0.0) stochastic_map[el] = rand return stochastic_map
def get_attributes(log, decision_points, attributes, use_trace_attributes, trace_attributes, k, net, initial_marking, final_marking, decision_points_names, parameters=None): """ This method aims to construct for each decision place a table where for each decision place a list if given with the label of the later decision and as value the given attributes :param log: Log on which the method is applied :param alignments: Computed alignments for a log and a model :param decision_points: Places that have multiple outgoing arcs :param attributes: Attributes that are considered :param use_trace_attributes: If trace attributes have to be considered or not :param trace_attributes: List of trace attributes that are considered :param k: Taking k last activities into account :return: Dictionary that has as keys the decision places. The value for this key is a list. The content of these lists are tuples. The first element of these tuples is information regrading the attributes, the second element of these tuples is the transition which chosen in a decision. """ if parameters is None: parameters = {} I = {} for key in decision_points: I[key] = [] A = {} for attri in attributes: A[attri] = None i = 0 # first, take a look at the variants variants_idxs = variants_module.get_variants_from_log_trace_idx( log, parameters=parameters) one_variant = [] for variant in variants_idxs: one_variant.append(variant) # TODO: Token based replay code mit paramter für nur varianten einbeziehen ausstatten replay_result = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters) replay_result = simplify_token_replay(replay_result) count = 0 for variant in replay_result: if variant['trace_fitness'] == 1.0: for trace_index in variants_idxs[one_variant[count]]: last_k_list = [None] * k trace = log[trace_index] if use_trace_attributes: for attribute in trace_attributes: # can be done here since trace attributes does not change for whole trace A[attribute] = trace.attributes[attribute] j = 0 # j is a pointer which points to the current event inside a trace for transition in variant['activated_transitions']: for key, value in decision_points_names.items(): if transition.label in value: for element in last_k_list: if element != None: if transition.label != None: I[key].append( (element.copy(), transition.label)) else: I[key].append( (element.copy(), transition.name)) for attri in attributes: # print(variant, transition.label, j) if attri in trace[j]: # only add the attribute information if it is present in the event A[attri] = trace[j][attri] # add A to last_k_list. Using modulo to access correct entry last_k_list[j % k] = A.copy() if transition.label != None: if not j + 1 >= len(trace): # Problem otherwise: If there are tau-transition after the last event related transition, # the pointer j which points to the current event in a trace, gets out of range j += 1 else: example_trace = log[variants_idxs[one_variant[count]][0]] align_parameters = copy(parameters) align_parameters[star.Parameters. PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE] = True alignment = ali.apply(example_trace, net, initial_marking, final_marking, parameters=align_parameters)['alignment'] for trace_index in variants_idxs[one_variant[count]]: last_k_list = [None] * k trace = log[trace_index] if use_trace_attributes: for attribute in trace_attributes: # can be done here since trace attributes does not change for whole trace A[attribute] = trace.attributes[attribute] j = 0 for el in alignment: if el[1][1] != '>>': # If move in model for key, value in decision_points.items(): if el[0][1] in value: for element in last_k_list: if element != None: # only add those entries where information is provided if el[1][1] == None: # for some dt algorithms, the entry None might be a problem, since it is left out later I[key].append( (element.copy(), el[0][1])) else: I[key].append( (element.copy(), el[1][1])) if el[1][0] != '>>' and el[1][1] != '>>': # If there is a move in log and model for attri in attributes: if attri in trace[j]: # only add the attribute information if it is present in the event A[attri] = trace[j][attri] # add A to last_k_list. Using modulo to access correct entry last_k_list[j % k] = A.copy() if el[1][0] != '>>': # only go to next event in trace if the current event has been aligned # TODO: Discuss if this is correct or can lead to problems j += 1 count += 1 return I