def get_decorations(log, net, initial_marking, final_marking, parameters=None, measure="frequency", ht_perf_method="last"): """ Calculate decorations in order to annotate the Petri net Parameters ----------- log Trace log net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters associated to the algorithm measure Measure to represent on the process model (frequency/performance) ht_perf_method Method to use in order to annotate hidden transitions (performance value could be put on the last possible point (last) or in the first possible point (first) Returns ------------ decorations Decorations to put on the process model """ if parameters is None: parameters = {} aggregation_measure = exec_utils.get_param_value(Parameters.AGGREGATION_MEASURE, parameters, None) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) variants_idx = variants_get.get_variants_from_log_trace_idx(log, parameters=parameters) variants = variants_get.convert_variants_trace_idx_to_trace_obj(log, variants_idx) parameters_tr = {token_replay.Variants.TOKEN_REPLAY.value.Parameters.ACTIVITY_KEY: activity_key, token_replay.Variants.TOKEN_REPLAY.value.Parameters.VARIANTS: variants} # do the replay aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr) # apply petri_reduction technique in order to simplify the Petri net # net = reduction.apply(net, parameters={"aligned_traces": aligned_traces}) element_statistics = performance_map.single_element_statistics(log, net, initial_marking, aligned_traces, variants_idx, activity_key=activity_key, timestamp_key=timestamp_key, ht_perf_method=ht_perf_method) aggregated_statistics = performance_map.aggregate_statistics(element_statistics, measure=measure, aggregation_measure=aggregation_measure) return aggregated_statistics
def apply_log(log, list_nets, parameters=None): """ Apply the recomposition alignment approach to a log and a decomposed Petri net Parameters -------------- log Log list_nets Decomposition parameters Parameters of the algorithm Returns -------------- aligned_traces For each trace, return its alignment """ if parameters is None: parameters = {} icache = exec_utils.get_param_value(Parameters.ICACHE, parameters, dict()) mcache = exec_utils.get_param_value(Parameters.MCACHE, parameters, dict()) parameters[Parameters.ICACHE] = icache parameters[Parameters.MCACHE] = mcache variants_idxs = variants_module.get_variants_from_log_trace_idx( log, parameters=parameters) one_tr_per_var = [] variants_list = [] for index_variant, variant in enumerate(variants_idxs): variants_list.append(variant) for variant in variants_list: one_tr_per_var.append(log[variants_idxs[variant][0]]) all_alignments = [] max_align_time = exec_utils.get_param_value( Parameters.PARAM_MAX_ALIGN_TIME, parameters, sys.maxsize) start_time = time.time() for index, trace in enumerate(one_tr_per_var): this_time = time.time() if this_time - start_time <= max_align_time: alignment = apply_trace(trace, list_nets, parameters=parameters) else: alignment = None all_alignments.append(alignment) al_idx = {} for index_variant, variant in enumerate(variants_idxs): for trace_idx in variants_idxs[variant]: al_idx[trace_idx] = all_alignments[index_variant] alignments = [] for i in range(len(log)): alignments.append(al_idx[i]) return alignments
def __get_variants_structure(log, parameters): variants_idxs = exec_utils.get_param_value(Parameters.VARIANTS_IDX, parameters, None) if variants_idxs is None: variants_idxs = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters) one_tr_per_var = [] variants_list = [] for index_variant, var in enumerate(variants_idxs): variants_list.append(var) for var in variants_list: one_tr_per_var.append(log[variants_idxs[var][0]]) return variants_idxs, one_tr_per_var
def get_transition_performance_with_token_replay(log, net, im, fm): """ Gets the transition performance through the usage of token-based replay Parameters ------------- log Event log net Petri net im Initial marking fm Final marking Returns -------------- transition_performance Dictionary where each transition label is associated to performance measures """ from pm4py.algo.conformance.tokenreplay import factory as token_replay variants_idx = variants_get.get_variants_from_log_trace_idx(log) aligned_traces = token_replay.apply(log, net, im, fm) element_statistics = single_element_statistics(log, net, im, aligned_traces, variants_idx) transition_performance = {} for el in element_statistics: if type(el) is PetriNet.Transition and el.label is not None: if "log_idx" in element_statistics[el] and "performance" in element_statistics[el]: if len(element_statistics[el]["performance"]) > 0: transition_performance[str(el)] = {"all_values": [], "case_association": {}, "mean": 0.0, "median": 0.0} for i in range(len(element_statistics[el]["log_idx"])): if not element_statistics[el]["log_idx"][i] in transition_performance[str(el)][ "case_association"]: transition_performance[str(el)]["case_association"][ element_statistics[el]["log_idx"][i]] = [] transition_performance[str(el)]["case_association"][ element_statistics[el]["log_idx"][i]].append( element_statistics[el]["performance"][i]) transition_performance[str(el)]["all_values"].append(element_statistics[el]["performance"][i]) transition_performance[str(el)]["all_values"] = sorted( transition_performance[str(el)]["all_values"]) if transition_performance[str(el)]["all_values"]: transition_performance[str(el)]["mean"] = mean(transition_performance[str(el)]["all_values"]) transition_performance[str(el)]["median"] = median( transition_performance[str(el)]["all_values"]) return transition_performance
def __approximate_alignments_for_log(log: EventLog, pt: ProcessTree, max_tl: int, max_th: int, parameters=None): if parameters is None: parameters = {} a_sets, sa_sets, ea_sets, tau_sets = initialize_a_sa_ea_tau_sets(pt) variants = get_variants_from_log_trace_idx(log, parameters=parameters) inv_corr = {} max_align_time = exec_utils.get_param_value( Parameters.PARAM_MAX_ALIGN_TIME, parameters, sys.maxsize) log_alignment_start_time = time.time() for i, var in enumerate(variants): this_time = time.time() if this_time - log_alignment_start_time <= max_align_time: parameters["trace_alignment_start_time"] = this_time alignment = __approximate_alignment_for_trace( pt, a_sets, sa_sets, ea_sets, tau_sets, log[variants[var][0]], max_tl, max_th, parameters=parameters) alignment = add_fitness_and_cost_info_to_alignments( alignment, pt, log[variants[var][0]], parameters=parameters) else: alignment = None for idx in variants[var]: inv_corr[idx] = alignment alignments = [] for i in range(len(log)): alignments.append(inv_corr[i]) return alignments
def apply_multiprocessing(log, net, initial_marking, final_marking, parameters=None, variant=TOKEN_REPLAY): if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_CASEID_KEY] = pmutil.constants.CASE_ATTRIBUTE_GLUE variants_idxs = parameters[ VARIANTS_IDX] if VARIANTS_IDX in parameters else None if variants_idxs is None: variants_idxs = variants_module.get_variants_from_log_trace_idx( log, parameters=parameters) variants_list = [[x, len(y)] for x, y in variants_idxs.items()] no_cores = mp.cpu_count() petri_net_string = petri_exporter.export_petri_as_string( net, initial_marking, final_marking) n = math.ceil(len(variants_list) / no_cores) variants_list_split = list(chunks(variants_list, n)) # Define an output queue output = mp.Queue() processes = [ mp.Process(target=VERSIONS_MULTIPROCESSING[variant]( output, x, petri_net_string, parameters=parameters)) for x in variants_list_split ] # Run processes for p in processes: p.start() results = [] for p in processes: result = output.get() results.append(result) al_idx = {} for index, el in enumerate(variants_list_split): for index2, var_item in enumerate(el): variant = var_item[0] for trace_idx in variants_idxs[variant]: al_idx[trace_idx] = results[index][index2] replayed_cases = [] for i in range(len(log)): replayed_cases.append(al_idx[i]) return replayed_cases
def apply_log(log, petri_net, initial_marking, final_marking, parameters=None, variant=DEFAULT_VARIANT): """ apply alignments to a log Parameters ----------- log object of the form :class:`pm4py.log.log.EventLog` event log petri_net :class:`pm4py.objects.petri.petrinet.PetriNet` the model to use for the alignment initial_marking :class:`pm4py.objects.petri.petrinet.Marking` initial marking of the net final_marking :class:`pm4py.objects.petri.petrinet.Marking` final marking of the net variant selected variant of the algorithm, possible values: {\'Variants.VERSION_STATE_EQUATION_A_STAR, Variants.VERSION_DIJKSTRA_NO_HEURISTICS \'} parameters :class:`dict` parameters of the algorithm, Returns ----------- alignment :class:`list` of :class:`dict` with keys **alignment**, **cost**, **visited_states**, **queued_states** and **traversed_arcs** The alignment is a sequence of labels of the form (a,t), (a,>>), or (>>,t) representing synchronous/log/model-moves. """ if parameters is None: parameters = dict() if not check_soundness.check_easy_soundness_net_in_fin_marking( petri_net, initial_marking, final_marking): raise Exception( "trying to apply alignments on a Petri net that is not a easy sound net!!" ) start_time = time.time() max_align_time = exec_utils.get_param_value( Parameters.PARAM_MAX_ALIGN_TIME, parameters, sys.maxsize) max_align_time_case = exec_utils.get_param_value( Parameters.PARAM_MAX_ALIGN_TIME_TRACE, parameters, sys.maxsize) parameters_best_worst = copy(parameters) best_worst_cost = exec_utils.get_variant(variant).get_best_worst_cost( petri_net, initial_marking, final_marking, parameters=parameters_best_worst) variants_idxs = exec_utils.get_param_value(Parameters.VARIANTS_IDX, parameters, None) if variants_idxs is None: variants_idxs = variants_module.get_variants_from_log_trace_idx( log, parameters=parameters) one_tr_per_var = [] variants_list = [] for index_variant, var in enumerate(variants_idxs): variants_list.append(var) for var in variants_list: one_tr_per_var.append(log[variants_idxs[var][0]]) all_alignments = [] for trace in one_tr_per_var: this_max_align_time = min(max_align_time_case, (max_align_time - (time.time() - start_time)) * 0.5) parameters[Parameters.PARAM_MAX_ALIGN_TIME_TRACE] = this_max_align_time all_alignments.append( apply_trace(trace, petri_net, initial_marking, final_marking, parameters=copy(parameters), variant=variant)) al_idx = {} for index_variant, variant in enumerate(variants_idxs): for trace_idx in variants_idxs[variant]: al_idx[trace_idx] = all_alignments[index_variant] alignments = [] for i in range(len(log)): alignments.append(al_idx[i]) # assign fitness to traces for index, align in enumerate(alignments): if align is not None: unfitness_upper_part = align[ 'cost'] // align_utils.STD_MODEL_LOG_MOVE_COST if unfitness_upper_part == 0: align['fitness'] = 1 elif (len(log[index]) + best_worst_cost) > 0: align['fitness'] = 1 - ( (align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST) / (len(log[index]) + best_worst_cost)) else: align['fitness'] = 0 return alignments
def apply(log, aligned_traces, parameters=None): """ Gets the alignment table visualization from the alignments output Parameters ------------- log Event log aligned_traces Aligned traces parameters Parameters of the algorithm Returns ------------- gviz Graphviz object """ if parameters is None: parameters = {} variants_idx_dict = variants_get.get_variants_from_log_trace_idx( log, parameters=parameters) variants_idx_list = [] for variant in variants_idx_dict: variants_idx_list.append((variant, variants_idx_dict[variant])) variants_idx_list = sorted(variants_idx_list, key=lambda x: len(x[1]), reverse=True) image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") table_alignments_list = [ "digraph {\n", "tbl [\n", "shape=plaintext\n", "label=<\n" ] table_alignments_list.append( "<table border='0' cellborder='1' color='blue' cellspacing='0'>\n") table_alignments_list.append( "<tr><td>Variant</td><td>Alignment</td></tr>\n") for index, variant in enumerate(variants_idx_list): al_tr = aligned_traces[variant[1][0]] table_alignments_list.append("<tr>") table_alignments_list.append("<td><font point-size='9'>Variant " + str(index + 1) + " (" + str(len(variant[1])) + " occurrences)</font></td>") table_alignments_list.append( "<td><font point-size='6'><table border='0'><tr>") for move in al_tr['alignment']: move_descr = str(move[1]).replace(">", ">") if not move[0][0] == ">>" or move[0][1] == ">>": table_alignments_list.append("<td bgcolor=\"green\">" + move_descr + "</td>") elif move[0][1] == ">>": table_alignments_list.append("<td bgcolor=\"violet\">" + move_descr + "</td>") elif move[0][0] == ">>": table_alignments_list.append("<td bgcolor=\"gray\">" + move_descr + "</td>") table_alignments_list.append("</tr></table></font></td>") table_alignments_list.append("</tr>") table_alignments_list.append("</table>\n") table_alignments_list.append(">];\n") table_alignments_list.append("}\n") table_alignments = "".join(table_alignments_list) filename = tempfile.NamedTemporaryFile(suffix='.gv') gviz = Source(table_alignments, filename=filename.name) gviz.format = image_format return gviz
def apply_log(log, list_nets, parameters=None): """ Apply the recomposition alignment approach to a log and a decomposed Petri net Parameters -------------- log Log list_nets Decomposition parameters Parameters of the algorithm Returns -------------- aligned_traces For each trace, return its alignment """ if parameters is None: parameters = {} show_progress_bar = exec_utils.get_param_value( Parameters.SHOW_PROGRESS_BAR, parameters, True) icache = exec_utils.get_param_value(Parameters.ICACHE, parameters, dict()) mcache = exec_utils.get_param_value(Parameters.MCACHE, parameters, dict()) parameters[Parameters.ICACHE] = icache parameters[Parameters.MCACHE] = mcache variants_idxs = variants_module.get_variants_from_log_trace_idx( log, parameters=parameters) progress = None if pkgutil.find_loader("tqdm") and show_progress_bar: from tqdm.auto import tqdm progress = tqdm( total=len(variants_idxs), desc= "aligning log with decomposition/recomposition, completed variants :: " ) one_tr_per_var = [] variants_list = [] for index_variant, variant in enumerate(variants_idxs): variants_list.append(variant) for variant in variants_list: one_tr_per_var.append(log[variants_idxs[variant][0]]) all_alignments = [] max_align_time = exec_utils.get_param_value( Parameters.PARAM_MAX_ALIGN_TIME, parameters, sys.maxsize) start_time = time.time() for index, trace in enumerate(one_tr_per_var): this_time = time.time() if this_time - start_time <= max_align_time: alignment = apply_trace(trace, list_nets, parameters=parameters) else: alignment = None if progress is not None: progress.update() all_alignments.append(alignment) al_idx = {} for index_variant, variant in enumerate(variants_idxs): for trace_idx in variants_idxs[variant]: al_idx[trace_idx] = all_alignments[index_variant] alignments = [] for i in range(len(log)): alignments.append(al_idx[i]) # gracefully close progress bar if progress is not None: progress.close() del progress return alignments
def apply_log_multiprocessing(log, petri_net, initial_marking, final_marking, parameters=None, version=DEFAULT_VARIANT): warnings.warn('factory methods are deprecated, use algorithm entrypoint instead', DeprecationWarning) if parameters is None: parameters = dict() if not check_soundness.check_easy_soundness_net_in_fin_marking(petri_net, initial_marking, final_marking): raise Exception("trying to apply alignments on a Petri net that is not a relaxed sound net!!") activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY model_cost_function = parameters[ PARAM_MODEL_COST_FUNCTION] if PARAM_MODEL_COST_FUNCTION in parameters else None sync_cost_function = parameters[ PARAM_SYNC_COST_FUNCTION] if PARAM_SYNC_COST_FUNCTION in parameters else None if model_cost_function is None or sync_cost_function is None: # reset variables value model_cost_function = dict() sync_cost_function = dict() for t in petri_net.transitions: if t.label is not None: model_cost_function[t] = align_utils.STD_MODEL_LOG_MOVE_COST sync_cost_function[t] = 0 else: model_cost_function[t] = 1 parameters[pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = activity_key parameters[ PARAM_MODEL_COST_FUNCTION] = model_cost_function parameters[ PARAM_SYNC_COST_FUNCTION] = sync_cost_function parameters_best_worst = copy(parameters) if PARAM_MAX_ALIGN_TIME_TRACE in parameters_best_worst: del parameters_best_worst[PARAM_MAX_ALIGN_TIME_TRACE] best_worst_cost = VERSIONS_COST[version](petri_net, initial_marking, final_marking, parameters=parameters_best_worst) variants_idxs = parameters[VARIANTS_IDX] if VARIANTS_IDX in parameters else None if variants_idxs is None: variants_idxs = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters) variants_list = [[x, len(y)] for x, y in variants_idxs.items()] no_cores = mp.cpu_count() petri_net_string = petri_exporter.export_petri_as_string(petri_net, initial_marking, final_marking) n = math.ceil(len(variants_list) / no_cores) variants_list_split = list(chunks(variants_list, n)) # Define an output queue output = mp.Queue() processes = [mp.Process( target=VERSIONS_VARIANTS_LIST_MPROCESSING[version](output, x, petri_net_string, parameters=parameters)) for x in variants_list_split] # Run processes for p in processes: p.start() results = [] for p in processes: result = output.get() results.append(result) al_idx = {} for index, el in enumerate(variants_list_split): for index2, var_item in enumerate(el): variant = var_item[0] for trace_idx in variants_idxs[variant]: al_idx[trace_idx] = results[index][variant] alignments = [] for i in range(len(log)): alignments.append(al_idx[i]) # assign fitness to traces for index, align in enumerate(alignments): if align is not None: unfitness_upper_part = align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST if unfitness_upper_part == 0: align['fitness'] = 1 elif (len(log[index]) + best_worst_cost) > 0: align['fitness'] = 1 - ( (align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST) / (len(log[index]) + best_worst_cost)) else: align['fitness'] = 0 return alignments
def apply_log(log, petri_net, initial_marking, final_marking, parameters=None, version=DEFAULT_VARIANT): """ apply alignments to a log Parameters ----------- log object of the form :class:`pm4py.log.log.EventLog` event log petri_net :class:`pm4py.objects.petri.petrinet.PetriNet` the model to use for the alignment initial_marking :class:`pm4py.objects.petri.petrinet.Marking` initial marking of the net final_marking :class:`pm4py.objects.petri.petrinet.Marking` final marking of the net version :class:`str` selected variant of the algorithm, possible values: {\'state_equation_a_star\'} parameters :class:`dict` parameters of the algorithm, for key \'state_equation_a_star\': pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> Attribute in the log that contains the activity pm4py.algo.conformance.alignments.variants.state_equation_a_star.PARAM_MODEL_COST_FUNCTION -> mapping of each transition in the model to corresponding synchronous costs pm4py.algo.conformance.alignments.variants.state_equation_a_star.PARAM_SYNC_COST_FUNCTION -> mapping of each transition in the model to corresponding model cost pm4py.algo.conformance.alignments.variants.state_equation_a_star.PARAM_TRACE_COST_FUNCTION -> mapping of each index of the trace to a positive cost value Returns ----------- alignment :class:`list` of :class:`dict` with keys **alignment**, **cost**, **visited_states**, **queued_states** and **traversed_arcs** The alignment is a sequence of labels of the form (a,t), (a,>>), or (>>,t) representing synchronous/log/model-moves. """ warnings.warn('factory methods are deprecated, use algorithm entrypoint instead', DeprecationWarning) if parameters is None: parameters = dict() if not check_soundness.check_easy_soundness_net_in_fin_marking(petri_net, initial_marking, final_marking): raise Exception("trying to apply alignments on a Petri net that is not a easy sound net!!") start_time = time.time() activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY model_cost_function = parameters[ PARAM_MODEL_COST_FUNCTION] if PARAM_MODEL_COST_FUNCTION in parameters else None sync_cost_function = parameters[ PARAM_SYNC_COST_FUNCTION] if PARAM_SYNC_COST_FUNCTION in parameters else None max_align_time = parameters[PARAM_MAX_ALIGN_TIME] if PARAM_MAX_ALIGN_TIME in parameters else DEFAULT_MAX_ALIGN_TIME max_align_time_case = parameters[ PARAM_MAX_ALIGN_TIME_TRACE] if PARAM_MAX_ALIGN_TIME_TRACE in parameters else DEFAULT_MAX_ALIGN_TIME_TRACE if model_cost_function is None or sync_cost_function is None: # reset variables value model_cost_function = dict() sync_cost_function = dict() for t in petri_net.transitions: if t.label is not None: model_cost_function[t] = align_utils.STD_MODEL_LOG_MOVE_COST sync_cost_function[t] = 0 else: model_cost_function[t] = 1 parameters[pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = activity_key parameters[ PARAM_MODEL_COST_FUNCTION] = model_cost_function parameters[ PARAM_SYNC_COST_FUNCTION] = sync_cost_function parameters_best_worst = copy(parameters) if PARAM_MAX_ALIGN_TIME_TRACE in parameters_best_worst: del parameters_best_worst[PARAM_MAX_ALIGN_TIME_TRACE] best_worst_cost = VERSIONS_COST[version](petri_net, initial_marking, final_marking, parameters=parameters_best_worst) variants_idxs = parameters[VARIANTS_IDX] if VARIANTS_IDX in parameters else None if variants_idxs is None: variants_idxs = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters) one_tr_per_var = [] variants_list = [] for index_variant, variant in enumerate(variants_idxs): variants_list.append(variant) for variant in variants_list: one_tr_per_var.append(log[variants_idxs[variant][0]]) all_alignments = [] for trace in one_tr_per_var: this_max_align_time = min(max_align_time_case, (max_align_time - (time.time() - start_time)) * 0.5) parameters[PARAM_MAX_ALIGN_TIME_TRACE] = this_max_align_time all_alignments.append(apply_trace(trace, petri_net, initial_marking, final_marking, parameters=copy(parameters), version=version)) al_idx = {} for index_variant, variant in enumerate(variants_idxs): for trace_idx in variants_idxs[variant]: al_idx[trace_idx] = all_alignments[index_variant] alignments = [] for i in range(len(log)): alignments.append(al_idx[i]) # assign fitness to traces for index, align in enumerate(alignments): if align is not None: unfitness_upper_part = align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST if unfitness_upper_part == 0: align['fitness'] = 1 elif (len(log[index]) + best_worst_cost) > 0: align['fitness'] = 1 - ( (align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST) / (len(log[index]) + best_worst_cost)) else: align['fitness'] = 0 return alignments
def apply(log, net, initial_marking, final_marking, parameters=None): """ Method to apply token-based replay Parameters ----------- log Log net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters of the algorithm """ if parameters is None: parameters = {} for t in net.transitions: ma = Marking() for a in t.out_arcs: p = a.target ma[p] = a.weight t.out_marking = ma for t in net.transitions: ma = Marking() for a in t.in_arcs: p = a.source ma[p] = a.weight t.in_marking = ma variants_idxs = variants_filter.get_variants_from_log_trace_idx( log, parameters=parameters) results = [] tmap = {} bmap = {} for t in net.transitions: if t.label is not None: if t.label not in tmap: tmap[t.label] = [] tmap[t.label].append(t) for variant in variants_idxs: vlist = variants_util.get_activities_from_variant(variant) result = tr_vlist(vlist, net, initial_marking, final_marking, tmap, bmap, parameters=parameters) results.append(result) al_idx = {} for index_variant, variant in enumerate(variants_idxs): for trace_idx in variants_idxs[variant]: al_idx[trace_idx] = results[index_variant] ret = [] for i in range(len(log)): ret.append(al_idx[i]) return ret
def get_map_from_log_and_net(log, net, initial_marking, final_marking, force_distribution=None, parameters=None): """ Get transition stochastic distribution map given the log and the Petri net Parameters ----------- log Event log net Petri net initial_marking Initial marking of the Petri net final_marking Final marking of the Petri net force_distribution If provided, distribution to force usage (e.g. EXPONENTIAL) parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> activity name Parameters.TIMESTAMP_KEY -> timestamp key Returns ----------- stochastic_map Map that to each transition associates a random variable """ stochastic_map = {} if parameters is None: parameters = {} token_replay_variant = exec_utils.get_param_value( Parameters.TOKEN_REPLAY_VARIANT, parameters, executor.Variants.TOKEN_REPLAY) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) parameters_variants = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key } variants_idx = variants_module.get_variants_from_log_trace_idx( log, parameters=parameters_variants) variants = variants_module.convert_variants_trace_idx_to_trace_obj( log, variants_idx) parameters_tr = { token_replay.Parameters.ACTIVITY_KEY: activity_key, token_replay.Parameters.VARIANTS: variants } # do the replay aligned_traces = executor.apply(log, net, initial_marking, final_marking, variant=token_replay_variant, parameters=parameters_tr) element_statistics = performance_map.single_element_statistics( log, net, initial_marking, aligned_traces, variants_idx, activity_key=activity_key, timestamp_key=timestamp_key, parameters={"business_hours": True}) for el in element_statistics: if type( el ) is PetriNet.Transition and "performance" in element_statistics[el]: values = element_statistics[el]["performance"] rand = RandomVariable() rand.calculate_parameters(values, force_distribution=force_distribution) no_of_times_enabled = element_statistics[el]['no_of_times_enabled'] no_of_times_activated = element_statistics[el][ 'no_of_times_activated'] if no_of_times_enabled > 0: rand.set_weight( float(no_of_times_activated) / float(no_of_times_enabled)) else: rand.set_weight(0.0) stochastic_map[el] = rand return stochastic_map
def get_attributes(log, decision_points, attributes, use_trace_attributes, trace_attributes, k, net, initial_marking, final_marking, decision_points_names, parameters=None): """ This method aims to construct for each decision place a table where for each decision place a list if given with the label of the later decision and as value the given attributes :param log: Log on which the method is applied :param alignments: Computed alignments for a log and a model :param decision_points: Places that have multiple outgoing arcs :param attributes: Attributes that are considered :param use_trace_attributes: If trace attributes have to be considered or not :param trace_attributes: List of trace attributes that are considered :param k: Taking k last activities into account :return: Dictionary that has as keys the decision places. The value for this key is a list. The content of these lists are tuples. The first element of these tuples is information regrading the attributes, the second element of these tuples is the transition which chosen in a decision. """ if parameters is None: parameters = {} I = {} for key in decision_points: I[key] = [] A = {} for attri in attributes: A[attri] = None i = 0 # first, take a look at the variants variants_idxs = variants_module.get_variants_from_log_trace_idx( log, parameters=parameters) one_variant = [] for variant in variants_idxs: one_variant.append(variant) # TODO: Token based replay code mit paramter für nur varianten einbeziehen ausstatten replay_result = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters) replay_result = simplify_token_replay(replay_result) count = 0 for variant in replay_result: if variant['trace_fitness'] == 1.0: for trace_index in variants_idxs[one_variant[count]]: last_k_list = [None] * k trace = log[trace_index] if use_trace_attributes: for attribute in trace_attributes: # can be done here since trace attributes does not change for whole trace A[attribute] = trace.attributes[attribute] j = 0 # j is a pointer which points to the current event inside a trace for transition in variant['activated_transitions']: for key, value in decision_points_names.items(): if transition.label in value: for element in last_k_list: if element != None: if transition.label != None: I[key].append( (element.copy(), transition.label)) else: I[key].append( (element.copy(), transition.name)) for attri in attributes: # print(variant, transition.label, j) if attri in trace[j]: # only add the attribute information if it is present in the event A[attri] = trace[j][attri] # add A to last_k_list. Using modulo to access correct entry last_k_list[j % k] = A.copy() if transition.label != None: if not j + 1 >= len(trace): # Problem otherwise: If there are tau-transition after the last event related transition, # the pointer j which points to the current event in a trace, gets out of range j += 1 else: example_trace = log[variants_idxs[one_variant[count]][0]] align_parameters = copy(parameters) align_parameters[star.Parameters. PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE] = True alignment = ali.apply(example_trace, net, initial_marking, final_marking, parameters=align_parameters)['alignment'] for trace_index in variants_idxs[one_variant[count]]: last_k_list = [None] * k trace = log[trace_index] if use_trace_attributes: for attribute in trace_attributes: # can be done here since trace attributes does not change for whole trace A[attribute] = trace.attributes[attribute] j = 0 for el in alignment: if el[1][1] != '>>': # If move in model for key, value in decision_points.items(): if el[0][1] in value: for element in last_k_list: if element != None: # only add those entries where information is provided if el[1][1] == None: # for some dt algorithms, the entry None might be a problem, since it is left out later I[key].append( (element.copy(), el[0][1])) else: I[key].append( (element.copy(), el[1][1])) if el[1][0] != '>>' and el[1][1] != '>>': # If there is a move in log and model for attri in attributes: if attri in trace[j]: # only add the attribute information if it is present in the event A[attri] = trace[j][attri] # add A to last_k_list. Using modulo to access correct entry last_k_list[j % k] = A.copy() if el[1][0] != '>>': # only go to next event in trace if the current event has been aligned # TODO: Discuss if this is correct or can lead to problems j += 1 count += 1 return I
def apply(df, discovery_algorithm=discover_inductive, parameters=None): if parameters is None: parameters = {} allowed_activities = parameters[ "allowed_activities"] if "allowed_activities" in parameters else None debug = parameters["debug"] if "debug" in parameters else True try: if df.type == "succint": df = succint_mdl_to_exploded_mdl.apply(df) df.type = "exploded" except: pass if len(df) == 0: df = pd.DataFrame({"event_id": [], "event_activity": []}) min_node_freq = parameters[ "min_node_freq"] if "min_node_freq" in parameters else 0 min_edge_freq = parameters[ "min_edge_freq"] if "min_edge_freq" in parameters else 0 df = clean_frequency.apply(df, min_node_freq) df = clean_arc_frequency.apply(df, min_edge_freq) if len(df) == 0: df = pd.DataFrame({"event_id": [], "event_activity": []}) persps = [x for x in df.columns if not x.startswith("event_")] ret = {} ret["nets"] = {} ret["act_count"] = {} ret["replay"] = {} ret["group_size_hist"] = {} ret["act_count_replay"] = {} ret["group_size_hist_replay"] = {} ret["aligned_traces"] = {} ret["place_fitness_per_trace"] = {} ret["aggregated_statistics_frequency"] = {} ret["aggregated_statistics_performance_min"] = {} ret["aggregated_statistics_performance_max"] = {} ret["aggregated_statistics_performance_median"] = {} ret["aggregated_statistics_performance_mean"] = {} diff_log = 0 diff_model = 0 diff_token_replay = 0 diff_performance_annotation = 0 diff_basic_stats = 0 for persp in persps: aa = time.time() if debug: print(persp, "getting log") log = algorithm.apply(df, persp, parameters=parameters) if debug: print(len(log)) if allowed_activities is not None: if persp not in allowed_activities: continue filtered_log = attributes_filter.apply_events( log, allowed_activities[persp]) else: filtered_log = log bb = time.time() diff_log += (bb - aa) # filtered_log = variants_filter.apply_auto_filter(deepcopy(filtered_log), parameters={"decreasingFactor": 0.5}) if debug: print(len(log)) print(persp, "got log") cc = time.time() #net, im, fm = inductive_miner.apply(filtered_log) net, im, fm = discovery_algorithm(filtered_log) """if persp == "items": trans_map = {t.label:t for t in net.transitions} source_place_it = list(trans_map["item out of stock"].in_arcs)[0].source target_place_re = list(trans_map["reorder item"].out_arcs)[0].target skip_trans_1 = PetriNet.Transition(str(uuid.uuid4()), None) net.transitions.add(skip_trans_1) add_arc_from_to(source_place_it, skip_trans_1, net) add_arc_from_to(skip_trans_1, target_place_re, net)""" #net = reduce_petri_net(net) dd = time.time() diff_model += (dd - cc) # net, im, fm = alpha_miner.apply(filtered_log) if debug: print(persp, "got model") xx1 = time.time() activ_count = algorithm.apply(df, persp, variant="activity_occurrence", parameters=parameters) if debug: print(persp, "got activ_count") xx2 = time.time() ee = time.time() variants_idx = variants_module.get_variants_from_log_trace_idx(log) # variants = variants_module.convert_variants_trace_idx_to_trace_obj(log, variants_idx) # parameters_tr = {PARAM_ACTIVITY_KEY: "concept:name", "variants": variants} if debug: print(persp, "got variants") aligned_traces, place_fitness_per_trace, transition_fitness_per_trace, notexisting_activities_in_model = tr_factory.apply( log, net, im, fm, parameters={ "enable_pltr_fitness": True, "disable_variants": True }) if debug: print(persp, "done tbr") element_statistics = performance_map.single_element_statistics( log, net, im, aligned_traces, variants_idx) if debug: print(persp, "done element_statistics") ff = time.time() diff_token_replay += (ff - ee) aggregated_statistics = performance_map.aggregate_statistics( element_statistics) if debug: print(persp, "done aggregated_statistics") element_statistics_performance = performance_map.single_element_statistics( log, net, im, aligned_traces, variants_idx) if debug: print(persp, "done element_statistics_performance") gg = time.time() aggregated_statistics_performance_min = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="min") aggregated_statistics_performance_max = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="max") aggregated_statistics_performance_median = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="median") aggregated_statistics_performance_mean = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="mean") hh = time.time() diff_performance_annotation += (hh - ee) if debug: print(persp, "done aggregated_statistics_performance") group_size_hist = algorithm.apply(df, persp, variant="group_size_hist", parameters=parameters) if debug: print(persp, "done group_size_hist") occurrences = {} for trans in transition_fitness_per_trace: occurrences[trans.label] = set() for trace in transition_fitness_per_trace[trans]["fit_traces"]: if not trace in transition_fitness_per_trace[trans][ "underfed_traces"]: case_id = trace.attributes["concept:name"] for event in trace: if event["concept:name"] == trans.label: occurrences[trans.label].add( (case_id, event["event_id"])) # print(transition_fitness_per_trace[trans]) len_different_ids = {} for act in occurrences: len_different_ids[act] = len(set(x[1] for x in occurrences[act])) eid_acti_count = {} for act in occurrences: eid_acti_count[act] = {} for x in occurrences[act]: if not x[0] in eid_acti_count: eid_acti_count[act][x[0]] = 0 eid_acti_count[act][x[0]] = eid_acti_count[act][x[0]] + 1 eid_acti_count[act] = sorted(list(eid_acti_count[act].values())) ii = time.time() diff_basic_stats += (ii - hh) + (xx2 - xx1) ret["nets"][persp] = [net, im, fm] ret["act_count"][persp] = activ_count ret["aligned_traces"][persp] = aligned_traces ret["place_fitness_per_trace"][persp] = place_fitness_per_trace ret["aggregated_statistics_frequency"][persp] = aggregated_statistics ret["aggregated_statistics_performance_min"][ persp] = aggregated_statistics_performance_min ret["aggregated_statistics_performance_max"][ persp] = aggregated_statistics_performance_max ret["aggregated_statistics_performance_median"][ persp] = aggregated_statistics_performance_median ret["aggregated_statistics_performance_mean"][ persp] = aggregated_statistics_performance_mean ret["replay"][persp] = aggregated_statistics ret["group_size_hist"][persp] = group_size_hist ret["act_count_replay"][persp] = len_different_ids ret["group_size_hist_replay"][persp] = eid_acti_count ret["computation_statistics"] = { "diff_log": diff_log, "diff_model": diff_model, "diff_token_replay": diff_token_replay, "diff_performance_annotation": diff_performance_annotation, "diff_basic_stats": diff_basic_stats } return ret
def get_decorations(log, net, initial_marking, final_marking, parameters=None, measure="frequency", ht_perf_method="last"): """ Calculate decorations in order to annotate the Petri net Parameters ----------- log Trace log net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters associated to the algorithm measure Measure to represent on the process model (frequency/performance) ht_perf_method Method to use in order to annotate hidden transitions (performance value could be put on the last possible point (last) or in the first possible point (first) Returns ------------ decorations Decorations to put on the process model """ if parameters is None: parameters = {} aggregation_measure = None if "aggregationMeasure" in parameters: aggregation_measure = parameters["aggregationMeasure"] activity_key = parameters[ PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY timestamp_key = parameters[PARAM_TIMESTAMP_KEY] if PARAM_TIMESTAMP_KEY in parameters else "time:timestamp" parameters_variants = {PARAM_ACTIVITY_KEY: activity_key} variants_idx = variants_get.get_variants_from_log_trace_idx(log, parameters=parameters_variants) variants = variants_get.convert_variants_trace_idx_to_trace_obj(log, variants_idx) parameters_tr = {PARAM_ACTIVITY_KEY: activity_key, "variants": variants} # do the replay aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr) # apply petri_reduction technique in order to simplify the Petri net # net = reduction.apply(net, parameters={"aligned_traces": aligned_traces}) element_statistics = performance_map.single_element_statistics(log, net, initial_marking, aligned_traces, variants_idx, activity_key=activity_key, timestamp_key=timestamp_key, ht_perf_method=ht_perf_method) aggregated_statistics = performance_map.aggregate_statistics(element_statistics, measure=measure, aggregation_measure=aggregation_measure) return aggregated_statistics