def apply_auto_filter(trace_log, parameters=None): """ Apply some filters in battery to the log in order to get a simplified log Parameters ---------- trace_log Trace log parameters Eventual parameters applied to the algorithms: decreasingFactor -> Decreasing factor (provided to all algorithms) activity_key -> Activity key (must be specified if different from concept:name) Returns --------- filtered_log Filtered log """ # the following filters are applied: # - activity filter (keep only attributes with a reasonable number of occurrences) # - paths filter (keep only paths with a reasonable number of occurrences) # - variant filter (keep only variants with a reasonable number of occurrences) # - start attributes filter (keep only variants that starts with a plausible start activity) # - end attributes filter (keep only variants that starts with a plausible end activity) if parameters is None: parameters = {} if parameters is None: parameters = {} attribute_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY decreasing_factor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else filtering_constants.DECREASING_FACTOR parameters_child = {"decreasingFactor": decreasing_factor, constants.PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key} variants = variants_module.get_variants(trace_log, parameters=parameters_child) filtered_log1 = attributes_filter.apply_auto_filter(trace_log, variants=variants, parameters=parameters_child) del trace_log variants = variants_module.get_variants(filtered_log1, parameters=parameters_child) filtered_log2 = paths_filter.apply_auto_filter(filtered_log1, variants=variants, parameters=parameters_child) del filtered_log1 variants = variants_module.get_variants(filtered_log2, parameters=parameters_child) filtered_log3 = variants_module.apply_auto_filter(filtered_log2, variants=variants, parameters=parameters_child) variants = variants_module.get_variants(filtered_log3, parameters=parameters_child) del filtered_log2 filtered_log4 = start_activities_filter.apply_auto_filter(filtered_log3, variants=variants, parameters=parameters_child) del filtered_log3 filtered_log5 = end_activities_filter.apply_auto_filter(filtered_log4, variants=variants, parameters=parameters_child) del filtered_log4 return filtered_log5
def calculate_prefix_alignments_multiprocessing(petri_net_filename, log, path_to_files): results = [] pnml_file_path = os.path.join(path_to_files, petri_net_filename) net, im, fm = petri.importer.pnml.import_net(pnml_file_path) variants = variants_filter.get_variants(log) pool = Pool() processes = [] global number_traces number_traces = len(variants) for v in variants: trace = variants[v][0] variant_count = len(variants[v]) p = pool.apply_async(calculate_prefix_alignment_for_trace, args=( trace, net, im, fm, variant_count, ), callback=print_progress_on_console) processes.append(p) pool.close() pool.join() for p in processes: results.append(p.get()) results_path = os.path.join( path_to_files, "RESULTS_" + petri_net_filename + '_' + str(date.today()) + ".pickle") with open(results_path, 'wb') as handle: pickle.dump(results, handle)
def get_variants_from_log(log, activity_key, disable_variants=False): """ Gets the variants from the log (allow disabling by giving each trace a different variant) Parameters ------------- log Trace log activity_key Attribute that is the activity disable_variants Boolean value that disable variants Returns ------------- variants Variants contained in the log """ if disable_variants: variants = {} for trace in log: variants[str(hash(trace))] = [trace] return variants parameters_variants = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key } variants = variants_module.get_variants(log, parameters=parameters_variants) return variants
def apply_auto_filter(trace_log, variants=None, parameters=None): """ Apply an end attributes filter detecting automatically a percentage Parameters ---------- trace_log Trace log variants (If specified) Dictionary with variant as the key and the list of traces as the value parameters Parameters of the algorithm, including: decreasingFactor -> Decreasing factor (stops the algorithm when the next activity by occurrence is below this factor in comparison to previous) attribute_key -> Attribute key (must be specified if different from concept:name) Returns --------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY decreasing_factor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else DECREASING_FACTOR parameters_variants = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key } if variants is None: variants = variants_filter.get_variants(trace_log, parameters=parameters_variants) vc = variants_filter.get_variants_sorted_by_count(variants) start_activities = get_start_activities(trace_log, parameters=parameters_variants) salist = start_activities_common.get_sorted_start_activities_list( start_activities) sathreshold = start_activities_common.get_start_activities_threshold( salist, decreasing_factor) filtered_log = filter_log_by_start_activities(start_activities, variants, vc, sathreshold, attribute_key) return filtered_log
def get_variant_statistics(trace_log, parameters=None): """ Gets a dictionary whose key is the variant and as value there is the list of traces that share the variant Parameters ---------- trace_log Trace log parameters Parameters of the algorithm, including: activity_key -> Attribute identifying the activity in the log max_variants_to_return -> Maximum number of variants to return variants -> If provided, avoid recalculation of the variants Returns ---------- variants_list List of variants along the statistics """ if parameters is None: parameters = {} max_variants_to_return = parameters[ "max_variants_to_return"] if "max_variants_to_return" in parameters else None varnt = parameters[ "variants"] if "variants" in parameters else variants_filter.get_variants( trace_log, parameters=parameters) variants_list = [] for var in varnt: variants_list.append({"variant": var, "count": len(varnt[var])}) variants_list = sorted(variants_list, key=lambda x: x["count"], reverse=True) if max_variants_to_return: variants_list = variants_list[:min(len(variants_list ), max_variants_to_return)] return variants_list
def apply(log, parameters=None): """ Returns a log from which a sound workflow net could be extracted taking into account a discovery algorithm returning models only with visible transitions Parameters ------------ log Trace log parameters Possible parameters of the algorithm, including: discovery_algorithm -> Discovery algorithm to consider, possible choices: alphaclassic max_no_variants -> Maximum number of variants to consider to return a Petri net Returns ------------ filtered_log Filtered trace log """ if parameters is None: parameters = {} discovery_algorithm = parameters[ "discovery_algorithm"] if "discovery_algorithm" in parameters else "alphaclassic" max_no_variants = parameters[ "max_no_variants"] if "max_no_variants" in parameters else 20 all_variants_dictio = variants_filter.get_variants(log, parameters=parameters) all_variants_list = [] for var in all_variants_dictio: all_variants_list.append([var, len(all_variants_dictio[var])]) all_variants_list = sorted(all_variants_list, key=lambda x: (x[1], x[0]), reverse=True) considered_variants = [] considered_traces = [] i = 0 while i < min(len(all_variants_list), max_no_variants): variant = all_variants_list[i][0] considered_variants.append(variant) considered_traces.append(all_variants_dictio[variant][0]) filtered_log = TraceLog(considered_traces) net = None initial_marking = None final_marking = None if discovery_algorithm == "alphaclassic" or discovery_algorithm == "alpha": net, initial_marking, final_marking = alpha_miner.apply( filtered_log, parameters=parameters) is_sound = check_soundness.check_petri_wfnet_and_soundness(net) if not is_sound: del considered_variants[-1] del considered_traces[-1] else: try: alignments = alignment_factory.apply(filtered_log, net, initial_marking, final_marking) del alignments fitness = replay_fitness_factory.apply(filtered_log, net, initial_marking, final_marking, parameters=parameters) if fitness["log_fitness"] < 0.99999: del considered_variants[-1] del considered_traces[-1] except TypeError: del considered_variants[-1] del considered_traces[-1] i = i + 1 sound_log = TraceLog() if considered_variants: sound_log = variants_filter.apply(log, considered_variants, parameters=parameters) return sound_log
def apply_log(log, net, initial_marking, final_marking, enable_place_fitness=False, consider_remaining_in_fitness=False, activity_key="concept:name", reach_mark_through_hidden=True, stop_immediately_unfit=False, walk_through_hidden_trans=True, places_shortest_path_by_hidden=None, variants=None): """ Apply token-based replay to a log Parameters ---------- log Trace log net Petri net initial_marking Initial marking final_marking Final marking enable_place_fitness Enable fitness calculation at place level consider_remaining_in_fitness Boolean value telling if the remaining tokens should be considered in fitness evaluation activity_key Name of the attribute that contains the activity reach_mark_through_hidden Boolean value that decides if we shall try to reach the final marking through hidden transitions stop_immediately_unfit Boolean value that decides if we shall stop immediately when a non-conformance is detected walk_through_hidden_trans Boolean value that decides if we shall walk through hidden transitions in order to enable visible transitions places_shortest_path_by_hidden Shortest paths between places by hidden transitions variants List of variants contained in the event log """ post_fix_cache = PostFixCaching() marking_to_activity_cache = MarkingToActivityCaching() if places_shortest_path_by_hidden is None: places_shortest_path_by_hidden = get_places_shortest_path_by_hidden(net) place_fitness_per_trace = {} aligned_traces = [] if enable_place_fitness: for place in net.places: place_fitness_per_trace[place] = {"underfed_traces": set(), "overfed_traces": set()} trans_map = {} for t in net.transitions: trans_map[t.label] = t if len(log) > 0: if len(log[0]) > 0: if activity_key in log[0][0]: if variants is None: parameters_variants = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key} variants = variants_module.get_variants(log, parameters=parameters_variants) vc = variants_module.get_variants_sorted_by_count(variants) threads = {} threads_results = {} for i in range(len(vc)): variant = vc[i][0] threads_keys = list(threads.keys()) if len(threads_keys) > MAX_NO_THREADS: for j in range(len(threads_keys)): t = threads[threads_keys[j]] t.join() threads_results[threads_keys[j]] = {"trace_is_fit": copy(t.t_fit), "trace_fitness": copy(t.t_value), "activated_transitions": copy(t.act_trans), "reached_marking": copy(t.reached_marking), "enabled_transitions_in_marking": copy( t.enabled_trans_in_mark), "transitions_with_problems": copy( t.trans_probl)} del threads[threads_keys[j]] del threads_keys threads[variant] = ApplyTraceTokenReplay(variants[variant][0], net, initial_marking, final_marking, trans_map, enable_place_fitness, place_fitness_per_trace, places_shortest_path_by_hidden, consider_remaining_in_fitness, activity_key=activity_key, reach_mark_through_hidden=reach_mark_through_hidden, stop_immediately_when_unfit=stop_immediately_unfit, walk_through_hidden_trans=walk_through_hidden_trans, post_fix_caching=post_fix_cache, marking_to_activity_caching=marking_to_activity_cache) threads[variant].start() threads_keys = list(threads.keys()) for j in range(len(threads_keys)): t = threads[threads_keys[j]] t.join() threads_results[threads_keys[j]] = {"trace_is_fit": copy(t.t_fit), "trace_fitness": copy(t.t_value), "activated_transitions": copy(t.act_trans), "reached_marking": copy(t.reached_marking), "enabled_transitions_in_marking": copy( t.enabled_trans_in_mark), "transitions_with_problems": copy(t.trans_probl)} del threads[threads_keys[j]] for trace in log: trace_variant = ",".join([x[activity_key] for x in trace]) t = threads_results[trace_variant] aligned_traces.append(t) else: raise NoConceptNameException("at least an event is without " + activity_key) if enable_place_fitness: return aligned_traces, place_fitness_per_trace else: return aligned_traces