def get_decorations(log, net, initial_marking, final_marking, parameters=None, measure="frequency", ht_perf_method="last"): """ Calculate decorations in order to annotate the Petri net Parameters ----------- log Trace log net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters associated to the algorithm measure Measure to represent on the process model (frequency/performance) ht_perf_method Method to use in order to annotate hidden transitions (performance value could be put on the last possible point (last) or in the first possible point (first) Returns ------------ decorations Decorations to put on the process model """ if parameters is None: parameters = {} aggregation_measure = exec_utils.get_param_value(Parameters.AGGREGATION_MEASURE, parameters, None) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) variants_idx = variants_get.get_variants_from_log_trace_idx(log, parameters=parameters) variants = variants_get.convert_variants_trace_idx_to_trace_obj(log, variants_idx) parameters_tr = {token_replay.Variants.TOKEN_REPLAY.value.Parameters.ACTIVITY_KEY: activity_key, token_replay.Variants.TOKEN_REPLAY.value.Parameters.VARIANTS: variants} # do the replay aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr) # apply petri_reduction technique in order to simplify the Petri net # net = reduction.apply(net, parameters={"aligned_traces": aligned_traces}) element_statistics = performance_map.single_element_statistics(log, net, initial_marking, aligned_traces, variants_idx, activity_key=activity_key, timestamp_key=timestamp_key, ht_perf_method=ht_perf_method) aggregated_statistics = performance_map.aggregate_statistics(element_statistics, measure=measure, aggregation_measure=aggregation_measure) return aggregated_statistics
def get_transition_performance_with_token_replay(log, net, im, fm): """ Gets the transition performance through the usage of token-based replay Parameters ------------- log Event log net Petri net im Initial marking fm Final marking Returns -------------- transition_performance Dictionary where each transition label is associated to performance measures """ variants_idx = variants_module.get_variants_from_log_trace_idx(log) aligned_traces = token_replay.apply(log, net, im, fm) element_statistics = performance_map.single_element_statistics( log, net, im, aligned_traces, variants_idx) transition_performance = {} for el in element_statistics: if type(el) is PetriNet.Transition and el.label is not None: if "log_idx" in element_statistics[ el] and "performance" in element_statistics[el]: if len(element_statistics[el]["performance"]) > 0: transition_performance[str(el)] = { "all_values": [], "case_association": {}, "mean": 0.0, "median": 0.0 } for i in range(len(element_statistics[el]["log_idx"])): if not element_statistics[el]["log_idx"][ i] in transition_performance[str( el)]["case_association"]: transition_performance[str( el)]["case_association"][element_statistics[el] ["log_idx"][i]] = [] transition_performance[str(el)]["case_association"][ element_statistics[el]["log_idx"][i]].append( element_statistics[el]["performance"][i]) transition_performance[str(el)]["all_values"].append( element_statistics[el]["performance"][i]) transition_performance[str(el)]["all_values"] = sorted( transition_performance[str(el)]["all_values"]) if transition_performance[str(el)]["all_values"]: transition_performance[str(el)]["mean"] = mean( transition_performance[str(el)]["all_values"]) transition_performance[str(el)]["median"] = median( transition_performance[str(el)]["all_values"]) return transition_performance
def get_decorations(log, net, initial_marking, final_marking, parameters=None, measure="frequency"): """ Calculate decorations in order to annotate the Petri net Parameters ----------- log Trace log net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters associated to the algorithm measure Measure to represent on the process model (frequency/performance) Returns ------------ decorations Decorations to put on the process model """ if parameters is None: parameters = {} aggregation_measure = None if "aggregationMeasure" in parameters: aggregation_measure = parameters["aggregationMeasure"] activity_key = parameters[ PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY timestamp_key = parameters[PARAM_TIMESTAMP_KEY] if PARAM_TIMESTAMP_KEY in parameters else "time:timestamp" parameters_variants = {PARAM_ACTIVITY_KEY: activity_key} variants_idx = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters_variants) variants = variants_module.convert_variants_trace_idx_to_trace_obj(log, variants_idx) parameters_tr = {PARAM_ACTIVITY_KEY: activity_key, "variants": variants} # do the replay aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr) # apply petri_reduction technique in order to simplify the Petri net # net = reduction.apply(net, parameters={"aligned_traces": aligned_traces}) element_statistics = performance_map.single_element_statistics(log, initial_marking, aligned_traces, variants_idx, activity_key=activity_key, timestamp_key=timestamp_key) aggregated_statistics = performance_map.aggregate_statistics(element_statistics, measure=measure, aggregation_measure=aggregation_measure) return aggregated_statistics
def get_map_from_log_and_net(log, net, initial_marking, final_marking, force_distribution=None, parameters=None): """ Get transition stochastic distribution map given the log and the Petri net Parameters ----------- log Event log net Petri net initial_marking Initial marking of the Petri net final_marking Final marking of the Petri net force_distribution If provided, distribution to force usage (e.g. EXPONENTIAL) parameters Parameters of the algorithm, including: PARAM_ACTIVITY_KEY -> activity name PARAM_TIMESTAMP_KEY -> timestamp key Returns ----------- stochastic_map Map that to each transition associates a random variable """ stochastic_map = {} if parameters is None: parameters = {} activity_key = parameters[ PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY timestamp_key = parameters[ PARAM_TIMESTAMP_KEY] if PARAM_TIMESTAMP_KEY in parameters else "time:timestamp" parameters_variants = {PARAM_ACTIVITY_KEY: activity_key} variants_idx = variants_module.get_variants_from_log_trace_idx( log, parameters=parameters_variants) variants = variants_module.convert_variants_trace_idx_to_trace_obj( log, variants_idx) parameters_tr = {PARAM_ACTIVITY_KEY: activity_key, "variants": variants} # do the replay aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr) element_statistics = performance_map.single_element_statistics( log, net, initial_marking, aligned_traces, variants_idx, activity_key=activity_key, timestamp_key=timestamp_key) for el in element_statistics: if type( el ) is PetriNet.Transition and "performance" in element_statistics[el]: values = element_statistics[el]["performance"] rand = RandomVariable() rand.calculate_parameters(values, force_distribution=force_distribution) no_of_times_enabled = element_statistics[el]['no_of_times_enabled'] no_of_times_activated = element_statistics[el][ 'no_of_times_activated'] if no_of_times_enabled > 0: rand.set_weight( float(no_of_times_activated) / float(no_of_times_enabled)) else: rand.set_weight(0.0) stochastic_map[el] = rand return stochastic_map
def apply(df, discovery_algorithm=discover_inductive, parameters=None): if parameters is None: parameters = {} allowed_activities = parameters[ "allowed_activities"] if "allowed_activities" in parameters else None debug = parameters["debug"] if "debug" in parameters else True try: if df.type == "succint": df = succint_mdl_to_exploded_mdl.apply(df) df.type = "exploded" except: pass if len(df) == 0: df = pd.DataFrame({"event_id": [], "event_activity": []}) min_node_freq = parameters[ "min_node_freq"] if "min_node_freq" in parameters else 0 min_edge_freq = parameters[ "min_edge_freq"] if "min_edge_freq" in parameters else 0 df = clean_frequency.apply(df, min_node_freq) df = clean_arc_frequency.apply(df, min_edge_freq) if len(df) == 0: df = pd.DataFrame({"event_id": [], "event_activity": []}) persps = [x for x in df.columns if not x.startswith("event_")] ret = {} ret["nets"] = {} ret["act_count"] = {} ret["replay"] = {} ret["group_size_hist"] = {} ret["act_count_replay"] = {} ret["group_size_hist_replay"] = {} ret["aligned_traces"] = {} ret["place_fitness_per_trace"] = {} ret["aggregated_statistics_frequency"] = {} ret["aggregated_statistics_performance_min"] = {} ret["aggregated_statistics_performance_max"] = {} ret["aggregated_statistics_performance_median"] = {} ret["aggregated_statistics_performance_mean"] = {} diff_log = 0 diff_model = 0 diff_token_replay = 0 diff_performance_annotation = 0 diff_basic_stats = 0 for persp in persps: aa = time.time() if debug: print(persp, "getting log") log = algorithm.apply(df, persp, parameters=parameters) if debug: print(len(log)) if allowed_activities is not None: if persp not in allowed_activities: continue filtered_log = attributes_filter.apply_events( log, allowed_activities[persp]) else: filtered_log = log bb = time.time() diff_log += (bb - aa) # filtered_log = variants_filter.apply_auto_filter(deepcopy(filtered_log), parameters={"decreasingFactor": 0.5}) if debug: print(len(log)) print(persp, "got log") cc = time.time() #net, im, fm = inductive_miner.apply(filtered_log) net, im, fm = discovery_algorithm(filtered_log) """if persp == "items": trans_map = {t.label:t for t in net.transitions} source_place_it = list(trans_map["item out of stock"].in_arcs)[0].source target_place_re = list(trans_map["reorder item"].out_arcs)[0].target skip_trans_1 = PetriNet.Transition(str(uuid.uuid4()), None) net.transitions.add(skip_trans_1) add_arc_from_to(source_place_it, skip_trans_1, net) add_arc_from_to(skip_trans_1, target_place_re, net)""" #net = reduce_petri_net(net) dd = time.time() diff_model += (dd - cc) # net, im, fm = alpha_miner.apply(filtered_log) if debug: print(persp, "got model") xx1 = time.time() activ_count = algorithm.apply(df, persp, variant="activity_occurrence", parameters=parameters) if debug: print(persp, "got activ_count") xx2 = time.time() ee = time.time() variants_idx = variants_module.get_variants_from_log_trace_idx(log) # variants = variants_module.convert_variants_trace_idx_to_trace_obj(log, variants_idx) # parameters_tr = {PARAM_ACTIVITY_KEY: "concept:name", "variants": variants} if debug: print(persp, "got variants") aligned_traces, place_fitness_per_trace, transition_fitness_per_trace, notexisting_activities_in_model = tr_factory.apply( log, net, im, fm, parameters={ "enable_pltr_fitness": True, "disable_variants": True }) if debug: print(persp, "done tbr") element_statistics = performance_map.single_element_statistics( log, net, im, aligned_traces, variants_idx) if debug: print(persp, "done element_statistics") ff = time.time() diff_token_replay += (ff - ee) aggregated_statistics = performance_map.aggregate_statistics( element_statistics) if debug: print(persp, "done aggregated_statistics") element_statistics_performance = performance_map.single_element_statistics( log, net, im, aligned_traces, variants_idx) if debug: print(persp, "done element_statistics_performance") gg = time.time() aggregated_statistics_performance_min = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="min") aggregated_statistics_performance_max = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="max") aggregated_statistics_performance_median = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="median") aggregated_statistics_performance_mean = performance_map.aggregate_statistics( element_statistics_performance, measure="performance", aggregation_measure="mean") hh = time.time() diff_performance_annotation += (hh - ee) if debug: print(persp, "done aggregated_statistics_performance") group_size_hist = algorithm.apply(df, persp, variant="group_size_hist", parameters=parameters) if debug: print(persp, "done group_size_hist") occurrences = {} for trans in transition_fitness_per_trace: occurrences[trans.label] = set() for trace in transition_fitness_per_trace[trans]["fit_traces"]: if not trace in transition_fitness_per_trace[trans][ "underfed_traces"]: case_id = trace.attributes["concept:name"] for event in trace: if event["concept:name"] == trans.label: occurrences[trans.label].add( (case_id, event["event_id"])) # print(transition_fitness_per_trace[trans]) len_different_ids = {} for act in occurrences: len_different_ids[act] = len(set(x[1] for x in occurrences[act])) eid_acti_count = {} for act in occurrences: eid_acti_count[act] = {} for x in occurrences[act]: if not x[0] in eid_acti_count: eid_acti_count[act][x[0]] = 0 eid_acti_count[act][x[0]] = eid_acti_count[act][x[0]] + 1 eid_acti_count[act] = sorted(list(eid_acti_count[act].values())) ii = time.time() diff_basic_stats += (ii - hh) + (xx2 - xx1) ret["nets"][persp] = [net, im, fm] ret["act_count"][persp] = activ_count ret["aligned_traces"][persp] = aligned_traces ret["place_fitness_per_trace"][persp] = place_fitness_per_trace ret["aggregated_statistics_frequency"][persp] = aggregated_statistics ret["aggregated_statistics_performance_min"][ persp] = aggregated_statistics_performance_min ret["aggregated_statistics_performance_max"][ persp] = aggregated_statistics_performance_max ret["aggregated_statistics_performance_median"][ persp] = aggregated_statistics_performance_median ret["aggregated_statistics_performance_mean"][ persp] = aggregated_statistics_performance_mean ret["replay"][persp] = aggregated_statistics ret["group_size_hist"][persp] = group_size_hist ret["act_count_replay"][persp] = len_different_ids ret["group_size_hist_replay"][persp] = eid_acti_count ret["computation_statistics"] = { "diff_log": diff_log, "diff_model": diff_model, "diff_token_replay": diff_token_replay, "diff_performance_annotation": diff_performance_annotation, "diff_basic_stats": diff_basic_stats } return ret