def test_tokenreplay(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.alpha import factory as alpha_miner net, im, fm = alpha_miner.apply(log) from pm4py.algo.conformance.tokenreplay import factory as token_replay replayed_traces = token_replay.apply(log, net, im, fm, variant="token_replay") replayed_traces = token_replay.apply(log, net, im, fm, variant="backwards") from pm4py.evaluation.replay_fitness import factory as rp_fitness_evaluator fitness = rp_fitness_evaluator.apply( log, net, im, fm, variant=rp_fitness_evaluator.TOKEN_BASED) evaluation = rp_fitness_evaluator.evaluate( replayed_traces, variant=rp_fitness_evaluator.TOKEN_BASED) from pm4py.evaluation.precision import factory as precision_evaluator precision = precision_evaluator.apply( log, net, im, fm, variant=precision_evaluator.ETCONFORMANCE_TOKEN) from pm4py.evaluation.generalization import factory as generalization_evaluation generalization = generalization_evaluation.apply( log, net, im, fm, variant=generalization_evaluation.GENERALIZATION_TOKEN)
def apply(log, petri_net, initial_marking, final_marking, parameters=None): """ Apply token replay fitness evaluation Parameters ----------- log Trace log petri_net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters Returns ----------- dictionary Containing two keys (percFitTraces and averageFitness) """ if parameters is None: parameters = {} activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY parameters_tr = {PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key, "consider_remaining_in_fitness": True} aligned_traces = token_replay.apply(log, petri_net, initial_marking, final_marking, parameters=parameters_tr) return evaluate(aligned_traces)
def execute_script(): log = xes_importer.apply( os.path.join("..", "tests", "input_data", "running-example.xes")) net, im, fm = inductive_miner.apply(log) # perform the backwards token-based replay replayed_traces = tr_factory.apply(log, net, im, fm, variant="backwards") print(replayed_traces)
def replay2(log, net, initial_marking, final_marking): replay_result = token_replay.apply(log, net, initial_marking, final_marking) acc=0 for x in replay_result: if x['trace_is_fit']: acc+=1 return acc/len(replay_result)
def get_transition_performance_with_token_replay(log, net, im, fm): """ Gets the transition performance through the usage of token-based replay Parameters ------------- log Event log net Petri net im Initial marking fm Final marking Returns -------------- transition_performance Dictionary where each transition label is associated to performance measures """ variants_idx = variants_module.get_variants_from_log_trace_idx(log) aligned_traces = token_replay.apply(log, net, im, fm) element_statistics = performance_map.single_element_statistics( log, net, im, aligned_traces, variants_idx) transition_performance = {} for el in element_statistics: if type(el) is PetriNet.Transition and el.label is not None: if "log_idx" in element_statistics[ el] and "performance" in element_statistics[el]: if len(element_statistics[el]["performance"]) > 0: transition_performance[str(el)] = { "all_values": [], "case_association": {}, "mean": 0.0, "median": 0.0 } for i in range(len(element_statistics[el]["log_idx"])): if not element_statistics[el]["log_idx"][ i] in transition_performance[str( el)]["case_association"]: transition_performance[str( el)]["case_association"][element_statistics[el] ["log_idx"][i]] = [] transition_performance[str(el)]["case_association"][ element_statistics[el]["log_idx"][i]].append( element_statistics[el]["performance"][i]) transition_performance[str(el)]["all_values"].append( element_statistics[el]["performance"][i]) transition_performance[str(el)]["all_values"] = sorted( transition_performance[str(el)]["all_values"]) if transition_performance[str(el)]["all_values"]: transition_performance[str(el)]["mean"] = mean( transition_performance[str(el)]["all_values"]) transition_performance[str(el)]["median"] = median( transition_performance[str(el)]["all_values"]) return transition_performance
def get_decorations(log, net, initial_marking, final_marking, parameters=None, measure="frequency"): """ Calculate decorations in order to annotate the Petri net Parameters ----------- log Trace log net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters associated to the algorithm measure Measure to represent on the process model (frequency/performance) Returns ------------ decorations Decorations to put on the process model """ if parameters is None: parameters = {} aggregation_measure = None if "aggregationMeasure" in parameters: aggregation_measure = parameters["aggregationMeasure"] activity_key = parameters[ PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY timestamp_key = parameters[PARAM_TIMESTAMP_KEY] if PARAM_TIMESTAMP_KEY in parameters else "time:timestamp" parameters_variants = {PARAM_ACTIVITY_KEY: activity_key} variants_idx = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters_variants) variants = variants_module.convert_variants_trace_idx_to_trace_obj(log, variants_idx) parameters_tr = {PARAM_ACTIVITY_KEY: activity_key, "variants": variants} # do the replay aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr) # apply petri_reduction technique in order to simplify the Petri net # net = reduction.apply(net, parameters={"aligned_traces": aligned_traces}) element_statistics = performance_map.single_element_statistics(log, initial_marking, aligned_traces, variants_idx, activity_key=activity_key, timestamp_key=timestamp_key) aggregated_statistics = performance_map.aggregate_statistics(element_statistics, measure=measure, aggregation_measure=aggregation_measure) return aggregated_statistics
def test_heu_log(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) net, im, fm = heuristics_miner.apply(log) aligned_traces_tr = tr_factory.apply(log, net, im, fm) aligned_traces_alignments = align_factory.apply(log, net, im, fm) evaluation = eval_factory.apply(log, net, im, fm) fitness = rp_fit_factory.apply(log, net, im, fm) precision = precision_factory.apply(log, net, im, fm) generalization = generalization_factory.apply(log, net, im, fm) simplicity = simplicity_factory.apply(net)
def test_inductiveminer_df(self): log = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) net, im, fm = inductive_miner.apply(log) aligned_traces_tr = tr_factory.apply(log, net, im, fm) aligned_traces_alignments = align_factory.apply(log, net, im, fm) evaluation = eval_factory.apply(log, net, im, fm) fitness = rp_fit_factory.apply(log, net, im, fm) precision = precision_factory.apply(log, net, im, fm) generalization = generalization_factory.apply(log, net, im, fm) simplicity = simplicity_factory.apply(net)
def apply(log, net, marking, final_marking, parameters=None): """ Get ET Conformance precision Parameters ---------- log Trace log net Petri net marking Initial marking final_marking Final marking parameters Parameters of the algorithm, including: pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> Activity key """ if parameters is None: parameters = {} activity_key = parameters[ PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY precision = 0.0 sum_ee = 0 sum_at = 0 prefixes, prefix_count = get_log_prefixes(log, activity_key=activity_key) prefixes_keys = list(prefixes.keys()) fake_log = form_fake_log(prefixes_keys, activity_key=activity_key) parameters_tr = { "consider_remaining_in_fitness": False, "try_to_reach_final_marking_through_hidden": False, "stop_immediately_unfit": True, "walk_through_hidden_trans": True, PARAM_ACTIVITY_KEY: activity_key } aligned_traces = token_replay.apply(fake_log, net, marking, final_marking, parameters=parameters_tr) for i in range(len(aligned_traces)): if aligned_traces[i]["trace_is_fit"]: log_transitions = set(prefixes[prefixes_keys[i]]) activated_transitions_labels = set( [x.label for x in aligned_traces[i]["enabled_transitions_in_marking"] if x.label is not None]) sum_at += len(activated_transitions_labels) * prefix_count[prefixes_keys[i]] escaping_edges = activated_transitions_labels.difference(log_transitions) sum_ee += len(escaping_edges) * prefix_count[prefixes_keys[i]] if sum_at > 0: precision = 1 - float(sum_ee) / float(sum_at) return precision
def execute_script(): log = xes_importer.import_log( os.path.join("..", "tests", "input_data", "receipt.xes")) filtered_log = auto_filter.auto_filter.apply_auto_filter(log) net, initial_marking, final_marking = inductive_miner.apply(filtered_log) replayed_traces, place_fitness, trans_fitness, unwanted_activities = token_based_replay.apply( log, net, initial_marking, final_marking, parameters={ "disable_variants": True, "enable_pltr_fitness": True }) trans_diagnostics = duration_diagnostics.diagnose_from_trans_fitness( log, trans_fitness) act_diagnostics = duration_diagnostics.diagnose_from_notexisting_activities( log, unwanted_activities) for trans in trans_diagnostics: print(trans, trans_diagnostics[trans]) for act in act_diagnostics: print(act, act_diagnostics[act]) # build decision trees string_attributes = ["org:group"] numeric_attributes = [] parameters = { "string_attributes": string_attributes, "numeric_attributes": numeric_attributes } trans_root_cause = root_cause_analysis.diagnose_from_trans_fitness( log, trans_fitness, parameters=parameters) print("trans_root_cause=", trans_root_cause) for trans in trans_root_cause: clf = trans_root_cause[trans]["clf"] feature_names = trans_root_cause[trans]["feature_names"] classes = trans_root_cause[trans]["classes"] # visualization could be called # gviz = dt_vis_factory.apply(clf, feature_names, classes) # dt_vis_factory.view(gviz) act_root_cause = root_cause_analysis.diagnose_from_notexisting_activities( log, unwanted_activities, parameters=parameters) print("act_root_cause=", act_root_cause) for act in act_root_cause: clf = act_root_cause[act]["clf"] feature_names = act_root_cause[act]["feature_names"] classes = act_root_cause[act]["classes"]
def test_inductiveminer_stream(self): stream = csv_importer.apply( os.path.join("input_data", "running-example.csv")) net, im, fm = inductive_miner.apply(stream) aligned_traces_tr = tr_factory.apply(stream, net, im, fm) aligned_traces_alignments = align_factory.apply(stream, net, im, fm) evaluation = eval_factory.apply(stream, net, im, fm) fitness = rp_fit_factory.apply(stream, net, im, fm) precision = precision_factory.apply(stream, net, im, fm) generalization = generalization_factory.apply(stream, net, im, fm) simplicity = simplicity_factory.apply(net)
def apply(log, petri_net, initial_marking, final_marking, parameters=None): """ Calculates generalization on the provided log and Petri net. The approach has been suggested by the paper Buijs, Joos CAM, Boudewijn F. van Dongen, and Wil MP van der Aalst. "Quality dimensions in process discovery: The importance of fitness, precision, generalization and simplicity." International Journal of Cooperative Information Systems 23.01 (2014): 1440001. A token replay is applied and, for each transition, we can measure the number of occurrences in the replay. The following formula is applied for generalization \sum_{t \in transitions} (math.sqrt(1.0/(n_occ_replay(t))) 1 - ---------------------------------------------------------- # transitions Parameters ----------- log Trace log petri_net Petri net initial_marking Initial marking final_marking Final marking parameters Algorithm parameters Returns ----------- generalization Generalization measure """ if parameters is None: parameters = {} activity_key = parameters[ PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY parameters_tr = { pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key } aligned_traces = token_replay.apply(log, petri_net, initial_marking, final_marking, parameters=parameters_tr) return get_generalization(petri_net, aligned_traces)
def generate_replay_result(xes_test_log, petri_net_train, initial_marking, final_marking): try: # apply token replay to the net, initial and final marking replay_result = token_replay.apply(xes_test_log, petri_net_train, initial_marking, final_marking) print("replay result: " + str(replay_result) + "\n") # verify log fitness log_fitness = replay_fitness_factory.evaluate(replay_result, variant="token_replay") print("log_fitness" + str(log_fitness) + "\n") except TypeError: print("Please check input values")
def execute_script(): log_path = os.path.join("..", "tests", "input_data", "running-example.xes") log = xes_importer.import_log(log_path) net, marking, final_marking = alpha_factory.apply(log) for place in marking: print("initial marking " + place.name) for place in final_marking: print("final marking " + place.name) gviz = pn_vis_factory.apply(net, marking, final_marking, parameters={"format": "svg"}) pn_vis_factory.view(gviz) print("started token replay") aligned_traces = token_replay.apply(log, net, marking, final_marking) fit_traces = [x for x in aligned_traces if x['trace_is_fit']] perc_fitness = 0.00 if len(aligned_traces) > 0: perc_fitness = len(fit_traces) / len(aligned_traces) print("perc_fitness=", perc_fitness)
def execute_script(): log = xes_importer.import_log( os.path.join("..", "tests", "input_data", "receipt.xes")) filtered_log = auto_filter.auto_filter.apply_auto_filter(log) net, initial_marking, final_marking = inductive_miner.apply(filtered_log) replayed_traces, place_fitness, trans_fitness, unwanted_activities = token_based_replay.apply( log, net, initial_marking, final_marking, parameters={ "disable_variants": True, "enable_pltr_fitness": True }) trans_diagnostics = duration_diagnostics.diagnose_from_trans_fitness( log, trans_fitness) act_diagnostics = duration_diagnostics.diagnose_from_notexisting_activities( log, unwanted_activities) for trans in trans_diagnostics: print(trans, trans_diagnostics[trans]) for act in act_diagnostics: print(act, act_diagnostics[act])
def get_map_from_log_and_net(log, net, initial_marking, final_marking, force_distribution=None, parameters=None): """ Get transition stochastic distribution map given the log and the Petri net Parameters ----------- log Event log net Petri net initial_marking Initial marking of the Petri net final_marking Final marking of the Petri net force_distribution If provided, distribution to force usage (e.g. EXPONENTIAL) parameters Parameters of the algorithm, including: PARAM_ACTIVITY_KEY -> activity name PARAM_TIMESTAMP_KEY -> timestamp key Returns ----------- stochastic_map Map that to each transition associates a random variable """ stochastic_map = {} if parameters is None: parameters = {} activity_key = parameters[ PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY timestamp_key = parameters[ PARAM_TIMESTAMP_KEY] if PARAM_TIMESTAMP_KEY in parameters else "time:timestamp" parameters_variants = {PARAM_ACTIVITY_KEY: activity_key} variants_idx = variants_module.get_variants_from_log_trace_idx( log, parameters=parameters_variants) variants = variants_module.convert_variants_trace_idx_to_trace_obj( log, variants_idx) parameters_tr = {PARAM_ACTIVITY_KEY: activity_key, "variants": variants} # do the replay aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr) element_statistics = performance_map.single_element_statistics( log, net, initial_marking, aligned_traces, variants_idx, activity_key=activity_key, timestamp_key=timestamp_key) for el in element_statistics: if type( el ) is PetriNet.Transition and "performance" in element_statistics[el]: values = element_statistics[el]["performance"] rand = RandomVariable() rand.calculate_parameters(values, force_distribution=force_distribution) no_of_times_enabled = element_statistics[el]['no_of_times_enabled'] no_of_times_activated = element_statistics[el][ 'no_of_times_activated'] if no_of_times_enabled > 0: rand.set_weight( float(no_of_times_activated) / float(no_of_times_enabled)) else: rand.set_weight(0.0) stochastic_map[el] = rand return stochastic_map
def apply(trace_log, parameters): """ Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking Parameters ----------- trace_log Trace log parameters Parameters of the algorithm, including: pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ----------- net Petri net initial_marking Initial marking final_marking Final marking """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] # apply the reduction by default only on very small logs enable_reduction = parameters[ "enable_reduction"] if "enable_reduction" in parameters else ( shared_constants.APPLY_REDUCTION_ON_SMALL_LOG and shared_constants.MAX_LOG_SIZE_FOR_REDUCTION) # get the DFG dfg = [(k, v) for k, v in dfg_inst.apply( trace_log, parameters={ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key }).items() if v > 0] # get the activities in the log activities = attributes_filter.get_attribute_values( trace_log, activity_key) # check if the log contains empty traces contains_empty_traces = False traces_length = [len(trace) for trace in trace_log] if traces_length: contains_empty_traces = min([len(trace) for trace in trace_log]) == 0 net, initial_marking, final_marking = apply_dfg( dfg, parameters=parameters, activities=activities, contains_empty_traces=contains_empty_traces) if enable_reduction: # do the replay aligned_traces = token_replay.apply(trace_log, net, initial_marking, final_marking, parameters=parameters) # apply petri_reduction technique in order to simplify the Petri net net = petri_cleaning.petri_reduction_treplay( net, parameters={"aligned_traces": aligned_traces}) return net, initial_marking, final_marking
def get_replay_result(log, model, initial_marking, final_marking): replay_result = token_replay.apply(log, model, initial_marking, final_marking) return replay_result
def replay(log, net, initial_marking, final_marking): replay_result = token_replay.apply(log, net, initial_marking, final_marking) log_fitness = replay_fitness_factory.evaluate(replay_result, variant="token_replay") return log_fitness
def apply_token_replay(log, net, initial_marking, final_marking, parameters=None): """ Calculates all metrics based on token-based replay and returns a unified dictionary Parameters ----------- log Trace log net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters Returns ----------- dictionary Dictionary containing fitness, precision, generalization and simplicity; along with the average weight of these metrics """ if parameters is None: parameters = {} activity_key = parameters[ PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY fitness_weight = parameters[PARAM_FITNESS_WEIGHT] if PARAM_FITNESS_WEIGHT in parameters else 0.25 precision_weight = parameters[PARAM_PRECISION_WEIGHT] if PARAM_PRECISION_WEIGHT in parameters else 0.25 simplicity_weight = parameters[PARAM_SIMPLICITY_WEIGHT] if PARAM_SIMPLICITY_WEIGHT in parameters else 0.25 generalization_weight = parameters[ PARAM_GENERALIZATION_WEIGHT] if PARAM_GENERALIZATION_WEIGHT in parameters else 0.25 sum_of_weights = (fitness_weight + precision_weight + simplicity_weight + generalization_weight) fitness_weight = fitness_weight / sum_of_weights precision_weight = precision_weight / sum_of_weights simplicity_weight = simplicity_weight / sum_of_weights generalization_weight = generalization_weight / sum_of_weights parameters_tr = {pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key} aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr) parameters = { "activity_key": activity_key } fitness = fitness_token_based.evaluate(aligned_traces) precision = precision_token_based.apply(log, net, initial_marking, final_marking, parameters=parameters) generalization = generalization_token_based.get_generalization(net, aligned_traces) simplicity = simplicity_arc_degree.apply(net) metrics_average_weight = fitness_weight * fitness["averageFitness"] + precision_weight * precision \ + generalization_weight * generalization + simplicity_weight * simplicity dictionary = { "fitness": fitness, "precision": precision, "generalization": generalization, "simplicity": simplicity, "metricsAverageWeight": metrics_average_weight } return dictionary
def apply(log, net, marking, final_marking, parameters=None): """ Get ET Conformance precision Parameters ---------- log Trace log net Petri net marking Initial marking final_marking Final marking parameters Parameters of the algorithm, including: pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> Activity key """ if parameters is None: parameters = {} cleaning_token_flood = parameters[ "cleaning_token_flood"] if "cleaning_token_flood" in parameters else False activity_key = parameters[ PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY # default value for precision, when no activated transitions (not even by looking at the initial marking) are found precision = 1.0 sum_ee = 0 sum_at = 0 parameters_tr = { "consider_remaining_in_fitness": False, "try_to_reach_final_marking_through_hidden": False, "stop_immediately_unfit": True, "walk_through_hidden_trans": True, "cleaning_token_flood": cleaning_token_flood, PARAM_ACTIVITY_KEY: activity_key } prefixes, prefix_count = precision_utils.get_log_prefixes( log, activity_key=activity_key) prefixes_keys = list(prefixes.keys()) fake_log = precision_utils.form_fake_log(prefixes_keys, activity_key=activity_key) aligned_traces = token_replay.apply(fake_log, net, marking, final_marking, parameters=parameters_tr) # fix: also the empty prefix should be counted! start_activities = set( start_activities_filter.get_start_activities(log, parameters=parameters)) trans_en_ini_marking = set([ x.label for x in get_visible_transitions_eventually_enabled_by_marking( net, marking) ]) diff = trans_en_ini_marking.difference(start_activities) sum_at += len(log) * len(trans_en_ini_marking) sum_ee += len(log) * len(diff) # end fix for i in range(len(aligned_traces)): if aligned_traces[i]["trace_is_fit"]: log_transitions = set(prefixes[prefixes_keys[i]]) activated_transitions_labels = set([ x.label for x in aligned_traces[i]["enabled_transitions_in_marking"] if x.label is not None ]) sum_at += len(activated_transitions_labels) * prefix_count[ prefixes_keys[i]] escaping_edges = activated_transitions_labels.difference( log_transitions) sum_ee += len(escaping_edges) * prefix_count[prefixes_keys[i]] if sum_at > 0: precision = 1 - float(sum_ee) / float(sum_at) return precision