def test_filtering_attributes_traces(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "running-example.xes") log = xes_importer.import_log(input_log) log1 = attributes_filter.apply(log, ["reject request"], parameters={"positive": True}) log2 = attributes_filter.apply(log, ["reject request"], parameters={"positive": True}) del log1 del log2
def diagnose_from_trans_fitness(log, trans_fitness, parameters=None): """ Provide some conformance diagnostics related to transitions that are executed in a unfit manner Parameters ------------- log Trace log trans_fitness For each transition, keeps track of unfit executions parameters Possible parameters of the algorithm, including: PARAMETER_CONSTANT_TIMESTAMP_KEY -> attribute of the event containing the timestamp Returns ------------- diagnostics For each problematic transition, diagnostics about case duration """ if parameters is None: parameters = {} timestamp_key = parameters[ constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY diagnostics = {} parameters_filtering = deepcopy(parameters) parameters_filtering["positive"] = True for trans in trans_fitness: if len(trans_fitness[trans]["underfed_traces"]) > 0: filtered_log_act = attributes_filter.apply( log, [trans.label], parameters=parameters_filtering) fit_cases = [] underfed_cases = [] for trace in log: if trace in trans_fitness[trans]["underfed_traces"]: underfed_cases.append(trace) elif trace in filtered_log_act: fit_cases.append(trace) if fit_cases and underfed_cases: n_fit = len(fit_cases) n_underfed = len(underfed_cases) fit_median_time = get_median_case_duration( fit_cases, timestamp_key=timestamp_key) underfed_median_time = get_median_case_duration( underfed_cases, timestamp_key=timestamp_key) relative_throughput = underfed_median_time / fit_median_time if fit_median_time > 0 else 0 diagn_dict = { "n_fit": n_fit, "n_underfed": n_underfed, "fit_median_time": fit_median_time, "underfed_median_time": underfed_median_time, "relative_throughput": relative_throughput } diagnostics[trans] = diagn_dict return diagnostics
def diagnose_from_notexisting_activities(log, notexisting_activities_in_model, parameters=None): """ Provide some conformance diagnostics related to activities that are not present in the model Parameters ------------- log Trace log notexisting_activities_in_model Not existing activities in the model parameters Possible parameters of the algorithm, including: PARAMETER_CONSTANT_TIMESTAMP_KEY -> attribute of the event containing the timestamp Returns ------------- diagnostics For each problematic activity, diagnostics about case duration """ if parameters is None: parameters = {} timestamp_key = parameters[ constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY diagnostics = {} parameters_filtering = deepcopy(parameters) parameters_filtering["positive"] = False values = list(notexisting_activities_in_model.keys()) filtered_log = attributes_filter.apply(log, values, parameters=parameters_filtering) for act in notexisting_activities_in_model: fit_cases = [] containing_cases = [] for trace in log: if trace in notexisting_activities_in_model[act]: containing_cases.append(trace) elif trace in filtered_log: fit_cases.append(trace) if containing_cases and fit_cases: n_containing = len(containing_cases) n_fit = len(fit_cases) fit_median_time = get_median_case_duration(fit_cases, timestamp_key=timestamp_key) containing_median_time = get_median_case_duration(containing_cases, timestamp_key=timestamp_key) relative_throughput = containing_median_time / fit_median_time if fit_median_time > 0 else 0 diagn_dict = {"n_containing": n_containing, "n_fit": n_fit, "fit_median_time": fit_median_time, "containing_median_time": containing_median_time, "relative_throughput": relative_throughput} diagnostics[act] = diagn_dict return diagnostics
def apply(log, parameters=None, classic_output=False): """ Gets a simple model out of a log Parameters ------------- log Trace log parameters Parameters of the algorithm, including: maximum_number_activities -> Maximum number of activities to keep discovery_algorithm -> Discovery algorithm to use (alpha, inductive) desidered_output -> Desidered output of the algorithm (default: Petri) include_filtered_log -> Include the filtered log in the output include_dfg_frequency -> Include the DFG of frequencies in the output include_dfg_performance -> Include the DFG of performance in the output include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output include_filtered_dfg_performance -> Include the filtered DFG of performance in the output classic_output Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking) or can return a more detailed dictionary """ if parameters is None: parameters = {} returned_dictionary = {} net = None initial_marking = None final_marking = None bpmn_graph = None dfg_frequency = None dfg_performance = None filtered_dfg_frequency = None filtered_dfg_performance = None maximum_number_activities = parameters[ "maximum_number_activities"] if "maximum_number_activities" in parameters else 20 discovery_algorithm = parameters[ "discovery_algorithm"] if "discovery_algorithm" in parameters else "alphaclassic" desidered_output = parameters[ "desidered_output"] if "desidered_output" in parameters else "petri" include_filtered_log = parameters[ "include_filtered_log"] if "include_filtered_log" in parameters else True include_dfg_frequency = parameters[ "include_dfg_frequency"] if "include_dfg_frequency" in parameters else True include_dfg_performance = parameters[ "include_dfg_performance"] if "include_dfg_performance" in parameters else True include_filtered_dfg_frequency = parameters[ "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True include_filtered_dfg_performance = parameters[ "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else True if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters: activity_key = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key else: log, activity_key = insert_classifier.search_act_class_attr(log) if activity_key is None: activity_key = DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] activities_count_dictio = attributes_filter.get_attribute_values( log, activity_key) activities_count_list = [] for activity in activities_count_dictio: activities_count_list.append( [activity, activities_count_dictio[activity]]) activities_count_list = sorted(activities_count_list, key=lambda x: x[1], reverse=True) activities_count_list = activities_count_list[:min( len(activities_count_list), maximum_number_activities)] activities_keep_list = [x[0] for x in activities_count_list] log = attributes_filter.apply(log, activities_keep_list, parameters=parameters) filtered_log = None if "alpha" in discovery_algorithm: filtered_log = start_activities_filter.apply_auto_filter( log, parameters=parameters) filtered_log = end_activities_filter.apply_auto_filter( filtered_log, parameters=parameters) filtered_log = filter_topvariants_soundmodel.apply( filtered_log, parameters=parameters) elif "inductive" in discovery_algorithm: filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters) if include_dfg_frequency: dfg_frequency = dfg_factory.apply(log, parameters=parameters, variant="frequency") if include_dfg_performance: dfg_performance = dfg_factory.apply(log, parameters=parameters, variant="performance") if include_filtered_dfg_frequency: filtered_dfg_frequency = dfg_factory.apply(filtered_log, parameters=parameters, variant="frequency") if include_filtered_dfg_performance: filtered_dfg_performance = dfg_factory.apply(filtered_log, parameters=parameters, variant="performance") if "alpha" in discovery_algorithm: net, initial_marking, final_marking = alpha_miner.apply( filtered_log, parameters=parameters) if filtered_log is not None and include_filtered_log: returned_dictionary["filtered_log"] = filtered_log if net is not None and desidered_output == "petri": returned_dictionary["net"] = net if initial_marking is not None and desidered_output == "petri": returned_dictionary["initial_marking"] = initial_marking if final_marking is not None and desidered_output == "petri": returned_dictionary["final_marking"] = final_marking if bpmn_graph is not None and desidered_output == "bpmn": returned_dictionary["bpmn_graph"] = bpmn_graph if dfg_frequency is not None and include_dfg_frequency: returned_dictionary["dfg_frequency"] = dfg_frequency if dfg_performance is not None and include_dfg_performance: returned_dictionary["dfg_performance"] = dfg_performance if filtered_dfg_frequency is not None and include_filtered_dfg_frequency: returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency if filtered_dfg_performance is not None and include_filtered_dfg_performance: returned_dictionary[ "filtered_dfg_performance"] = filtered_dfg_performance if classic_output: if net is not None and desidered_output == "petri": return net, initial_marking, final_marking return returned_dictionary