def end_activities(log): log_end = end_activities_filter.get_end_activities(log) n_unique_end_activities = len(log_end) end_activities_occurrences = list(log_end.values()) end_activities_min = np.min(end_activities_occurrences) end_activities_max = np.max(end_activities_occurrences) end_activities_mean = np.mean(end_activities_occurrences) end_activities_median = np.median(end_activities_occurrences) end_activities_std = np.std(end_activities_occurrences) end_activities_variance = np.var(end_activities_occurrences) end_activities_q1 = np.percentile(end_activities_occurrences, 25) end_activities_q3 = np.percentile(end_activities_occurrences, 75) end_activities_iqr = stats.iqr(end_activities_occurrences) end_activities_skewness = stats.skew(end_activities_occurrences) end_activities_kurtosis = stats.kurtosis(end_activities_occurrences) return [ n_unique_end_activities, end_activities_min, end_activities_max, end_activities_mean, end_activities_median, end_activities_std, end_activities_variance, end_activities_q1, end_activities_q3, end_activities_iqr, end_activities_skewness, end_activities_kurtosis, ]
def test_21(self): from pm4py.algo.filtering.pandas.end_activities import end_activities_filter from pm4py.util import constants df = self.load_running_example_df() end_acitivites = end_activities_filter.get_end_activities(df) filtered_df = end_activities_filter.apply(df, ["pay compensation"], parameters={ end_activities_filter.Parameters.CASE_ID_KEY: "case:concept:name", end_activities_filter.Parameters.ACTIVITY_KEY: "concept:name"})
def apply(log, parameters): """ Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking Parameters ----------- log Log parameters Parameters of the algorithm, including: pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ----------- net Petri net initial_marking Initial marking final_marking Final marking """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters: parameters[pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] # get the DFG dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters={ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key}).items() if v > 0] # get the activities in the log activities = attributes_filter.get_attribute_values(log, activity_key) # gets the start activities from the log start_activities = list(start_activities_filter.get_start_activities(log, parameters=parameters).keys()) # gets the end activities from the log end_activities = list(end_activities_filter.get_end_activities(log, parameters=parameters).keys()) # check if the log contains empty traces contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 net, initial_marking, final_marking = apply_dfg(dfg, parameters=parameters, activities=activities, contains_empty_traces=contains_empty_traces, start_activities=start_activities, end_activities=end_activities) return net, initial_marking, final_marking
def apply(log, parameters=None): """ Gets the performance HNet Parameters ------------ log Log parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY log = attributes_filter.filter_log_on_max_no_activities(log, max_no_activities=constants.MAX_NO_ACTIVITIES, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters) activities_count = attributes_filter.get_attribute_values(filtered_log, activity_key) start_activities_count = start_activities_filter.get_start_activities(filtered_log, parameters=parameters) end_activities_count = end_activities_filter.get_end_activities(filtered_log, parameters=parameters) activities = list(activities_count.keys()) start_activities = list(start_activities_count.keys()) end_activities = list(end_activities_count.keys()) dfg_freq = dfg_factory.apply(filtered_log, parameters=parameters) dfg_perf = dfg_factory.apply(filtered_log, variant="performance", parameters=parameters) heu_net = HeuristicsNet(dfg_freq, performance_dfg=dfg_perf, activities=activities, start_activities=start_activities, end_activities=end_activities, activities_occurrences=activities_count) heu_net.calculate(dfg_pre_cleaning_noise_thresh=constants.DEFAULT_DFG_CLEAN_MULTIPLIER * decreasingFactor) vis = heu_vis_factory.apply(heu_net, parameters={"format": "svg"}) vis2 = heu_vis_factory.apply(heu_net, parameters={"format": "dot"}) gviz_base64 = get_base64_from_file(vis2.name) return get_base64_from_file(vis.name), None, "", "xes", activities, start_activities, end_activities, gviz_base64, [], "heuristics", "perf", None, "", activity_key
def get_end_activities(self, parameters=None): """ Gets the end activities from the log Returns ------------- end_activities_dict Dictionary of end activities """ if parameters is None: parameters = {} parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = self.activity_key parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = self.activity_key return end_activities_filter.get_end_activities(self.log, parameters=parameters)
def apply_heu(log, parameters=None): """ Discovers an Heuristics Net using Heuristics Miner Parameters ------------ log Event log parameters Possible parameters of the algorithm, including: activity_key, case_id_glue, timestamp_key, dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh, loops_length_two_thresh Returns ------------ heu Heuristics Net """ if parameters is None: parameters = {} activity_key = parameters[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY start_activities = log_sa_filter.get_start_activities( log, parameters=parameters) end_activities = log_ea_filter.get_end_activities(log, parameters=parameters) activities_occurrences = log_attributes.get_attribute_values( log, activity_key, parameters=parameters) activities = list(activities_occurrences.keys()) dfg = dfg_factory.apply(log, parameters=parameters) parameters_w2 = deepcopy(parameters) parameters_w2["window"] = 2 dfg_window_2 = dfg_factory.apply(log, parameters=parameters_w2) freq_triples = dfg_factory.apply(log, parameters=parameters, variant="freq_triples") return apply_heu_dfg(dfg, activities=activities, activities_occurrences=activities_occurrences, start_activities=start_activities, end_activities=end_activities, dfg_window_2=dfg_window_2, freq_triples=freq_triples, parameters=parameters)
def apply_tree(log, parameters): """ Apply the IMDF algorithm to a log obtaining a process tree Parameters ---------- log Log parameters Parameters of the algorithm, including: pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ---------- tree Process tree """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] # get the DFG dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters={ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key}).items() if v > 0] # gets the start activities from the log start_activities = start_activities_filter.get_start_activities(log, parameters=parameters) # gets the end activities from the log end_activities = end_activities_filter.get_end_activities(log, parameters=parameters) # get the activities in the log activities = attributes_filter.get_attribute_values(log, activity_key) # check if the log contains empty traces contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 return apply_tree_dfg(dfg, parameters, activities=activities, contains_empty_traces=contains_empty_traces, start_activities=start_activities, end_activities=end_activities)
def read_xes(data_dir, dataset, aggregate_type, mode="pruning"): prune_parameter_freq = 350 prune_parameter_time = -1 #keep all #read the xes file if dataset in "BPIC14": # log = csv_importer.import_event_stream(os.path.join(data_dir, dataset + ".csv")) data = csv_import_adapter.import_dataframe_from_path(os.path.join( data_dir, dataset + ".csv"), sep=";") data['case:concept:name'] = data['Incident ID'] data['time:timestamp'] = data['DateStamp'] data['concept:name'] = data['IncidentActivity_Type'] log = conversion_factory.apply(data) elif dataset == "Unrineweginfectie": data = csv_import_adapter.import_dataframe_from_path(os.path.join( data_dir, dataset + ".csv"), sep=",") data['case:concept:name'] = data['Patientnummer'] data['time:timestamp'] = data['Starttijd'] data['concept:name'] = data['Aciviteit'] log = conversion_factory.apply(data) else: log = xes_import_factory.apply(os.path.join(data_dir, dataset + ".xes")) data = get_dataframe_from_event_stream(log) # dataframe = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME) # dfg_freq = dfg_factory.apply(log,variant="frequency") # dfg_time =get_dfg_time(data,aggregate_type,dataset) if aggregate_type == AggregateType.FREQ: dfg = dfg_factory.apply(log, variant="frequency") else: dfg = get_dfg_time(data, aggregate_type, dataset) """Getting Start and End activities""" # log = xes_importer.import_log(xes_file) log_start = start_activities_filter.get_start_activities(log) log_end = end_activities_filter.get_end_activities(log) # return dfg_freq,dfg_time return dfg
def apply(log, parameters=None): """ Gets the process tree using Inductive Miner Directly-Follows Parameters ------------ log Log parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY log = attributes_filter.filter_log_on_max_no_activities( log, max_no_activities=constants.MAX_NO_ACTIVITIES, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters) activities_count = attributes_filter.get_attribute_values( filtered_log, activity_key) activities = list(activities_count.keys()) start_activities = list( start_activities_filter.get_start_activities( filtered_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities(filtered_log, parameters=parameters).keys()) dfg = dfg_factory.apply(filtered_log, parameters=parameters) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) tree = inductive_miner.apply_tree_dfg(dfg, parameters=parameters, activities=activities, start_activities=start_activities, end_activities=end_activities) parameters["format"] = "svg" gviz = pt_vis_factory.apply(tree, parameters=parameters) gviz_base64 = base64.b64encode(str(gviz).encode('utf-8')) return get_base64_from_gviz(gviz), None, "", "xes", activities, start_activities, end_activities, gviz_base64, [], "tree", "freq", None, "", activity_key
## Start activities from pm4py.algo.filtering.log.start_activities import start_activities_filter log_start = start_activities_filter.get_start_activities(log) log = start_activities_filter.apply_auto_filter( log, parameters={"decreasingFactor": 0.6}) print(start_activities_filter.get_start_activities(log)) ## End activities from pm4py.algo.filtering.log.end_activities import end_activities_filter log_end = end_activities_filter.get_end_activities(log) log_af_ea = end_activities_filter.apply_auto_filter( log, parameters={"decreasingFactor": 0.6}) print(end_activities_filter.get_end_activities(log_af_ea)) ## Other Events from pm4py.algo.filtering.log.attributes import attributes_filter from pm4py.util import constants log = attributes_filter.apply_auto_filter( log, parameters={ constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "concept:name", "decreasingFactor": 0.6 })
def test_20(self): from pm4py.algo.filtering.log.end_activities import end_activities_filter log = self.load_running_example_xes() end_activities = end_activities_filter.get_end_activities(log) filtered_log = end_activities_filter.apply(log, ["pay compensation"])
result_num = [] print(model) tree = 5 tree_num = 0 tree_avg = 0 print(tree) for sam in range(1, 11): print(sam) input_file_path = os.path.join("input_data", "df_complete_logs", "%d_1000_%d.xes" % (tree, sam)) log = xes_importer.apply(input_file_path) dfg_org = dfg_factory.apply(log) start_act = set(get_start_activities(log).keys()) end_act = set(get_end_activities(log).keys()) #make petri net for simulation net, im, fm = heu_factory.apply(log) num = len(dfg_org.keys()) sim_log = sim_factory.apply(net, im, parameters={ 'maxTraceLength': 20, 'noTraces': 1000 }) dfg_algo = dfg_factory.apply(log) start_act_algo = set(get_start_activities(sim_log).keys()) end_act_algo = set(get_end_activities(sim_log).keys()) score = 0
def detect_cut(self, second_iteration=False, parameters=None): if pkgutil.find_loader("networkx"): import networkx as nx if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # check base cases: empty_log = base_case.empty_log(self.log) single_activity = base_case.single_activity(self.log, activity_key) if empty_log: self.detected_cut = 'empty_log' elif single_activity: self.detected_cut = 'single_activity' # if no base cases are found, search for a cut: else: conn_components = detection_utils.get_connected_components( self.ingoing, self.outgoing, self.activities) this_nx_graph = transform_dfg_to_directed_nx_graph( self.dfg, activities=self.activities) strongly_connected_components = [ list(x) for x in nx.strongly_connected_components(this_nx_graph) ] xor_cut = self.detect_xor(conn_components) # the following part searches for a cut in the current log_skeleton # if a cut is found, the log_skeleton is split according to the cut, the resulting logs are saved in new_logs # recursion is used on all the logs in new_logs if xor_cut[0]: logging.debug("xor_cut") self.detected_cut = 'concurrent' new_logs = split.split_xor(xor_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: sequence_cut = cut_detection.detect_sequential_cut( self, self.dfg, strongly_connected_components) if sequence_cut[0]: logging.debug("sequence_cut") new_logs = split.split_sequence( sequence_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "sequential" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: parallel_cut = self.detect_concurrent() if parallel_cut[0]: logging.debug("parallel_cut") new_logs = split.split_parallel( parallel_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "parallel" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter. get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: loop_cut = self.detect_loop() if loop_cut[0]: logging.debug("loop_cut") new_logs = split.split_loop( loop_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "loopCut" for l in new_logs: new_dfg = [ (k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0 ] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter. get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter. get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self. noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) # if the code gets to this point, there is no base_case and no cut found in the log_skeleton # therefore, we now apply fall through: else: self.apply_fall_through(parameters) else: msg = "networkx is not available. inductive miner cannot be used!" logging.error(msg) raise Exception(msg)
def apply(log, parameters=None, classic_output=False): """ Gets a simple model out of a log Parameters ------------- log Trace log parameters Parameters of the algorithm, including: maximum_number_activities -> Maximum number of activities to keep discovery_algorithm -> Discovery algorithm to use (alpha, inductive) desidered_output -> Desidered output of the algorithm (default: Petri) include_filtered_log -> Include the filtered log in the output include_dfg_frequency -> Include the DFG of frequencies in the output include_dfg_performance -> Include the DFG of performance in the output include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output include_filtered_dfg_performance -> Include the filtered DFG of performance in the output classic_output Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking) or can return a more detailed dictionary """ if parameters is None: parameters = {} returned_dictionary = {} net = None initial_marking = None final_marking = None bpmn_graph = None dfg_frequency = None dfg_performance = None filtered_dfg_frequency = None filtered_dfg_performance = None maximum_number_activities = parameters[ "maximum_number_activities"] if "maximum_number_activities" in parameters else 20 discovery_algorithm = parameters["discovery_algorithm"] if "discovery_algorithm" in parameters else "alpha" desidered_output = parameters["desidered_output"] if "desidered_output" in parameters else "petri" include_filtered_log = parameters["include_filtered_log"] if "include_filtered_log" in parameters else True include_dfg_frequency = parameters["include_dfg_frequency"] if "include_dfg_frequency" in parameters else True include_dfg_performance = parameters[ "include_dfg_performance"] if "include_dfg_performance" in parameters else False include_filtered_dfg_frequency = parameters[ "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True include_filtered_dfg_performance = parameters[ "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else False if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters: activity_key = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key else: log, activity_key = insert_classifier.search_act_class_attr(log) if activity_key is None: activity_key = DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] activities_count_dictio = attributes_filter.get_attribute_values(log, activity_key) activities_count_list = [] for activity in activities_count_dictio: activities_count_list.append([activity, activities_count_dictio[activity]]) activities_count_list = sorted(activities_count_list, key=lambda x: x[1], reverse=True) activities_count_list = activities_count_list[:min(len(activities_count_list), maximum_number_activities)] activities_keep_list = [x[0] for x in activities_count_list] log = attributes_filter.apply(log, activities_keep_list, parameters=parameters) filtered_log = None if "alpha" in discovery_algorithm: # parameters_sa = deepcopy(parameters) # parameters_sa["decreasingFactor"] = 1.0 filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters) filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters) filtered_log = filter_topvariants_soundmodel.apply(filtered_log, parameters=parameters) elif "dfg_mining" in discovery_algorithm: filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters) filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(filtered_log, parameters=parameters) if include_dfg_frequency or "dfg_mining" in discovery_algorithm: dfg_frequency = dfg_factory.apply(log, parameters=parameters, variant="frequency") if include_dfg_performance: dfg_performance = dfg_factory.apply(log, parameters=parameters, variant="performance") if include_filtered_dfg_frequency: filtered_dfg_frequency = dfg_factory.apply(filtered_log, parameters=parameters, variant="frequency") if include_filtered_dfg_performance: filtered_dfg_performance = dfg_factory.apply(filtered_log, parameters=parameters, variant="performance") if "alpha" in discovery_algorithm: net, initial_marking, final_marking = alpha_miner.apply(filtered_log, parameters=parameters) elif "dfg_mining" in discovery_algorithm: start_activities = start_activities_filter.get_start_activities(filtered_log, parameters=parameters) end_activities = end_activities_filter.get_end_activities(filtered_log, parameters=parameters) parameters_conv = {} parameters_conv["start_activities"] = start_activities parameters_conv["end_activities"] = end_activities net, initial_marking, final_marking = dfg_conv_factory.apply(dfg_frequency, parameters=parameters_conv) if filtered_log is not None and include_filtered_log: returned_dictionary["filtered_log"] = filtered_log if net is not None and desidered_output == "petri": returned_dictionary["net"] = net if initial_marking is not None and desidered_output == "petri": returned_dictionary["initial_marking"] = initial_marking if final_marking is not None and desidered_output == "petri": returned_dictionary["final_marking"] = final_marking if bpmn_graph is not None and desidered_output == "bpmn": returned_dictionary["bpmn_graph"] = bpmn_graph if dfg_frequency is not None and include_dfg_frequency: returned_dictionary["dfg_frequency"] = dfg_frequency if dfg_performance is not None and include_dfg_performance: returned_dictionary["dfg_performance"] = dfg_performance if filtered_dfg_frequency is not None and include_filtered_dfg_frequency: returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency if filtered_dfg_performance is not None and include_filtered_dfg_performance: returned_dictionary["filtered_dfg_performance"] = filtered_dfg_performance if classic_output: if net is not None and desidered_output == "petri": return net, initial_marking, final_marking return returned_dictionary
sum_tree_num = 0 print(tree) tree_avg_tel = 0 tree_avg_log = 0 tree_avg_sum = 0 for sam in range(1, 11): print(sam) path = os.path.join("input_data", "df_complete_logs", "%d_1000_%d.xes" % (tree, sam)) log = xes_importer.apply(path) tel = xes_importer.apply(path) xes_utils.set_enabled(tel) dfg_100 = dfg_factory.apply(log) start_act = set(get_start_activities(log).keys()) end_act = set(get_end_activities(log).keys()) result_norm = [] result_tel = [] num = len(dfg_100.keys()) score_tel = 0 score_log = 0 score_sum = 0 su_tel = 0 su_log = 0 su_sum = 0 for k in range(10): found_tel = False found_log = False found_sum = False
def apply_tree(log, parameters): """ Apply the IM_FF algorithm to a log obtaining a process tree Parameters ---------- log Log parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ---------- process_tree Process tree """ if parameters is None: parameters = {} if type(log) is pd.DataFrame: vars = variants_get.get_variants_count(log, parameters=parameters) return apply_tree_variants(vars, parameters=parameters) else: activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) log = converter.apply(log, parameters=parameters) # keep only the activity attribute (since the others are not used) log = filtering_utils.keep_only_one_attribute_per_event( log, activity_key) noise_threshold = exec_utils.get_param_value( Parameters.NOISE_THRESHOLD, parameters, shared_constants.NOISE_THRESHOLD_IMF) dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters=parameters).items() if v > 0] c = Counts() activities = attributes_filter.get_attribute_values(log, activity_key) start_activities = list( start_activities_filter.get_start_activities( log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( log, parameters=parameters).keys()) contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 # set the threshold parameter based on f and the max value in the dfg: max_value = 0 for key, value in dfg: if value > max_value: max_value = value threshold = noise_threshold * max_value recursion_depth = 0 sub = subtree.make_tree(log, dfg, dfg, dfg, activities, c, recursion_depth, noise_threshold, threshold, start_activities, end_activities, start_activities, end_activities, parameters=parameters) process_tree = get_tree_repr_implain.get_repr( sub, 0, contains_empty_traces=contains_empty_traces) # Ensures consistency to the parent pointers in the process tree tree_consistency.fix_parent_pointers(process_tree) # Fixes a 1 child XOR that is added when single-activities flowers are found tree_consistency.fix_one_child_xor_flower(process_tree) # folds the process tree (to simplify it in case fallthroughs/filtering is applied) process_tree = util.fold(process_tree) return process_tree
def apply(log, parameters=None): """ Gets the Petri net through Inductive Miner, decorated by frequency metric Parameters ------------ log Log parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY # reduce the depth of the search done by token-based replay token_replay.MAX_REC_DEPTH = 1 token_replay.MAX_IT_FINAL1 = 1 token_replay.MAX_IT_FINAL2 = 1 token_replay.MAX_REC_DEPTH_HIDTRANSENABL = 1 log = attributes_filter.filter_log_on_max_no_activities( log, max_no_activities=constants.MAX_NO_ACTIVITIES, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters) activities_count = attributes_filter.get_attribute_values( filtered_log, activity_key) activities = list(activities_count.keys()) start_activities = list( start_activities_filter.get_start_activities( filtered_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities(filtered_log, parameters=parameters).keys()) dfg = dfg_factory.apply(filtered_log, parameters=parameters) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) net, im, fm = inductive_miner.apply_dfg(dfg, parameters=parameters, activities=activities, start_activities=start_activities, end_activities=end_activities) parameters["format"] = "svg" gviz = pn_vis_factory.apply(net, im, fm, log=filtered_log, variant="frequency", parameters=parameters) svg = get_base64_from_gviz(gviz) gviz_base64 = base64.b64encode(str(gviz).encode('utf-8')) ret_graph = get_graph.get_graph_from_petri(net, im, fm) return svg, export_petri_as_string( net, im, fm ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "inductive", "freq", None, "", activity_key
for tree in range(1, 11): tel_tree_num = 0 log_tree_num = 0 print(tree) tree_avg_tel = 0 tree_avg_log = 0 for sam in range(1, 11): print(sam) path = os.path.join("input_data", "df_complete_logs", "%d_1000_%d.xes" % (tree, sam)) log = xes_importer.apply(path) dfg_org = dfg_factory.apply(log) start_act = set(get_start_activities(log).keys()) end_act = set(get_end_activities(log).keys()) alpha_path = os.path.join("input_data", "df_complete_logs", "df_complete_alpha", "%d_alpha_%d.xes" % (tree, sam)) alpha_log = xes_importer.apply(alpha_path) num = len(dfg_org.keys()) score_tel = 0 su_tel = 0 for k in range(10): for n in range(1, 1000): sampled_log = sampling.sample_log(alpha_log, no_traces=n) dfg_log = dfg_factory.apply(sampled_log) yes_dfg = 0
def detect_loop(self): # p0 is part of return value, it contains the partition of activities # write all start and end activities in p1 if self.contains_empty_trace(): return [False, []] start_activities = list( start_activities_filter.get_start_activities( self.log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( self.log, parameters=self.parameters).keys()) p1 = [] for act in start_activities: if act not in p1: p1.append(act) for act in end_activities: if act not in p1: p1.append(act) # create new dfg without the transitions to start and end activities new_dfg = copy(self.dfg) copy_dfg = copy(new_dfg) for ele in copy_dfg: if ele[0][0] in p1 or ele[0][1] in p1: new_dfg.remove(ele) # get connected components of this new dfg new_ingoing = get_ingoing_edges(new_dfg) new_outgoing = get_outgoing_edges(new_dfg) # it was a pain in the *** to get a working directory of the current_activities, as we can't iterate ove the dfg current_activities = {} for element in self.activities: if element not in p1: current_activities.update({element: 1}) p0 = detection_utils.get_connected_components(new_ingoing, new_outgoing, current_activities) p0.insert(0, p1) iterable_dfg = [] for i in range(0, len(self.dfg)): iterable_dfg.append(self.dfg[i][0]) # p0 is like P1,P2,...,Pn in line 3 on page 190 of the IM Thesis # check for subsets in p0 that have connections to and end or from a start activity p0_copy = [] for int_el in p0: p0_copy.append(int_el) for element in p0_copy: # for every set in p0 removed = False if element in p0 and element != p0[0]: for act in element: # for every activity in this set for e in end_activities: # for every end activity if e not in start_activities: if (act, e) in iterable_dfg: # check if connected # is there an element in dfg pointing from any act in a subset of p0 to an end activity for activ in element: if activ not in p0[0]: p0[0].append(activ) if element in p0: p0.remove( element ) # remove subsets that are connected to an end activity removed = True break if removed: break for s in start_activities: if s not in end_activities: if not removed: if (s, act) in iterable_dfg: for acti in element: if acti not in p0[0]: p0[0].append(acti) if element in p0: p0.remove( element ) # remove subsets that are connected to an end activity removed = True break else: break if removed: break iterable_dfg = [] for i in range(0, len(self.dfg)): iterable_dfg.append(self.dfg[i][0]) p0_copy = [] for int_el in p0: p0_copy.append(int_el) for element in p0_copy: if element in p0 and element != p0[0]: for act in element: for e in self.end_activities: if ( e, act ) in iterable_dfg: # get those act, that are connected from an end activity for e2 in self.end_activities: # check, if the act is connected from all end activities if (e2, act) not in iterable_dfg: for acti in element: if acti not in p0[0]: p0[0].append(acti) if element in p0: p0.remove( element ) # remove subsets that are connected to an end activity break for s in self.start_activities: if ( act, s ) in iterable_dfg: # same as above (in this case for activities connected to # a start activity) for s2 in self.start_activities: if (act, s2) not in iterable_dfg: for acti in element: if acti not in p0[0]: p0[0].append(acti) if element in p0: p0.remove( element ) # remove subsets that are connected to an end activity break if len(p0) > 1: return [True, p0] else: return [False, []]
def apply_cut_im_plain(self, type_of_cut, cut, activity_key): # dfg_viz = dfg_factory.apply(self.log) # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"}) # dfg_vis_factory.view(gviz) if type_of_cut == 'concurrent': self.detected_cut = 'concurrent' new_logs = split.split_xor(cut[1], self.log, activity_key) for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters)) elif type_of_cut == 'sequential': new_logs = split.split_sequence(cut[1], self.log, activity_key) self.detected_cut = "sequential" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters)) elif type_of_cut == 'parallel': new_logs = split.split_parallel(cut[1], self.log, activity_key) self.detected_cut = "parallel" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters)) elif type_of_cut == 'loopCut': new_logs = split.split_loop(cut[1], self.log, activity_key) self.detected_cut = "loopCut" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters))
def detect_cut_if(self, second_iteration=False, parameters=None): # dfg_viz = dfg_factory.apply(self.log) # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"}) # dfg_vis_factory.view(gviz) if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, self.parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # check base cases: empty_log = base_case.empty_log(self.log) single_activity = base_case.single_activity(self.log, activity_key) if empty_log: self.detected_cut = 'empty_log' elif single_activity: self.detected_cut = 'single_activity' # if no base cases are found, search for a cut: # use the cutting and splitting functions of im_plain: else: found_plain_cut, type_of_cut, cut = self.check_cut_im_plain() if found_plain_cut: self.apply_cut_im_plain(type_of_cut, cut, activity_key) # if im_plain does not find a cut, we filter on our threshold and then again apply the im_cut detection # but this time, we have to use different splitting functions: else: self.filter_dfg_on_threshold() """ dfg_viz = dfg_factory.apply(self.log) gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"}) dfg_vis_factory.view(gviz) """ found_plain_cut, type_of_cut, cut = self.check_cut_im_plain() if found_plain_cut: if type_of_cut == 'concurrent': logging.debug("concurrent_cut_if") self.detected_cut = 'concurrent' new_logs = splitting_infrequent.split_xor_infrequent( cut[1], self.log, activity_key) for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) elif type_of_cut == 'sequential': logging.debug("sequential_if") new_logs = splitting_infrequent.split_sequence_infrequent( cut[1], self.log, activity_key) self.detected_cut = "sequential" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) elif type_of_cut == 'parallel': logging.debug("parallel_if") new_logs = split.split_parallel( cut[1], self.log, activity_key) self.detected_cut = "parallel" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) elif type_of_cut == 'loopCut': logging.debug("loopCut_if") new_logs = splitting_infrequent.split_loop_infrequent( cut[1], self.log, activity_key) self.detected_cut = "loopCut" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: self.apply_fall_through_infrequent(parameters)
def apply_tree(log, parameters=None): """ Apply the IM algorithm to a log_skeleton obtaining a process tree Parameters ---------- log Log parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> attribute of the log_skeleton to use as activity name (default concept:name) Returns ---------- process_tree Process tree """ if parameters is None: parameters = {} if pkgutil.find_loader("pandas"): import pandas as pd from pm4py.statistics.variants.pandas import get as variants_get if type(log) is pd.DataFrame: vars = variants_get.get_variants_count(log, parameters=parameters) return apply_tree_variants(vars, parameters=parameters) activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) log = converter.apply(log, parameters=parameters) # since basic IM is influenced once per variant, it makes sense to keep one trace per variant log = filtering_utils.keep_one_trace_per_variant(log, parameters=parameters) # keep only the activity attribute (since the others are not used) log = filtering_utils.keep_only_one_attribute_per_event(log, activity_key) dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters=parameters).items() if v > 0] c = Counts() activities = attributes_filter.get_attribute_values(log, activity_key) start_activities = list( start_activities_filter.get_start_activities( log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities(log, parameters=parameters).keys()) contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 recursion_depth = 0 sub = subtree.make_tree(log, dfg, dfg, dfg, activities, c, recursion_depth, 0.0, start_activities, end_activities, start_activities, end_activities, parameters) process_tree = get_tree_repr_implain.get_repr( sub, 0, contains_empty_traces=contains_empty_traces) # Ensures consistency to the parent pointers in the process tree tree_consistency.fix_parent_pointers(process_tree) # Fixes a 1 child XOR that is added when single-activities flowers are found tree_consistency.fix_one_child_xor_flower(process_tree) # folds the process tree (to simplify it in case fallthroughs/filtering is applied) process_tree = util.fold(process_tree) return process_tree
def apply_fall_through_infrequent(self, parameters=None): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, self.parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # set flags for fall_throughs, base case is True (enabled) use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters ) or parameters[Parameters.EMPTY_TRACE_KEY] use_act_once_per_trace = ( Parameters.ONCE_PER_TRACE_KEY not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY] use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters ) or parameters[Parameters.CONCURRENT_KEY] use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters ) or parameters[Parameters.STRICT_TAU_LOOP_KEY] use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters ) or parameters[Parameters.TAU_LOOP_KEY] if use_empty_trace: empty_traces_present, enough_traces, new_log = fall_through_infrequent.empty_trace_filtering( self.log, self.f) self.log = new_log else: empty_traces_present = False enough_traces = False # if an empty trace is found, the empty trace fallthrough applies if empty_traces_present and enough_traces: logging.debug("empty_trace_if") self.detected_cut = 'empty_trace' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) elif empty_traces_present and not enough_traces: # no node is added to the PT, instead we just use recursion on the log without the empty traces self.detect_cut_if() else: if use_act_once_per_trace: activity_once, new_log, small_log = fall_through.act_once_per_trace( self.log, self.activities, activity_key) else: activity_once = False if activity_once: self.detected_cut = 'parallel' # create two new dfgs as we need them to append to self.children later new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply( small_log, parameters=parameters).items() if v > 0] small_activities = attributes_filter.get_attribute_values( small_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=parameters).keys()) # append the chosen activity as leaf: self.children.append( SubtreeInfrequent( small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log self.children.append( SubtreeInfrequent( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_act_concurrent: activity_concurrent, new_log, small_log, key = fall_through.activity_concurrent( self, self.log, self.activities, activity_key, parameters=parameters) else: activity_concurrent = False if activity_concurrent: self.detected_cut = 'parallel' # create two new dfgs on to append later new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply( small_log, parameters=parameters).items() if v > 0] small_activities = attributes_filter.get_attribute_values( small_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=parameters).keys()) # append the concurrent activity as leaf: self.children.append( SubtreeInfrequent( small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, initial_start_activities=self. initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log: self.children.append( SubtreeInfrequent( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_strict_tau_loop: strict_tau_loop, new_log = fall_through.strict_tau_loop( self.log, self.start_activities, self.end_activities, activity_key) else: strict_tau_loop = False if strict_tau_loop: self.detected_cut = 'strict_tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: if use_tau_loop: tau_loop, new_log = fall_through.tau_loop( self.log, self.start_activities, activity_key) else: tau_loop = False if tau_loop: self.detected_cut = 'tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: logging.debug("flower_if") self.detected_cut = 'flower'
def apply(log, parameters=None): """ Gets the frequency DFG Parameters ------------ log Log parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY log = attributes_filter.filter_log_on_max_no_activities( log, max_no_activities=constants.MAX_NO_ACTIVITIES, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters) activities_count = attributes_filter.get_attribute_values( filtered_log, activity_key) activities = list(activities_count.keys()) start_activities = list( start_activities_filter.get_start_activities( filtered_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities(filtered_log, parameters=parameters).keys()) dfg = dfg_factory.apply(filtered_log, parameters=parameters) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) parameters["format"] = "svg" parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities gviz = dfg_vis_factory.apply(dfg, log=filtered_log, variant="frequency", parameters=parameters) gviz_base64 = base64.b64encode(str(gviz).encode('utf-8')) ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities, end_activities) net, im, fm = dfg_conv_factory.apply(dfg, parameters={ "start_activities": start_activities, "end_activities": end_activities }) return get_base64_from_gviz(gviz), export_petri_as_string( net, im, fm ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "freq", None, "", activity_key
def apply(log, parameters): """ Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking Parameters ----------- log Log parameters Parameters of the algorithm, including: pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ----------- net Petri net initial_marking Initial marking final_marking Final marking """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters: parameters[ pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] # apply the reduction by default only on very small logs enable_reduction = parameters[ "enable_reduction"] if "enable_reduction" in parameters else True # get the DFG if isinstance(log[0][0], tel.Event): dfg = [(k, v) for k, v in inductive_revise.get_dfg_graph_trans( log, parameters={ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key }).items() if v > 0] else: dfg = [(k, v) for k, v in dfg_inst.apply( log, parameters={ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key }).items() if v > 0] # get the activities in the log activities = attributes_filter.get_attribute_values(log, activity_key) # gets the start activities from the log start_activities = list( start_activities_filter.get_start_activities( log, parameters=parameters).keys()) # gets the end activities from the log end_activities = list( end_activities_filter.get_end_activities(log, parameters=parameters).keys()) # check if the log contains empty traces contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 net, initial_marking, final_marking = apply_dfg( dfg, parameters=parameters, activities=activities, contains_empty_traces=contains_empty_traces, start_activities=start_activities, end_activities=end_activities) """if enable_reduction: vis_trans = [x for x in net.transitions if x.label] hid_trans = [x for x in net.transitions if x.label is None] if vis_trans: ratio = len(hid_trans) / len(vis_trans) if ratio < 2.0: # avoid reducting too much complicated processes reduction_parameters = copy(parameters) if "is_reduction" not in reduction_parameters: reduction_parameters["is_reduction"] = True if "thread_maximum_ex_time" not in reduction_parameters: reduction_parameters["thread_maximum_ex_time"] = shared_constants.RED_MAX_THR_EX_TIME # do the replay aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=reduction_parameters) # apply petri_reduction technique in order to simplify the Petri net net = petri_cleaning.petri_reduction_treplay(net, parameters={"aligned_traces": aligned_traces})""" return net, initial_marking, final_marking
def apply_fall_through(self, parameters=None): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # set flags for fall_throughs, base case is True (enabled) use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters ) or parameters[Parameters.EMPTY_TRACE_KEY] use_act_once_per_trace = ( Parameters.ONCE_PER_TRACE_KEY not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY] use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters ) or parameters[Parameters.CONCURRENT_KEY] use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters ) or parameters[Parameters.STRICT_TAU_LOOP_KEY] use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters ) or parameters[Parameters.TAU_LOOP_KEY] if use_empty_trace: empty_trace, new_log = fall_through.empty_trace(self.log) # if an empty trace is found, the empty trace fallthrough applies # else: empty_trace = False if empty_trace: logging.debug("empty_trace") activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append(act[activity_key]) self.detected_cut = 'empty_trace' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_act_once_per_trace: activity_once, new_log, small_log = fall_through.act_once_per_trace( self.log, self.activities, activity_key) small_log = filtering_utils.keep_one_trace_per_variant( small_log, parameters=parameters) else: activity_once = False if use_act_once_per_trace and activity_once: self.detected_cut = 'parallel' # create two new dfgs as we need them to append to self.children later new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply( small_log, parameters=parameters).items() if v > 0] small_activities = attributes_filter.get_attribute_values( small_log, activity_key) self.children.append( SubtreePlain( small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log_skeleton start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_act_concurrent: activity_concurrent, new_log, small_log, activity_left_out = fall_through.activity_concurrent( self, self.log, self.activities, activity_key, parameters=parameters) small_log = filtering_utils.keep_one_trace_per_variant( small_log, parameters=parameters) else: activity_concurrent = False if use_act_concurrent and activity_concurrent: self.detected_cut = 'parallel' # create two new dfgs on to append later new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply( small_log, parameters=parameters).items() if v > 0] small_activities = attributes_filter.get_attribute_values( small_log, activity_key) # append the concurrent activity as leaf: self.children.append( SubtreePlain( small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, initial_start_activities=self. initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log_skeleton: start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_strict_tau_loop: strict_tau_loop, new_log = fall_through.strict_tau_loop( self.log, self.start_activities, self.end_activities, activity_key) new_log = filtering_utils.keep_one_trace_per_variant( new_log, parameters=parameters) else: strict_tau_loop = False if use_strict_tau_loop and strict_tau_loop: activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append(act[activity_key]) self.detected_cut = 'strict_tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain(new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: if use_tau_loop: tau_loop, new_log = fall_through.tau_loop( self.log, self.start_activities, activity_key) new_log = filtering_utils.keep_one_trace_per_variant( new_log, parameters=parameters) else: tau_loop = False if use_tau_loop and tau_loop: activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append( act[activity_key]) self.detected_cut = 'tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: logging.debug("flower model") activites_left = [] for trace in self.log: for act in trace: if act[activity_key] not in activites_left: activites_left.append( act[activity_key]) self.detected_cut = 'flower'
def apply(log, parameters=None): """ Gets the Petri net through Inductive Miner, decorated by performance metric Parameters ------------ log Log parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY # reduce the depth of the search done by token-based replay token_replay.MAX_REC_DEPTH = 1 token_replay.MAX_IT_FINAL1 = 1 token_replay.MAX_IT_FINAL2 = 1 token_replay.MAX_REC_DEPTH_HIDTRANSENABL = 1 log = attributes_filter.filter_log_on_max_no_activities( log, max_no_activities=constants.MAX_NO_ACTIVITIES, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters) activities_count = attributes_filter.get_attribute_values( filtered_log, activity_key) activities = list(activities_count.keys()) start_activities = list( start_activities_filter.get_start_activities( filtered_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities(filtered_log, parameters=parameters).keys()) dfg = dfg_factory.apply(filtered_log, parameters=parameters) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) net, im, fm = inductive_miner.apply_dfg(dfg, parameters=parameters, activities=activities, start_activities=start_activities, end_activities=end_activities) #parameters["format"] = "svg" #gviz = pn_vis_factory.apply(net, im, fm, log=log, variant="performance", parameters=parameters) bpmn_graph, el_corr, inv_el_corr, el_corr_keys_map = petri_to_bpmn.apply( net, im, fm) aggregated_statistics = token_decoration.get_decorations( filtered_log, net, im, fm, parameters=parameters, measure="performance") bpmn_aggreg_statistics = convert_performance_map.convert_performance_map_to_bpmn( aggregated_statistics, inv_el_corr) #bpmn_graph = bpmn_embedding.embed_info_into_bpmn(bpmn_graph, bpmn_aggreg_statistics, "performance") bpmn_graph = bpmn_diagram_layouter.apply(bpmn_graph) bpmn_string = bpmn_exporter.get_string_from_bpmn(bpmn_graph) gviz = bpmn_vis_factory.apply_petri( net, im, fm, aggregated_statistics=aggregated_statistics, variant="performance", parameters={"format": "svg"}) gviz2 = bpmn_vis_factory.apply_petri( net, im, fm, aggregated_statistics=aggregated_statistics, variant="performance", parameters={"format": "dot"}) svg = get_base64_from_file(gviz.name) gviz_base64 = get_base64_from_file(gviz2.name) ret_graph = get_graph.get_graph_from_petri(net, im, fm) return svg, export_petri_as_string( net, im, fm ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "indbpmn", "perf", bpmn_string, ".bpmn", activity_key