def slice_dist_suc(log_1, log_2, unit): (log1_list, freq1_list) = filter_subsets.logslice_percent(log_1, unit) (log2_list, freq2_list) = filter_subsets.logslice_percent(log_2, unit) if len(freq1_list) >= len(freq2_list): max_len = len(freq1_list) min_len = len(freq2_list) max_log = log1_list min_log = log2_list var_count_max = freq1_list var_count_min = freq2_list else: max_len = len(freq2_list) min_len = len(freq1_list) max_log = log2_list min_log = log1_list var_count_max = freq2_list var_count_min = freq1_list dist_matrix = np.zeros((max_len, min_len)) max_per_var = np.zeros(max_len) max_freq = np.zeros(max_len) min_freq = np.zeros(min_len) min_per_var = np.zeros(min_len) index_rec = set(list(range(min_len))) if log1_list == log2_list: print("Please give different variant lists!") dist = 0 else: for i in range(max_len): dist_vec = np.zeros(min_len) dfg1 = native.apply(max_log[i]) df1_dfg = act_dist_calc.occu_var_act(dfg1) for j in range(min_len): dfg2 = native.apply(min_log[j]) df2_dfg = act_dist_calc.occu_var_act(dfg2) df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0) dist_vec[j] = pdist(np.array([df_dfg['freq_x'].values, df_dfg['freq_y'].values]), 'cosine')[0] dist_matrix[i][j] = dist_vec[j] if j == (min_len - 1): max_loc_col = np.argmin(dist_vec) if abs(dist_vec[max_loc_col]) <= 1e-8: index_rec.discard(max_loc_col) max_freq[i] = var_count_max[i] * var_count_min[max_loc_col] * 2 max_per_var[i] = dist_vec[max_loc_col] * max_freq[i] * 2 else: max_freq[i] = var_count_max[i] * var_count_min[max_loc_col] max_per_var[i] = dist_vec[max_loc_col] * max_freq[i] if (len(index_rec) != 0): for i in list(index_rec): min_loc_row = np.argmin(dist_matrix[:, i]) min_freq[i] = var_count_max[min_loc_row] * var_count_min[i] min_per_var[i] = dist_matrix[min_loc_row, i] * min_freq[i] dist = (np.sum(max_per_var) + np.sum(min_per_var)) / (np.sum(max_freq) + np.sum(min_freq)) return dist
def create_dfg(self, parameters=None): if parameters is None: parameters = {} dfg = [(k, v) for k, v in dfg_inst.apply( self.log, parameters=parameters).items() if v > 0] return dfg
def apply(log, parameters): """ Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking Parameters ----------- log Log parameters Parameters of the algorithm, including: pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ----------- net Petri net initial_marking Initial marking final_marking Final marking """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters: parameters[pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] # get the DFG dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters={ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key}).items() if v > 0] # get the activities in the log activities = attributes_filter.get_attribute_values(log, activity_key) # gets the start activities from the log start_activities = list(start_activities_filter.get_start_activities(log, parameters=parameters).keys()) # gets the end activities from the log end_activities = list(end_activities_filter.get_end_activities(log, parameters=parameters).keys()) # check if the log contains empty traces contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 net, initial_marking, final_marking = apply_dfg(dfg, parameters=parameters, activities=activities, contains_empty_traces=contains_empty_traces, start_activities=start_activities, end_activities=end_activities) return net, initial_marking, final_marking
def apply_tree(log, parameters=None): """ Apply the IMDF algorithm to a log obtaining a process tree Parameters ---------- log Log parameters Parameters of the algorithm, including: pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ---------- tree Process tree """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] # get the DFG dfg = [(k, v) for k, v in dfg_inst.apply( log, parameters={ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key }).items() if v > 0] # gets the start activities from the log start_activities = log_start_act_stats.get_start_activities( log, parameters=parameters) # gets the end activities from the log end_activities = log_end_act_stats.get_end_activities( log, parameters=parameters) # get the activities in the log activities = log_attributes_stats.get_attribute_values(log, activity_key) # check if the log contains empty traces contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 return apply_tree_dfg(dfg, parameters=parameters, activities=activities, contains_empty_traces=contains_empty_traces, start_activities=start_activities, end_activities=end_activities)
def apply(trace_log, parameters=None): """ This method calls the \"classic\" alpha miner [1]_. Parameters ---------- trace_log: :class:`pm4py.log.log.TraceLog` Event log to use in the alpha miner, note that it should be a TraceLog! parameters: Parameters of the algorithm, including: activity_key : :class:`str`, optional Key to use within events to identify the underlying activity. By deafult, the value 'concept:name' is used. Returns ------- net: :class:`pm4py.entities.petri.petrinet.PetriNet` A Petri net describing the event log that is provided as an input initial marking: :class:`pm4py.models.net.Marking` marking object representing the initial marking final marking: :class:`pm4py.models.net.Marking` marking object representing the final marking, not guaranteed that it is actually reachable! References ---------- .. [1] Wil M. P. van der Aalst et al., "Workflow Mining: Discovering Process Models from Event Logs", IEEE Trans. Knowl. Data Eng., 16, 1128-1142, 2004. `DOI <https://doi.org/10.1109/TKDE.2004.47>`_. """ if parameters is None: parameters = {} if pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[ pm_util.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = log_util.xes.DEFAULT_NAME_KEY dfg = { k: v for k, v in dfg_inst.apply(trace_log, parameters=parameters).items() if v > 0 } start_activities = endpoints.derive_start_activities_from_tracelog( trace_log, parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]) end_activities = endpoints.derive_end_activities_from_tracelog( trace_log, parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]) return apply_dfg_sa_ea(dfg, start_activities, end_activities, parameters=parameters)
def check_for_cut(self, test_log, deleted_activity=None, parameters=None): if pkgutil.find_loader("networkx"): import networkx as nx if deleted_activity is not None: del self.activities[deleted_activity] if parameters is None: parameters = {} dfg = [(k, v) for k, v in dfg_inst.apply( test_log, parameters=parameters).items() if v > 0] self.dfg = dfg self.outgoing = get_outgoing_edges(self.dfg) self.ingoing = get_ingoing_edges(self.dfg) self.log = test_log conn_components = detection_utils.get_connected_components( self.ingoing, self.outgoing, self.activities) this_nx_graph = transform_dfg_to_directed_nx_graph( self.dfg, activities=self.activities) strongly_connected_components = [ list(x) for x in nx.strongly_connected_components(this_nx_graph) ] # search for cut and return true as soon as a cut is found: xor_cut = self.detect_xor(conn_components) if xor_cut[0]: return True else: sequence_cut = cut_detection.detect_sequential_cut( self, self.dfg, strongly_connected_components) if sequence_cut[0]: return True else: parallel_cut = self.detect_concurrent() if parallel_cut[0]: return True else: loop_cut = self.detect_loop() if loop_cut[0]: return True else: return False else: msg = "networkx is not available. inductive miner cannot be used!" logging.error(msg) raise Exception(msg)
def apply(trace_log, parameters): """ Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking Parameters ----------- trace_log Trace log parameters Parameters of the algorithm, including: pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ----------- net Petri net initial_marking Initial marking final_marking Final marking """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] # apply the reduction by default only on very small logs enable_reduction = parameters[ "enable_reduction"] if "enable_reduction" in parameters else ( shared_constants.APPLY_REDUCTION_ON_SMALL_LOG and shared_constants.MAX_LOG_SIZE_FOR_REDUCTION) # get the DFG dfg = [(k, v) for k, v in dfg_inst.apply( trace_log, parameters={ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key }).items() if v > 0] # get the activities in the log activities = attributes_filter.get_attribute_values( trace_log, activity_key) # check if the log contains empty traces contains_empty_traces = False traces_length = [len(trace) for trace in trace_log] if traces_length: contains_empty_traces = min([len(trace) for trace in trace_log]) == 0 net, initial_marking, final_marking = apply_dfg( dfg, parameters=parameters, activities=activities, contains_empty_traces=contains_empty_traces) if enable_reduction: # do the replay aligned_traces = token_replay.apply(trace_log, net, initial_marking, final_marking, parameters=parameters) # apply petri_reduction technique in order to simplify the Petri net net = petri_cleaning.petri_reduction_treplay( net, parameters={"aligned_traces": aligned_traces}) return net, initial_marking, final_marking
def apply_fall_through(self, parameters=None): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # set flags for fall_throughs, base case is True (enabled) use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters ) or parameters[Parameters.EMPTY_TRACE_KEY] use_act_once_per_trace = ( Parameters.ONCE_PER_TRACE_KEY not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY] use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters ) or parameters[Parameters.CONCURRENT_KEY] use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters ) or parameters[Parameters.STRICT_TAU_LOOP_KEY] use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters ) or parameters[Parameters.TAU_LOOP_KEY] if use_empty_trace: empty_trace, new_log = fall_through.empty_trace(self.log) # if an empty trace is found, the empty trace fallthrough applies # else: empty_trace = False if empty_trace: logging.debug("empty_trace") activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append(act[activity_key]) self.detected_cut = 'empty_trace' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_act_once_per_trace: activity_once, new_log, small_log = fall_through.act_once_per_trace( self.log, self.activities, activity_key) small_log = filtering_utils.keep_one_trace_per_variant( small_log, parameters=parameters) else: activity_once = False if use_act_once_per_trace and activity_once: self.detected_cut = 'parallel' # create two new dfgs as we need them to append to self.children later new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply( small_log, parameters=parameters).items() if v > 0] small_activities = attributes_filter.get_attribute_values( small_log, activity_key) self.children.append( SubtreePlain( small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_act_concurrent: activity_concurrent, new_log, small_log, activity_left_out = fall_through.activity_concurrent( self, self.log, self.activities, activity_key, parameters=parameters) small_log = filtering_utils.keep_one_trace_per_variant( small_log, parameters=parameters) else: activity_concurrent = False if use_act_concurrent and activity_concurrent: self.detected_cut = 'parallel' # create two new dfgs on to append later new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply( small_log, parameters=parameters).items() if v > 0] small_activities = attributes_filter.get_attribute_values( small_log, activity_key) # append the concurrent activity as leaf: self.children.append( SubtreePlain( small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, initial_start_activities=self. initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log: start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_strict_tau_loop: strict_tau_loop, new_log = fall_through.strict_tau_loop( self.log, self.start_activities, self.end_activities, activity_key) new_log = filtering_utils.keep_one_trace_per_variant( new_log, parameters=parameters) else: strict_tau_loop = False if use_strict_tau_loop and strict_tau_loop: activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append(act[activity_key]) self.detected_cut = 'strict_tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain(new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: if use_tau_loop: tau_loop, new_log = fall_through.tau_loop( self.log, self.start_activities, activity_key) new_log = filtering_utils.keep_one_trace_per_variant( new_log, parameters=parameters) else: tau_loop = False if use_tau_loop and tau_loop: activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append( act[activity_key]) self.detected_cut = 'tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: logging.debug("flower model") activites_left = [] for trace in self.log: for act in trace: if act[activity_key] not in activites_left: activites_left.append( act[activity_key]) self.detected_cut = 'flower'
def detect_cut(self, second_iteration=False, parameters=None): if pkgutil.find_loader("networkx"): import networkx as nx if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # check base cases: empty_log = base_case.empty_log(self.log) single_activity = base_case.single_activity(self.log, activity_key) if empty_log: self.detected_cut = 'empty_log' elif single_activity: self.detected_cut = 'single_activity' # if no base cases are found, search for a cut: else: conn_components = detection_utils.get_connected_components( self.ingoing, self.outgoing, self.activities) this_nx_graph = transform_dfg_to_directed_nx_graph( self.dfg, activities=self.activities) strongly_connected_components = [ list(x) for x in nx.strongly_connected_components(this_nx_graph) ] xor_cut = self.detect_xor(conn_components) # the following part searches for a cut in the current log # if a cut is found, the log is split according to the cut, the resulting logs are saved in new_logs # recursion is used on all the logs in new_logs if xor_cut[0]: logging.debug("xor_cut") self.detected_cut = 'concurrent' new_logs = split.split_xor(xor_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: sequence_cut = cut_detection.detect_sequential_cut( self, self.dfg, strongly_connected_components) if sequence_cut[0]: logging.debug("sequence_cut") new_logs = split.split_sequence( sequence_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "sequential" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: parallel_cut = self.detect_concurrent() if parallel_cut[0]: logging.debug("parallel_cut") new_logs = split.split_parallel( parallel_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "parallel" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter. get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: loop_cut = self.detect_loop() if loop_cut[0]: logging.debug("loop_cut") new_logs = split.split_loop( loop_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "loopCut" for l in new_logs: new_dfg = [ (k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0 ] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter. get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter. get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self. noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) # if the code gets to this point, there is no base_case and no cut found in the log # therefore, we now apply fall through: else: self.apply_fall_through(parameters) else: msg = "networkx is not available. inductive miner cannot be used!" logging.error(msg) raise Exception(msg)
def trans_alpha(log, parameters=None): dfg = {k: v for k, v in dfg_inst.apply(log).items() if v > 0} if parameters is None: parameters = {} if pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pm_util.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY start_activities = endpoints.derive_start_activities_from_log( log, parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]) end_activities = endpoints.derive_end_activities_from_log( log, parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]) labels = set() for el in dfg: labels.add(el[0]) labels.add(el[1]) for a in start_activities: labels.add(a) for a in end_activities: labels.add(a) labels = list(labels) alpha_abstraction = alpha_classic_abstraction.ClassicAlphaAbstraction( start_activities, end_activities, dfg) new_parallel_set = set() loop_set = set() for par in alpha_abstraction.parallel_relation: for trace in log: for i in range(len(trace) - 1): if trace[i]['concept:name'] == par[0] and trace[ i + 1]['concept:name'] == par[1]: if trace[i + 1]['concept:name'] in trace[i].enabled: new_parallel_set.add(par) pairs = list( map( lambda p: ({p[0]}, {p[1]}), filter( lambda p: classic.__initial_filter( alpha_abstraction.parallel_relation, p), alpha_abstraction.causal_relation))) for par in alpha_abstraction.parallel_relation: if par not in new_parallel_set and ( par[1], par[0]) not in new_parallel_set and par[1] != par[0]: loop_set.add(par) pairs.append(({par[0]}, {par[1]})) else: new_parallel_set.add(par) for i in range(0, len(pairs)): t1 = pairs[i] for j in range(i, len(pairs)): t2 = pairs[j] if t1 != t2: if t1[0].issubset(t2[0]) or t1[1].issubset(t2[1]): if not (classic.__check_is_unrelated( (new_parallel_set.union(loop_set)), alpha_abstraction.causal_relation, t1[0], t2[0]) or classic.__check_is_unrelated( (new_parallel_set.union(loop_set)), alpha_abstraction.causal_relation, t1[1], t2[1])): new_alpha_pair = (t1[0] | t2[0], t1[1] | t2[1]) if new_alpha_pair not in pairs: pairs.append((t1[0] | t2[0], t1[1] | t2[1])) internal_places = filter(lambda p: classic.__pair_maximizer(pairs, p), pairs) net = petri.petrinet.PetriNet('alpha_classic_net_' + str(time.time())) label_transition_dict = {} for i in range(0, len(labels)): label_transition_dict[labels[i]] = petri.petrinet.PetriNet.Transition( labels[i], labels[i]) net.transitions.add(label_transition_dict[labels[i]]) src = classic.__add_source(net, alpha_abstraction.start_activities, label_transition_dict) sink = classic.__add_sink(net, alpha_abstraction.end_activities, label_transition_dict) for pair in internal_places: place = petri.petrinet.PetriNet.Place(str(pair)) net.places.add(place) for in_arc in pair[0]: petri.utils.add_arc_from_to(label_transition_dict[in_arc], place, net) for out_arc in pair[1]: petri.utils.add_arc_from_to(place, label_transition_dict[out_arc], net) return net, Marking({src: 1}), Marking({sink: 1})
def apply(log, parameters): """ Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking Parameters ----------- log Log parameters Parameters of the algorithm, including: pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ----------- net Petri net initial_marking Initial marking final_marking Final marking """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters: parameters[ pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] # apply the reduction by default only on very small logs enable_reduction = parameters[ "enable_reduction"] if "enable_reduction" in parameters else True # get the DFG if isinstance(log[0][0], tel.Event): dfg = [(k, v) for k, v in inductive_revise.get_dfg_graph_trans( log, parameters={ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key }).items() if v > 0] else: dfg = [(k, v) for k, v in dfg_inst.apply( log, parameters={ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key }).items() if v > 0] # get the activities in the log activities = attributes_filter.get_attribute_values(log, activity_key) # gets the start activities from the log start_activities = list( start_activities_filter.get_start_activities( log, parameters=parameters).keys()) # gets the end activities from the log end_activities = list( end_activities_filter.get_end_activities(log, parameters=parameters).keys()) # check if the log contains empty traces contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 net, initial_marking, final_marking = apply_dfg( dfg, parameters=parameters, activities=activities, contains_empty_traces=contains_empty_traces, start_activities=start_activities, end_activities=end_activities) """if enable_reduction: vis_trans = [x for x in net.transitions if x.label] hid_trans = [x for x in net.transitions if x.label is None] if vis_trans: ratio = len(hid_trans) / len(vis_trans) if ratio < 2.0: # avoid reducting too much complicated processes reduction_parameters = copy(parameters) if "is_reduction" not in reduction_parameters: reduction_parameters["is_reduction"] = True if "thread_maximum_ex_time" not in reduction_parameters: reduction_parameters["thread_maximum_ex_time"] = shared_constants.RED_MAX_THR_EX_TIME # do the replay aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=reduction_parameters) # apply petri_reduction technique in order to simplify the Petri net net = petri_cleaning.petri_reduction_treplay(net, parameters={"aligned_traces": aligned_traces})""" return net, initial_marking, final_marking
def apply_fall_through_infrequent(self, parameters=None): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, self.parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # set flags for fall_throughs, base case is True (enabled) use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters ) or parameters[Parameters.EMPTY_TRACE_KEY] use_act_once_per_trace = ( Parameters.ONCE_PER_TRACE_KEY not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY] use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters ) or parameters[Parameters.CONCURRENT_KEY] use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters ) or parameters[Parameters.STRICT_TAU_LOOP_KEY] use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters ) or parameters[Parameters.TAU_LOOP_KEY] if use_empty_trace: empty_traces_present, enough_traces, new_log = fall_through_infrequent.empty_trace_filtering( self.log, self.f) self.log = new_log else: empty_traces_present = False enough_traces = False # if an empty trace is found, the empty trace fallthrough applies if empty_traces_present and enough_traces: logging.debug("empty_trace_if") self.detected_cut = 'empty_trace' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) elif empty_traces_present and not enough_traces: # no node is added to the PT, instead we just use recursion on the log without the empty traces self.detect_cut_if() else: if use_act_once_per_trace: activity_once, new_log, small_log = fall_through.act_once_per_trace( self.log, self.activities, activity_key) else: activity_once = False if activity_once: self.detected_cut = 'parallel' # create two new dfgs as we need them to append to self.children later new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply( small_log, parameters=parameters).items() if v > 0] small_activities = attributes_filter.get_attribute_values( small_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=parameters).keys()) # append the chosen activity as leaf: self.children.append( SubtreeInfrequent( small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log self.children.append( SubtreeInfrequent( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_act_concurrent: activity_concurrent, new_log, small_log, key = fall_through.activity_concurrent( self, self.log, self.activities, activity_key, parameters=parameters) else: activity_concurrent = False if activity_concurrent: self.detected_cut = 'parallel' # create two new dfgs on to append later new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply( small_log, parameters=parameters).items() if v > 0] small_activities = attributes_filter.get_attribute_values( small_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=parameters).keys()) # append the concurrent activity as leaf: self.children.append( SubtreeInfrequent( small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, initial_start_activities=self. initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log: self.children.append( SubtreeInfrequent( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_strict_tau_loop: strict_tau_loop, new_log = fall_through.strict_tau_loop( self.log, self.start_activities, self.end_activities, activity_key) else: strict_tau_loop = False if strict_tau_loop: self.detected_cut = 'strict_tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: if use_tau_loop: tau_loop, new_log = fall_through.tau_loop( self.log, self.start_activities, activity_key) else: tau_loop = False if tau_loop: self.detected_cut = 'tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_filter.get_start_activities( new_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( new_log, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: logging.debug("flower_if") self.detected_cut = 'flower'
def detect_cut_if(self, second_iteration=False, parameters=None): # dfg_viz = dfg_factory.apply(self.log) # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"}) # dfg_vis_factory.view(gviz) if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, self.parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # check base cases: empty_log = base_case.empty_log(self.log) single_activity = base_case.single_activity(self.log, activity_key) if empty_log: self.detected_cut = 'empty_log' elif single_activity: self.detected_cut = 'single_activity' # if no base cases are found, search for a cut: # use the cutting and splitting functions of im_plain: else: found_plain_cut, type_of_cut, cut = self.check_cut_im_plain() if found_plain_cut: self.apply_cut_im_plain(type_of_cut, cut, activity_key) # if im_plain does not find a cut, we filter on our threshold and then again apply the im_cut detection # but this time, we have to use different splitting functions: else: self.filter_dfg_on_threshold() """ dfg_viz = dfg_factory.apply(self.log) gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"}) dfg_vis_factory.view(gviz) """ found_plain_cut, type_of_cut, cut = self.check_cut_im_plain() if found_plain_cut: if type_of_cut == 'concurrent': logging.debug("concurrent_cut_if") self.detected_cut = 'concurrent' new_logs = splitting_infrequent.split_xor_infrequent( cut[1], self.log, activity_key) for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) elif type_of_cut == 'sequential': logging.debug("sequential_if") new_logs = splitting_infrequent.split_sequence_infrequent( cut[1], self.log, activity_key) self.detected_cut = "sequential" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) elif type_of_cut == 'parallel': logging.debug("parallel_if") new_logs = split.split_parallel( cut[1], self.log, activity_key) self.detected_cut = "parallel" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) elif type_of_cut == 'loopCut': logging.debug("loopCut_if") new_logs = splitting_infrequent.split_loop_infrequent( cut[1], self.log, activity_key) self.detected_cut = "loopCut" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: self.apply_fall_through_infrequent(parameters)
def apply_cut_im_plain(self, type_of_cut, cut, activity_key): # dfg_viz = dfg_factory.apply(self.log) # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"}) # dfg_vis_factory.view(gviz) if type_of_cut == 'concurrent': self.detected_cut = 'concurrent' new_logs = split.split_xor(cut[1], self.log, activity_key) for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters)) elif type_of_cut == 'sequential': new_logs = split.split_sequence(cut[1], self.log, activity_key) self.detected_cut = "sequential" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters)) elif type_of_cut == 'parallel': new_logs = split.split_parallel(cut[1], self.log, activity_key) self.detected_cut = "parallel" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters)) elif type_of_cut == 'loopCut': new_logs = split.split_loop(cut[1], self.log, activity_key) self.detected_cut = "loopCut" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters))
def apply_tree(log, parameters): """ Apply the IM_FF algorithm to a log obtaining a process tree Parameters ---------- log Log parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ---------- process_tree Process tree """ if parameters is None: parameters = {} if type(log) is pd.DataFrame: vars = variants_get.get_variants_count(log, parameters=parameters) return apply_tree_variants(vars, parameters=parameters) else: activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) log = converter.apply(log, parameters=parameters) # keep only the activity attribute (since the others are not used) log = filtering_utils.keep_only_one_attribute_per_event( log, activity_key) noise_threshold = exec_utils.get_param_value( Parameters.NOISE_THRESHOLD, parameters, shared_constants.NOISE_THRESHOLD_IMF) dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters=parameters).items() if v > 0] c = Counts() activities = attributes_filter.get_attribute_values(log, activity_key) start_activities = list( start_activities_filter.get_start_activities( log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( log, parameters=parameters).keys()) contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 # set the threshold parameter based on f and the max value in the dfg: max_value = 0 for key, value in dfg: if value > max_value: max_value = value threshold = noise_threshold * max_value recursion_depth = 0 sub = subtree.make_tree(log, dfg, dfg, dfg, activities, c, recursion_depth, noise_threshold, threshold, start_activities, end_activities, start_activities, end_activities, parameters=parameters) process_tree = get_tree_repr_implain.get_repr( sub, 0, contains_empty_traces=contains_empty_traces) # Ensures consistency to the parent pointers in the process tree tree_consistency.fix_parent_pointers(process_tree) # Fixes a 1 child XOR that is added when single-activities flowers are found tree_consistency.fix_one_child_xor_flower(process_tree) # folds the process tree (to simplify it in case fallthroughs/filtering is applied) process_tree = util.fold(process_tree) return process_tree
def trans_alpha(log, parameters=None): dfg = {k: v for k, v in dfg_inst.apply(log).items() if v > 0} if parameters is None: parameters = {} if pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pm_util.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY start_activities = endpoints.derive_start_activities_from_log( log, parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]) end_activities = endpoints.derive_end_activities_from_log( log, parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]) labels = set() for el in dfg: labels.add(el[0]) labels.add(el[1]) for a in start_activities: labels.add(a) for a in end_activities: labels.add(a) labels = list(labels) alpha_abstraction = alpha_classic_abstraction.ClassicAlphaAbstraction( start_activities, end_activities, dfg) pairs = list( map( lambda p: ({p[0]}, {p[1]}), filter( lambda p: classic.__initial_filter( alpha_abstraction.parallel_relation, p), alpha_abstraction.causal_relation))) #this part added parallel_set = alpha_abstraction.parallel_relation loop_cand_set = set() for rel in parallel_set.copy(): not_loop_flag = False pre_act = rel[0] post_act = rel[1] for trace in log: for i in range(len(trace) - 1): if trace[i]['concept:name'] == pre_act and trace[ i + 1]['concept:name'] == post_act: pre_en = trace[i]['enabled'] if post_act in pre_en: #not loop not_loop_flag = True break else: #loop continue break if not not_loop_flag: loop_cand_set.add((pre_act, post_act)) loop_set = set() for loop_cand in loop_cand_set: if loop_cand[::-1] in loop_cand_set and loop_cand[0] != loop_cand[1]: loop_set.add(loop_cand) #find loops based on enabling information #this part added for i in range(0, len(pairs)): t1 = pairs[i] for j in range(i, len(pairs)): t2 = pairs[j] if t1 != t2: if t1[0].issubset(t2[0]) or t1[1].issubset(t2[1]): if not (classic.__check_is_unrelated( alpha_abstraction.parallel_relation, alpha_abstraction.causal_relation, t1[0], t2[0]) or classic.__check_is_unrelated( alpha_abstraction.parallel_relation, alpha_abstraction.causal_relation, t1[1], t2[1])): new_alpha_pair = (t1[0] | t2[0], t1[1] | t2[1]) if new_alpha_pair not in pairs: pairs.append((t1[0] | t2[0], t1[1] | t2[1])) internal_places = filter(lambda p: classic.__pair_maximizer(pairs, p), pairs) net = petri.petrinet.PetriNet('alpha_classic_net_' + str(time.time())) label_transition_dict = {} for i in range(0, len(labels)): label_transition_dict[labels[i]] = petri.petrinet.PetriNet.Transition( labels[i], labels[i]) net.transitions.add(label_transition_dict[labels[i]]) for pair in internal_places: place = petri.petrinet.PetriNet.Place(str(pair)) net.places.add(place) for in_arc in pair[0]: petri.utils.add_arc_from_to(label_transition_dict[in_arc], place, net) for out_arc in pair[1]: petri.utils.add_arc_from_to(place, label_transition_dict[out_arc], net) src = classic.__add_source(net, alpha_abstraction.start_activities, label_transition_dict) sink = classic.__add_sink(net, alpha_abstraction.end_activities, label_transition_dict) loop_tail_set = set() for t in label_transition_dict.values(): #check if two-length-loop if len(t.in_arcs) == 0 and len(t.out_arcs) == 0: loop_tail_set.add(t) for loop_tail in loop_tail_set: if loop_set is not None: loop_body = None for loop in loop_set: if loop[0] == loop_tail.name: loop_body = label_transition_dict[loop[1]] if loop_body is not None: for place in net.places: for in_arc in place.in_arcs: if in_arc.source == loop_body: petri.utils.add_arc_from_to( place, label_transition_dict[loop_tail.name], net) break for out_arc in place.out_arcs: if out_arc.target == loop_body: petri.utils.add_arc_from_to( label_transition_dict[loop_tail.name], place, net) return net, Marking({src: 1}), Marking({sink: 1})