def __init__(self, dfg, master_dfg, initial_dfg, activities, counts, rec_depth, noise_threshold=0, start_activities=None, end_activities=None, initial_start_activities=None, initial_end_activities=None): """ Constructor Parameters ----------- dfg Directly follows graph of this subtree master_dfg Original DFG initial_dfg Referral directly follows graph that should be taken in account adding hidden/loop transitions activities Activities of this subtree counts Shared variable rec_depth Current recursion depth """ self.master_dfg = copy(master_dfg) self.initial_dfg = copy(initial_dfg) self.counts = counts self.rec_depth = rec_depth self.noise_threshold = noise_threshold self.start_activities = start_activities if self.start_activities is None: self.start_activities = [] self.end_activities = end_activities if self.end_activities is None: self.end_activities = [] self.initial_start_activities = initial_start_activities if self.initial_start_activities is None: self.initial_start_activities = infer_start_activities(master_dfg) self.initial_end_activities = initial_end_activities if self.initial_end_activities is None: self.initial_end_activities = infer_end_activities(master_dfg) self.second_iteration = None self.activities = None self.dfg = None self.outgoing = None self.ingoing = None self.self_loop_activities = None self.initial_ingoing = None self.initial_outgoing = None self.activities_direction = None self.activities_dir_list = None self.negated_dfg = None self.negated_activities = None self.negated_outgoing = None self.negated_ingoing = None self.detected_cut = None self.children = None self.must_insert_skip = False self.need_loop_on_subtree = False self.initialize_tree(dfg, initial_dfg, activities)
def apply(dfg, output_path, parameters=None): """ Exports a DFG into a .dfg file Parameters ---------------- dfg Directly-Follows Graph output_path Output path parameters Parameters of the algorithm, including: Parameters.START_ACTIVITIES => Start activities of the DFG Parameters.END_ACTIVITIES => End activities of the DFG """ if parameters is None: parameters = {} start_activities = exec_utils.get_param_value( Parameters.START_ACTIVITIES, parameters, Counter(dfg_utils.infer_start_activities(dfg))) end_activities = exec_utils.get_param_value( Parameters.END_ACTIVITIES, parameters, Counter(dfg_utils.infer_end_activities(dfg))) if len(start_activities) == 0: raise Exception( "error: impossible to determine automatically the start activities from the DFG. Please specify them manually through the START_ACTIVITIES parameter" ) if len(end_activities) == 0: raise Exception( "error: impossible to determine automatically the end activities from the DFG. Please specify them manually through the END_ACTIVITIES parameter" ) activities = list(set(x[0] for x in dfg).union(set(x[1] for x in dfg))) F = open(output_path, "w") F.write("%d\n" % (len(activities))) for act in activities: F.write("%s\n" % (act)) F.write("%d\n" % (len(start_activities))) for act, count in start_activities.items(): F.write("%dx%d\n" % (activities.index(act), count)) F.write("%d\n" % (len(end_activities))) for act, count in end_activities.items(): F.write("%dx%d\n" % (activities.index(act), count)) for el, count in dfg.items(): F.write("%d>%dx%d\n" % (activities.index(el[0]), activities.index(el[1]), count)) F.close()
def export_line_by_line(dfg, parameters=None): """ Exports a DFG into the .dfg format - Line by line yielding Parameters -------------- dfg DFG parameters Parameters of the algorithm Returns -------------- line Lines of the .dfg file (yielded one-by-one) """ if parameters is None: parameters = {} start_activities = exec_utils.get_param_value( Parameters.START_ACTIVITIES, parameters, Counter(dfg_utils.infer_start_activities(dfg))) end_activities = exec_utils.get_param_value( Parameters.END_ACTIVITIES, parameters, Counter(dfg_utils.infer_end_activities(dfg))) if len(start_activities) == 0: raise Exception( "error: impossible to determine automatically the start activities from the DFG. Please specify them manually through the START_ACTIVITIES parameter" ) if len(end_activities) == 0: raise Exception( "error: impossible to determine automatically the end activities from the DFG. Please specify them manually through the END_ACTIVITIES parameter" ) activities = list(set(x[0] for x in dfg).union(set(x[1] for x in dfg))) yield "%d\n" % (len(activities)) for act in activities: yield "%s\n" % (act) yield "%d\n" % (len(start_activities)) for act, count in start_activities.items(): yield "%dx%d\n" % (activities.index(act), count) yield "%d\n" % (len(end_activities)) for act, count in end_activities.items(): yield "%dx%d\n" % (activities.index(act), count) for el, count in dfg.items(): yield "%d>%dx%d\n" % (activities.index(el[0]), activities.index( el[1]), count)
def detect_loop_cut(dfg, activities, start_activities, end_activities): """ Detect loop cut """ all_start_activities = start_activities all_end_activities = list( set(end_activities).intersection(set(infer_end_activities(dfg)))) start_activities = all_start_activities end_activities = list(set(all_end_activities) - set(all_start_activities)) start_act_that_are_also_end = list( set(all_end_activities) - set(end_activities)) do_part = [] redo_part = [] dangerous_redo_part = [] exit_part = [] for sa in start_activities: do_part.append(sa) for ea in end_activities: exit_part.append(ea) for act in activities: if act not in start_activities and act not in end_activities: input_connected_activities = get_all_activities_connected_as_input_to_activity( dfg, act) output_connected_activities = get_all_activities_connected_as_output_to_activity( dfg, act) if set(output_connected_activities).issubset( start_activities) and set(start_activities).issubset( output_connected_activities): if len(input_connected_activities.intersection(exit_part)) > 0: dangerous_redo_part.append(act) redo_part.append(act) else: do_part.append(act) if (len(do_part) + len(exit_part)) > 0 and len(redo_part) > 0: return [ True, [do_part + exit_part, redo_part], True, len(start_act_that_are_also_end) > 0 ] return [False, [], False]
def apply(dfg, parameters=None): """ Applies the DFG mining on a given object (if it is a Pandas dataframe or a log_skeleton, the DFG is calculated) Parameters ------------- dfg Object (DFG) (if it is a Pandas dataframe or a log_skeleton, the DFG is calculated) parameters Parameters """ if parameters is None: parameters = {} dfg = dfg start_activities = exec_utils.get_param_value( Parameters.START_ACTIVITIES, parameters, dfg_utils.infer_start_activities(dfg)) end_activities = exec_utils.get_param_value( Parameters.END_ACTIVITIES, parameters, dfg_utils.infer_end_activities(dfg)) activities = dfg_utils.get_activities_from_dfg(dfg) net = PetriNet("") im = Marking() fm = Marking() source = PetriNet.Place("source") net.places.add(source) im[source] = 1 sink = PetriNet.Place("sink") net.places.add(sink) fm[sink] = 1 places_corr = {} index = 0 for act in activities: places_corr[act] = PetriNet.Place(act) net.places.add(places_corr[act]) for act in start_activities: if act in places_corr: index = index + 1 trans = PetriNet.Transition(act + "_" + str(index), act) net.transitions.add(trans) pn_util.add_arc_from_to(source, trans, net) pn_util.add_arc_from_to(trans, places_corr[act], net) for act in end_activities: if act in places_corr: index = index + 1 inv_trans = PetriNet.Transition(act + "_" + str(index), None) net.transitions.add(inv_trans) pn_util.add_arc_from_to(places_corr[act], inv_trans, net) pn_util.add_arc_from_to(inv_trans, sink, net) for el in dfg.keys(): act1 = el[0] act2 = el[1] index = index + 1 trans = PetriNet.Transition(act2 + "_" + str(index), act2) net.transitions.add(trans) pn_util.add_arc_from_to(places_corr[act1], trans, net) pn_util.add_arc_from_to(trans, places_corr[act2], net) return net, im, fm
def __init__(self, dfg, master_dfg, initial_dfg, activities, counts, rec_depth, noise_threshold=0, initial_start_activities=None, initial_end_activities=None): """ Constructor Parameters ----------- dfg Directly follows graph of this subtree master_dfg Original DFG initial_dfg Referral directly follows graph that should be taken in account adding hidden/loop transitions activities Activities of this subtree counts Shared variable rec_depth Current recursion depth noise_threshold Noise threshold initial_start_activities Start activities of the log initial_end_activities End activities of the log """ self.master_dfg = copy(master_dfg) self.initial_dfg = copy(initial_dfg) self.counts = counts self.rec_depth = rec_depth self.noise_threshold = noise_threshold self.initial_start_activities = initial_start_activities if self.initial_start_activities is None: self.initial_start_activities = infer_start_activities(master_dfg) self.initial_end_activities = initial_end_activities if self.initial_end_activities is None: self.initial_end_activities = infer_end_activities(master_dfg) self.second_iteration = None self.activities = None self.dfg = None self.outgoing = None self.ingoing = None self.self_loop_activities = None self.initial_ingoing = None self.initial_outgoing = None self.activities_direction = None self.activities_dir_list = None self.negated_dfg = None self.negated_activities = None self.negated_outgoing = None self.negated_ingoing = None self.detected_cut = None self.children = None self.must_insert_skip = False self.need_loop_on_subtree = False self.initialize_tree(dfg, initial_dfg, activities) # start/end activities of the initial log intersected with the current set of activities self.initial_start_activities = list(set(self.initial_start_activities).intersection(set(self.activities))) self.initial_end_activities = list(set(self.initial_end_activities).intersection(set(self.activities))) if rec_depth > 0: self.start_activities = list( set(self.initial_start_activities).union(infer_start_activities(self.dfg)).union( infer_start_activities_from_prev_connections_and_current_dfg(self.initial_dfg, self.dfg, self.activities)).intersection( self.activities)) self.end_activities = list(set(self.initial_end_activities).union(infer_end_activities(self.dfg)).union( infer_end_activities_from_succ_connections_and_current_dfg(self.initial_dfg, self.dfg, self.activities)).intersection( self.activities)) else: self.start_activities = self.initial_start_activities self.end_activities = self.initial_end_activities self.detect_cut()
def apply_dfg_sa_ea( dfg: Dict[str, int], start_activities: Union[None, Dict[str, int]], end_activities: Union[None, Dict[str, int]], parameters: Optional[Dict[Union[str, Parameters], Any]] = None ) -> Tuple[PetriNet, Marking, Marking]: """ Applying Alpha Miner starting from the knowledge of the Directly Follows graph, and of the start activities and end activities in the log (possibly inferred from the DFG) Parameters ------------ dfg Directly-Follows graph start_activities Start activities end_activities End activities parameters Parameters of the algorithm including: activity key -> name of the attribute that contains the activity Returns ------- net : :class:`pm4py.entities.petri.petrinet.PetriNet` A Petri net describing the event log that is provided as an input initial marking : :class:`pm4py.models.net.Marking` marking object representing the initial marking final marking : :class:`pm4py.models.net.Marking` marking object representing the final marking, not guaranteed that it is actually reachable! """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pm_util.xes_constants.DEFAULT_NAME_KEY) if start_activities is None: start_activities = dfg_utils.infer_start_activities(dfg) if end_activities is None: end_activities = dfg_utils.infer_end_activities(dfg) labels = set() for el in dfg: labels.add(el[0]) labels.add(el[1]) for a in start_activities: labels.add(a) for a in end_activities: labels.add(a) labels = list(labels) alpha_abstraction = alpha_classic_abstraction.ClassicAlphaAbstraction( start_activities, end_activities, dfg, activity_key=activity_key) pairs = list( map( lambda p: ({p[0]}, {p[1]}), filter( lambda p: __initial_filter(alpha_abstraction.parallel_relation, p), alpha_abstraction.causal_relation))) for i in range(0, len(pairs)): t1 = pairs[i] for j in range(i, len(pairs)): t2 = pairs[j] if t1 != t2: if t1[0].issubset(t2[0]) or t1[1].issubset(t2[1]): if not (__check_is_unrelated( alpha_abstraction.parallel_relation, alpha_abstraction.causal_relation, t1[0], t2[0]) or __check_is_unrelated( alpha_abstraction.parallel_relation, alpha_abstraction.causal_relation, t1[1], t2[1])): new_alpha_pair = (t1[0] | t2[0], t1[1] | t2[1]) if new_alpha_pair not in pairs: pairs.append((t1[0] | t2[0], t1[1] | t2[1])) internal_places = filter(lambda p: __pair_maximizer(pairs, p), pairs) net = PetriNet('alpha_classic_net_' + str(time.time())) label_transition_dict = {} for i in range(0, len(labels)): label_transition_dict[labels[i]] = PetriNet.Transition( labels[i], labels[i]) net.transitions.add(label_transition_dict[labels[i]]) src = __add_source(net, alpha_abstraction.start_activities, label_transition_dict) sink = __add_sink(net, alpha_abstraction.end_activities, label_transition_dict) for pair in internal_places: place = PetriNet.Place(str(pair)) net.places.add(place) for in_arc in pair[0]: add_arc_from_to(label_transition_dict[in_arc], place, net) for out_arc in pair[1]: add_arc_from_to(place, label_transition_dict[out_arc], net) return net, Marking({src: 1}), Marking({sink: 1})
def detect_cut(initial_dfg, dfg, parent, conf, process, initial_start_activities, initial_end_activities, activities): """ Detect generally a cut in the graph (applying all the algorithms) """ if dfg: # print('DFG' + str(dfg) + ' will be cut on ' + str(conf)) # print(dfg) # Find in order: xor, seq, par, loop, seq, flower ingoing = get_ingoing_edges(dfg) outgoing = get_outgoing_edges(dfg) start_activities = infer_start_activities(dfg) end_activities = infer_end_activities(dfg) if parent == "m": initial_start_activities = start_activities initial_end_activities = end_activities activities = get_activities_from_dfg(dfg) else: activities = set(activities) conn_components = detection_utils.get_connected_components(ingoing, outgoing, activities) # print("Init Start: " + str(initial_start_activities) + ", Init End: " + str(initial_end_activities)) # print(activities) xor_cut = detect_xor_cut(dfg, conn_components) if xor_cut[0]: found_cut = "xor" print(found_cut) for index, comp in enumerate(xor_cut[1]): # print(comp) filtered_dfg = filter_dfg_on_act(dfg, comp) save_cut(filtered_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities) else: this_nx_graph = detection_utils.transform_dfg_to_directed_nx_graph(activities, dfg) strongly_connected_components = [list(x) for x in nx.strongly_connected_components(this_nx_graph)] # print(strongly_connected_components) seq_cut = detect_sequential_cut(dfg, strongly_connected_components) if seq_cut[0]: found_cut = "seq" print("seq") for index, comp in enumerate(seq_cut[1]): # print(comp) filter_dfg = filter_dfg_on_act(dfg, comp) print(filter_dfg) save_cut(filter_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities) # self.put_skips_in_seq_cut()? else: negated_dfg = detection_utils.negate(dfg) negated_ingoing = get_ingoing_edges(negated_dfg) negated_outgoing = get_outgoing_edges(negated_dfg) par_cut = detect_parallel_cut(this_nx_graph, strongly_connected_components, negated_ingoing, negated_outgoing, activities, dfg, initial_start_activities, initial_end_activities, initial_dfg) if par_cut[0]: found_cut = "par" print("par") i = 0 for comp in par_cut[1]: i += 1 # print(comp) filtter_dfg = filter_dfg_on_act(dfg, comp) save_cut(filtter_dfg, comp, parent, found_cut, i, conf, process, initial_start_activities, initial_end_activities) else: start_activities = infer_start_activities(dfg) end_activities = infer_end_activities(dfg) loop_cut = detect_loop_cut(dfg, activities, start_activities, end_activities) if loop_cut[0]: if loop_cut[2]: found_cut = "loop" print("loop") for index, comp in enumerate(loop_cut[1]): # print(comp) filter_dfg = filter_dfg_on_act(dfg, comp) save_cut(filter_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities) # if loop_cut[3]: # insert_skip else: found_cut = "seq2" print('seq 2') # self.need_loop_on_subtree = True for index, comp in enumerate(loop_cut[1]): # print(comp) filter_dfg = filter_dfg_on_act(dfg, comp) save_cut(filter_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities) #insert_skip else: pass found_cut = "flower" print("flower") #save_cut(dfg, comp, parent, found_cut, 0, conf, process) return found_cut else: print("no DFG or base_xor") return "base_xor"
def __init__(self, frequency_dfg, activities=None, start_activities=None, end_activities=None, activities_occurrences=None, default_edges_color="#000000", performance_dfg=None, dfg_window_2=None, freq_triples=None, net_name=DEFAULT_NET_NAME): """ Initialize an Hueristics Net The implementation is based on the original paper on Heuristics Miner, namely: Weijters, A. J. M. M., Wil MP van Der Aalst, and AK Alves De Medeiros. "Process mining with the heuristics miner-algorithm." Technische Universiteit Eindhoven, Tech. Rep. WP 166 (2006): 1-34. and it manages to calculate the dependency matrix, the loops of length one and two, and the AND measure Parameters ------------- frequency_dfg Directly-Follows graph (frequency) activities Activities start_activities Start activities end_activities End activities activities_occurrences Activities occurrences default_edges_color (If provided) Default edges color performance_dfg Performance DFG dfg_window_2 DFG window 2 freq_triples Frequency triples net_name (If provided) name of the heuristics net """ self.net_name = [net_name] self.nodes = {} self.dependency_matrix = {} self.dfg_matrix = {} self.dfg = frequency_dfg self.performance_dfg = performance_dfg self.node_type = "frequency" if self.performance_dfg is None else "performance" self.activities = activities if self.activities is None: self.activities = dfg_utils.get_activities_from_dfg(frequency_dfg) if start_activities is None: self.start_activities = [ dfg_utils.infer_start_activities(frequency_dfg) ] else: self.start_activities = [start_activities] if end_activities is None: self.end_activities = [ dfg_utils.infer_end_activities(frequency_dfg) ] else: self.end_activities = [end_activities] self.activities_occurrences = activities_occurrences if self.activities_occurrences is None: self.activities_occurrences = {} for act in self.activities: self.activities_occurrences[ act] = dfg_utils.sum_activities_count( frequency_dfg, [act]) self.default_edges_color = [default_edges_color] self.dfg_window_2 = dfg_window_2 self.dfg_window_2_matrix = {} self.freq_triples = freq_triples self.freq_triples_matrix = {}
def apply(dfg: Dict[Tuple[str, str], int], parameters: Optional[Dict[Any, Any]] = None): """ Applies the DFG mining on a given object (if it is a Pandas dataframe or a log, the DFG is calculated) Parameters ------------- dfg Object (DFG) (if it is a Pandas dataframe or a log, the DFG is calculated) parameters Parameters: - Parameters.START_ACTIVITIES: the start activities of the DFG - Parameters.END_ACTIVITIES: the end activities of the DFG Returns ------------- net Petri net im Initial marking fm Final marking """ if parameters is None: parameters = {} start_activities = exec_utils.get_param_value( Parameters.START_ACTIVITIES, parameters, {x: 1 for x in dfg_utils.infer_start_activities(dfg)}) end_activities = exec_utils.get_param_value( Parameters.END_ACTIVITIES, parameters, {x: 1 for x in dfg_utils.infer_end_activities(dfg)}) artificial_start_activity = exec_utils.get_param_value( Parameters.PARAM_ARTIFICIAL_START_ACTIVITY, parameters, constants.DEFAULT_ARTIFICIAL_START_ACTIVITY) artificial_end_activity = exec_utils.get_param_value( Parameters.PARAM_ARTIFICIAL_END_ACTIVITY, parameters, constants.DEFAULT_ARTIFICIAL_END_ACTIVITY) enriched_dfg = copy(dfg) for act in start_activities: enriched_dfg[(artificial_start_activity, act)] = start_activities[act] for act in end_activities: enriched_dfg[(act, artificial_end_activity)] = end_activities[act] activities = set(x[1] for x in enriched_dfg).union( set(x[0] for x in enriched_dfg)) net = PetriNet("") im = Marking() fm = Marking() left_places = {} transes = {} right_places = {} for act in activities: pl1 = PetriNet.Place("source_" + act) pl2 = PetriNet.Place("sink_" + act) trans = PetriNet.Transition("trans_" + act, act) if act in [artificial_start_activity, artificial_end_activity]: trans.label = None net.places.add(pl1) net.places.add(pl2) net.transitions.add(trans) petri_utils.add_arc_from_to(pl1, trans, net) petri_utils.add_arc_from_to(trans, pl2, net) left_places[act] = pl1 right_places[act] = pl2 transes[act] = trans for arc in enriched_dfg: hidden = PetriNet.Transition(arc[0] + "_" + arc[1], None) net.transitions.add(hidden) petri_utils.add_arc_from_to(right_places[arc[0]], hidden, net) petri_utils.add_arc_from_to(hidden, left_places[arc[1]], net) im[left_places[artificial_start_activity]] = 1 fm[right_places[artificial_end_activity]] = 1 return net, im, fm