def initialize_tree(self, dfg, initial_dfg, activities, second_iteration=False): """ Initialize the tree Parameters ----------- dfg Directly follows graph of this subtree initial_dfg Referral directly follows graph that should be taken in account adding hidden/loop transitions activities Activities of this subtree second_iteration Boolean that indicates if we are executing this method for the second time """ self.second_iteration = second_iteration if activities is None: self.activities = get_activities_from_dfg(dfg) else: self.activities = copy(activities) if second_iteration: self.dfg = clean_dfg_based_on_noise_thresh(self.dfg, self.activities, self.noise_threshold) else: self.dfg = copy(dfg) self.initial_dfg = initial_dfg self.outgoing = get_outgoing_edges(self.dfg) self.ingoing = get_ingoing_edges(self.dfg) self.self_loop_activities = get_activities_self_loop(self.dfg) self.initial_outgoing = get_outgoing_edges(self.initial_dfg) self.initial_ingoing = get_ingoing_edges(self.initial_dfg) self.activities_direction = get_activities_direction( self.dfg, self.activities) self.activities_dir_list = get_activities_dirlist( self.activities_direction) self.negated_dfg = negate(self.dfg) self.negated_activities = get_activities_from_dfg(self.negated_dfg) self.negated_outgoing = get_outgoing_edges(self.negated_dfg) self.negated_ingoing = get_ingoing_edges(self.negated_dfg) self.detected_cut = None self.children = [] if second_iteration: self.detect_cut(second_iteration=second_iteration)
def apply(dfg, parameters=None): """ Clean Directly-Follows graph based on noise threshold Parameters ----------- dfg Directly-Follows graph parameters Possible parameters of the algorithm, including: noiseThreshold -> Threshold of noise in the algorithm Returns ---------- newDfg Cleaned dfg based on noise threshold """ if parameters is None: parameters = {} noise_threshold = parameters[ "noiseThreshold"] if "noiseThreshold" in parameters else filtering_constants.DEFAULT_NOISE_THRESH_DF activities = get_activities_from_dfg(dfg) return clean_dfg_based_on_noise_thresh(dfg, activities, noise_threshold)
def apply(dfg, log=None, parameters=None, activities_count=None, measure="frequency"): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") max_no_of_edges_in_diagram = exec_utils.get_param_value( Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 75) start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters, []) end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters, []) if activities_count is None: if log is not None: activities_count = attr_get.get_attribute_values( log, activity_key, parameters=parameters) else: activities = dfg_utils.get_activities_from_dfg(dfg) activities_count = {key: 1 for key in activities} return graphviz_visualization( activities_count, dfg, image_format=image_format, measure=measure, max_no_of_edges_in_diagram=max_no_of_edges_in_diagram, start_activities=start_activities, end_activities=end_activities)
def apply(dfg, log=None, parameters=None, activities_count=None, soj_time=None): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") max_no_of_edges_in_diagram = exec_utils.get_param_value(Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 100000) start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters, []) end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters, []) font_size = exec_utils.get_param_value(Parameters.FONT_SIZE, parameters, 12) font_size = str(font_size) activities = dfg_utils.get_activities_from_dfg(dfg) if activities_count is None: if log is not None: activities_count = attr_get.get_attribute_values(log, activity_key, parameters=parameters) else: activities_count = {key: 1 for key in activities} if soj_time is None: if log is not None: soj_time = soj_time_get.apply(log, parameters=parameters) else: soj_time = {key: 0 for key in activities} return graphviz_visualization(activities_count, dfg, image_format=image_format, measure="performance", max_no_of_edges_in_diagram=max_no_of_edges_in_diagram, start_activities=start_activities, end_activities=end_activities, soj_time=soj_time, font_size=font_size)
def initialize_tree(self): """ Initialize the tree """ if self.activities is None: self.activities = list(set(y for x in self.traces for y in x)) else: if self.parent is not None and self.parent.detected_cut_add_info == "loop": self.get_traces_loop() else: self.traces = self.get_traces_general() if self.second_iteration: self.traces, self.activities = self.clean_traces_noise() self.start_activities = list(set(x[0] for x in self.traces if x)) self.end_activities = list(set(x[-1] for x in self.traces if x)) self.activities_occurrences = Counter( [y for x in self.traces for y in x]) self.dfg = Counter( (x[i - 1], x[i]) for x in self.traces for i in range(1, len(x))) self.dfg = [(x, y) for x, y in self.dfg.items()] self.initial_dfg = self.dfg self.outgoing = get_outgoing_edges(self.dfg) self.ingoing = get_ingoing_edges(self.dfg) self.self_loop_activities = get_activities_self_loop(self.dfg) self.activities_direction = get_activities_direction( self.dfg, self.activities) self.activities_dir_list = get_activities_dirlist( self.activities_direction) self.negated_dfg = negate(self.dfg) self.negated_activities = get_activities_from_dfg(self.negated_dfg) self.negated_outgoing = get_outgoing_edges(self.negated_dfg) self.negated_ingoing = get_ingoing_edges(self.negated_dfg) self.contains_empty_traces = min( len(x) for x in self.traces) == 0 if len(self.traces) > 0 else False self.must_insert_skip = self.contains_empty_traces if self.parent is not None and self.parent.detected_cut == "xor": self.must_insert_skip = False self.must_insert_skip = self.rec_must_insert_skip or self.must_insert_skip if not self.second_iteration: self.second_tree = self.clone_second_it() self.detected_cut = None self.children = []
def dfg_vis(dfg, log=None, parameters=None, activities_count=None, measure="frequency"): if parameters is None: parameters = {} activity_key = ( parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY ) max_no_of_edges_in_diagram = 75 if "maxNoOfEdgesInDiagram" in parameters: max_no_of_edges_in_diagram = parameters["maxNoOfEdgesInDiagram"] start_activities = ( parameters["start_activities"] if "start_activities" in parameters else [] ) end_activities = ( parameters["end_activities"] if "end_activities" in parameters else [] ) if activities_count is None: if log is not None: activities_count = attributes_filter.get_attribute_values( log, activity_key, parameters=parameters ) activities_count["start"] = len(log) else: activities = dfg_utils.get_activities_from_dfg(dfg) activities_count = {key: 1 for key in activities} activities_count["start"] = None return graphviz_visualization( activities_count, dfg, measure=measure, max_no_of_edges_in_diagram=max_no_of_edges_in_diagram, start_activities=start_activities, end_activities=end_activities, )
def apply(dfg, log=None, parameters=None, activities_count=None, measure="frequency"): if parameters is None: parameters = {} activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY image_format = "png" max_no_of_edges_in_diagram = 75 if "format" in parameters: image_format = parameters["format"] if "maxNoOfEdgesInDiagram" in parameters: max_no_of_edges_in_diagram = parameters["maxNoOfEdgesInDiagram"] start_activities = parameters[ "start_activities"] if "start_activities" in parameters else [] end_activities = parameters[ "end_activities"] if "end_activities" in parameters else [] if activities_count is None: if log is not None: activities_count = attr_get.get_attribute_values( log, activity_key, parameters=parameters) else: activities = dfg_utils.get_activities_from_dfg(dfg) activities_count = {key: 1 for key in activities} return graphviz_visualization( activities_count, dfg, image_format=image_format, measure=measure, max_no_of_edges_in_diagram=max_no_of_edges_in_diagram, start_activities=start_activities, end_activities=end_activities)
def apply(dfg, parameters=None): """ Applies the DFG mining on a given object (if it is a Pandas dataframe or a log_skeleton, the DFG is calculated) Parameters ------------- dfg Object (DFG) (if it is a Pandas dataframe or a log_skeleton, the DFG is calculated) parameters Parameters """ if parameters is None: parameters = {} dfg = dfg start_activities = exec_utils.get_param_value( Parameters.START_ACTIVITIES, parameters, dfg_utils.infer_start_activities(dfg)) end_activities = exec_utils.get_param_value( Parameters.END_ACTIVITIES, parameters, dfg_utils.infer_end_activities(dfg)) activities = dfg_utils.get_activities_from_dfg(dfg) net = PetriNet("") im = Marking() fm = Marking() source = PetriNet.Place("source") net.places.add(source) im[source] = 1 sink = PetriNet.Place("sink") net.places.add(sink) fm[sink] = 1 places_corr = {} index = 0 for act in activities: places_corr[act] = PetriNet.Place(act) net.places.add(places_corr[act]) for act in start_activities: if act in places_corr: index = index + 1 trans = PetriNet.Transition(act + "_" + str(index), act) net.transitions.add(trans) pn_util.add_arc_from_to(source, trans, net) pn_util.add_arc_from_to(trans, places_corr[act], net) for act in end_activities: if act in places_corr: index = index + 1 inv_trans = PetriNet.Transition(act + "_" + str(index), None) net.transitions.add(inv_trans) pn_util.add_arc_from_to(places_corr[act], inv_trans, net) pn_util.add_arc_from_to(inv_trans, sink, net) for el in dfg.keys(): act1 = el[0] act2 = el[1] index = index + 1 trans = PetriNet.Transition(act2 + "_" + str(index), act2) net.transitions.add(trans) pn_util.add_arc_from_to(places_corr[act1], trans, net) pn_util.add_arc_from_to(trans, places_corr[act2], net) return net, im, fm
def detect_cut(initial_dfg, dfg, parent, conf, process, initial_start_activities, initial_end_activities, activities): """ Detect generally a cut in the graph (applying all the algorithms) """ if dfg: # print('DFG' + str(dfg) + ' will be cut on ' + str(conf)) # print(dfg) # Find in order: xor, seq, par, loop, seq, flower ingoing = get_ingoing_edges(dfg) outgoing = get_outgoing_edges(dfg) start_activities = infer_start_activities(dfg) end_activities = infer_end_activities(dfg) if parent == "m": initial_start_activities = start_activities initial_end_activities = end_activities activities = get_activities_from_dfg(dfg) else: activities = set(activities) conn_components = detection_utils.get_connected_components(ingoing, outgoing, activities) # print("Init Start: " + str(initial_start_activities) + ", Init End: " + str(initial_end_activities)) # print(activities) xor_cut = detect_xor_cut(dfg, conn_components) if xor_cut[0]: found_cut = "xor" print(found_cut) for index, comp in enumerate(xor_cut[1]): # print(comp) filtered_dfg = filter_dfg_on_act(dfg, comp) save_cut(filtered_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities) else: this_nx_graph = detection_utils.transform_dfg_to_directed_nx_graph(activities, dfg) strongly_connected_components = [list(x) for x in nx.strongly_connected_components(this_nx_graph)] # print(strongly_connected_components) seq_cut = detect_sequential_cut(dfg, strongly_connected_components) if seq_cut[0]: found_cut = "seq" print("seq") for index, comp in enumerate(seq_cut[1]): # print(comp) filter_dfg = filter_dfg_on_act(dfg, comp) print(filter_dfg) save_cut(filter_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities) # self.put_skips_in_seq_cut()? else: negated_dfg = detection_utils.negate(dfg) negated_ingoing = get_ingoing_edges(negated_dfg) negated_outgoing = get_outgoing_edges(negated_dfg) par_cut = detect_parallel_cut(this_nx_graph, strongly_connected_components, negated_ingoing, negated_outgoing, activities, dfg, initial_start_activities, initial_end_activities, initial_dfg) if par_cut[0]: found_cut = "par" print("par") i = 0 for comp in par_cut[1]: i += 1 # print(comp) filtter_dfg = filter_dfg_on_act(dfg, comp) save_cut(filtter_dfg, comp, parent, found_cut, i, conf, process, initial_start_activities, initial_end_activities) else: start_activities = infer_start_activities(dfg) end_activities = infer_end_activities(dfg) loop_cut = detect_loop_cut(dfg, activities, start_activities, end_activities) if loop_cut[0]: if loop_cut[2]: found_cut = "loop" print("loop") for index, comp in enumerate(loop_cut[1]): # print(comp) filter_dfg = filter_dfg_on_act(dfg, comp) save_cut(filter_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities) # if loop_cut[3]: # insert_skip else: found_cut = "seq2" print('seq 2') # self.need_loop_on_subtree = True for index, comp in enumerate(loop_cut[1]): # print(comp) filter_dfg = filter_dfg_on_act(dfg, comp) save_cut(filter_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities) #insert_skip else: pass found_cut = "flower" print("flower") #save_cut(dfg, comp, parent, found_cut, 0, conf, process) return found_cut else: print("no DFG or base_xor") return "base_xor"
def __init__(self, frequency_dfg, activities=None, start_activities=None, end_activities=None, activities_occurrences=None, default_edges_color="#000000", performance_dfg=None, dfg_window_2=None, freq_triples=None, net_name=DEFAULT_NET_NAME): """ Initialize an Hueristics Net The implementation is based on the original paper on Heuristics Miner, namely: Weijters, A. J. M. M., Wil MP van Der Aalst, and AK Alves De Medeiros. "Process mining with the heuristics miner-algorithm." Technische Universiteit Eindhoven, Tech. Rep. WP 166 (2006): 1-34. and it manages to calculate the dependency matrix, the loops of length one and two, and the AND measure Parameters ------------- frequency_dfg Directly-Follows graph (frequency) activities Activities start_activities Start activities end_activities End activities activities_occurrences Activities occurrences default_edges_color (If provided) Default edges color performance_dfg Performance DFG dfg_window_2 DFG window 2 freq_triples Frequency triples net_name (If provided) name of the heuristics net """ self.net_name = [net_name] self.nodes = {} self.dependency_matrix = {} self.dfg_matrix = {} self.dfg = frequency_dfg self.performance_dfg = performance_dfg self.node_type = "frequency" if self.performance_dfg is None else "performance" self.activities = activities if self.activities is None: self.activities = dfg_utils.get_activities_from_dfg(frequency_dfg) if start_activities is None: self.start_activities = [ dfg_utils.infer_start_activities(frequency_dfg) ] else: self.start_activities = [start_activities] if end_activities is None: self.end_activities = [ dfg_utils.infer_end_activities(frequency_dfg) ] else: self.end_activities = [end_activities] self.activities_occurrences = activities_occurrences if self.activities_occurrences is None: self.activities_occurrences = {} for act in self.activities: self.activities_occurrences[ act] = dfg_utils.sum_activities_count( frequency_dfg, [act]) self.default_edges_color = [default_edges_color] self.dfg_window_2 = dfg_window_2 self.dfg_window_2_matrix = {} self.freq_triples = freq_triples self.freq_triples_matrix = {}
def apply(dfg: Dict[Tuple[str, str], int], log: EventLog = None, parameters: Optional[Dict[Any, Any]] = None, activities_count: Dict[str, int] = None, soj_time: Dict[str, float] = None) -> Digraph: """ Visualize a frequency directly-follows graph Parameters ----------------- dfg Frequency Directly-follows graph log (if provided) Event log for the calculation of statistics activities_count (if provided) Dictionary associating to each activity the number of occurrences in the log. soj_time (if provided) Dictionary associating to each activity the average sojourn time parameters Variant-specific parameters Returns ----------------- gviz Graphviz digraph """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") max_no_of_edges_in_diagram = exec_utils.get_param_value( Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 100000) start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters, {}) end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters, {}) font_size = exec_utils.get_param_value(Parameters.FONT_SIZE, parameters, 12) font_size = str(font_size) activities = dfg_utils.get_activities_from_dfg(dfg) bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters, "transparent") stat_locale = exec_utils.get_param_value(Parameters.STAT_LOCALE, parameters, None) if stat_locale is None: stat_locale = {} if activities_count is None: if log is not None: activities_count = attr_get.get_attribute_values( log, activity_key, parameters=parameters) else: # the frequency of an activity in the log is at least the number of occurrences of # incoming arcs in the DFG. # if the frequency of the start activities nodes is also provided, use also that. activities_count = Counter({key: 0 for key in activities}) for el in dfg: activities_count[el[1]] += dfg[el] if isinstance(start_activities, dict): for act in start_activities: activities_count[act] += start_activities[act] if soj_time is None: if log is not None: soj_time = soj_time_get.apply(log, parameters=parameters) else: soj_time = {key: 0 for key in activities} return graphviz_visualization( activities_count, dfg, image_format=image_format, measure="frequency", max_no_of_edges_in_diagram=max_no_of_edges_in_diagram, start_activities=start_activities, end_activities=end_activities, soj_time=soj_time, font_size=font_size, bgcolor=bgcolor, stat_locale=stat_locale)