def initialize_tree(self, dfg, log, initial_dfg, activities, second_iteration=False, end_call=True, parameters=None): """ Initialize the tree Parameters ----------- dfg Directly follows graph of this subtree log the event log_skeleton initial_dfg Referral directly follows graph that should be taken in account adding hidden/loop transitions activities Activities of this subtree second_iteration Boolean that indicates if we are executing this method for the second time """ self.second_iteration = second_iteration if activities is None: self.activities = get_activities_from_dfg(dfg) else: self.activities = copy(activities) if second_iteration: self.dfg = clean_dfg_based_on_noise_thresh(self.dfg, self.activities, self.noise_threshold) else: self.dfg = copy(dfg) self.initial_dfg = initial_dfg self.outgoing = get_outgoing_edges(self.dfg) self.ingoing = get_ingoing_edges(self.dfg) self.self_loop_activities = get_activities_self_loop(self.dfg) self.initial_outgoing = get_outgoing_edges(self.initial_dfg) self.initial_ingoing = get_ingoing_edges(self.initial_dfg) self.negated_dfg = negate(self.dfg) self.negated_activities = get_activities_from_dfg(self.negated_dfg) self.negated_outgoing = get_outgoing_edges(self.negated_dfg) self.negated_ingoing = get_ingoing_edges(self.negated_dfg) self.detected_cut = None self.children = [] self.log = log self.original_log = log self.parameters = parameters self.detect_cut(second_iteration=False, parameters=parameters)
def apply(dfg, parameters=None): """ Clean Directly-Follows graph based on noise threshold Parameters ----------- dfg Directly-Follows graph parameters Possible parameters of the algorithm, including: noiseThreshold -> Threshold of noise in the algorithm Returns ---------- newDfg Cleaned dfg based on noise threshold """ if parameters is None: parameters = {} noise_threshold = parameters[ "noiseThreshold"] if "noiseThreshold" in parameters else filtering_constants.DEFAULT_NOISE_THRESH_DF activities = get_activities_from_dfg(dfg) return clean_dfg_based_on_noise_thresh(dfg, activities, noise_threshold)
def apply(dfg, log=None, parameters=None, activities_count=None, measure="frequency"): if parameters is None: parameters = {} activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY image_format = "png" max_no_of_edges_in_diagram = 75 if "format" in parameters: image_format = parameters["format"] if "maxNoOfEdgesInDiagram" in parameters: max_no_of_edges_in_diagram = parameters["maxNoOfEdgesInDiagram"] if activities_count is None: if log is not None: activities_count = attributes_filter.get_attribute_values( log, activity_key, parameters=parameters) else: activities = dfg_utils.get_activities_from_dfg(dfg) activities_count = {key: 1 for key in activities} return graphviz_visualization( activities_count, dfg, image_format=image_format, measure=measure, max_no_of_edges_in_diagram=max_no_of_edges_in_diagram)
def apply(dfg, parameters=None): """ Applies the DFG mining on a given object (if it is a Pandas dataframe or a log, the DFG is calculated) Parameters ------------- dfg Object (DFG) (if it is a Pandas dataframe or a log, the DFG is calculated) parameters Parameters """ if parameters is None: parameters = {} dfg = dfg start_activities = parameters[ PARAM_KEY_START_ACTIVITIES] if PARAM_KEY_START_ACTIVITIES in parameters else dfg_utils.infer_start_activities( dfg) end_activities = parameters[ PARAM_KEY_END_ACTIVITIES] if PARAM_KEY_END_ACTIVITIES in parameters else dfg_utils.infer_end_activities( dfg) activities = dfg_utils.get_activities_from_dfg(dfg) net = PetriNet("") im = Marking() fm = Marking() source = PetriNet.Place("source") net.places.add(source) im[source] = 1 sink = PetriNet.Place("sink") net.places.add(sink) fm[sink] = 1 places_corr = {} index = 0 for act in activities: places_corr[act] = PetriNet.Place(act) net.places.add(places_corr[act]) for act in start_activities: if act in places_corr: index = index + 1 trans = PetriNet.Transition(act + "_" + str(index), act) net.transitions.add(trans) add_arc_from_to(source, trans, net) add_arc_from_to(trans, places_corr[act], net) for act in end_activities: if act in places_corr: index = index + 1 inv_trans = PetriNet.Transition(act + "_" + str(index), None) net.transitions.add(inv_trans) add_arc_from_to(places_corr[act], inv_trans, net) add_arc_from_to(inv_trans, sink, net) for el in dfg.keys(): act1 = el[0] act2 = el[1] index = index + 1 trans = PetriNet.Transition(act2 + "_" + str(index), act2) net.transitions.add(trans) add_arc_from_to(places_corr[act1], trans, net) add_arc_from_to(trans, places_corr[act2], net) return net, im, fm
def __init__(self, frequency_dfg, activities=None, start_activities=None, end_activities=None, activities_occurrences=None, default_edges_color="#000000", performance_dfg=None, net_name=DEFAULT_NET_NAME): """ Initialize an Hueristics Net The implementation is based on the original paper on Heuristics Miner, namely: Weijters, A. J. M. M., Wil MP van Der Aalst, and AK Alves De Medeiros. "Process mining with the heuristics miner-algorithm." Technische Universiteit Eindhoven, Tech. Rep. WP 166 (2006): 1-34. and it manages to calculate the dependency matrix, the loops of length one and two, and the AND measure Parameters ------------- frequency_dfg Directly-Follows graph (frequency) activities Activities start_activities Start activities end_activities End activities activities_occurrences Activities occurrences default_edges_color (If provided) Default edges color performance_dfg Performance DFG net_name (If provided) name of the heuristics net """ self.net_name = [net_name] self.nodes = {} self.dependency_matrix = {} self.dfg_matrix = {} self.dfg = frequency_dfg self.performance_dfg = performance_dfg self.node_type = "frequency" if self.performance_dfg is None else "performance" self.activities = activities if self.activities is None: self.activities = dfg_utils.get_activities_from_dfg(frequency_dfg) if start_activities is None: self.start_activities = [ dfg_utils.infer_start_activities(frequency_dfg) ] else: self.start_activities = [start_activities] if end_activities is None: self.end_activities = [ dfg_utils.infer_end_activities(frequency_dfg) ] else: self.end_activities = [end_activities] self.activities_occurrences = activities_occurrences if self.activities_occurrences is None: self.activities_occurrences = {} for act in self.activities: self.activities_occurrences[ act] = dfg_utils.sum_activities_count( frequency_dfg, [act]) self.default_edges_color = [default_edges_color]