def __init__(self, df, parameters=None): if parameters is None: parameters = {} self.type = "proclet_derivation_model" self.df = df self.model_dfg = gen_fram_factory.apply(df, model_type_variant="model3", rel_ev_variant="rel_dfg", node_freq_variant="type31", edge_freq_variant="type11") self.model_3_1 = gen_fram_factory.apply( df, model_type_variant="model3", rel_ev_variant="being_produced", node_freq_variant="type31", edge_freq_variant="type11") self.model_3_2 = gen_fram_factory.apply( df, model_type_variant="model3", rel_ev_variant="being_produced", node_freq_variant="type32", edge_freq_variant="type13") self.model_link = gen_fram_factory.apply(df, model_type_variant="model3", rel_ev_variant="link", node_freq_variant="type32", edge_freq_variant="type11") self.model_inductive = {} self.start_activities = {} self.end_activities = {} for cl in self.model_dfg.edge_freq: this_dfg = self.model_dfg.edge_freq[cl] new_dfg = {(x.split("@@")[0], x.split("@@")[1]): this_dfg[x] for x in this_dfg} self.model_inductive[cl] = inductive_miner.apply_dfg(new_dfg) self.start_activities[cl] = dfg_utils.infer_start_activities( new_dfg) self.end_activities[cl] = dfg_utils.infer_end_activities(new_dfg) self.linked_perspectives = {} self.find_linked_perspectives()
def __init__(self, log, dfg, master_dfg, initial_dfg, activities, counts, rec_depth, noise_threshold=0, start_activities=None, end_activities=None, initial_start_activities=None, initial_end_activities=None, parameters=None, real_init=True): """ Constructor Parameters ----------- dfg Directly follows graph of this subtree master_dfg Original DFG initial_dfg Referral directly follows graph that should be taken in account adding hidden/loop transitions activities Activities of this subtree counts Shared variable rec_depth Current recursion depth """ if real_init: self.master_dfg = copy(master_dfg) self.initial_dfg = copy(initial_dfg) self.counts = counts self.rec_depth = rec_depth self.noise_threshold = noise_threshold self.start_activities = start_activities if self.start_activities is None: self.start_activities = [] self.end_activities = end_activities if self.end_activities is None: self.end_activities = [] self.initial_start_activities = initial_start_activities if self.initial_start_activities is None: self.initial_start_activities = infer_start_activities( master_dfg) self.initial_end_activities = initial_end_activities if self.initial_end_activities is None: self.initial_end_activities = infer_end_activities(master_dfg) self.second_iteration = None self.activities = None self.dfg = None self.outgoing = None self.ingoing = None self.self_loop_activities = None self.initial_ingoing = None self.initial_outgoing = None self.activities_direction = None self.activities_dir_list = None self.negated_dfg = None self.negated_activities = None self.negated_outgoing = None self.negated_ingoing = None self.detected_cut = None self.children = None self.must_insert_skip = False self.log = log self.inverted_dfg = None self.original_log = log self.initialize_tree(dfg, log, initial_dfg, activities, parameters=parameters)
def apply(dfg, parameters=None): """ Applies the DFG mining on a given object (if it is a Pandas dataframe or a log, the DFG is calculated) Parameters ------------- dfg Object (DFG) (if it is a Pandas dataframe or a log, the DFG is calculated) parameters Parameters """ if parameters is None: parameters = {} dfg = dfg start_activities = parameters[ PARAM_KEY_START_ACTIVITIES] if PARAM_KEY_START_ACTIVITIES in parameters else dfg_utils.infer_start_activities( dfg) end_activities = parameters[ PARAM_KEY_END_ACTIVITIES] if PARAM_KEY_END_ACTIVITIES in parameters else dfg_utils.infer_end_activities( dfg) activities = dfg_utils.get_activities_from_dfg(dfg) net = PetriNet("") im = Marking() fm = Marking() source = PetriNet.Place("source") net.places.add(source) im[source] = 1 sink = PetriNet.Place("sink") net.places.add(sink) fm[sink] = 1 places_corr = {} index = 0 for act in activities: places_corr[act] = PetriNet.Place(act) net.places.add(places_corr[act]) for act in start_activities: if act in places_corr: index = index + 1 trans = PetriNet.Transition(act + "_" + str(index), act) net.transitions.add(trans) add_arc_from_to(source, trans, net) add_arc_from_to(trans, places_corr[act], net) for act in end_activities: if act in places_corr: index = index + 1 inv_trans = PetriNet.Transition(act + "_" + str(index), None) net.transitions.add(inv_trans) add_arc_from_to(places_corr[act], inv_trans, net) add_arc_from_to(inv_trans, sink, net) for el in dfg.keys(): act1 = el[0] act2 = el[1] index = index + 1 trans = PetriNet.Transition(act2 + "_" + str(index), act2) net.transitions.add(trans) add_arc_from_to(places_corr[act1], trans, net) add_arc_from_to(trans, places_corr[act2], net) return net, im, fm
def __init__(self, frequency_dfg, activities=None, start_activities=None, end_activities=None, activities_occurrences=None, default_edges_color="#000000", performance_dfg=None, net_name=DEFAULT_NET_NAME): """ Initialize an Hueristics Net The implementation is based on the original paper on Heuristics Miner, namely: Weijters, A. J. M. M., Wil MP van Der Aalst, and AK Alves De Medeiros. "Process mining with the heuristics miner-algorithm." Technische Universiteit Eindhoven, Tech. Rep. WP 166 (2006): 1-34. and it manages to calculate the dependency matrix, the loops of length one and two, and the AND measure Parameters ------------- frequency_dfg Directly-Follows graph (frequency) activities Activities start_activities Start activities end_activities End activities activities_occurrences Activities occurrences default_edges_color (If provided) Default edges color performance_dfg Performance DFG net_name (If provided) name of the heuristics net """ self.net_name = [net_name] self.nodes = {} self.dependency_matrix = {} self.dfg_matrix = {} self.dfg = frequency_dfg self.performance_dfg = performance_dfg self.node_type = "frequency" if self.performance_dfg is None else "performance" self.activities = activities if self.activities is None: self.activities = dfg_utils.get_activities_from_dfg(frequency_dfg) if start_activities is None: self.start_activities = [ dfg_utils.infer_start_activities(frequency_dfg) ] else: self.start_activities = [start_activities] if end_activities is None: self.end_activities = [ dfg_utils.infer_end_activities(frequency_dfg) ] else: self.end_activities = [end_activities] self.activities_occurrences = activities_occurrences if self.activities_occurrences is None: self.activities_occurrences = {} for act in self.activities: self.activities_occurrences[ act] = dfg_utils.sum_activities_count( frequency_dfg, [act]) self.default_edges_color = [default_edges_color]
def put_skips_in_seq_cut(self): """ Puts the skips in sequential cut """ # first, put skips when in some cut there is an ending activity in_end_act = set(self.initial_end_activities) i = 0 while i < len(self.children) - 1: activities_set = set(self.children[i].activities) intersection = activities_set.intersection(in_end_act) if len(intersection) > 0: j = i + 1 while j < len(self.children): self.children[j].must_insert_skip = True j = j + 1 i = i + 1 # second, put skips when in some cut you are not sure to pass through i = 0 while i < len(self.children) - 1: act_i = self.children[i].activities act_i_output_appearences = {} max_value = i for act in act_i: for out_act in self.outgoing[act]: act_i_output_appearences[out_act] = len(self.children) - 1 j = i + 1 while j < len(self.children): act_children = self.children[j].activities for act in act_children: if act in act_i_output_appearences and act_i_output_appearences[ act] == len(self.children) - 1: act_i_output_appearences[act] = j if j > max_value: max_value = j j = j + 1 j = i + 1 while j < max_value: self.children[j].must_insert_skip = True j = j + 1 i = i + 1 this_start_activities = set(infer_start_activities(self.dfg)) # third, put skips when some input activities do not pass there out_start_activities = infer_start_activities_from_prev_connections_and_current_dfg( self.initial_dfg, self.dfg, self.activities, include_self=False) out_start_activities_diff = out_start_activities - set(self.activities) for act in out_start_activities_diff: out_act_here = set() for el in self.initial_dfg: if el[0][0] == act and el[0][1] in self.activities: out_act_here.add(el[0][1]) i = 0 while i < len(self.children): child_act = set(self.children[i].activities) inte = child_act.intersection(out_act_here) if inte: for el in inte: out_act_here.remove(el) if len(out_act_here) > 0: self.children[i].must_insert_skip = True i = i + 1 # fourth, put skips until all start activities are reached remaining_act = (out_start_activities - this_start_activities).intersection(self.activities) i = 0 while i < len(self.children): child_act = set(self.children[i].activities) inte = child_act.intersection(remaining_act) if inte: for el in inte: remaining_act.remove(el) if len(remaining_act) > 0: self.children[i].must_insert_skip = True i = i + 1
def apply_dfg_sa_ea(dfg, start_activities, end_activities, parameters=None): """ Applying Alpha Miner starting from the knowledge of the Directly Follows graph, and of the start activities and end activities in the log (possibly inferred from the DFG) Parameters ------------ dfg Directly-Follows graph start_activities Start activities end_activities End activities parameters Parameters of the algorithm including: activity key -> name of the attribute that contains the activity Returns ------- net : :class:`pm4py.entities.petri.petrinet.PetriNet` A Petri net describing the event log that is provided as an input initial marking : :class:`pm4py.models.net.Marking` marking object representing the initial marking final marking : :class:`pm4py.models.net.Marking` marking object representing the final marking, not guaranteed that it is actually reachable! """ if parameters is None: parameters = {} if pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[ pm_util.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = log_util.xes.DEFAULT_NAME_KEY if start_activities is None: start_activities = dfg_utils.infer_start_activities(dfg) if end_activities is None: end_activities = dfg_utils.infer_end_activities(dfg) labels = set() for el in dfg: labels.add(el[0]) labels.add(el[1]) for a in start_activities: labels.add(a) for a in end_activities: labels.add(a) labels = list(labels) alpha_abstraction = alpha_classic_abstraction.ClassicAlphaAbstraction( start_activities, end_activities, dfg, activity_key=parameters[PARAMETER_CONSTANT_ACTIVITY_KEY]) pairs = list( map( lambda p: ({p[0]}, {p[1]}), filter( lambda p: __initial_filter(alpha_abstraction.parallel_relation, p), alpha_abstraction.causal_relation))) for i in range(0, len(pairs)): t1 = pairs[i] for j in range(i, len(pairs)): t2 = pairs[j] if t1 != t2: if t1[0].issubset(t2[0]) or t1[1].issubset(t2[1]): if not (__check_is_unrelated( alpha_abstraction.parallel_relation, alpha_abstraction.causal_relation, t1[0], t2[0]) or __check_is_unrelated( alpha_abstraction.parallel_relation, alpha_abstraction.causal_relation, t1[1], t2[1])): new_alpha_pair = (t1[0] | t2[0], t1[1] | t2[1]) if new_alpha_pair not in pairs: pairs.append((t1[0] | t2[0], t1[1] | t2[1])) internal_places = filter(lambda p: __pair_maximizer(pairs, p), pairs) net = petri.petrinet.PetriNet('alpha_classic_net_' + str(time.time())) label_transition_dict = {} for i in range(0, len(labels)): label_transition_dict[labels[i]] = petri.petrinet.PetriNet.Transition( labels[i], labels[i]) net.transitions.add(label_transition_dict[labels[i]]) src = __add_source(net, alpha_abstraction.start_activities, label_transition_dict) sink = __add_sink(net, alpha_abstraction.end_activities, label_transition_dict) for pair in internal_places: place = petri.petrinet.PetriNet.Place(str(pair)) net.places.add(place) for in_arc in pair[0]: petri.utils.add_arc_from_to(label_transition_dict[in_arc], place, net) for out_arc in pair[1]: petri.utils.add_arc_from_to(place, label_transition_dict[out_arc], net) return net, Marking({src: 1}), Marking({sink: 1})