def detect(log: EventLog, alphabet: Dict[str, int], act_key: str, use_msd: bool) -> Optional[str]: candidates = set(alphabet.keys()) for t in log: candidates = candidates.intersection(set(map(lambda e: e[act_key], t))) if len(candidates) == 0: return None for a in candidates: proj = EventLog() for t in log: proj.append(pm4py.filter_trace(lambda e: e[act_key] != a, t)) if len(list(filter(lambda t: len(t) == 0, proj))) == 0: dfg_proj = discover_dfg.apply(proj, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) alphabet_proj = pm4py.get_attribute_values(proj, act_key) start_act_proj = get_starters.get_start_activities(proj, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) end_act_proj = get_ends.get_end_activities(log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) pre_proj, post_proj = dfg_utils.get_transitive_relations(dfg_proj, alphabet_proj) cut = sequence_cut.detect(alphabet_proj, pre_proj, post_proj) if cut is not None: return a cut = xor_cut.detect(dfg_proj, alphabet_proj) if cut is not None: return a cut = concurrent_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj, msd= msdw_algo.derive_msd_witnesses(proj, msd_algo.apply(log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}), parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if use_msd else None) if cut is not None: return a cut = loop_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj) if cut is not None: return a return None
def derive_msd_witnesses( log: EventLog, msd: Optional[Dict[Any, int]] = None, parameters: Optional[Dict[Union[str, Parameters], Any]] = None ) -> Dict[str, Set[str]]: ''' This function derives the minimum self distance witnesses. The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc. The minimum self distance is the minimal observed self distance value in the event log. A 'witness' is an activity that witnesses the minimum self distance. For example, if the minimum self distance of activity a in some log L is 2, then, if trace <a,b,c,a> is in log L, b and c are a witness of a. Parameters ---------- log Event Log to use msd Optional minimum self distance dictionary parameters Optional parameters dictionary Returns ------- Dictionary mapping each activity to a set of witnesses. ''' log = converter.apply(log, variant=converter.Variants.TO_EVENT_LOG, parameters=parameters) act_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) alphabet = pm4py.get_event_attribute_values(log, act_key) msd = msd if msd is not None else msd_algo.apply(log, parameters) log = list(map(lambda t: list(map(lambda e: e[act_key], t)), log)) witnesses = dict() for a in alphabet: if a in msd and msd[a] > 0: witnesses[a] = set() else: continue for t in log: if len(list(filter(lambda e: e == a, t))) > 1: indices = [i for i, x in enumerate(t) if x == a] for i in range(len(indices) - 1): if indices[i + 1] - indices[i] - 1 == msd[a]: for b in t[indices[i] + 1:indices[i + 1]]: witnesses[a].add(b) return witnesses
def get_minimum_self_distances(log: EventLog) -> Dict[str, int]: ''' This algorithm computes the minimum self-distance for each activity observed in an event log. The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc. The minimum self distance is the minimal observed self distance value in the event log. Parameters ---------- log event log (either pandas.DataFrame, EventLog or EventStream) Returns ------- dict mapping an activity to its self-distance, if it exists, otherwise it is not part of the dict. ''' from pm4py.algo.discovery.minimum_self_distance import algorithm as msd_algo return msd_algo.apply(log)
def derive_minimum_self_distance(log: Union[DataFrame, EventLog, EventStream]) -> Dict[str, int]: ''' This algorithm computes the minimum self-distance for each activity observed in an event log. The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc. The activity key 'concept:name' is used. Parameters ---------- log event log (either pandas.DataFrame, EventLog or EventStream) Returns ------- dict mapping an activity to its self-distance, if it exists, otherwise it is not part of the dict. ''' from pm4py.algo.discovery.minimum_self_distance import algorithm as msd return msd.apply(log)
def get_minimum_self_distances(log: EventLog) -> Dict[str, int]: ''' This algorithm computes the minimum self-distance for each activity observed in an event log. The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc. The minimum self distance is the minimal observed self distance value in the event log. Parameters ---------- log event log (either pandas.DataFrame, EventLog or EventStream) Returns ------- dict mapping an activity to its self-distance, if it exists, otherwise it is not part of the dict. ''' if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.discovery.minimum_self_distance import algorithm as msd_algo return msd_algo.apply(log, parameters=get_properties(log))
def get_minimum_self_distance_witnesses(log: EventLog) -> Dict[str, Set[str]]: ''' This function derives the minimum self distance witnesses. The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc. The minimum self distance is the minimal observed self distance value in the event log. A 'witness' is an activity that witnesses the minimum self distance. For example, if the minimum self distance of activity a in some log L is 2, then, if trace <a,b,c,a> is in log L, b and c are a witness of a. Parameters ---------- log Event Log to use Returns ------- Dictionary mapping each activity to a set of witnesses. ''' from pm4py.algo.discovery.minimum_self_distance import algorithm as msd_algo from pm4py.algo.discovery.minimum_self_distance import utils as msdw_algo return msdw_algo.derive_msd_witnesses(log, msd_algo.apply(log))
def get_minimum_self_distance_witnesses(log: EventLog) -> Dict[str, Set[str]]: ''' This function derives the minimum self distance witnesses. The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc. The minimum self distance is the minimal observed self distance value in the event log. A 'witness' is an activity that witnesses the minimum self distance. For example, if the minimum self distance of activity a in some log L is 2, then, if trace <a,b,c,a> is in log L, b and c are a witness of a. Parameters ---------- log Event Log to use Returns ------- Dictionary mapping each activity to a set of witnesses. ''' if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.discovery.minimum_self_distance import algorithm as msd_algo from pm4py.algo.discovery.minimum_self_distance import utils as msdw_algo return msdw_algo.derive_msd_witnesses(log, msd_algo.apply(log, parameters=get_properties(log)), parameters=get_properties(log))
def __inductive_miner_internal(log, dfg, threshold, root, act_key, use_msd, remove_noise=False): alphabet = pm4py.get_event_attribute_values(log, act_key) if threshold > 0 and remove_noise: end_activities = get_ends.get_end_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) dfg = __filter_dfg_on_threshold(dfg, end_activities, threshold) original_length = len(log) log = pm4py.filter_log(lambda t: len(t) > 0, log) # revised EMPTYSTRACES if original_length - len(log) > original_length * threshold: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key, [EventLog(), log], use_msd) start_activities = get_starters.get_start_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) end_activities = get_ends.get_end_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if __is_base_case_act(log, act_key) or __is_base_case_silent(log): return __apply_base_case(log, root, act_key) pre, post = dfg_utils.get_transitive_relations(dfg, alphabet) cut = sequence_cut.detect(alphabet, pre, post) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.SEQUENCE, root), threshold, act_key, sequence_cut.project(log, cut, act_key), use_msd) cut = xor_cut.detect(dfg, alphabet) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key, xor_cut.project(log, cut, act_key), use_msd) cut = concurrent_cut.detect( dfg, alphabet, start_activities, end_activities, msd=msdw_algo.derive_msd_witnesses( log, msd_algo.apply(log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key }), parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if use_msd else None) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key, concurrent_cut.project(log, cut, act_key), use_msd) cut = loop_cut.detect(dfg, alphabet, start_activities, end_activities) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, loop_cut.project(log, cut, act_key), use_msd) aopt = activity_once_per_trace.detect(log, alphabet, act_key) if aopt is not None: operator = pt.ProcessTree(operator=pt.Operator.PARALLEL, parent=root) operator.children.append( pt.ProcessTree(operator=None, parent=operator, label=aopt)) return __add_operator_recursive_logs( operator, threshold, act_key, activity_once_per_trace.project(log, aopt, act_key), use_msd) act_conc = activity_concurrent.detect(log, alphabet, act_key, use_msd) if act_conc is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key, activity_concurrent.project(log, act_conc, act_key), use_msd) stl = strict_tau_loop.detect(log, start_activities, end_activities, act_key) if stl is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, [stl, EventLog()], use_msd) tl = tau_loop.detect(log, start_activities, act_key) if tl is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, [tl, EventLog()], use_msd) if threshold > 0 and not remove_noise: return __inductive_miner(log, dfg, threshold, root, act_key, use_msd, remove_noise=True) return __flower(alphabet, root)
def test_minimum_self_distance(self): import pm4py from pm4py.algo.discovery.minimum_self_distance import algorithm as minimum_self_distance log = pm4py.read_xes(os.path.join("input_data", "running-example.xes")) msd = minimum_self_distance.apply(log)
def inductive_miner(log, dfg, threshold, root, act_key, use_msd): alphabet = pm4py.get_attribute_values(log, act_key) start_activities = get_starters.get_start_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) end_activities = get_ends.get_end_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) empty_traces = pm4py.filter_log(lambda trace: len(trace) == 0, log) if len(empty_traces) == 0: if _is_base_case_act(log, act_key) or _is_base_case_silent(log): return _apply_base_case(log, root, act_key) pre, post = dfg_utils.get_transitive_relations(dfg, alphabet) cut = sequence_cut.detect(alphabet, pre, post) if cut is not None: return _add_operator_recursive( pt.ProcessTree(pt.Operator.SEQUENCE, root), threshold, act_key, sequence_cut.project(log, cut, act_key), use_msd) cut = xor_cut.detect(dfg, alphabet) if cut is not None: return _add_operator_recursive( pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key, xor_cut.project(log, cut, act_key), use_msd) cut = concurrent_cut.detect( dfg, alphabet, start_activities, end_activities, msd=msdw_algo.derive_msd_witnesses( log, msd_algo.apply(log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key }), parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key }) if use_msd else None) if cut is not None: return _add_operator_recursive( pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key, concurrent_cut.project(log, cut, act_key), use_msd) cut = loop_cut.detect(dfg, alphabet, start_activities, end_activities) if cut is not None: return _add_operator_recursive( pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, loop_cut.project(log, cut, act_key), use_msd) if len(empty_traces) > 0: nempty = pm4py.filter_log(lambda t: len(t) > 0, log) return _add_operator_recursive(pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key, [EventLog(), nempty], use_msd) aopt = activity_once_per_trace.detect(log, alphabet, act_key) if aopt is not None: operator = pt.ProcessTree(operator=pt.Operator.PARALLEL, parent=root) operator.children.append( pt.ProcessTree(operator=None, parent=operator, label=aopt)) return _add_operator_recursive( operator, threshold, act_key, activity_once_per_trace.project(log, aopt, act_key), use_msd) act_conc = activity_concurrent.detect(log, alphabet, act_key, use_msd) if act_conc is not None: return _add_operator_recursive( pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key, activity_concurrent.project(log, act_conc, act_key), use_msd) stl = strict_tau_loop.detect(log, start_activities, end_activities, act_key) if stl is not None: return _add_operator_recursive(pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, [stl, EventLog()], use_msd) tl = tau_loop.detect(log, start_activities, act_key) if tl is not None: return _add_operator_recursive(pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, [tl, EventLog()], use_msd) return _flower(alphabet, root)