def project(log, groups, activity_key): # currently not 'noise' proof # assumes that no empty traces are in the log logs = list() for group in groups: logs.append( pm4py.filter_log(lambda t: t[0][activity_key] in group, log)) return logs
def split(): ps = process_args(sys.argv[1:]) dpn = DPN(read_pnml_input(ps["model"])) (log, has_uncertainty) = read_log(ps["log"]) print("number of traces: %d" % len(log)) #naive_part = NaivePartitioning(list(logd.values())) #interval_part = IntervalPartitioning(dpn, naive_part.representatives()) i = 0 ts = [] for t in log: tp = preprocess_trace(t, dpn) if not tp in ts: log1 = pm4py.filter_log(lambda x: x == t, log) print(len(log1), i) xes_exporter.apply( log1, 'data/hospital_billing/single_traces/' + str(i) + '.xes') i += 1 ts.append(tp)
def __inductive_miner_internal(log, dfg, threshold, root, act_key, use_msd, remove_noise=False): alphabet = pm4py.get_event_attribute_values(log, act_key) if threshold > 0 and remove_noise: end_activities = get_ends.get_end_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) dfg = __filter_dfg_on_threshold(dfg, end_activities, threshold) original_length = len(log) log = pm4py.filter_log(lambda t: len(t) > 0, log) # revised EMPTYSTRACES if original_length - len(log) > original_length * threshold: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key, [EventLog(), log], use_msd) start_activities = get_starters.get_start_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) end_activities = get_ends.get_end_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if __is_base_case_act(log, act_key) or __is_base_case_silent(log): return __apply_base_case(log, root, act_key) pre, post = dfg_utils.get_transitive_relations(dfg, alphabet) cut = sequence_cut.detect(alphabet, pre, post) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.SEQUENCE, root), threshold, act_key, sequence_cut.project(log, cut, act_key), use_msd) cut = xor_cut.detect(dfg, alphabet) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key, xor_cut.project(log, cut, act_key), use_msd) cut = concurrent_cut.detect( dfg, alphabet, start_activities, end_activities, msd=msdw_algo.derive_msd_witnesses( log, msd_algo.apply(log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key }), parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if use_msd else None) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key, concurrent_cut.project(log, cut, act_key), use_msd) cut = loop_cut.detect(dfg, alphabet, start_activities, end_activities) if cut is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, loop_cut.project(log, cut, act_key), use_msd) aopt = activity_once_per_trace.detect(log, alphabet, act_key) if aopt is not None: operator = pt.ProcessTree(operator=pt.Operator.PARALLEL, parent=root) operator.children.append( pt.ProcessTree(operator=None, parent=operator, label=aopt)) return __add_operator_recursive_logs( operator, threshold, act_key, activity_once_per_trace.project(log, aopt, act_key), use_msd) act_conc = activity_concurrent.detect(log, alphabet, act_key, use_msd) if act_conc is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key, activity_concurrent.project(log, act_conc, act_key), use_msd) stl = strict_tau_loop.detect(log, start_activities, end_activities, act_key) if stl is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, [stl, EventLog()], use_msd) tl = tau_loop.detect(log, start_activities, act_key) if tl is not None: return __add_operator_recursive_logs( pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, [tl, EventLog()], use_msd) if threshold > 0 and not remove_noise: return __inductive_miner(log, dfg, threshold, root, act_key, use_msd, remove_noise=True) return __flower(alphabet, root)
def inductive_miner(log, dfg, threshold, root, act_key, use_msd): alphabet = pm4py.get_attribute_values(log, act_key) start_activities = get_starters.get_start_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) end_activities = get_ends.get_end_activities( log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) empty_traces = pm4py.filter_log(lambda trace: len(trace) == 0, log) if len(empty_traces) == 0: if _is_base_case_act(log, act_key) or _is_base_case_silent(log): return _apply_base_case(log, root, act_key) pre, post = dfg_utils.get_transitive_relations(dfg, alphabet) cut = sequence_cut.detect(alphabet, pre, post) if cut is not None: return _add_operator_recursive( pt.ProcessTree(pt.Operator.SEQUENCE, root), threshold, act_key, sequence_cut.project(log, cut, act_key), use_msd) cut = xor_cut.detect(dfg, alphabet) if cut is not None: return _add_operator_recursive( pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key, xor_cut.project(log, cut, act_key), use_msd) cut = concurrent_cut.detect( dfg, alphabet, start_activities, end_activities, msd=msdw_algo.derive_msd_witnesses( log, msd_algo.apply(log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key }), parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key }) if use_msd else None) if cut is not None: return _add_operator_recursive( pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key, concurrent_cut.project(log, cut, act_key), use_msd) cut = loop_cut.detect(dfg, alphabet, start_activities, end_activities) if cut is not None: return _add_operator_recursive( pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, loop_cut.project(log, cut, act_key), use_msd) if len(empty_traces) > 0: nempty = pm4py.filter_log(lambda t: len(t) > 0, log) return _add_operator_recursive(pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key, [EventLog(), nempty], use_msd) aopt = activity_once_per_trace.detect(log, alphabet, act_key) if aopt is not None: operator = pt.ProcessTree(operator=pt.Operator.PARALLEL, parent=root) operator.children.append( pt.ProcessTree(operator=None, parent=operator, label=aopt)) return _add_operator_recursive( operator, threshold, act_key, activity_once_per_trace.project(log, aopt, act_key), use_msd) act_conc = activity_concurrent.detect(log, alphabet, act_key, use_msd) if act_conc is not None: return _add_operator_recursive( pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key, activity_concurrent.project(log, act_conc, act_key), use_msd) stl = strict_tau_loop.detect(log, start_activities, end_activities, act_key) if stl is not None: return _add_operator_recursive(pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, [stl, EventLog()], use_msd) tl = tau_loop.detect(log, start_activities, act_key) if tl is not None: return _add_operator_recursive(pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key, [tl, EventLog()], use_msd) return _flower(alphabet, root)