def project(log: EventLog, cut: Cut, activity_key: str) -> List[EventLog]: do = cut[0] redo = cut[1:] do_log = EventLog() redo_logs = [] for i in range(len(redo)): redo_logs.append(EventLog()) for t in log: do_trace = Trace() redo_trace = Trace() for e in t: if e[activity_key] in do: do_trace.append(e) if len(redo_trace) > 0: redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key) redo_trace = Trace() else: redo_trace.append(e) if len(do_trace) > 0: do_log.append(do_trace) do_trace = Trace() if len(redo_trace) > 0: redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key) do_log.append(do_trace) logs = [do_log] logs.extend(redo_logs) return logs
def detect(log: EventLog, start_activities, act_key: str) -> Optional[EventLog]: proj = EventLog() for t in log: x = 0 for i in range(1, len(t)): if t[i][act_key] in start_activities: proj.append(Trace(t[x:i])) x = i proj.append(Trace(t[x:len(t)])) return proj if len(proj) > len(log) else None
def form_log_from_dictio_couple(first_cases_repr, second_cases_repr, enable_multiplier=False): """ Form a log from a couple of dictionary, to use for root cause analysis Parameters ------------- first_cases_repr First cases representation second_cases_repr Second cases representation enable_multiplier Enable balancing of classes Returns ------------ log Trace log object """ log = EventLog() if enable_multiplier: multiplier_first = int( max( float(len(second_cases_repr)) / float(len(first_cases_repr)), 1)) multiplier_second = int( max( float(len(first_cases_repr)) / float(len(second_cases_repr)), 1)) else: multiplier_first = 1 multiplier_second = 1 for j in range(multiplier_first): for i in range(len(first_cases_repr)): trace = Trace() event = Event(first_cases_repr[i]) trace.append(event) log.append(trace) for j in range(multiplier_second): for i in range(len(second_cases_repr)): trace = Trace() event = Event(second_cases_repr[i]) trace.append(event) log.append(trace) return log
def get_log_with_log_prefixes(log, parameters=None): """ Gets an extended log that contains, in order, all the prefixes for a case of the original log Parameters -------------- log Original log parameters Possible parameters of the algorithm Returns ------------- all_prefixes_log Log with all the prefixes change_indexes Indexes of the extended log where there was a change between cases """ all_prefixes_log = EventLog() change_indexes = [] for trace in log: cumulative_trace = Trace() for event in trace: all_prefixes_log.append(deepcopy(cumulative_trace)) cumulative_trace.append(event) all_prefixes_log.append(deepcopy(cumulative_trace)) change_indexes.append([len(all_prefixes_log) - 1] * len(trace)) return all_prefixes_log, change_indexes
def sort_timestamp_trace(trace, timestamp_key=xes.DEFAULT_TIMESTAMP_KEY, reverse_sort=False): """ Sort a trace based on timestamp key Parameters ----------- trace Trace timestamp_key Timestamp key reverse_sort If true, reverses the direction in which the sort is done (ascending) Returns ----------- trace Sorted trace """ events = sorted(trace._list, key=lambda x: x[timestamp_key], reverse=reverse_sort) new_trace = Trace(events, attributes=trace.attributes) return new_trace
def apply_tree_variants(variants, parameters=None): """ Apply the IM algorithm to a dictionary of variants obtaining a process tree Parameters ---------- variants Variants parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ---------- process_tree Process tree """ log = EventLog() activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) var_keys = list(variants.keys()) for var in var_keys: trace = Trace() activities = variants_util.get_activities_from_variant(var) for act in activities: trace.append(Event({activity_key: act})) log.append(trace) return apply_tree(log, parameters=parameters)
def get_prefixes_from_log(log: EventLog, length: int) -> EventLog: """ Gets the prefixes of a log of a given length Parameters ---------------- log Event log length Length Returns ---------------- prefix_log Log contain the prefixes: - if a trace has lower or identical length, it is included as-is - if a trace has greater length, it is cut """ prefix_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers, omni_present=log.omni_present, properties=log.properties) for trace in log: if len(trace) <= length: prefix_log.append(trace) else: new_trace = Trace(attributes=trace.attributes) for i in range(length): new_trace.append(trace[i]) prefix_log.append(new_trace) return prefix_log
def list_of_str_to_trace(activities: List[str]) -> Trace: t = Trace() for a in activities: e = Event() e["concept:name"] = a t.append(e) return t
def apply(df, parameters=None): """ Convert a dataframe into a log containing 1 case per variant (only control-flow perspective is considered) Parameters ------------- df Dataframe parameters Parameters of the algorithm Returns ------------- log Event log """ from pm4py.statistics.traces.pandas import case_statistics if parameters is None: parameters = {} variant_stats = case_statistics.get_variant_statistics(df, parameters=parameters) activity_key = parameters[ pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY log = EventLog() for vd in variant_stats: variant = vd['variant'].split(",") trace = Trace() for activity in variant: event = Event() event[activity_key] = activity trace.append(event) log.append(trace) return log
def variant_to_trace(variant, parameters=None): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) variant_delimiter = exec_utils.get_param_value( Parameters.PARAMETER_VARIANT_DELIMITER, parameters, constants.DEFAULT_VARIANT_SEP) from pm4py.objects.log.obj import Trace, Event trace = Trace() if type(variant) is tuple or type(variant) is list: for act in variant: event = Event({activity_key: act}) trace.append(event) elif type(variant) is str: var_act = variant.split(variant_delimiter) for act in var_act: event = Event({activity_key: act}) trace.append(event) return trace
def apply(tree: ProcessTree, parameters : Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog: """ Performs an extensive playout of the process tree Parameters ------------- tree Process tree parameters Possible parameters, including: - Parameters.MIN_TRACE_LENGTH => minimum length of a trace (default: 1) - Parameters.MAX_TRACE_LENGTH => maximum length of a trace (default: min_allowed_trace_length) - Parameters.MAX_LOOP_OCC => maximum number of occurrences for a loop (default: MAX_TRACE_LENGTH) - Parameters.ACTIVITY_KEY => activity key - Parameters.MAX_LIMIT_NUM_TRACES => maximum number to the limit of traces; the playout shall stop when the number is reached (default: 100000) Returns ------------- log Event log """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) # to save memory in the returned log, allocate each activity once. to know the list of activities of the # process tree, use the footprints module fp_tree = fp_discovery.apply(tree, parameters=parameters) activities = fp_tree["activities"] activities = {act: Event({activity_key: act}) for act in activities} min_allowed_trace_length = bottomup_discovery.get_min_trace_length(tree, parameters=parameters) min_trace_length = exec_utils.get_param_value(Parameters.MIN_TRACE_LENGTH, parameters, 1) max_trace_length = exec_utils.get_param_value(Parameters.MAX_TRACE_LENGTH, parameters, min_allowed_trace_length) max_loop_occ = exec_utils.get_param_value(Parameters.MAX_LOOP_OCC, parameters, int(max_trace_length / 2)) max_limit_num_traces = exec_utils.get_param_value(Parameters.MAX_LIMIT_NUM_TRACES, parameters, 100000) return_set_strings = exec_utils.get_param_value(Parameters.RETURN_SET_STRINGS, parameters, False) bottomup = bottomup_discovery.get_bottomup_nodes(tree, parameters=parameters) min_rem_dict = bottomup_discovery.get_min_rem_dict(tree, parameters=parameters) max_rem_dict = bottomup_discovery.get_max_rem_dict(tree, parameters=parameters) playout_dictio = {} for i in range(len(bottomup)): get_playout(bottomup[i], playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict, max_rem_dict, max_limit_num_traces) tree_playout_traces = playout_dictio[tree][TRACES] if return_set_strings: return tree_playout_traces log = EventLog() for tr0 in tree_playout_traces: trace = Trace() for act in tr0: trace.append(activities[act]) log.append(trace) return log
def apply(df, parameters=None): """ Convert a dataframe into a log containing N case per variant (only control-flow perspective is considered) Parameters ------------- df Dataframe parameters Parameters of the algorithm Returns ------------- log Event log """ from pm4py.statistics.traces.pandas import case_statistics if parameters is None: parameters = {} return_variants = parameters[ RETURN_VARIANTS] if RETURN_VARIANTS in parameters else False case_glue = parameters[ pm4_constants. PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else pm4_constants.CASE_CONCEPT_NAME activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY variant_stats = case_statistics.get_variant_statistics( df, parameters=parameters) log = EventLog() all_variants_log = {} for vd in variant_stats: variant = vd['variant'].split(",") variant_count = vd[case_glue] trace = Trace() for activity in variant: event = Event() event[activity_key] = activity trace.append(event) all_variants_log[vd['variant']] = [] for i in range(variant_count): log.append(trace) all_variants_log[vd['variant']].append(len(log) - 1) if return_variants: return log, all_variants_log return log
def apply(log: EventLog, act1: str, act2: str, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog: """ Given an event log, filters all the subtraces going from an event with activity "act1" to an event with activity "act2" Parameters ---------------- log Event log act1 First activity act2 Second activity parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY => activity key Returns ---------------- filtered_log Log with all the subtraces going from "act1" to "act2" """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) filtered_log = EventLog(attributes=log.attributes, extensions=log.extensions, omni_present=log.omni_present, classifiers=log.classifiers, properties=log.properties) for trace in log: act1_encountered = False filt_trace = None i = 0 while i < len(trace) - 1: if not act1_encountered and trace[i][activity_key] == act1: act1_encountered = True filt_trace = Trace(attributes=trace.attributes) filt_trace.append(trace[i]) elif act1_encountered and trace[i][activity_key] == act2: filt_trace.append(trace[i]) filtered_log.append(filt_trace) act1_encountered = False filt_trace = None elif filt_trace is not None: filt_trace.append(trace[i]) i = i + 1 return filtered_log
def filter_log_by_attributes_threshold(log, attributes, variants, vc, threshold, attribute_key=xes.DEFAULT_NAME_KEY): """ Keep only attributes which number of occurrences is above the threshold (or they belong to the first variant) Parameters ---------- log Log attributes Dictionary of attributes associated with their count variants (If specified) Dictionary with variant as the key and the list of traces as the value vc List of variant names along with their count threshold Cutting threshold (remove attributes which number of occurrences is below the threshold) attribute_key (If specified) Specify the activity key in the log (default concept:name) Returns ---------- filtered_log Filtered log """ filtered_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers, omni_present=log.omni_present, properties=log.properties) fva = [ x[attribute_key] for x in variants[vc[0][0]][0] if attribute_key in x ] for trace in log: new_trace = Trace() for j in range(len(trace)): if attribute_key in trace[j]: attribute_value = trace[j][attribute_key] if attribute_value in attributes: if (attribute_value in fva and attribute_key == xes.DEFAULT_NAME_KEY ) or attributes[attribute_value] >= threshold: new_trace.append(trace[j]) if len(new_trace) > 0: for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] filtered_log.append(new_trace) return filtered_log
def acyclic_net_variants(net, initial_marking, final_marking, activity_key=xes_util.DEFAULT_NAME_KEY): """ Given an acyclic accepting Petri net, initial and final marking extracts a set of variants (in form of traces) replayable on the net. Warning: this function is based on a marking exploration. If the accepting Petri net contains loops, the method will not work properly as it stops the search if a specific marking has already been encountered. Parameters ---------- :param net: An acyclic workflow net :param initial_marking: The initial marking of the net. :param final_marking: The final marking of the net. :param activity_key: activity key to use Returns ------- :return: variants: :class:`list` Set of variants - in the form of Trace objects - obtainable executing the net """ active = {(initial_marking, ())} visited = set() variants = set() while active: curr_marking, curr_partial_trace = active.pop() curr_pair = (curr_marking, curr_partial_trace) enabled_transitions = semantics.enabled_transitions(net, curr_marking) for transition in enabled_transitions: if transition.label is not None: next_partial_trace = curr_partial_trace + (transition.label, ) else: next_partial_trace = curr_partial_trace next_marking = semantics.execute(transition, net, curr_marking) next_pair = (next_marking, next_partial_trace) if next_marking == final_marking: variants.add(next_partial_trace) else: # If the next marking is not in visited, if the next marking+partial trace is different from the current one+partial trace if next_pair not in visited and curr_pair != next_pair: active.add(next_pair) visited.add(curr_pair) trace_variants = [] for variant in variants: trace = Trace() for activity_label in variant: trace.append(Event({activity_key: activity_label})) trace_variants.append(trace) return trace_variants
def filter_log_traces_attr(log, values, parameters=None): """ Filter log by keeping only traces that has/has not events with an attribute value that belongs to the provided values list Parameters ----------- log Trace log values Allowed attributes parameters Parameters of the algorithm, including: activity_key -> Attribute identifying the activity in the log positive -> Indicate if events should be kept/removed Returns ----------- filtered_log Filtered log """ # CODE SAVING FROM FILTERS if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY) positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) filtered_log = EventLog() for trace in log: new_trace = Trace() found = False for j in range(len(trace)): if attribute_key in trace[j]: attribute_value = trace[j][attribute_key] if attribute_value in values: found = True if (found and positive) or (not found and not positive): new_trace = trace else: for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] if len(new_trace) > 0: filtered_log.append(new_trace) return filtered_log
def apply(log, values, parameters=None): """ Filter log by keeping only traces that has/has not events with an attribute value that belongs to the provided values list Parameters ----------- log Trace log values Allowed attributes parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> Attribute identifying the activity in the log Parameters.POSITIVE -> Indicate if events should be kept/removed Returns ----------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY) positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) filtered_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers, omni_present=log.omni_present, properties=log.properties) for trace in log: new_trace = Trace() found = False for j in range(len(trace)): if attribute_key in trace[j]: attribute_value = trace[j][attribute_key] if attribute_value in values: found = True if (found and positive) or (not found and not positive): new_trace = trace else: for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] if len(new_trace) > 0: filtered_log.append(new_trace) return filtered_log
def apply_trace_attributes(log, list_of_values, parameters=None): """ Filter log by keeping only traces that has/has not certain case attribute value that belongs to the provided values list Parameters ----------- log Trace log values Allowed attribute values(if it's numerical value, [] is needed to make it a list) parameters Parameters of the algorithm, including: activity_key -> Attribute identifying the case in the log positive -> Indicate if events should be kept/removed Returns ----------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY) positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) filtered_log = EventLog() for trace in log: new_trace = Trace() found = False if attribute_key in trace.attributes: attribute_value = trace.attributes[attribute_key] if attribute_value in list_of_values: found = True if (found and positive) or (not found and not positive): new_trace = trace else: for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] if len(new_trace) > 0: filtered_log.append(new_trace) return filtered_log
def read_trace(self) -> Trace: if self.i < self.no_traces: case_id = self.c_unq[self.i] si = self.c_ind[self.i] ei = si + self.c_counts[self.i] trace = Trace( attributes={xes_constants.DEFAULT_TRACEID_KEY: case_id}) for j in range(si, ei): event = Event({ xes_constants.DEFAULT_NAME_KEY: self.activities[j], xes_constants.DEFAULT_TIMESTAMP_KEY: self.timestamps[j] }) trace.append(event) self.i = self.i + 1 return trace
def parse_event_log_string( traces: Collection[str], sep: str = ",", activity_key: str = xes_constants.DEFAULT_NAME_KEY, timestamp_key: str = xes_constants.DEFAULT_TIMESTAMP_KEY, case_id_key: str = xes_constants.DEFAULT_TRACEID_KEY) -> EventLog: """ Parse a collection of traces expressed as strings (e.g., ["A,B,C,D", "A,C,B,D", "A,D"]) to an event log Parameters ------------------ traces Collection of traces expressed as strings sep Separator used to split the activities of a string trace activity_key The attribute that should be used as activity timestamp_key The attribute that should be used as timestamp case_id_key The attribute that should be used as case identifier Returns ----------------- log Event log """ log = EventLog() this_timest = 10000000 for index, trace in enumerate(traces): activities = trace.split(sep) trace = Trace() trace.attributes[case_id_key] = str(index) for act in activities: event = Event({ activity_key: act, timestamp_key: datetime.datetime.fromtimestamp(this_timest) }) trace.append(event) this_timest = this_timest + 1 log.append(trace) return log
def project(log, groups, activity_key): # refactored to support both IM and IMf logs = list() for group in groups: logs.append(EventLog()) for t in log: count = {i: 0 for i in range(len(groups))} for index, group in enumerate(groups): for e in t: if e[activity_key] in group: count[index] += 1 count = sorted(list((x, y) for x, y in count.items()), key=lambda x: (x[1], x[0]), reverse=True) new_trace = Trace() for e in t: if e[activity_key] in groups[count[0][0]]: new_trace.append(e) logs[count[0][0]].append(new_trace) return logs
def execute_script(): L = EventLog() e1 = Event() e1["concept:name"] = "A" e2 = Event() e2["concept:name"] = "B" e3 = Event() e3["concept:name"] = "C" e4 = Event() e4["concept:name"] = "D" t = Trace() t.append(e1) t.append(e2) t.append(e3) t.append(e4) for i in range(10000): L.append(deepcopy(t)) print(len(L))
def generate_log(pt0, no_traces=100): """ Generate a log out of a process tree Parameters ------------ pt Process tree no_traces Number of traces contained in the process tree Returns ------------ log Trace log object """ pt = deepcopy(pt0) # different taus must give different ID in log generation!!!! # so we cannot use the default process tree class # we use this different one! pt = GenerationTree(pt) log = EventLog() # assigns to each event an increased timestamp from 1970 curr_timestamp = 10000000 for i in range(no_traces): ex_seq = execute(pt) ex_seq_labels = pt_util.project_execution_sequence_to_labels(ex_seq) trace = Trace() trace.attributes[xes.DEFAULT_NAME_KEY] = str(i) for label in ex_seq_labels: event = Event() event[xes.DEFAULT_NAME_KEY] = label event[xes.DEFAULT_TIMESTAMP_KEY] = datetime.datetime.fromtimestamp(curr_timestamp) trace.append(event) curr_timestamp = curr_timestamp + 1 log.append(trace) return log
def apply( tree: ProcessTree, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog: """ Gets the top-bottom playout of a process tree Parameters --------------- tree Process tree parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY: activity key - Parameters.NO_TRACES: number of traces that should be returned Returns --------------- log Event log """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) no_traces = exec_utils.get_param_value(Parameters.NO_TRACES, parameters, 1000) execution_sequences = get_num_ex_sequences(tree, no_traces) log = EventLog() for seq in execution_sequences: trace = Trace() for el in seq: if el.label is not None: event = Event({activity_key: el.label}) trace.append(event) log.append(trace) return log
def keep_only_one_attribute_per_event(log, attribute_key): """ Keeps only one attribute per event Parameters --------------- log Event log attribute_key Attribute key """ new_log = EventLog() if log is not None: for trace in log: new_trace = Trace() for ev in trace: new_trace.append(Event({attribute_key: ev[attribute_key]})) new_log.append(new_trace) return new_log
def form_fake_log(prefixes_keys, activity_key=xes_util.DEFAULT_NAME_KEY): """ Form fake log for replay (putting each prefix as separate trace to align) Parameters ---------- prefixes_keys Keys of the prefixes (to form a log with a given order) activity_key Activity key (must be provided if different from concept:name) """ fake_log = EventLog() for prefix in prefixes_keys: trace = Trace() prefix_activities = prefix.split(constants.DEFAULT_VARIANT_SEP) for activity in prefix_activities: event = Event() event[activity_key] = activity trace.append(event) fake_log.append(trace) return fake_log
def check_is_fitting(*args, activity_key=xes_constants.DEFAULT_NAME_KEY): """ Checks if a trace object is fit against a process model Parameters ----------------- trace Trace object (trace / variant) model Model (process tree, Petri net, BPMN, ...) activity_key Activity key (optional) Returns ----------------- is_fit Boolean value (True if the trace fits; False if the trace does not) """ from pm4py.util import variants_util from pm4py.convert import convert_to_process_tree, convert_to_petri_net trace = args[0] model = args[1:] try: model = convert_to_process_tree(*model) except: # the model cannot be expressed as a process tree, let's say if at least can be expressed as a Petri net model = convert_to_petri_net(*model) if not isinstance(trace, Trace): activities = variants_util.get_activities_from_variant(trace) trace = Trace() for act in activities: trace.append(Event({activity_key: act})) if isinstance(model, ProcessTree): return __check_is_fit_process_tree(trace, model, activity_key=activity_key) elif isinstance(model, tuple) and isinstance(model[0], PetriNet): return __check_is_fit_petri_net(trace, model[0], model[1], model[2], activity_key=activity_key)
def project(log, groups, activity_key): ''' This method projects the log based on a presumed sequence cut and a list of activity groups Parameters ---------- log original log groups list of activity sets to be used in projection (activities can only appear in one group) activity_key key to use in the event to derive the activity name Returns ------- list of corresponding logs according to the sequence cut. ''' # refactored to support both IM and IMf logs = list() for group in groups: logs.append(EventLog()) for t in log: i = 0 split_point = 0 act_union = set() while i < len(groups): new_split_point = find_split_point(t, groups[i], split_point, act_union, activity_key) trace_i = Trace() j = split_point while j < new_split_point: if t[j][activity_key] in groups[i]: trace_i.append(t[j]) j = j + 1 logs[i].append(trace_i) split_point = new_split_point act_union = act_union.union(set(groups[i])) i = i + 1 return logs
def from_dicts_to_trace(event_dicts, trace_info_dict): events = [from_dict_to_event(ed) for ed in event_dicts] return Trace(events, attributes=trace_info_dict)
def __approximate_alignment_on_sequence(pt: ProcessTree, trace: Trace, a_sets: Dict[ProcessTree, Set[str]], sa_sets: Dict[ProcessTree, Set[str]], ea_sets: Dict[ProcessTree, Set[str]], tau_flags: Dict[ProcessTree, bool], tl: int, th: int, parameters=None): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY) assert pt.operator == Operator.SEQUENCE assert len(pt.children) > 0 assert len(trace) > 0 # x_i_j = 1 <=> assigns activity i to subtree j x__variables = {} # s_i_j = 1 <=> activity i is a start activity in the current sub-trace assigned to subtree j s__variables = {} # e_i_j = 1 <=> activity i is an end activity in the current sub-trace assigned to subtree j e__variables = {} # auxiliary u_j <=> u_j=1 if an activity is assigned to subtree j u__variables = {} # v_i_j = 1 <=> activity i is neither a start nor end-activity in the current sub-trace assigned to subtree j v__variables = {} s__costs = {} e__costs = {} u__costs = {} v__costs = {} all_variables = [] for i, a in enumerate(trace): x__variables[i] = {} s__variables[i] = {} s__costs[i] = {} e__variables[i] = {} e__costs[i] = {} v__variables[i] = {} v__costs[i] = {} for j, subtree in enumerate(pt.children): all_variables.append('x_' + str(i) + '_' + str(j)) x__variables[i][j] = len(all_variables) - 1 all_variables.append('s_' + str(i) + '_' + str(j)) s__variables[i][j] = len(all_variables) - 1 all_variables.append('e_' + str(i) + '_' + str(j)) e__variables[i][j] = len(all_variables) - 1 all_variables.append('v_' + str(i) + '_' + str(j)) v__variables[i][j] = len(all_variables) - 1 s__costs[i][j] = 0 if a[activity_key] in sa_sets[subtree] else 1 e__costs[i][j] = 0 if a[activity_key] in ea_sets[subtree] else 1 v__costs[i][j] = 0 if a[activity_key] in a_sets[subtree] else 1 for j in range(len(pt.children)): all_variables.append('u_' + str(j)) u__variables[j] = len(all_variables) - 1 # define costs to not assign anything to subtree j if tau_flags[pt.children[j]]: u__costs[j] = 0 elif sa_sets[pt.children[j]] & ea_sets[pt.children[j]]: # intersection of start-activities and end-activities is not empty u__costs[j] = 1 else: # intersection of start-activities and end-activities is empty u__costs[j] = 2 # objective function c = [0] * len(all_variables) for i in range(len(trace)): for j in range(len(pt.children)): c[v__variables[i][j]] = v__costs[i][j] for i in range(len(trace)): for j in range(len(pt.children)): c[s__variables[i][j]] = s__costs[i][j] for i in range(len(trace)): for j in range(len(pt.children)): c[e__variables[i][j]] = e__costs[i][j] for j in range(len(pt.children)): c[u__variables[j]] = -u__costs[j] Aub = [] bub = [] Aeq = [] beq = [] # every activity is assigned to one subtree for i in range(len(trace)): r = [0] * len(all_variables) for j in range(len(pt.children)): r[x__variables[i][j]] = 1 Aeq.append(r) beq.append(1) for j in range(len(pt.children)): r1 = [0] * len(all_variables) r2 = [0] * len(all_variables) # first activity is start activity r1[x__variables[0][j]] = 1 r1[s__variables[0][j]] = -1 # last activity is an end activity r2[x__variables[len(trace) - 1][j]] = 1 r2[e__variables[len(trace) - 1][j]] = -1 Aub.append(r1) Aub.append(r2) bub.append(0) bub.append(0) # define s_i_j variables for i in range(1, len(trace)): for j in range(len(pt.children)): r1 = [0] * len(all_variables) r2 = [0] * len(all_variables) r3 = [0] * len(all_variables) r1[s__variables[i][j]] = -1 r1[x__variables[i][j]] = 1 r1[x__variables[i - 1][j]] = -1 r2[s__variables[i][j]] = 1 r2[x__variables[i][j]] = -1 r3[s__variables[i][j]] = 1 r3[x__variables[i - 1][j]] = 1 Aub.append(r1) Aub.append(r2) Aub.append(r3) bub.append(0) bub.append(0) bub.append(1) for i in range(len(trace)): r = [0] * len(all_variables) for j in range(len(pt.children)): r[s__variables[i][j]] = 1 Aub.append(r) bub.append(1) # define e_i_j variables for i in range(len(trace) - 1): for j in range(len(pt.children)): r1 = [0] * len(all_variables) r2 = [0] * len(all_variables) r3 = [0] * len(all_variables) r1[e__variables[i][j]] = -1 r1[x__variables[i][j]] = 1 r1[x__variables[i + 1][j]] = -1 r2[e__variables[i][j]] = 1 r2[x__variables[i][j]] = -1 r3[e__variables[i][j]] = 1 r3[x__variables[i + 1][j]] = 1 Aub.append(r1) Aub.append(r2) Aub.append(r3) bub.append(0) bub.append(0) bub.append(1) for i in range(len(trace)): # activity can be only for one subtree an end-activity r = [0] * len(all_variables) for j in range(len(pt.children)): r[e__variables[i][j]] = 1 Aub.append(r) bub.append(1) # constraint - preserving sequence when assigning activities to subtrees for i in range(len(trace) - 1): for j in range(len(pt.children)): r = [0] * len(all_variables) for k in range(j, len(pt.children)): r[x__variables[i + 1][k]] = -1 r[x__variables[i][j]] = 1 Aub.append(r) bub.append(0) # define u_j variables for j in range(len(pt.children)): for i in range(len(trace)): r = [0] * len(all_variables) r[u__variables[j]] = -1 r[x__variables[i][j]] = 1 Aub.append(r) bub.append(0) r1 = [0] * len(all_variables) r2 = [0] * len(all_variables) r1[u__variables[j]] = 1 r2[u__variables[j]] = 1 for i in range(len(trace)): r1[s__variables[i][j]] = -1 r2[e__variables[i][j]] = -1 Aub.append(r1) Aub.append(r2) bub.append(0) bub.append(0) # define v_i_j variables for i in range(len(trace)): for j in range(2): r1 = [0] * len(all_variables) r2 = [0] * len(all_variables) r3 = [0] * len(all_variables) r4 = [0] * len(all_variables) r1[v__variables[i][j]] = -1 r1[s__variables[i][j]] = -1 r1[e__variables[i][j]] = -1 r1[x__variables[i][j]] = 1 r2[v__variables[i][j]] = 1 r2[x__variables[i][j]] = -1 r3[v__variables[i][j]] = 1 r3[e__variables[i][j]] = 1 r4[v__variables[i][j]] = 1 r4[s__variables[i][j]] = 1 Aub.append(r1) Aub.append(r2) Aub.append(r3) Aub.append(r4) bub.append(0) bub.append(0) bub.append(1) bub.append(1) for idx, v in enumerate(all_variables): r = [0] * len(all_variables) r[idx] = -1 Aub.append(r) bub.append(0) r = [0] * len(all_variables) r[idx] = 1 Aub.append(r) bub.append(1) points = __ilp_solve(c, Aub, bub, Aeq, beq) for i in x__variables: for j in x__variables[i]: x__variables[i][j] = True if points[x__variables[i][j]] == 1 else False x_variables = x__variables alignments_to_calculate: List[Tuple[ProcessTree, Trace]] = [] for j in range(len(pt.children)): sub_trace = Trace() for i in range(len(trace)): if x_variables[i][j] == 1: sub_trace.append(trace[i]) alignments_to_calculate.append((pt.children[j], sub_trace)) # calculate and compose alignments res = [] for subtree, sub_trace in alignments_to_calculate: align_result = __approximate_alignment_for_trace(subtree, a_sets, sa_sets, ea_sets, tau_flags, sub_trace, tl, th, parameters=parameters) if align_result is None: # the alignment did not terminate correctly return None res.extend(align_result) return res