def apply(trace_log, admitted_variants, parameters=None): """ Filter log keeping/removing only provided variants Parameters ----------- trace_log Trace log object admitted_variants Admitted variants parameters Parameters of the algorithm, including: activity_key -> Attribute identifying the activity in the log positive -> Indicate if events should be kept/removed """ if parameters is None: parameters = {} positive = parameters["positive"] if "positive" in parameters else True variants = get_variants(trace_log, parameters=parameters) trace_log = TraceLog() for variant in variants: if (positive and variant in admitted_variants) or ( not positive and variant not in admitted_variants): for trace in variants[variant]: trace_log.append(trace) return trace_log
def generate_log(pt, no_traces=100): """ Generate a log out of a process tree Parameters ------------ pt Process tree no_traces Number of traces contained in the process tree Returns ------------ log Trace log object """ log = TraceLog() for i in range(no_traces): ex_seq = execute(pt) ex_seq_labels = pt_util.project_execution_sequence_to_labels(ex_seq) trace = Trace() trace.attributes[xes.DEFAULT_NAME_KEY] = str(i) for label in ex_seq_labels: event = Event() event[xes.DEFAULT_NAME_KEY] = label trace.append(event) log.append(trace) return log
def filter_log_by_start_activities(start_activities, variants, vc, threshold, activity_key="concept:name"): """ Keep only variants of the log with a start activity which number of occurrences is above the threshold Parameters ---------- start_activities Dictionary of start attributes associated with their count variants (If specified) Dictionary with variant as the key and the list of traces as the value vc List of variant names along with their count threshold Cutting threshold (remove variants having start attributes which number of occurrences is below the threshold activity_key (If specified) Specify the activity key in the log (default concept:name) Returns ---------- filtered_log Filtered log """ filtered_log = TraceLog() fvsa = variants[vc[0][0]][0][0][activity_key] for variant in variants: vsa = variants[variant][0][0][activity_key] if vsa in start_activities: if vsa == fvsa or start_activities[vsa] >= threshold: for trace in variants[variant]: filtered_log.append(trace) return filtered_log
def sample_tracelog(trace_log, no_traces=100): """ Randomly sample a fixed number of traces from the original log Parameters ----------- trace_log Trace log no_traces Number of traces that the sample should have Returns ----------- newLog Filtered log """ new_log = TraceLog(attributes=trace_log.attributes, extensions=trace_log.extensions, globals=trace_log._omni, classifiers=trace_log.classifiers) set_traces = set() for i in range(0, min(no_traces, len(trace_log._list))): set_traces.add(random.randrange(0, len(trace_log._list))) set_traces = list(set_traces) for trace in set_traces: new_log.append(copy(trace_log._list[trace])) return new_log
def filter_log_by_variants_percentage(trace_log, variants, variants_percentage=0.0): """ Filter the log by variants percentage Parameters ---------- trace_log Trace log variants Dictionary with variant as the key and the list of traces as the value variants_percentage Percentage of variants that should be kept (the most common variant is always kept) Returns ---------- filtered_log Filtered trace log """ filtered_log = TraceLog() no_of_traces = len(trace_log) variant_count = get_variants_sorted_by_count(variants) already_added_sum = 0 for i in range(len(variant_count)): variant = variant_count[i][0] varcount = variant_count[i][1] percentage_already_added = already_added_sum / no_of_traces if already_added_sum == 0 or percentage_already_added < variants_percentage: for trace in variants[variant]: filtered_log.append(trace) already_added_sum = already_added_sum + varcount return filtered_log
def filter_log_by_paths(trace_log, paths, variants, vc, threshold, attribute_key="concept:name"): """ Keep only paths which number of occurrences is above the threshold (or they belong to the first variant) Parameters ---------- trace_log Trace log paths Dictionary of paths associated with their count variants (If specified) Dictionary with variant as the key and the list of traces as the value vc List of variant names along with their count threshold Cutting threshold (remove paths which number of occurrences is below the threshold) attribute_key (If specified) Specify the attribute key to use (default concept:name) Returns ---------- filtered_log Filtered log """ filtered_log = TraceLog() fvft = variants[vc[0][0]][0] fvp = set() for i in range(0, len(fvft) - 1): path = fvft[i][attribute_key] + "," + fvft[i + 1][attribute_key] fvp.add(path) for trace in trace_log: new_trace = Trace() jj = 0 if len(trace) > 0: new_trace.append(trace[0]) for j in range(1, len(trace) - 1): jj = j if j >= len(trace): break if attribute_key in trace[j] and attribute_key in trace[j + 1]: path = trace[j][attribute_key] + "," + trace[ j + 1][attribute_key] if path in paths: if path in fvp or paths[path] >= threshold: new_trace.append(trace[j]) new_trace.append(trace[j + 1]) if len(trace) > 1 and not jj == len(trace): new_trace.append(trace[-1]) if len(new_trace) > 0: filtered_log.append(new_trace) return filtered_log
def apply(trace_log, values, parameters=None): """ Filter log by keeping only traces that has/has not events with an attribute value that belongs to the provided values list Parameters ----------- trace_log Trace log values Allowed attributes parameters Parameters of the algorithm, including: activity_key -> Attribute identifying the activity in the log positive -> Indicate if events should be kept/removed Returns ----------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY positive = parameters["positive"] if "positive" in parameters else True filtered_log = TraceLog() for trace in trace_log: new_trace = Trace() found = False for j in range(len(trace)): if attribute_key in trace[j]: attribute_value = trace[j][attribute_key] if attribute_value in values: found = True if (found and positive) or (not found and not positive): new_trace = trace if len(new_trace) > 0: filtered_log.append(new_trace) return filtered_log
def filter_log_by_attributes_threshold(trace_log, attributes, variants, vc, threshold, attribute_key="concept:name"): """ Keep only attributes which number of occurrences is above the threshold (or they belong to the first variant) Parameters ---------- trace_log Trace log attributes Dictionary of attributes associated with their count variants (If specified) Dictionary with variant as the key and the list of traces as the value vc List of variant names along with their count threshold Cutting threshold (remove attributes which number of occurrences is below the threshold) attribute_key (If specified) Specify the activity key in the log (default concept:name) Returns ---------- filtered_log Filtered log """ filtered_log = TraceLog() fva = [ x[attribute_key] for x in variants[vc[0][0]][0] if attribute_key in x ] for trace in trace_log: new_trace = Trace() for j in range(len(trace)): if attribute_key in trace[j]: attribute_value = trace[j][attribute_key] if attribute_value in attributes: if attribute_value in fva or attributes[ attribute_value] >= threshold: new_trace.append(trace[j]) if len(new_trace) > 0: filtered_log.append(new_trace) return filtered_log
def form_fake_log(prefixes_keys, activity_key=xes_util.DEFAULT_NAME_KEY): """ Form fake log for replay (putting each prefix as separate trace to align) Parameters ---------- prefixes_keys Keys of the prefixes (to form a log with a given order) activity_key Activity key (must be provided if different from concept:name) """ fake_log = TraceLog() for prefix in prefixes_keys: trace = Trace() prefix_activities = prefix.split(",") for activity in prefix_activities: event = Event() event[activity_key] = activity trace.append(event) fake_log.append(trace) return fake_log
def project_tracelog(log, allowed_activities, parameters=None): """ Project a log on a given list of allowed (by the user) activities Parameters ------------- log Trace log allowed_activities List of allowed activities parameters Possible parameters of the algorithm, including: PARAMETER_CONSTANT_ACTIVITY_KEY -> the activity name to use in the projection Returns ------------ projected_log Projected trace log """ if parameters is None: parameters = {} activity_key = parameters[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY projected_log = TraceLog() for trace in log: projected_trace = Trace() for event in trace: if event[activity_key] in allowed_activities: projected_trace.append(deepcopy(event)) if len(projected_trace) > 0: projected_log.append(projected_trace) return projected_log
def apply(trace_log, paths, parameters=None): """ Apply a filter on traces containing / not containing a path Parameters ----------- trace_log Trace log paths Paths that we are looking for (expressed as tuple of 2 strings) parameters Parameters of the algorithm, including: activity_key -> Attribute identifying the activity in the log positive -> Indicate if events should be kept/removed Returns ----------- filtered_log Filtered trace log """ if parameters is None: parameters = {} attribute_key = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else xes.DEFAULT_NAME_KEY positive = parameters["positive"] if "positive" in parameters else True filtered_log = TraceLog() for trace in trace_log: found = False for i in range(len(trace) - 1): path = (trace[i][attribute_key], trace[i + 1][attribute_key]) if path in paths: found = True break if (found and positive) or (not found and not positive): filtered_log.append(trace) return filtered_log