def event_trace_sampling(log, k): new_log = EventLog() unique_ev = unique_activities(log) sampled = random.sample(unique_ev, k) for trace in log: valid = True eve_list = [] for event in trace: if event['concept:name'] not in sampled: valid = False break eve_list.append(event['concept:name']) if valid: new_log.append(trace) print(f'lenght of sampled log is: {len(new_log)}') return new_log
def filter_on_case_performance(log, inf_perf, sup_perf, parameters=None): """ Gets a filtered log_skeleton keeping only traces that satisfy the given performance requirements Parameters ------------ log Log inf_perf Lower bound on the performance sup_perf Upper bound on the performance parameters Parameters Returns ----------- filtered_log Filtered log_skeleton """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) filtered_log = EventLog([ trace for trace in log if satisfy_perf(trace, inf_perf, sup_perf, timestamp_key) ]) return filtered_log
def filter_cases_exceeding_specified_acti_performance(log, transition_performance, activity, lower_bound): """ Filter cases exceeding the specified activity performance threshold Parameters ------------ log Event log transition_performance Dictionary where each transition label is associated to performance measures activity Target activity (of the filter) lower_bound Lower bound (filter cases which have a duration of the activity exceeding) Returns ------------ filtered_log Filtered log """ satisfying_indexes = get_idx_exceeding_specified_acti_performance( log, transition_performance, activity, lower_bound) new_log = EventLog(list(log[i] for i in satisfying_indexes)) return new_log
def apply(log, admitted_start_activities, parameters=None): """ Filter the log_skeleton on the specified start activities Parameters ----------- log log_skeleton admitted_start_activities Admitted start activities parameters Algorithm parameters Returns ----------- filtered_log Filtered log_skeleton """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY) filtered_log = EventLog([ trace for trace in log if trace and trace[0][attribute_key] in admitted_start_activities ]) return filtered_log
def createEventLog(self, original_log, simplifiedlog, event_attributes, life_cycle, all_life_cycle, sensitive_attributes, time_accuracy): time_prefix = 'time:timestamp' life_cycle_prefix = ['lifecycle:transition'] deleteLog = [] log = copy.deepcopy(original_log) for i in range(0, len(log)): caseId = log[i].attributes["concept:name"] if caseId not in simplifiedlog.keys(): deleteLog.append(i) continue trace = simplifiedlog[caseId]["trace"] del_list = [] simple_trace, sens = self.create_trace(log[i], event_attributes, life_cycle, all_life_cycle, life_cycle_prefix, time_prefix, sensitive_attributes, time_accuracy, 0, 0) j = 0 while j < len(log[i]): if (simple_trace[j] not in trace): del_list.append(log[i][j]) j += 1 for x in del_list: log[i]._list.remove(x) for i in sorted(deleteLog, reverse=True): log._list.remove(log[i]) log2 = EventLog([trace for trace in log], classifiers=original_log.classifiers) return log2
def apply(log, admitted_end_activities, parameters=None): """ Filter the log on the specified end activities Parameters ----------- log Log admitted_end_activities Admitted end activities parameters Algorithm parameters Returns ----------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY filtered_log = [ trace for trace in log if trace and trace[-1][attribute_key] in admitted_end_activities ] return EventLog(filtered_log)
def variant_filter(log): new_log = EventLog() result = [] variant_list = get.get_variants(log) variant_list_count = case_statistics.get_variant_statistics(log) sampled = random.sample(variant_list_count, 1000) vlist = [v['variant'] for v in variant_list_count] vlist_s = [v['variant'] for v in sampled] for v in vlist: if v in vlist_s: for trace in variant_list[v]: new_log.append(trace) new_len = len(case_statistics.get_variant_statistics(new_log)) result.extend([new_len, len(new_log), len(unique_activities(new_log))]) return new_log
def apply(log, values, parameters=None): """ Filter log by keeping only traces that has/has not events with an attribute value that belongs to the provided values list Parameters ----------- log Trace log values Allowed attributes parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> Attribute identifying the activity in the log Parameters.POSITIVE -> Indicate if events should be kept/removed Returns ----------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY) positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) filtered_log = EventLog() for trace in log: new_trace = Trace() found = False for j in range(len(trace)): if attribute_key in trace[j]: attribute_value = trace[j][attribute_key] if attribute_value in values: found = True if (found and positive) or (not found and not positive): new_trace = trace else: for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] if len(new_trace) > 0: filtered_log.append(new_trace) return filtered_log
def filter_log_by_attributes_threshold(log, attributes, variants, vc, threshold, attribute_key="concept:name"): """ Keep only attributes which number of occurrences is above the threshold (or they belong to the first variant) Parameters ---------- log Log attributes Dictionary of attributes associated with their count variants (If specified) Dictionary with variant as the key and the list of traces as the value vc List of variant names along with their count threshold Cutting threshold (remove attributes which number of occurrences is below the threshold) attribute_key (If specified) Specify the activity key in the log (default concept:name) Returns ---------- filtered_log Filtered log """ filtered_log = EventLog() fva = [ x[attribute_key] for x in variants[vc[0][0]][0] if attribute_key in x ] for trace in log: new_trace = Trace() for j in range(len(trace)): if attribute_key in trace[j]: attribute_value = trace[j][attribute_key] if attribute_value in attributes: if attribute_value in fva or attributes[ attribute_value] >= threshold: new_trace.append(trace[j]) if len(new_trace) > 0: for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] filtered_log.append(new_trace) return filtered_log
def apply(log, parameters=None): """ Apply PCA + DBSCAN clustering after creating a representation of the log containing the wanted attributes and the wanted succession of attributes Parameters ----------- log Trace log parameters Parameters of the algorithm, including: pca_components -> Number of the components for the PCA dbscan_eps -> EPS value for the DBScan clustering str_tr_attr -> String trace attributes to consider in feature representation str_ev_attr -> String event attributes to consider in feature representation num_tr_attr -> Numeric trace attributes to consider in feature representation num_ev_attr -> Numeric event attributes to consider in feature representation str_evsucc_attr -> Succession between event attributes to consider in feature representation Returns ----------- log_list A list containing, for each cluster, a different log """ if parameters is None: parameters = {} pca_components = parameters[ "pca_components"] if "pca_components" in parameters else 3 dbscan_eps = parameters["dbscan_eps"] if "dbscan_eps" in parameters else 0.3 log_list = [] data, feature_names = get_.get_representation(log, str_ev_attr=['concept:name'], str_tr_attr=[], num_ev_attr=[], num_tr_attr=[], str_evsucc_attr=[]) pca = PCA(n_components=pca_components) pca.fit(data) data2d = pca.transform(data) db = DBSCAN(eps=dbscan_eps).fit(data2d) labels = db.labels_ already_seen = {} for i in range(len(log)): if not labels[i] in already_seen: already_seen[labels[i]] = len(list(already_seen.keys())) log_list.append(EventLog()) trace = Trace(log[i]) for attribute in log[i].attributes: trace.attributes[attribute] = log[i].attributes[attribute] log_list[already_seen[labels[i]]].append(trace) return log_list
def execute_script(): log = xes_importer.apply( os.path.join("..", "tests", "input_data", "receipt.xes")) log = sorting.sort_timestamp(log) net, im, fm = inductive_miner.apply(log) log1 = EventLog(log[:500]) log2 = EventLog(log[len(log) - 500:]) statistics = element_usage_comparison.compare_element_usage_two_logs( net, im, fm, log1, log2) gviz = pn_vis.apply( net, im, fm, variant=pn_vis.Variants.FREQUENCY, aggregated_statistics=statistics, parameters={pn_vis.Variants.FREQUENCY.value.Parameters.FORMAT: "svg"}) pn_vis.view(gviz)
def merge_log(path, cate, iter): loglist = [] mergedlog = EventLog() for i in range(1, cate + 1): for j in range(1, iter + 1): log = xes_importer.apply(path + '\\log_1_' + str(i) + '_' + str(j) + ".xes") for trace in log: trace.attributes["concept:name"] = str(iter * (i - 1) + j) trace.attributes["index"] = str(iter * (i - 1) + j) loglist.append(log) for i in range(len(loglist)): for trace in loglist[i]: mergedlog.append(trace) return loglist, mergedlog
def apply_trace_attributes(log, list_of_values, parameters=None): """ Filter log by keeping only traces that has/has not certain case attribute value that belongs to the provided values list Parameters ----------- log Trace log values Allowed attribute values(if it's numerical value, [] is needed to make it a list) parameters Parameters of the algorithm, including: activity_key -> Attribute identifying the case in the log positive -> Indicate if events should be kept/removed Returns ----------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY) positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) filtered_log = EventLog() for trace in log: new_trace = Trace() found = False if attribute_key in trace.attributes: attribute_value = trace.attributes[attribute_key] if attribute_value in list_of_values: found = True if (found and positive) or (not found and not positive): new_trace = trace else: for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] if len(new_trace) > 0: filtered_log.append(new_trace) return filtered_log
def project(log: EventLog, cut: Cut, activity_key: str) -> List[EventLog]: do = cut[0] redo = cut[1:] do_log = EventLog() redo_logs = [EventLog()] * len(redo) for t in log: do_trace = Trace() redo_trace = Trace() for e in t: if e[activity_key] in do: do_trace.append(e) if len(redo_trace) > 0: redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key) redo_trace = Trace() else: redo_trace.append(e) if len(do_trace) > 0: do_log.append(do_trace) do_trace = Trace() if len(redo_trace) > 0: redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key) do_log.append(do_trace) logs = [do_log] logs.extend(redo_logs) return logs
def apply_numeric(log, int1, int2, parameters=None): """ Apply a filter on cases (numerical filter) Parameters -------------- log Log int1 Lower bound of the interval int2 Upper bound of the interval parameters Possible parameters of the algorithm Returns -------------- filtered_df Filtered dataframe """ if parameters is None: parameters = {} attribute_key = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY case_key = parameters[ PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else xes.DEFAULT_TRACEID_KEY # stream_filter_key is helpful to filter on cases containing an event with an attribute # in the specified value set, but such events shall have an activity in particular. stream_filter_key1 = parameters["stream_filter_key1"] if "stream_filter_key1" in parameters else None stream_filter_value1 = parameters["stream_filter_value1"] if "stream_filter_value1" in parameters else None stream_filter_key2 = parameters["stream_filter_key2"] if "stream_filter_key2" in parameters else None stream_filter_value2 = parameters["stream_filter_value2"] if "stream_filter_value2" in parameters else None positive = parameters["positive"] if "positive" in parameters else True stream = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) if stream_filter_key1 is not None: stream = EventStream( list(filter(lambda x: stream_filter_key1 in x and x[stream_filter_key1] == stream_filter_value1, stream))) if stream_filter_key2 is not None: stream = EventStream( list(filter(lambda x: stream_filter_key2 in x and x[stream_filter_key2] == stream_filter_value2, stream))) if positive: stream = EventStream(list(filter(lambda x: attribute_key in x and int1 <= x[attribute_key] <= int2, stream))) else: stream = EventStream( list(filter(lambda x: attribute_key in x and (x[attribute_key] < int1 or x[attribute_key] > int2), stream))) all_cases_ids = set(x["case:" + case_key] for x in stream) filtered_log = EventLog() for case in log: if case.attributes[case_key] in all_cases_ids: filtered_log.append(case)
def execute_script(): L = EventLog() e1 = Event() e1["concept:name"] = "A" e2 = Event() e2["concept:name"] = "B" e3 = Event() e3["concept:name"] = "C" e4 = Event() e4["concept:name"] = "D" t = Trace() t.append(e1) t.append(e2) t.append(e3) t.append(e4) for i in range(10000): L.append(deepcopy(t)) print(len(L))
def replay_prediction(replay_job: Job, training_initial_job: Job, trace_id) -> list: """The function create a set with timestamps of events, then create a list of requests simulating the log in the time passing :param trace_id: :param replay_job: job dictionary :param training_initial_job: job dictionary :return: List of requests """ split = replay_job.split log = get_log(split.train_log) requests_list = list() eventlog = EventLog() trace = log[int(trace_id)] for key in log.attributes.keys(): eventlog.attributes[key] = log.attributes[key] for index in range(len(trace)): new_trace = Trace(trace[0:index]) for key in trace.attributes: new_trace.attributes[key] = trace.attributes[key] eventlog.append(new_trace) replay_job.case_id = trace_id replay_job.event_number = len(trace) replay_job.save() try: logger.error("Sending request for replay_prediction task.") r = requests.post( url="http://127.0.0.1:8000/runtime/replay_prediction/", data=export_log_as_string(eventlog), params={ 'jobId': replay_job.id, 'training_job': training_initial_job.id }, headers={ 'Content-Type': 'text/plain', 'charset': 'UTF-8' }) requests_list.append(str(r)) except Exception as e: requests_list.append(str(e)) logger.warning(str(e)) return requests_list
def apply(log, admitted_start_activities, parameters=None): """ Filter the log on the specified start activities Parameters ----------- log log admitted_start_activities Admitted start activities parameters Algorithm parameters Returns ----------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY) positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) if positive: filtered_log = EventLog([ trace for trace in log if trace and trace[0][attribute_key] in admitted_start_activities ], attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers, omni_present=log.omni_present) else: filtered_log = EventLog([ trace for trace in log if trace and trace[0][attribute_key] not in admitted_start_activities ], attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers, omni_present=log.omni_present) return filtered_log
def generate_log(pt0, no_traces=100): """ Generate a log out of a process tree Parameters ------------ pt Process tree no_traces Number of traces contained in the process tree Returns ------------ log Trace log object """ pt = deepcopy(pt0) # different taus must give different ID in log generation!!!! # so we cannot use the default process tree class # we use this different one! pt = GenerationTree(pt) log = EventLog() # assigns to each event an increased timestamp from 1970 curr_timestamp = 10000000 for i in range(no_traces): ex_seq = execute(pt) ex_seq_labels = pt_util.project_execution_sequence_to_labels(ex_seq) trace = Trace() trace.attributes[xes.DEFAULT_NAME_KEY] = str(i) for label in ex_seq_labels: event = Event() event[xes.DEFAULT_NAME_KEY] = label event[xes.DEFAULT_TIMESTAMP_KEY] = datetime.datetime.fromtimestamp( curr_timestamp) trace.append(event) curr_timestamp = curr_timestamp + 1 log.append(trace) return log
def import_log(filename, parameters=None): """ Imports an XES file into a log object Parameters ---------- filename: Absolute filename parameters Parameters of the algorithm, including Parameters.TIMESTAMP_SORT -> Specify if we should sort log by timestamp Parameters.TIMESTAMP_KEY -> If sort is enabled, then sort the log by using this key Parameters.REVERSE_SORT -> Specify in which direction the log should be sorted Parameters.MAX_TRACES -> Specify the maximum number of traces to import from the log (read in order in the XML file) Parameters.SHOW_PROGRESS_BAR -> Enables/disables the progress bar (default: True) Parameters.ENCODING -> regulates the encoding (default: utf-8) Returns ------- log : :class:`pm4py.log.log.EventLog` A log """ from lxml import etree if parameters is None: parameters = {} encoding = exec_utils.get_param_value(Parameters.ENCODING, parameters, constants.DEFAULT_ENCODING) show_progress_bar = exec_utils.get_param_value( Parameters.SHOW_PROGRESS_BAR, parameters, True) is_compressed = filename.lower().endswith(".gz") if pkgutil.find_loader("tqdm") and show_progress_bar: if is_compressed: f = gzip.open(filename, "rb") else: f = open(filename, "rb") context = etree.iterparse(f, events=[_EVENT_START, _EVENT_END], encoding=encoding) num_traces = count_traces(context) else: # avoid the iteration to calculate the number of traces is "tqdm" is not used num_traces = 0 if is_compressed: f = gzip.open(filename, "rb") else: f = open(filename, "rb") context = etree.iterparse(f, events=[_EVENT_START, _EVENT_END], encoding=encoding) log = EventLog() return import_from_context(context, num_traces, log, parameters=parameters)
def apply(log, parameters=None): """ Discovers a footprint object from an event log (the footprints are returned case-by-case) Parameters -------------- log Log parameters Parameters of the algorithm: - Parameters.ACTIVITY_KEY Returns -------------- footprints_obj List of footprints for the cases of the log """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) log = converter.apply(log, variant=converter.TO_EVENT_LOG, parameters=parameters) ret = [] for trace in log: dfg = dfg_discovery.apply(EventLog([trace]), parameters=parameters) parallel = {(x, y) for (x, y) in dfg if (y, x) in dfg} sequence = {(x, y) for (x, y) in dfg if not (y, x) in dfg} trace = tuple(x[activity_key] for x in trace) activities = set(trace) if len(trace) > 0: start_activities = {trace[0]} end_activities = {trace[-1]} else: start_activities = set() end_activities = set() ret.append({ Outputs.DFG.value: dfg, Outputs.SEQUENCE.value: sequence, Outputs.PARALLEL.value: parallel, Outputs.ACTIVITIES.value: activities, Outputs.START_ACTIVITIES.value: start_activities, Outputs.END_ACTIVITIES.value: end_activities, Outputs.MIN_TRACE_LENGTH.value: len(trace), Outputs.TRACE.value: trace }) return ret
def create_event_log(log): traces = list() for events in log.split(", "): trace = Trace() for e in list(events): event = Event() event["concept:name"] = e trace.append(event) traces.append(trace) return EventLog(traces)
def event_sampling(input_log, k): sampled_log = EventLog() sampled_log = input_log unique_event = unique_activities(sampled_log) sampled_events = random.sample(unique_event, k) for t in sampled_log: t[:] = [e for e in t if e['concept:name'] in sampled_events] sampled_log[:] = [t for t in sampled_log if len(t) != 0] print(f'lenght of sampled log is: {len(sampled_log)}') return sampled_log
def apply(log): """ Filter log by keeping only traces where label is not nan - adapted from pm4py filtering method values list Parameters ----------- log Trace log values Allowed attributes parameters Parameters of the algorithm, including: activity_key -> Attribute identifying the activity in the log positive -> Indicate if events should be kept/removed Returns ----------- filtered_log Filtered log """ attribute_key = "label" positive = False filtered_log = EventLog() for trace in log: new_trace = Trace() found = False for j in range(len(trace)): if attribute_key in trace[j]: attribute_value = trace[j][attribute_key] if np.isnan(attribute_value): found = True if (found and positive) or (not found and not positive): new_trace = trace else: for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] if len(new_trace) > 0: filtered_log.append(new_trace) return filtered_log
def list_to_xes(log): traces = list() for t in log: trace = Trace() for e in t.split(", "): event = Event() event["concept:name"] = e trace.append(event) traces.append(trace) return EventLog(traces)
def filter_variants_variants_percentage(log, variants, variants_percentage=0.0): """ Filter the log by variants percentage Parameters ---------- log Log variants Dictionary with variant as the key and the list of traces as the value variants_percentage Percentage of variants that should be kept (the most common variant is always kept) Returns ---------- filtered_log Filtered log """ filtered_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers, omni_present=log.omni_present) no_of_traces = len(log) variant_count = get_variants_sorted_by_count(variants) already_added_sum = 0 shall_break_under = -1 for i in range(len(variant_count)): variant = variant_count[i][0] varcount = variant_count[i][1] if varcount < shall_break_under: break percentage_already_added = already_added_sum / no_of_traces for trace in variants[variant]: filtered_log.append(trace) already_added_sum = already_added_sum + varcount if percentage_already_added >= variants_percentage: shall_break_under = varcount return filtered_log
def create_sample_logs(clus_dict, cluster_labels, log): """ Build separate logs with traces corresponding to each cluster Input: Dictionary output of check_sample_list(), EventLog object Output: List of sample logs as EventLog objects """ """ Computes the sample logs from the full log given a dictionary mapping cluster labels to case ids. Parameters ----------- clus_dict : dict Dictionary using the cluster labels as keys and the corresponding list of case ids as values. cluster_labels : list The labels of the clusters to be discovered log EventLog object Returns ----------- sample_logs : list List of EventLog objects """ sample_logs = [] for cluster_label in cluster_labels: caseids = clus_dict[cluster_label] args = { 'attributes': log.attributes, 'extensions': log.extensions, 'omni_present': log.omni_present, 'classifiers': log.classifiers } samplelog = EventLog(**args) for idx in range(len(log)): if log[idx].attributes['concept:name'] in caseids: samplelog.append(deepcopy(log[idx])) #samplelog[-1].attributes['original_log_idx'] = idx sample_logs.append(samplelog) return sample_logs
def apply_numeric(log, int1, int2, parameters=None): """ Apply a filter on cases (numerical filter) Parameters -------------- log Log int1 Lower bound of the interval int2 Upper bound of the interval parameters Possible parameters of the algorithm Returns -------------- filtered_df Filtered dataframe """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY) case_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, xes.DEFAULT_TRACEID_KEY) positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) # stream_filter_key is helpful to filter on cases containing an event with an attribute # in the specified value set, but such events shall have an activity in particular. stream_filter_key1 = exec_utils.get_param_value(Parameters.STREAM_FILTER_KEY1, parameters, None) stream_filter_value1 = exec_utils.get_param_value(Parameters.STREAM_FILTER_VALUE1, parameters, None) stream_filter_key2 = exec_utils.get_param_value(Parameters.STREAM_FILTER_KEY2, parameters, None) stream_filter_value2 = exec_utils.get_param_value(Parameters.STREAM_FILTER_VALUE2, parameters, None) stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM) if stream_filter_key1 is not None: stream = EventStream( list(filter(lambda x: stream_filter_key1 in x and x[stream_filter_key1] == stream_filter_value1, stream))) if stream_filter_key2 is not None: stream = EventStream( list(filter(lambda x: stream_filter_key2 in x and x[stream_filter_key2] == stream_filter_value2, stream))) if positive: stream = EventStream(list(filter(lambda x: attribute_key in x and int1 <= x[attribute_key] <= int2, stream))) else: stream = EventStream( list(filter(lambda x: attribute_key in x and (x[attribute_key] < int1 or x[attribute_key] > int2), stream))) all_cases_ids = set(x["case:" + case_key] for x in stream) filtered_log = EventLog() for case in log: if case.attributes[case_key] in all_cases_ids: filtered_log.append(case)
def apply_events(log, values, parameters=None): """ Filter log by keeping only events with an attribute value that belongs to the provided values list Parameters ----------- log log values Allowed attributes parameters Parameters of the algorithm, including: activity_key -> Attribute identifying the activity in the log positive -> Indicate if events should be kept/removed Returns ----------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY positive = parameters["positive"] if "positive" in parameters else True filtered_log = EventLog() for trace in log: new_trace = Trace() for j in range(len(trace)): if attribute_key in trace[j]: attribute_value = trace[j][attribute_key] if (positive and attribute_value in values) or ( not positive and attribute_value not in values): new_trace.append(trace[j]) if len(new_trace) > 0: for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] filtered_log.append(new_trace) return filtered_log
def apply(log, paths, parameters=None): """ Apply a filter on traces containing / not containing a path Parameters ----------- log Log paths Paths that we are looking for (expressed as tuple of 2 strings) parameters Parameters of the algorithm, including: Parameters.ATTRIBUTE_KEY -> Attribute identifying the activity in the log Parameters.POSITIVE -> Indicate if events should be kept/removed Returns ----------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, xes.DEFAULT_NAME_KEY) positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) filtered_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers, omni_present=log.omni_present) for trace in log: found = False for i in range(len(trace) - 1): path = (trace[i][attribute_key], trace[i + 1][attribute_key]) if path in paths: found = True break if (found and positive) or (not found and not positive): filtered_log.append(trace) return filtered_log