def sample_log(log, no_traces=100): """ Randomly sample a fixed number of traces from the original log_skeleton Parameters ----------- log Log no_traces Number of traces that the sample should have Returns ----------- newLog Filtered log_skeleton """ new_log = EventLog(attributes=log.attributes, extensions=log.extensions, globals=log._omni, classifiers=log.classifiers) new_log._list = random.sample(log, min(no_traces, len(log))) return new_log
def filter_log_by_relevance(topK, log, relevance_scores): log_new = EventLog() for label in relevance_scores: topK = len(relevance_scores[label]['scores']) if len( relevance_scores[label]['scores']) < topK else topK relevance_scores[label]['scores'] = dict( sorted(relevance_scores[label]['scores'].items(), key=lambda x: x[1], reverse=True)[:topK]) log_dummy = filter_log_by_caseid(log, relevance_scores[label]['traces']) log_dummy = attributes_filter.apply_events( log_dummy, relevance_scores[label]['scores'].keys(), parameters={ attributes_filter.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "concept:name", "positive": True }) log_new._list = log_new._list + log_dummy._list return relevance_scores, log_new