def tfidf_filter_events(events, threshold, deduplicate=True): filtered_events = tfidf_filter_namedtuple(events, threshold, Event) if not deduplicate: return filtered_events else: # Note that calling this will reassign random event IDs. logger.info("Removing subsets from tfidf_filter result...") template_id_sets = [frozenset(event.template_ids) for event in filtered_events] template_id_sets = get_nonsubsets(template_id_sets) filtered_events = [Event(id=str(uuid.uuid4()), template_ids=template_id_set) for template_id_set in template_id_sets] return filtered_events
def tf_idf_filter_window(windows, threshold): return tfidf_filter_namedtuple(windows, threshold, ModelGenWindow)