def apply_auto_filter(log, variants=None, parameters=None): """ Apply an attributes filter detecting automatically a percentage Parameters ---------- log Log variants (If specified) Dictionary with variant as the key and the list of traces as the value parameters Parameters of the algorithm, including: Parameters.DECREASING_FACTOR -> Decreasing factor (stops the algorithm when the next activity by occurrence is below this factor in comparison to previous) Parameters.ATTRIBUTE_KEY -> Attribute key (must be specified if different from concept:name) Returns --------- filtered_log Filtered log_skeleton """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY) decreasing_factor = exec_utils.get_param_value( Parameters.DECREASING_FACTOR, parameters, filtering_constants.DECREASING_FACTOR) parameters_variants = { PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key } if len(log) > 0: if variants is None: variants = variants_filter.get_variants( log, parameters=parameters_variants) vc = variants_filter.get_variants_sorted_by_count(variants) attributes_values = get_attribute_values( log, attribute_key, parameters=parameters_variants) alist = attributes_common.get_sorted_attributes_list(attributes_values) thresh = attributes_common.get_attributes_threshold( alist, decreasing_factor) filtered_log = filter_log_by_attributes_threshold( log, attributes_values, variants, vc, thresh, attribute_key) return filtered_log return log
def apply_auto_filter(df, parameters=None): """ Apply auto filter on activity values Parameters ------------ df Dataframe parameters Possible parameters of the algorithm, including: activity_key -> Column containing the activity decreasingFactor -> Decreasing factor that should be passed to the algorithm Returns ------------ df Filtered dataframe """ if parameters is None: parameters = {} most_common_variant = parameters[ PARAM_MOST_COMMON_VARIANT] if PARAM_MOST_COMMON_VARIANT in parameters else None if most_common_variant is None: most_common_variant = [] activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY decreasing_factor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else DECREASING_FACTOR if len(df) > 0: activities = get_attribute_values(df, activity_key) alist = attributes_common.get_sorted_attributes_list(activities) thresh = attributes_common.get_attributes_threshold( alist, decreasing_factor) return filter_df_keeping_activ_exc_thresh( df, thresh, activity_key=activity_key, act_count0=activities, most_common_variant=most_common_variant) return df