def apply_auto_filter(df, parameters=None): """ Apply auto filter on end activities Parameters ----------- df Pandas dataframe parameters Parameters of the algorithm, including: Parameters.CASE_ID_KEY -> Case ID column in the dataframe Parameters.ACTIVITY_KEY -> Column that represents the activity Parameters.DECREASING_FACTOR -> Decreasing factor that should be passed to the algorithm Returns ----------- df Filtered dataframe """ if parameters is None: parameters = {} case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY) grouped_df = exec_utils.get_param_value(Parameters.GROUP_DATAFRAME, parameters, None) decreasing_factor = exec_utils.get_param_value(Parameters.DECREASING_FACTOR, parameters, filtering_constants.DECREASING_FACTOR) start_activities = get_start_activities(df, parameters=parameters) salist = start_activities_common.get_sorted_start_activities_list(start_activities) sathreshold = start_activities_common.get_start_activities_threshold(salist, decreasing_factor) return filter_df_on_start_activities_nocc(df, sathreshold, sa_count0=start_activities, case_id_glue=case_id_glue, activity_key=activity_key, grouped_df=grouped_df)
def apply_auto_filter(log, variants=None, parameters=None): """ Apply an end attributes filter detecting automatically a percentage Parameters ---------- log Log variants (If specified) Dictionary with variant as the key and the list of traces as the value parameters Parameters of the algorithm, including: Parameters.DECREASING_FACTOR -> Decreasing factor (stops the algorithm when the next activity by occurrence is below this factor in comparison to previous) Parameters.ATTRIBUTE_KEY -> Attribute key (must be specified if different from concept:name) Returns --------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY) decreasing_factor = exec_utils.get_param_value( Parameters.DECREASING_FACTOR, parameters, DECREASING_FACTOR) parameters_variants = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key } if variants is None: variants = variants_filter.get_variants(log, parameters=parameters_variants) vc = variants_filter.get_variants_sorted_by_count(variants) start_activities = get_start_activities(log, parameters=parameters_variants) salist = start_activities_common.get_sorted_start_activities_list( start_activities) sathreshold = start_activities_common.get_start_activities_threshold( salist, decreasing_factor) filtered_log = filter_log_by_start_activities(start_activities, variants, vc, sathreshold, attribute_key) return filtered_log
def apply_auto_filter(df, parameters=None): """ Apply auto filter on end activities Parameters ----------- df Pandas dataframe parameters Parameters of the algorithm, including: case_id_glue -> Case ID column in the dataframe activity_key -> Column that represents the activity decreasingFactor -> Decreasing factor that should be passed to the algorithm Returns ----------- df Filtered dataframe """ if parameters is None: parameters = {} case_id_glue = parameters[ PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY decreasing_factor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else filtering_constants.DECREASING_FACTOR grouped_df = parameters[GROUPED_DATAFRAME] if GROUPED_DATAFRAME in parameters else None start_activities = get_start_activities(df, parameters=parameters) salist = start_activities_common.get_sorted_start_activities_list(start_activities) sathreshold = start_activities_common.get_start_activities_threshold(salist, decreasing_factor) return filter_df_on_start_activities_nocc(df, sathreshold, sa_count0=start_activities, case_id_glue=case_id_glue, activity_key=activity_key, grouped_df=grouped_df)