Esempio n. 1
0
def apply_auto_filter(log, variants=None, parameters=None):
    """
    Apply an attributes filter detecting automatically a percentage

    Parameters
    ----------
    log
        Log
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    parameters
        Parameters of the algorithm, including:
            Parameters.DECREASING_FACTOR -> Decreasing factor (stops the algorithm when the next activity by occurrence is
            below this factor in comparison to previous)
            Parameters.ATTRIBUTE_KEY -> Attribute key (must be specified if different from concept:name)

    Returns
    ---------
    filtered_log
        Filtered log_skeleton
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY,
                                               parameters, DEFAULT_NAME_KEY)
    decreasing_factor = exec_utils.get_param_value(
        Parameters.DECREASING_FACTOR, parameters,
        filtering_constants.DECREASING_FACTOR)

    parameters_variants = {
        PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key,
        PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key
    }
    if len(log) > 0:
        if variants is None:
            variants = variants_filter.get_variants(
                log, parameters=parameters_variants)
        vc = variants_filter.get_variants_sorted_by_count(variants)
        attributes_values = get_attribute_values(
            log, attribute_key, parameters=parameters_variants)
        alist = attributes_common.get_sorted_attributes_list(attributes_values)
        thresh = attributes_common.get_attributes_threshold(
            alist, decreasing_factor)
        filtered_log = filter_log_by_attributes_threshold(
            log, attributes_values, variants, vc, thresh, attribute_key)
        return filtered_log
    return log
def apply_auto_filter(df, parameters=None):
    """
    Apply auto filter on activity values

    Parameters
    ------------
    df
        Dataframe
    parameters
        Possible parameters of the algorithm, including:
            activity_key -> Column containing the activity
            decreasingFactor -> Decreasing factor that should be passed to the algorithm

    Returns
    ------------
    df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}

    most_common_variant = parameters[
        PARAM_MOST_COMMON_VARIANT] if PARAM_MOST_COMMON_VARIANT in parameters else None

    if most_common_variant is None:
        most_common_variant = []

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY
    decreasing_factor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else DECREASING_FACTOR

    if len(df) > 0:
        activities = get_attribute_values(df, activity_key)
        alist = attributes_common.get_sorted_attributes_list(activities)
        thresh = attributes_common.get_attributes_threshold(
            alist, decreasing_factor)

        return filter_df_keeping_activ_exc_thresh(
            df,
            thresh,
            activity_key=activity_key,
            act_count0=activities,
            most_common_variant=most_common_variant)
    return df