Example #1
0
def apply_auto_filter(df, parameters=None):
    """
    Apply auto filter on activity values

    Parameters
    ------------
    df
        Dataframe
    parameters
        Possible parameters of the algorithm, including:
            activity_key -> Column containing the activity
            decreasingFactor -> Decreasing factor that should be passed to the algorithm

    Returns
    ------------
    df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}
    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY
    decreasing_factor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else DECREASING_FACTOR

    activities = get_attribute_values(df, activity_key)
    alist = attributes_common.get_sorted_attributes_list(activities)
    thresh = attributes_common.get_attributes_threshold(
        alist, decreasing_factor)

    return filter_df_keeping_activ_exc_thresh(df,
                                              thresh,
                                              activity_key=activity_key,
                                              act_count0=activities)
def apply_auto_filter(df, parameters=None):
    """Applies auto filter on activity values
    """
    if parameters is None:
        parameters = {}

    most_common_variant = parameters[
        PARAM_MOST_COMMON_VARIANT] if PARAM_MOST_COMMON_VARIANT in parameters else None

    if most_common_variant is None:
        most_common_variant = []

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY
    decreasing_factor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else DECREASING_FACTOR

    if df.count() > 0:
        activities = get_attribute_values(df, activity_key)
        alist = attributes_common.get_sorted_attributes_list(activities)
        thresh = attributes_common.get_attributes_threshold(
            alist, decreasing_factor)

        return filter_df_keeping_activ_exc_thresh(
            df,
            thresh,
            activity_key=activity_key,
            act_count0=activities,
            most_common_variant=most_common_variant)
    return df
def apply_auto_filter(log, variants=None, parameters=None):
    """
    Apply an attributes filter detecting automatically a percentage

    Parameters
    ----------
    log
        Log
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    parameters
        Parameters of the algorithm, including:
            decreasingFactor -> Decreasing factor (stops the algorithm when the next activity by occurrence is
            below this factor in comparison to previous)
            attribute_key -> Attribute key (must be specified if different from concept:name)

    Returns
    ---------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
    decreasing_factor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else filtering_constants.DECREASING_FACTOR

    parameters_variants = {
        PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key,
        PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key
    }
    if variants is None:
        variants = variants_filter.get_variants(log,
                                                parameters=parameters_variants)
    vc = variants_filter.get_variants_sorted_by_count(variants)
    attributes_values = get_attribute_values(log,
                                             attribute_key,
                                             parameters=parameters_variants)
    alist = attributes_common.get_sorted_attributes_list(attributes_values)
    thresh = attributes_common.get_attributes_threshold(
        alist, decreasing_factor)
    filtered_log = filter_log_by_attributes_threshold(log, attributes_values,
                                                      variants, vc, thresh,
                                                      attribute_key)
    return filtered_log