Beispiel #1
0
def apply_auto_filter(df, parameters=None):
    """
    Apply some filters to Pandas dataframe in order to get
    a simpler dataframe

    Parameters
    ------------
    df
        Dataframe
    parameters
        Eventual parameters passed to the algorithms:
            case_id_glue -> Column where the case ID is present
            activity_key -> Column where the activity is present
            decreasingFactor -> Decreasing factor (provided to all algorithms)
            enable_activities_filter -> Enables or disables auto filter on activities number
            (it is useful to disable if the dataframe has been already filtered by activities number before).
            Default is True
            enable_variants_filter -> Enables or disables auto filter on variants (that is slower than others).
            Default is False
            enable_start_activities_filter -> Enables or disables auto filter on start activities. Default is False
            enable_end_activities_filter -> Enables or disables auto filter on end activities. Default is True

    Returns
    ------------
    df
        Filtered dataframe
    """

    if parameters is None:
        parameters = {}

    enable_activities_filter = parameters[
        "enable_activities_filter"] if "enable_activities_filter" in parameters else True
    enable_variants_filter = parameters[
        "enable_variants_filter"] if "enable_variants_filter" in parameters else False
    enable_start_activities_filter = parameters[
        "enable_start_activities_filter"] if "enable_start_activities_filter" in parameters else False
    enable_end_activities_filter = parameters[
        "enable_end_activities_filter"] if "enable_end_activities_filter" in parameters else True

    # list of filters that are applied:
    # - activities (if enabled)
    # - variants filter (if enabled)
    # - end activities filter (if enabled)
    # - start activities filter (if enabled)
    if enable_activities_filter:
        df = attributes_filter.apply_auto_filter(df, parameters=parameters)
    if enable_variants_filter:
        df = variants_filter.apply_auto_filter(df, parameters=parameters)
    if enable_end_activities_filter:
        df = end_activities_filter.apply_auto_filter(df, parameters=parameters)
    if enable_start_activities_filter:
        df = start_activities_filter.apply_auto_filter(df,
                                                       parameters=parameters)

    return df
Beispiel #2
0
def apply_auto_filter(df, parameters=None):
    """
    Apply some filters to Pandas dataframe in order to get
    a simpler dataframe

    Parameters
    ------------
    df
        Dataframe
    parameters
        Eventual parameters passed to the algorithms:
            Parameters.CASE_ID_KEY -> Column where the case ID is present
            Parameters.ACTIVITY_KEY -> Column where the activity is present
            Parameters.DECREASING_FACTOR -> Decreasing factor (provided to all algorithms)
            Parameters.ENABLE_ACTIVITES_FILTER -> Enables or disables auto filter on activities number
            (it is useful to disable if the dataframe has been already filtered by activities number before).
            Default is True
            Parameters.ENABLE_VARIANTS_FILTER -> Enables or disables auto filter on variants (that is slower than others).
            Default is False
            Parameters.ENABLE_START_ACTIVITIES_FILTER -> Enables or disables auto filter on start activities. Default is False
            Parameters.ENABLE_END_ACTIVITIES_FILTER -> Enables or disables auto filter on end activities. Default is True

    Returns
    ------------
    df
        Filtered dataframe
    """

    if parameters is None:
        parameters = {}

    enable_activities_filter = exec_utils.get_param_value(
        Parameters.ENABLE_ACTIVITES_FILTER, parameters, True)
    enable_variants_filter = exec_utils.get_param_value(
        Parameters.ENABLE_VARIANTS_FILTER, parameters, False)
    enable_start_activities_filter = exec_utils.get_param_value(
        Parameters.ENABLE_START_ACTIVITIES_FILTER, parameters, False)
    enable_end_activities_filter = exec_utils.get_param_value(
        Parameters.ENABLE_END_ACTIVITIES_FILTER, parameters, True)
    return_dict = exec_utils.get_param_value(Parameters.RETURN_EA_COUNT,
                                             parameters, False)

    ea_dict = None

    # list of filters that are applied:
    # - activities (if enabled)
    # - variants filter (if enabled)
    # - end activities filter (if enabled)
    # - start activities filter (if enabled)
    if enable_activities_filter:
        df = attributes_filter.apply_auto_filter(df, parameters=parameters)
    if enable_variants_filter:
        df = variants_filter.apply_auto_filter(df, parameters=parameters)
    if enable_end_activities_filter:
        parameters[constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = return_dict
        if return_dict:
            df, ea_dict = end_activities_filter.apply_auto_filter(
                df, parameters=parameters)
        else:
            df = end_activities_filter.apply_auto_filter(df,
                                                         parameters=parameters)
    if enable_start_activities_filter:
        df = start_activities_filter.apply_auto_filter(df,
                                                       parameters=parameters)

    if return_dict:
        return df, ea_dict

    return df
Beispiel #3
0
def execute_script():
    time1 = time.time()
    dataframe = csv_import_adapter.import_dataframe_from_path_wo_timeconversion(
        inputLog, sep=SEP, quotechar=QUOTECHAR)
    time2 = time.time()
    print("time2 - time1: " + str(time2 - time1))
    parameters_filtering = {
        constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE,
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: ACTIVITY_KEY
    }
    if enable_auto_filter:
        dataframe = auto_filter.apply_auto_filter(
            dataframe, parameters=parameters_filtering)
    else:
        dataframe = attributes_filter.apply_auto_filter(
            dataframe, parameters=parameters_filtering)
    time3 = time.time()
    print("time3 - time2: " + str(time3 - time2))
    if enable_filtering_on_cases:
        dataframe = case_filter.filter_on_ncases(dataframe,
                                                 case_id_glue=CASEID_GLUE,
                                                 max_no_cases=max_no_cases)
    time4 = time.time()
    dataframe = csv_import_adapter.convert_caseid_column_to_str(
        dataframe, case_id_glue=CASEID_GLUE)
    dataframe = csv_import_adapter.convert_timestamp_columns_in_df(
        dataframe, timest_columns=TIMEST_COLUMNS, timest_format=TIMEST_FORMAT)
    time6 = time.time()
    print("time6 - time4: " + str(time6 - time4))
    # dataframe = dataframe.sort_values('time:timestamp')
    time7 = time.time()
    print("time7 - time6: " + str(time7 - time6))

    # show the filtered dataframe on the screen
    activities_count = attributes_filter.get_attribute_values(
        dataframe, attribute_key=ACTIVITY_KEY)
    [dfg_frequency, dfg_performance
     ] = df_statistics.get_dfg_graph(dataframe,
                                     measure="both",
                                     perf_aggregation_key="median",
                                     case_id_glue=CASEID_GLUE,
                                     activity_key=ACTIVITY_KEY,
                                     timestamp_key=TIMEST_KEY)
    if enable_filtering_df:
        print("len dfg_frequency 0=", len(dfg_frequency))
        dfg_frequency = dfg_filtering.apply(
            dfg_frequency, {"noiseThreshold": filtering_df_noise})
        print("len dfg_frequency 1=", len(dfg_frequency))
    time8 = time.time()
    print("time8 - time7: " + str(time8 - time7))
    gviz = dfg_vis_factory.apply(dfg_frequency,
                                 activities_count=activities_count,
                                 parameters={"format": "svg"})
    dfg_vis_factory.view(gviz)
    net, initial_marking, final_marking = inductive_factory.apply_dfg(
        dfg_frequency)
    # net, initial_marking, final_marking = alpha_factory.apply_dfg(dfg_frequency)
    spaths = get_shortest_paths(net)
    time9 = time.time()
    print("time9 - time8: " + str(time9 - time8))
    aggregated_statistics = get_decorations_from_dfg_spaths_acticount(
        net, dfg_performance, spaths, activities_count, variant="performance")
    gviz = pn_vis_factory.apply(net,
                                initial_marking,
                                final_marking,
                                variant="performance",
                                aggregated_statistics=aggregated_statistics,
                                parameters={"format": "svg"})
    time10 = time.time()
    print("time10 - time9: " + str(time10 - time9))
    print("time10 - time1: " + str(time10 - time1))
    pn_vis_factory.view(gviz)