def apply_auto_filter(df, parameters=None): """ Apply some filters to Pandas dataframe in order to get a simpler dataframe Parameters ------------ df Dataframe parameters Eventual parameters passed to the algorithms: case_id_glue -> Column where the case ID is present activity_key -> Column where the activity is present decreasingFactor -> Decreasing factor (provided to all algorithms) enable_activities_filter -> Enables or disables auto filter on activities number (it is useful to disable if the dataframe has been already filtered by activities number before). Default is True enable_variants_filter -> Enables or disables auto filter on variants (that is slower than others). Default is False enable_start_activities_filter -> Enables or disables auto filter on start activities. Default is False enable_end_activities_filter -> Enables or disables auto filter on end activities. Default is True Returns ------------ df Filtered dataframe """ if parameters is None: parameters = {} enable_activities_filter = parameters[ "enable_activities_filter"] if "enable_activities_filter" in parameters else True enable_variants_filter = parameters[ "enable_variants_filter"] if "enable_variants_filter" in parameters else False enable_start_activities_filter = parameters[ "enable_start_activities_filter"] if "enable_start_activities_filter" in parameters else False enable_end_activities_filter = parameters[ "enable_end_activities_filter"] if "enable_end_activities_filter" in parameters else True # list of filters that are applied: # - activities (if enabled) # - variants filter (if enabled) # - end activities filter (if enabled) # - start activities filter (if enabled) if enable_activities_filter: df = attributes_filter.apply_auto_filter(df, parameters=parameters) if enable_variants_filter: df = variants_filter.apply_auto_filter(df, parameters=parameters) if enable_end_activities_filter: df = end_activities_filter.apply_auto_filter(df, parameters=parameters) if enable_start_activities_filter: df = start_activities_filter.apply_auto_filter(df, parameters=parameters) return df
def apply_auto_filter(df, parameters=None): """ Apply some filters to Pandas dataframe in order to get a simpler dataframe Parameters ------------ df Dataframe parameters Eventual parameters passed to the algorithms: Parameters.CASE_ID_KEY -> Column where the case ID is present Parameters.ACTIVITY_KEY -> Column where the activity is present Parameters.DECREASING_FACTOR -> Decreasing factor (provided to all algorithms) Parameters.ENABLE_ACTIVITES_FILTER -> Enables or disables auto filter on activities number (it is useful to disable if the dataframe has been already filtered by activities number before). Default is True Parameters.ENABLE_VARIANTS_FILTER -> Enables or disables auto filter on variants (that is slower than others). Default is False Parameters.ENABLE_START_ACTIVITIES_FILTER -> Enables or disables auto filter on start activities. Default is False Parameters.ENABLE_END_ACTIVITIES_FILTER -> Enables or disables auto filter on end activities. Default is True Returns ------------ df Filtered dataframe """ if parameters is None: parameters = {} enable_activities_filter = exec_utils.get_param_value( Parameters.ENABLE_ACTIVITES_FILTER, parameters, True) enable_variants_filter = exec_utils.get_param_value( Parameters.ENABLE_VARIANTS_FILTER, parameters, False) enable_start_activities_filter = exec_utils.get_param_value( Parameters.ENABLE_START_ACTIVITIES_FILTER, parameters, False) enable_end_activities_filter = exec_utils.get_param_value( Parameters.ENABLE_END_ACTIVITIES_FILTER, parameters, True) return_dict = exec_utils.get_param_value(Parameters.RETURN_EA_COUNT, parameters, False) ea_dict = None # list of filters that are applied: # - activities (if enabled) # - variants filter (if enabled) # - end activities filter (if enabled) # - start activities filter (if enabled) if enable_activities_filter: df = attributes_filter.apply_auto_filter(df, parameters=parameters) if enable_variants_filter: df = variants_filter.apply_auto_filter(df, parameters=parameters) if enable_end_activities_filter: parameters[constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = return_dict if return_dict: df, ea_dict = end_activities_filter.apply_auto_filter( df, parameters=parameters) else: df = end_activities_filter.apply_auto_filter(df, parameters=parameters) if enable_start_activities_filter: df = start_activities_filter.apply_auto_filter(df, parameters=parameters) if return_dict: return df, ea_dict return df
def execute_script(): time1 = time.time() dataframe = csv_import_adapter.import_dataframe_from_path_wo_timeconversion( inputLog, sep=SEP, quotechar=QUOTECHAR) time2 = time.time() print("time2 - time1: " + str(time2 - time1)) parameters_filtering = { constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE, constants.PARAMETER_CONSTANT_ACTIVITY_KEY: ACTIVITY_KEY } if enable_auto_filter: dataframe = auto_filter.apply_auto_filter( dataframe, parameters=parameters_filtering) else: dataframe = attributes_filter.apply_auto_filter( dataframe, parameters=parameters_filtering) time3 = time.time() print("time3 - time2: " + str(time3 - time2)) if enable_filtering_on_cases: dataframe = case_filter.filter_on_ncases(dataframe, case_id_glue=CASEID_GLUE, max_no_cases=max_no_cases) time4 = time.time() dataframe = csv_import_adapter.convert_caseid_column_to_str( dataframe, case_id_glue=CASEID_GLUE) dataframe = csv_import_adapter.convert_timestamp_columns_in_df( dataframe, timest_columns=TIMEST_COLUMNS, timest_format=TIMEST_FORMAT) time6 = time.time() print("time6 - time4: " + str(time6 - time4)) # dataframe = dataframe.sort_values('time:timestamp') time7 = time.time() print("time7 - time6: " + str(time7 - time6)) # show the filtered dataframe on the screen activities_count = attributes_filter.get_attribute_values( dataframe, attribute_key=ACTIVITY_KEY) [dfg_frequency, dfg_performance ] = df_statistics.get_dfg_graph(dataframe, measure="both", perf_aggregation_key="median", case_id_glue=CASEID_GLUE, activity_key=ACTIVITY_KEY, timestamp_key=TIMEST_KEY) if enable_filtering_df: print("len dfg_frequency 0=", len(dfg_frequency)) dfg_frequency = dfg_filtering.apply( dfg_frequency, {"noiseThreshold": filtering_df_noise}) print("len dfg_frequency 1=", len(dfg_frequency)) time8 = time.time() print("time8 - time7: " + str(time8 - time7)) gviz = dfg_vis_factory.apply(dfg_frequency, activities_count=activities_count, parameters={"format": "svg"}) dfg_vis_factory.view(gviz) net, initial_marking, final_marking = inductive_factory.apply_dfg( dfg_frequency) # net, initial_marking, final_marking = alpha_factory.apply_dfg(dfg_frequency) spaths = get_shortest_paths(net) time9 = time.time() print("time9 - time8: " + str(time9 - time8)) aggregated_statistics = get_decorations_from_dfg_spaths_acticount( net, dfg_performance, spaths, activities_count, variant="performance") gviz = pn_vis_factory.apply(net, initial_marking, final_marking, variant="performance", aggregated_statistics=aggregated_statistics, parameters={"format": "svg"}) time10 = time.time() print("time10 - time9: " + str(time10 - time9)) print("time10 - time1: " + str(time10 - time1)) pn_vis_factory.view(gviz)