def apply_auto_filter(log, parameters=None): """ Apply some filters in battery to the log_skeleton in order to get a simplified log_skeleton Parameters ---------- log Log parameters Eventual parameters applied to the algorithms: Parameters.DECREASING_FACTOR -> Decreasing factor (provided to all algorithms) Parameters.ACTIVITY_KEY -> Activity key (must be specified if different from concept:name) Returns --------- filtered_log Filtered log_skeleton """ # the following filters are applied: # - activity filter (keep only attributes with a reasonable number of occurrences) (if enabled) # - variant filter (keep only variants with a reasonable number of occurrences) (if enabled) # - start attributes filter (keep only variants that starts with a plausible start activity) (if enabled) # - end attributes filter (keep only variants that starts with a plausible end activity) (if enabled) if parameters is None: parameters = {} enable_activities_filter = exec_utils.get_param_value(Parameters.ENABLE_ACTIVITES_FILTER, parameters, True) enable_variants_filter = exec_utils.get_param_value(Parameters.ENABLE_VARIANTS_FILTER, parameters, False) enable_start_activities_filter = exec_utils.get_param_value(Parameters.ENABLE_START_ACTIVITIES_FILTER, parameters, False) enable_end_activities_filter = exec_utils.get_param_value(Parameters.ENABLE_END_ACTIVITIES_FILTER, parameters, True) attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, xes.DEFAULT_NAME_KEY) parameters[Parameters.ATTRIBUTE_KEY] = attribute_key parameters[Parameters.ACTIVITY_KEY] = attribute_key variants = variants_module.get_variants(log, parameters=parameters) filtered_log = log if enable_activities_filter: filtered_log = attributes_filter.apply_auto_filter(log, variants=variants, parameters=parameters) variants = variants_module.get_variants(filtered_log, parameters=parameters) if enable_variants_filter: filtered_log = variants_module.apply_auto_filter(filtered_log, variants=variants, parameters=parameters) variants = variants_module.get_variants(filtered_log, parameters=parameters) if enable_start_activities_filter: filtered_log = start_activities_filter.apply_auto_filter(filtered_log, variants=variants, parameters=parameters) if enable_end_activities_filter: filtered_log = end_activities_filter.apply_auto_filter(filtered_log, variants=variants, parameters=parameters) return filtered_log
def test_logfiltering_filtering1(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "running-example.xes") log = xes_importer.import_log(input_log) log = attributes_filter.apply_auto_filter(log) log = variants_module.apply_auto_filter(log) log = start_activities_filter.apply_auto_filter(log) log = end_activities_filter.apply_auto_filter(log) log = paths_filter.apply_auto_filter(log) del log
def load_from_json(self, file_name, filter_start_activities=False, filter_end_activities=False): """ Load an event log stored as a json file from disk. :file_name: """ with gzip.open(file_name, 'rb') as f: json_eventlog = json.load(f) log = log_lib.log.EventLog() # read json file for tr in json_eventlog['cases']: attr_dict = tr['attributes'] evnt_list = tr['events'] trace = log_lib.log.Trace() trace.attributes['concept:name'] = tr['id'] # attach attributes for key in attr_dict.keys(): trace.attributes[key] = attr_dict[key] for evnt in evnt_list: event = log_lib.log.Event() event['concept:name'] = evnt['name'] if evnt['timestamp'] is not None: event['time:timestamp'] = ciso8601.parse_datetime( evnt['timestamp']) # attach other event attributes for key in evnt['attributes'].keys(): event[key] = evnt['attributes'][key] trace.append(event) log.append(trace) self._event_log = log if filter_end_activities: self._event_log = end_activities_filter.apply_auto_filter( self._event_log, parameters={"decreasingFactor": 0.6}) if filter_start_activities: self._event_log = start_activities_filter.apply_auto_filter( self._event_log, parameters={"decreasingFactor": 0.6})
def load_from_xes(self, file_name, sort=True, filter_start_activities=False, filter_end_activities=False): # load file parameters = {'timestamp_sort': sort} self._event_log = xes_import_factory.apply(file_name, parameters=parameters) if filter_end_activities: self._event_log = end_activities_filter.apply_auto_filter( self._event_log, parameters={"decreasingFactor": 0.6}) if filter_start_activities: self._event_log = start_activities_filter.apply_auto_filter( self._event_log, parameters={"decreasingFactor": 0.6})
def apply(log, parameters=None, classic_output=False): """ Gets a simple model out of a log Parameters ------------- log Trace log parameters Parameters of the algorithm, including: maximum_number_activities -> Maximum number of activities to keep discovery_algorithm -> Discovery algorithm to use (alpha, inductive) desidered_output -> Desidered output of the algorithm (default: Petri) include_filtered_log -> Include the filtered log in the output include_dfg_frequency -> Include the DFG of frequencies in the output include_dfg_performance -> Include the DFG of performance in the output include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output include_filtered_dfg_performance -> Include the filtered DFG of performance in the output classic_output Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking) or can return a more detailed dictionary """ if parameters is None: parameters = {} returned_dictionary = {} net = None initial_marking = None final_marking = None bpmn_graph = None dfg_frequency = None dfg_performance = None filtered_dfg_frequency = None filtered_dfg_performance = None maximum_number_activities = parameters[ "maximum_number_activities"] if "maximum_number_activities" in parameters else 20 discovery_algorithm = parameters["discovery_algorithm"] if "discovery_algorithm" in parameters else "alpha" desidered_output = parameters["desidered_output"] if "desidered_output" in parameters else "petri" include_filtered_log = parameters["include_filtered_log"] if "include_filtered_log" in parameters else True include_dfg_frequency = parameters["include_dfg_frequency"] if "include_dfg_frequency" in parameters else True include_dfg_performance = parameters[ "include_dfg_performance"] if "include_dfg_performance" in parameters else False include_filtered_dfg_frequency = parameters[ "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True include_filtered_dfg_performance = parameters[ "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else False if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters: activity_key = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key else: log, activity_key = insert_classifier.search_act_class_attr(log) if activity_key is None: activity_key = DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] activities_count_dictio = attributes_filter.get_attribute_values(log, activity_key) activities_count_list = [] for activity in activities_count_dictio: activities_count_list.append([activity, activities_count_dictio[activity]]) activities_count_list = sorted(activities_count_list, key=lambda x: x[1], reverse=True) activities_count_list = activities_count_list[:min(len(activities_count_list), maximum_number_activities)] activities_keep_list = [x[0] for x in activities_count_list] log = attributes_filter.apply(log, activities_keep_list, parameters=parameters) filtered_log = None if "alpha" in discovery_algorithm: # parameters_sa = deepcopy(parameters) # parameters_sa["decreasingFactor"] = 1.0 filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters) filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters) filtered_log = filter_topvariants_soundmodel.apply(filtered_log, parameters=parameters) elif "dfg_mining" in discovery_algorithm: filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters) filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(filtered_log, parameters=parameters) if include_dfg_frequency or "dfg_mining" in discovery_algorithm: dfg_frequency = dfg_factory.apply(log, parameters=parameters, variant="frequency") if include_dfg_performance: dfg_performance = dfg_factory.apply(log, parameters=parameters, variant="performance") if include_filtered_dfg_frequency: filtered_dfg_frequency = dfg_factory.apply(filtered_log, parameters=parameters, variant="frequency") if include_filtered_dfg_performance: filtered_dfg_performance = dfg_factory.apply(filtered_log, parameters=parameters, variant="performance") if "alpha" in discovery_algorithm: net, initial_marking, final_marking = alpha_miner.apply(filtered_log, parameters=parameters) elif "dfg_mining" in discovery_algorithm: start_activities = start_activities_filter.get_start_activities(filtered_log, parameters=parameters) end_activities = end_activities_filter.get_end_activities(filtered_log, parameters=parameters) parameters_conv = {} parameters_conv["start_activities"] = start_activities parameters_conv["end_activities"] = end_activities net, initial_marking, final_marking = dfg_conv_factory.apply(dfg_frequency, parameters=parameters_conv) if filtered_log is not None and include_filtered_log: returned_dictionary["filtered_log"] = filtered_log if net is not None and desidered_output == "petri": returned_dictionary["net"] = net if initial_marking is not None and desidered_output == "petri": returned_dictionary["initial_marking"] = initial_marking if final_marking is not None and desidered_output == "petri": returned_dictionary["final_marking"] = final_marking if bpmn_graph is not None and desidered_output == "bpmn": returned_dictionary["bpmn_graph"] = bpmn_graph if dfg_frequency is not None and include_dfg_frequency: returned_dictionary["dfg_frequency"] = dfg_frequency if dfg_performance is not None and include_dfg_performance: returned_dictionary["dfg_performance"] = dfg_performance if filtered_dfg_frequency is not None and include_filtered_dfg_frequency: returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency if filtered_dfg_performance is not None and include_filtered_dfg_performance: returned_dictionary["filtered_dfg_performance"] = filtered_dfg_performance if classic_output: if net is not None and desidered_output == "petri": return net, initial_marking, final_marking return returned_dictionary
def apply_auto_filter(log, parameters=None): """ Apply some filters in battery to the log in order to get a simplified log Parameters ---------- log Log parameters Eventual parameters applied to the algorithms: decreasingFactor -> Decreasing factor (provided to all algorithms) activity_key -> Activity key (must be specified if different from concept:name) Returns --------- filtered_log Filtered log """ # the following filters are applied: # - activity filter (keep only attributes with a reasonable number of occurrences) (if enabled) # - variant filter (keep only variants with a reasonable number of occurrences) (if enabled) # - start attributes filter (keep only variants that starts with a plausible start activity) (if enabled) # - end attributes filter (keep only variants that starts with a plausible end activity) (if enabled) if parameters is None: parameters = {} attribute_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY decreasing_factor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else filtering_constants.DECREASING_FACTOR parameters_child = { "decreasingFactor": decreasing_factor, constants.PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key, constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key } enable_activities_filter = parameters[ "enable_activities_filter"] if "enable_activities_filter" in parameters else True enable_variants_filter = parameters[ "enable_variants_filter"] if "enable_variants_filter" in parameters else False enable_start_activities_filter = parameters[ "enable_start_activities_filter"] if "enable_start_activities_filter" in parameters else False enable_end_activities_filter = parameters[ "enable_end_activities_filter"] if "enable_end_activities_filter" in parameters else True variants = variants_module.get_variants(log, parameters=parameters_child) filtered_log = log if enable_activities_filter: filtered_log = attributes_filter.apply_auto_filter( log, variants=variants, parameters=parameters_child) variants = variants_module.get_variants(filtered_log, parameters=parameters_child) if enable_variants_filter: filtered_log = variants_module.apply_auto_filter( filtered_log, variants=variants, parameters=parameters_child) variants = variants_module.get_variants(filtered_log, parameters=parameters_child) if enable_start_activities_filter: filtered_log = start_activities_filter.apply_auto_filter( filtered_log, variants=variants, parameters=parameters_child) if enable_end_activities_filter: filtered_log = end_activities_filter.apply_auto_filter( filtered_log, variants=variants, parameters=parameters_child) return filtered_log
def test_19(self): from pm4py.algo.filtering.pandas.start_activities import start_activities_filter dataframe = self.load_running_example_df() df_auto_sa = start_activities_filter.apply_auto_filter(dataframe, parameters={ start_activities_filter.Parameters.DECREASING_FACTOR: 0.6})
def test_18(self): from pm4py.algo.filtering.log.start_activities import start_activities_filter log = self.load_running_example_xes() log_af_sa = start_activities_filter.apply_auto_filter(log, parameters={ start_activities_filter.Parameters.DECREASING_FACTOR: 0.6})
"sort_field": "time:timestamp" }) log = conversion_factory.apply(event_stream, parameters={"timestamp_sort": True}) ####### Filter the event log ## Start activities from pm4py.algo.filtering.log.start_activities import start_activities_filter log_start = start_activities_filter.get_start_activities(log) log = start_activities_filter.apply_auto_filter( log, parameters={"decreasingFactor": 0.6}) print(start_activities_filter.get_start_activities(log)) ## End activities from pm4py.algo.filtering.log.end_activities import end_activities_filter log_end = end_activities_filter.get_end_activities(log) log_af_ea = end_activities_filter.apply_auto_filter( log, parameters={"decreasingFactor": 0.6}) print(end_activities_filter.get_end_activities(log_af_ea)) ## Other Events from pm4py.algo.filtering.log.attributes import attributes_filter