コード例 #1
0
def apply_auto_filter(log, parameters=None):
    """
    Apply some filters in battery to the log_skeleton in order to get a simplified log_skeleton
    
    Parameters
    ----------
    log
        Log
    parameters
        Eventual parameters applied to the algorithms:
            Parameters.DECREASING_FACTOR -> Decreasing factor (provided to all algorithms)
            Parameters.ACTIVITY_KEY -> Activity key (must be specified if different from concept:name)
    
    Returns
    ---------
    filtered_log
        Filtered log_skeleton
    """

    # the following filters are applied:
    # - activity filter (keep only attributes with a reasonable number of occurrences) (if enabled)
    # - variant filter (keep only variants with a reasonable number of occurrences) (if enabled)
    # - start attributes filter (keep only variants that starts with a plausible start activity) (if enabled)
    # - end attributes filter (keep only variants that starts with a plausible end activity) (if enabled)

    if parameters is None:
        parameters = {}

    enable_activities_filter = exec_utils.get_param_value(Parameters.ENABLE_ACTIVITES_FILTER, parameters, True)
    enable_variants_filter = exec_utils.get_param_value(Parameters.ENABLE_VARIANTS_FILTER, parameters, False)
    enable_start_activities_filter = exec_utils.get_param_value(Parameters.ENABLE_START_ACTIVITIES_FILTER, parameters,
                                                                False)
    enable_end_activities_filter = exec_utils.get_param_value(Parameters.ENABLE_END_ACTIVITIES_FILTER, parameters, True)

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, xes.DEFAULT_NAME_KEY)

    parameters[Parameters.ATTRIBUTE_KEY] = attribute_key
    parameters[Parameters.ACTIVITY_KEY] = attribute_key

    variants = variants_module.get_variants(log, parameters=parameters)

    filtered_log = log
    if enable_activities_filter:
        filtered_log = attributes_filter.apply_auto_filter(log, variants=variants, parameters=parameters)
        variants = variants_module.get_variants(filtered_log, parameters=parameters)
    if enable_variants_filter:
        filtered_log = variants_module.apply_auto_filter(filtered_log, variants=variants, parameters=parameters)
        variants = variants_module.get_variants(filtered_log, parameters=parameters)
    if enable_start_activities_filter:
        filtered_log = start_activities_filter.apply_auto_filter(filtered_log, variants=variants,
                                                                 parameters=parameters)
    if enable_end_activities_filter:
        filtered_log = end_activities_filter.apply_auto_filter(filtered_log, variants=variants,
                                                               parameters=parameters)

    return filtered_log
コード例 #2
0
 def test_logfiltering_filtering1(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "running-example.xes")
     log = xes_importer.import_log(input_log)
     log = attributes_filter.apply_auto_filter(log)
     log = variants_module.apply_auto_filter(log)
     log = start_activities_filter.apply_auto_filter(log)
     log = end_activities_filter.apply_auto_filter(log)
     log = paths_filter.apply_auto_filter(log)
     del log
コード例 #3
0
ファイル: eventlog.py プロジェクト: lukeaust/case2vec
    def load_from_json(self,
                       file_name,
                       filter_start_activities=False,
                       filter_end_activities=False):
        """
        Load an event log stored as a json file from disk.
        
        :file_name:
        """
        with gzip.open(file_name, 'rb') as f:
            json_eventlog = json.load(f)

        log = log_lib.log.EventLog()

        # read json file
        for tr in json_eventlog['cases']:
            attr_dict = tr['attributes']
            evnt_list = tr['events']

            trace = log_lib.log.Trace()
            trace.attributes['concept:name'] = tr['id']

            # attach attributes
            for key in attr_dict.keys():
                trace.attributes[key] = attr_dict[key]

            for evnt in evnt_list:
                event = log_lib.log.Event()
                event['concept:name'] = evnt['name']

                if evnt['timestamp'] is not None:
                    event['time:timestamp'] = ciso8601.parse_datetime(
                        evnt['timestamp'])

                # attach other event attributes
                for key in evnt['attributes'].keys():
                    event[key] = evnt['attributes'][key]

                trace.append(event)

            log.append(trace)

        self._event_log = log

        if filter_end_activities:
            self._event_log = end_activities_filter.apply_auto_filter(
                self._event_log, parameters={"decreasingFactor": 0.6})

        if filter_start_activities:
            self._event_log = start_activities_filter.apply_auto_filter(
                self._event_log, parameters={"decreasingFactor": 0.6})
コード例 #4
0
ファイル: eventlog.py プロジェクト: lukeaust/case2vec
    def load_from_xes(self,
                      file_name,
                      sort=True,
                      filter_start_activities=False,
                      filter_end_activities=False):
        # load file
        parameters = {'timestamp_sort': sort}
        self._event_log = xes_import_factory.apply(file_name,
                                                   parameters=parameters)

        if filter_end_activities:
            self._event_log = end_activities_filter.apply_auto_filter(
                self._event_log, parameters={"decreasingFactor": 0.6})

        if filter_start_activities:
            self._event_log = start_activities_filter.apply_auto_filter(
                self._event_log, parameters={"decreasingFactor": 0.6})
コード例 #5
0
ファイル: classic.py プロジェクト: zhengyuxin/pm4py-source
def apply(log, parameters=None, classic_output=False):
    """
    Gets a simple model out of a log

    Parameters
    -------------
    log
        Trace log
    parameters
        Parameters of the algorithm, including:
            maximum_number_activities -> Maximum number of activities to keep
            discovery_algorithm -> Discovery algorithm to use (alpha, inductive)
            desidered_output -> Desidered output of the algorithm (default: Petri)
            include_filtered_log -> Include the filtered log in the output
            include_dfg_frequency -> Include the DFG of frequencies in the output
            include_dfg_performance -> Include the DFG of performance in the output
            include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output
            include_filtered_dfg_performance -> Include the filtered DFG of performance in the output
    classic_output
        Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking)
        or can return a more detailed dictionary
    """
    if parameters is None:
        parameters = {}

    returned_dictionary = {}

    net = None
    initial_marking = None
    final_marking = None
    bpmn_graph = None
    dfg_frequency = None
    dfg_performance = None
    filtered_dfg_frequency = None
    filtered_dfg_performance = None

    maximum_number_activities = parameters[
        "maximum_number_activities"] if "maximum_number_activities" in parameters else 20
    discovery_algorithm = parameters["discovery_algorithm"] if "discovery_algorithm" in parameters else "alpha"
    desidered_output = parameters["desidered_output"] if "desidered_output" in parameters else "petri"
    include_filtered_log = parameters["include_filtered_log"] if "include_filtered_log" in parameters else True
    include_dfg_frequency = parameters["include_dfg_frequency"] if "include_dfg_frequency" in parameters else True
    include_dfg_performance = parameters[
        "include_dfg_performance"] if "include_dfg_performance" in parameters else False
    include_filtered_dfg_frequency = parameters[
        "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True
    include_filtered_dfg_performance = parameters[
        "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else False

    if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters:
        activity_key = parameters[
            PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key
    else:
        log, activity_key = insert_classifier.search_act_class_attr(log)
        if activity_key is None:
            activity_key = DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key

    if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY]

    activities_count_dictio = attributes_filter.get_attribute_values(log, activity_key)
    activities_count_list = []
    for activity in activities_count_dictio:
        activities_count_list.append([activity, activities_count_dictio[activity]])

    activities_count_list = sorted(activities_count_list, key=lambda x: x[1], reverse=True)
    activities_count_list = activities_count_list[:min(len(activities_count_list), maximum_number_activities)]
    activities_keep_list = [x[0] for x in activities_count_list]

    log = attributes_filter.apply(log, activities_keep_list, parameters=parameters)

    filtered_log = None

    if "alpha" in discovery_algorithm:
        # parameters_sa = deepcopy(parameters)
        # parameters_sa["decreasingFactor"] = 1.0
        filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters)
        filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters)
        filtered_log = filter_topvariants_soundmodel.apply(filtered_log, parameters=parameters)
    elif "dfg_mining" in discovery_algorithm:
        filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters)
        filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters)
        filtered_log = auto_filter.apply_auto_filter(filtered_log, parameters=parameters)

    if include_dfg_frequency or "dfg_mining" in discovery_algorithm:
        dfg_frequency = dfg_factory.apply(log, parameters=parameters, variant="frequency")
    if include_dfg_performance:
        dfg_performance = dfg_factory.apply(log, parameters=parameters, variant="performance")
    if include_filtered_dfg_frequency:
        filtered_dfg_frequency = dfg_factory.apply(filtered_log, parameters=parameters, variant="frequency")
    if include_filtered_dfg_performance:
        filtered_dfg_performance = dfg_factory.apply(filtered_log, parameters=parameters, variant="performance")

    if "alpha" in discovery_algorithm:
        net, initial_marking, final_marking = alpha_miner.apply(filtered_log, parameters=parameters)
    elif "dfg_mining" in discovery_algorithm:
        start_activities = start_activities_filter.get_start_activities(filtered_log, parameters=parameters)
        end_activities = end_activities_filter.get_end_activities(filtered_log, parameters=parameters)

        parameters_conv = {}
        parameters_conv["start_activities"] = start_activities
        parameters_conv["end_activities"] = end_activities

        net, initial_marking, final_marking = dfg_conv_factory.apply(dfg_frequency, parameters=parameters_conv)

    if filtered_log is not None and include_filtered_log:
        returned_dictionary["filtered_log"] = filtered_log
    if net is not None and desidered_output == "petri":
        returned_dictionary["net"] = net
    if initial_marking is not None and desidered_output == "petri":
        returned_dictionary["initial_marking"] = initial_marking
    if final_marking is not None and desidered_output == "petri":
        returned_dictionary["final_marking"] = final_marking
    if bpmn_graph is not None and desidered_output == "bpmn":
        returned_dictionary["bpmn_graph"] = bpmn_graph
    if dfg_frequency is not None and include_dfg_frequency:
        returned_dictionary["dfg_frequency"] = dfg_frequency
    if dfg_performance is not None and include_dfg_performance:
        returned_dictionary["dfg_performance"] = dfg_performance
    if filtered_dfg_frequency is not None and include_filtered_dfg_frequency:
        returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency
    if filtered_dfg_performance is not None and include_filtered_dfg_performance:
        returned_dictionary["filtered_dfg_performance"] = filtered_dfg_performance

    if classic_output:
        if net is not None and desidered_output == "petri":
            return net, initial_marking, final_marking

    return returned_dictionary
コード例 #6
0
def apply_auto_filter(log, parameters=None):
    """
    Apply some filters in battery to the log in order to get a simplified log
    
    Parameters
    ----------
    log
        Log
    parameters
        Eventual parameters applied to the algorithms:
            decreasingFactor -> Decreasing factor (provided to all algorithms)
            activity_key -> Activity key (must be specified if different from concept:name)
    
    Returns
    ---------
    filtered_log
        Filtered log
    """

    # the following filters are applied:
    # - activity filter (keep only attributes with a reasonable number of occurrences) (if enabled)
    # - variant filter (keep only variants with a reasonable number of occurrences) (if enabled)
    # - start attributes filter (keep only variants that starts with a plausible start activity) (if enabled)
    # - end attributes filter (keep only variants that starts with a plausible end activity) (if enabled)

    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    decreasing_factor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else filtering_constants.DECREASING_FACTOR

    parameters_child = {
        "decreasingFactor": decreasing_factor,
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key,
        constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key
    }

    enable_activities_filter = parameters[
        "enable_activities_filter"] if "enable_activities_filter" in parameters else True
    enable_variants_filter = parameters[
        "enable_variants_filter"] if "enable_variants_filter" in parameters else False
    enable_start_activities_filter = parameters[
        "enable_start_activities_filter"] if "enable_start_activities_filter" in parameters else False
    enable_end_activities_filter = parameters[
        "enable_end_activities_filter"] if "enable_end_activities_filter" in parameters else True

    variants = variants_module.get_variants(log, parameters=parameters_child)

    filtered_log = log
    if enable_activities_filter:
        filtered_log = attributes_filter.apply_auto_filter(
            log, variants=variants, parameters=parameters_child)
        variants = variants_module.get_variants(filtered_log,
                                                parameters=parameters_child)
    if enable_variants_filter:
        filtered_log = variants_module.apply_auto_filter(
            filtered_log, variants=variants, parameters=parameters_child)
        variants = variants_module.get_variants(filtered_log,
                                                parameters=parameters_child)
    if enable_start_activities_filter:
        filtered_log = start_activities_filter.apply_auto_filter(
            filtered_log, variants=variants, parameters=parameters_child)
    if enable_end_activities_filter:
        filtered_log = end_activities_filter.apply_auto_filter(
            filtered_log, variants=variants, parameters=parameters_child)

    return filtered_log
コード例 #7
0
 def test_19(self):
     from pm4py.algo.filtering.pandas.start_activities import start_activities_filter
     dataframe = self.load_running_example_df()
     df_auto_sa = start_activities_filter.apply_auto_filter(dataframe, parameters={
         start_activities_filter.Parameters.DECREASING_FACTOR: 0.6})
コード例 #8
0
 def test_18(self):
     from pm4py.algo.filtering.log.start_activities import start_activities_filter
     log = self.load_running_example_xes()
     log_af_sa = start_activities_filter.apply_auto_filter(log, parameters={
         start_activities_filter.Parameters.DECREASING_FACTOR: 0.6})
コード例 #9
0
                                                    "sort_field":
                                                    "time:timestamp"
                                                })

log = conversion_factory.apply(event_stream,
                               parameters={"timestamp_sort": True})

####### Filter the event log

## Start activities

from pm4py.algo.filtering.log.start_activities import start_activities_filter

log_start = start_activities_filter.get_start_activities(log)

log = start_activities_filter.apply_auto_filter(
    log, parameters={"decreasingFactor": 0.6})
print(start_activities_filter.get_start_activities(log))

## End activities

from pm4py.algo.filtering.log.end_activities import end_activities_filter

log_end = end_activities_filter.get_end_activities(log)

log_af_ea = end_activities_filter.apply_auto_filter(
    log, parameters={"decreasingFactor": 0.6})
print(end_activities_filter.get_end_activities(log_af_ea))

## Other Events

from pm4py.algo.filtering.log.attributes import attributes_filter