Beispiel #1
0
def slice_dist_suc(log_1, log_2, unit):
    (log1_list, freq1_list) = filter_subsets.logslice_percent(log_1, unit)
    (log2_list, freq2_list) = filter_subsets.logslice_percent(log_2, unit)

    if len(freq1_list) >= len(freq2_list):
        max_len = len(freq1_list)
        min_len = len(freq2_list)
        max_log = log1_list
        min_log = log2_list
        var_count_max = freq1_list
        var_count_min = freq2_list

    else:
        max_len = len(freq2_list)
        min_len = len(freq1_list)
        max_log = log2_list
        min_log = log1_list
        var_count_max = freq2_list
        var_count_min = freq1_list

    dist_matrix = np.zeros((max_len, min_len))
    max_per_var = np.zeros(max_len)
    max_freq = np.zeros(max_len)
    min_freq = np.zeros(min_len)
    min_per_var = np.zeros(min_len)
    index_rec = set(list(range(min_len)))

    if log1_list == log2_list:
        print("Please give different variant lists!")
        dist = 0
    else:
        for i in range(max_len):
            dist_vec = np.zeros(min_len)
            dfg1 = native.apply(max_log[i])
            df1_dfg = act_dist_calc.occu_var_act(dfg1)
            for j in range(min_len):
                dfg2 = native.apply(min_log[j])
                df2_dfg = act_dist_calc.occu_var_act(dfg2)
                df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0)
                dist_vec[j] = pdist(np.array([df_dfg['freq_x'].values, df_dfg['freq_y'].values]), 'cosine')[0]
                dist_matrix[i][j] = dist_vec[j]
                if j == (min_len - 1):
                    max_loc_col = np.argmin(dist_vec)
                    if abs(dist_vec[max_loc_col]) <= 1e-8:
                        index_rec.discard(max_loc_col)
                        max_freq[i] = var_count_max[i] * var_count_min[max_loc_col] * 2
                        max_per_var[i] = dist_vec[max_loc_col] * max_freq[i] * 2
                    else:
                        max_freq[i] = var_count_max[i] * var_count_min[max_loc_col]
                        max_per_var[i] = dist_vec[max_loc_col] * max_freq[i]

        if (len(index_rec) != 0):
            for i in list(index_rec):
                min_loc_row = np.argmin(dist_matrix[:, i])
                min_freq[i] = var_count_max[min_loc_row] * var_count_min[i]
                min_per_var[i] = dist_matrix[min_loc_row, i] * min_freq[i]
        dist = (np.sum(max_per_var) + np.sum(min_per_var)) / (np.sum(max_freq) + np.sum(min_freq))

    return dist
Beispiel #2
0
    def create_dfg(self, parameters=None):
        if parameters is None:
            parameters = {}

        dfg = [(k, v) for k, v in dfg_inst.apply(
            self.log, parameters=parameters).items() if v > 0]

        return dfg
def apply(log, parameters):
    """
    Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking

    Parameters
    -----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    -----------
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters:
        parameters[pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]

    # get the DFG
    dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters={
        pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key}).items() if v > 0]

    # get the activities in the log
    activities = attributes_filter.get_attribute_values(log, activity_key)

    # gets the start activities from the log
    start_activities = list(start_activities_filter.get_start_activities(log, parameters=parameters).keys())
    # gets the end activities from the log
    end_activities = list(end_activities_filter.get_end_activities(log, parameters=parameters).keys())

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    net, initial_marking, final_marking = apply_dfg(dfg, parameters=parameters, activities=activities,
                                                    contains_empty_traces=contains_empty_traces,
                                                    start_activities=start_activities, end_activities=end_activities)

    return net, initial_marking, final_marking
def apply_tree(log, parameters=None):
    """
    Apply the IMDF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]

    # get the DFG
    dfg = [(k, v) for k, v in dfg_inst.apply(
        log,
        parameters={
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
        }).items() if v > 0]

    # gets the start activities from the log
    start_activities = log_start_act_stats.get_start_activities(
        log, parameters=parameters)
    # gets the end activities from the log
    end_activities = log_end_act_stats.get_end_activities(
        log, parameters=parameters)

    # get the activities in the log
    activities = log_attributes_stats.get_attribute_values(log, activity_key)

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    return apply_tree_dfg(dfg,
                          parameters=parameters,
                          activities=activities,
                          contains_empty_traces=contains_empty_traces,
                          start_activities=start_activities,
                          end_activities=end_activities)
def apply(trace_log, parameters=None):
    """
    This method calls the \"classic\" alpha miner [1]_.

    Parameters
    ----------
    trace_log: :class:`pm4py.log.log.TraceLog`
        Event log to use in the alpha miner, note that it should be a TraceLog!
    parameters:
        Parameters of the algorithm, including:
            activity_key : :class:`str`, optional
                Key to use within events to identify the underlying activity.
                By deafult, the value 'concept:name' is used.

    Returns
    -------
    net: :class:`pm4py.entities.petri.petrinet.PetriNet`
        A Petri net describing the event log that is provided as an input
    initial marking: :class:`pm4py.models.net.Marking`
        marking object representing the initial marking
    final marking: :class:`pm4py.models.net.Marking`
        marking object representing the final marking, not guaranteed that it is actually reachable!

    References
    ----------
    .. [1] Wil M. P. van der Aalst et al., "Workflow Mining: Discovering Process Models from Event Logs",
      IEEE Trans. Knowl. Data Eng., 16, 1128-1142, 2004. `DOI <https://doi.org/10.1109/TKDE.2004.47>`_.

    """
    if parameters is None:
        parameters = {}
    if pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[
            pm_util.constants.
            PARAMETER_CONSTANT_ACTIVITY_KEY] = log_util.xes.DEFAULT_NAME_KEY
    dfg = {
        k: v
        for k, v in dfg_inst.apply(trace_log, parameters=parameters).items()
        if v > 0
    }
    start_activities = endpoints.derive_start_activities_from_tracelog(
        trace_log,
        parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY])
    end_activities = endpoints.derive_end_activities_from_tracelog(
        trace_log,
        parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY])
    return apply_dfg_sa_ea(dfg,
                           start_activities,
                           end_activities,
                           parameters=parameters)
Beispiel #6
0
    def check_for_cut(self, test_log, deleted_activity=None, parameters=None):
        if pkgutil.find_loader("networkx"):
            import networkx as nx

            if deleted_activity is not None:
                del self.activities[deleted_activity]
            if parameters is None:
                parameters = {}
            dfg = [(k, v) for k, v in dfg_inst.apply(
                test_log, parameters=parameters).items() if v > 0]
            self.dfg = dfg
            self.outgoing = get_outgoing_edges(self.dfg)
            self.ingoing = get_ingoing_edges(self.dfg)
            self.log = test_log
            conn_components = detection_utils.get_connected_components(
                self.ingoing, self.outgoing, self.activities)
            this_nx_graph = transform_dfg_to_directed_nx_graph(
                self.dfg, activities=self.activities)
            strongly_connected_components = [
                list(x)
                for x in nx.strongly_connected_components(this_nx_graph)
            ]
            # search for cut and return true as soon as a cut is found:
            xor_cut = self.detect_xor(conn_components)
            if xor_cut[0]:
                return True
            else:
                sequence_cut = cut_detection.detect_sequential_cut(
                    self, self.dfg, strongly_connected_components)
                if sequence_cut[0]:
                    return True
                else:
                    parallel_cut = self.detect_concurrent()
                    if parallel_cut[0]:
                        return True
                    else:
                        loop_cut = self.detect_loop()
                        if loop_cut[0]:
                            return True
                        else:
                            return False
        else:
            msg = "networkx is not available. inductive miner cannot be used!"
            logging.error(msg)
            raise Exception(msg)
Beispiel #7
0
def apply(trace_log, parameters):
    """
    Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking

    Parameters
    -----------
    trace_log
        Trace log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    -----------
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]
    # apply the reduction by default only on very small logs
    enable_reduction = parameters[
        "enable_reduction"] if "enable_reduction" in parameters else (
            shared_constants.APPLY_REDUCTION_ON_SMALL_LOG
            and shared_constants.MAX_LOG_SIZE_FOR_REDUCTION)

    # get the DFG
    dfg = [(k, v) for k, v in dfg_inst.apply(
        trace_log,
        parameters={
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
        }).items() if v > 0]

    # get the activities in the log
    activities = attributes_filter.get_attribute_values(
        trace_log, activity_key)

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in trace_log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in trace_log]) == 0

    net, initial_marking, final_marking = apply_dfg(
        dfg,
        parameters=parameters,
        activities=activities,
        contains_empty_traces=contains_empty_traces)

    if enable_reduction:
        # do the replay
        aligned_traces = token_replay.apply(trace_log,
                                            net,
                                            initial_marking,
                                            final_marking,
                                            parameters=parameters)

        # apply petri_reduction technique in order to simplify the Petri net
        net = petri_cleaning.petri_reduction_treplay(
            net, parameters={"aligned_traces": aligned_traces})

    return net, initial_marking, final_marking
Beispiel #8
0
    def apply_fall_through(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_trace, new_log = fall_through.empty_trace(self.log)
            # if an empty trace is found, the empty trace fallthrough applies
            #
        else:
            empty_trace = False
        if empty_trace:
            logging.debug("empty_trace")
            activites_left = []
            for trace in new_log:
                for act in trace:
                    if act[activity_key] not in activites_left:
                        activites_left.append(act[activity_key])
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=parameters).items() if v > 0]
            activities = attributes_filter.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_filter.get_start_activities(
                    new_log, parameters=self.parameters).keys())
            end_activities = list(
                end_activities_filter.get_end_activities(
                    new_log, parameters=self.parameters).keys())
            self.children.append(
                SubtreePlain(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
                small_log = filtering_utils.keep_one_trace_per_variant(
                    small_log, parameters=parameters)
            else:
                activity_once = False
            if use_act_once_per_trace and activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_filter.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_filter.get_attribute_values(
                    small_log, activity_key)
                self.children.append(
                    SubtreePlain(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log
                start_activities = list(
                    start_activities_filter.get_start_activities(
                        new_log, parameters=self.parameters).keys())
                end_activities = list(
                    end_activities_filter.get_end_activities(
                        new_log, parameters=self.parameters).keys())
                self.children.append(
                    SubtreePlain(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, activity_left_out = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                    small_log = filtering_utils.keep_one_trace_per_variant(
                        small_log, parameters=parameters)
                else:
                    activity_concurrent = False
                if use_act_concurrent and activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_filter.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_filter.get_attribute_values(
                        small_log, activity_key)
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreePlain(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log:
                    start_activities = list(
                        start_activities_filter.get_start_activities(
                            new_log, parameters=self.parameters).keys())
                    end_activities = list(
                        end_activities_filter.get_end_activities(
                            new_log, parameters=self.parameters).keys())
                    self.children.append(
                        SubtreePlain(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                        new_log = filtering_utils.keep_one_trace_per_variant(
                            new_log, parameters=parameters)
                    else:
                        strict_tau_loop = False
                    if use_strict_tau_loop and strict_tau_loop:
                        activites_left = []
                        for trace in new_log:
                            for act in trace:
                                if act[activity_key] not in activites_left:
                                    activites_left.append(act[activity_key])
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                new_log, parameters=self.parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                new_log, parameters=self.parameters).keys())
                        self.children.append(
                            SubtreePlain(new_log,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                            new_log = filtering_utils.keep_one_trace_per_variant(
                                new_log, parameters=parameters)
                        else:
                            tau_loop = False
                        if use_tau_loop and tau_loop:
                            activites_left = []
                            for trace in new_log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower model")
                            activites_left = []
                            for trace in self.log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'flower'
Beispiel #9
0
    def detect_cut(self, second_iteration=False, parameters=None):
        if pkgutil.find_loader("networkx"):
            import networkx as nx

            if parameters is None:
                parameters = {}
            activity_key = exec_utils.get_param_value(
                Parameters.ACTIVITY_KEY, parameters,
                pmutil.xes_constants.DEFAULT_NAME_KEY)

            # check base cases:
            empty_log = base_case.empty_log(self.log)
            single_activity = base_case.single_activity(self.log, activity_key)
            if empty_log:
                self.detected_cut = 'empty_log'
            elif single_activity:
                self.detected_cut = 'single_activity'
            # if no base cases are found, search for a cut:
            else:
                conn_components = detection_utils.get_connected_components(
                    self.ingoing, self.outgoing, self.activities)
                this_nx_graph = transform_dfg_to_directed_nx_graph(
                    self.dfg, activities=self.activities)
                strongly_connected_components = [
                    list(x)
                    for x in nx.strongly_connected_components(this_nx_graph)
                ]
                xor_cut = self.detect_xor(conn_components)
                # the following part searches for a cut in the current log
                # if a cut is found, the log is split according to the cut, the resulting logs are saved in new_logs
                # recursion is used on all the logs in new_logs
                if xor_cut[0]:
                    logging.debug("xor_cut")
                    self.detected_cut = 'concurrent'
                    new_logs = split.split_xor(xor_cut[1], self.log,
                                               activity_key)
                    for i in range(len(new_logs)):
                        new_logs[
                            i] = filtering_utils.keep_one_trace_per_variant(
                                new_logs[i], parameters=parameters)
                    for l in new_logs:
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            l, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            l, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                l, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                l, parameters=parameters).keys())
                        self.children.append(
                            SubtreePlain(l,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                else:
                    sequence_cut = cut_detection.detect_sequential_cut(
                        self, self.dfg, strongly_connected_components)
                    if sequence_cut[0]:
                        logging.debug("sequence_cut")
                        new_logs = split.split_sequence(
                            sequence_cut[1], self.log, activity_key)
                        for i in range(len(new_logs)):
                            new_logs[
                                i] = filtering_utils.keep_one_trace_per_variant(
                                    new_logs[i], parameters=parameters)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    else:
                        parallel_cut = self.detect_concurrent()
                        if parallel_cut[0]:
                            logging.debug("parallel_cut")
                            new_logs = split.split_parallel(
                                parallel_cut[1], self.log, activity_key)
                            for i in range(len(new_logs)):
                                new_logs[
                                    i] = filtering_utils.keep_one_trace_per_variant(
                                        new_logs[i], parameters=parameters)
                            self.detected_cut = "parallel"
                            for l in new_logs:
                                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                    l, parameters=parameters).items() if v > 0]
                                activities = attributes_filter.get_attribute_values(
                                    l, activity_key)
                                start_activities = list(
                                    start_activities_filter.
                                    get_start_activities(
                                        l, parameters=parameters).keys())
                                end_activities = list(
                                    end_activities_filter.get_end_activities(
                                        l, parameters=parameters).keys())
                                self.children.append(
                                    SubtreePlain(
                                        l,
                                        new_dfg,
                                        self.master_dfg,
                                        self.initial_dfg,
                                        activities,
                                        self.counts,
                                        self.rec_depth + 1,
                                        noise_threshold=self.noise_threshold,
                                        start_activities=start_activities,
                                        end_activities=end_activities,
                                        initial_start_activities=self.
                                        initial_start_activities,
                                        initial_end_activities=self.
                                        initial_end_activities,
                                        parameters=parameters))
                        else:
                            loop_cut = self.detect_loop()
                            if loop_cut[0]:
                                logging.debug("loop_cut")
                                new_logs = split.split_loop(
                                    loop_cut[1], self.log, activity_key)
                                for i in range(len(new_logs)):
                                    new_logs[
                                        i] = filtering_utils.keep_one_trace_per_variant(
                                            new_logs[i], parameters=parameters)
                                self.detected_cut = "loopCut"
                                for l in new_logs:
                                    new_dfg = [
                                        (k, v) for k, v in dfg_inst.apply(
                                            l, parameters=parameters).items()
                                        if v > 0
                                    ]
                                    activities = attributes_filter.get_attribute_values(
                                        l, activity_key)
                                    start_activities = list(
                                        start_activities_filter.
                                        get_start_activities(
                                            l, parameters=parameters).keys())
                                    end_activities = list(
                                        end_activities_filter.
                                        get_end_activities(
                                            l, parameters=parameters).keys())
                                    self.children.append(
                                        SubtreePlain(
                                            l,
                                            new_dfg,
                                            self.master_dfg,
                                            self.initial_dfg,
                                            activities,
                                            self.counts,
                                            self.rec_depth + 1,
                                            noise_threshold=self.
                                            noise_threshold,
                                            start_activities=start_activities,
                                            end_activities=end_activities,
                                            initial_start_activities=self.
                                            initial_start_activities,
                                            initial_end_activities=self.
                                            initial_end_activities,
                                            parameters=parameters))

                            # if the code gets to this point, there is no base_case and no cut found in the log
                            # therefore, we now apply fall through:
                            else:
                                self.apply_fall_through(parameters)
        else:
            msg = "networkx is not available. inductive miner cannot be used!"
            logging.error(msg)
            raise Exception(msg)
Beispiel #10
0
def trans_alpha(log, parameters=None):
    dfg = {k: v for k, v in dfg_inst.apply(log).items() if v > 0}
    if parameters is None:
        parameters = {}
    if pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pm_util.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    start_activities = endpoints.derive_start_activities_from_log(
        log, parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY])
    end_activities = endpoints.derive_end_activities_from_log(
        log, parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY])

    labels = set()
    for el in dfg:
        labels.add(el[0])
        labels.add(el[1])
    for a in start_activities:
        labels.add(a)
    for a in end_activities:
        labels.add(a)
    labels = list(labels)

    alpha_abstraction = alpha_classic_abstraction.ClassicAlphaAbstraction(
        start_activities, end_activities, dfg)
    new_parallel_set = set()
    loop_set = set()

    for par in alpha_abstraction.parallel_relation:
        for trace in log:
            for i in range(len(trace) - 1):
                if trace[i]['concept:name'] == par[0] and trace[
                        i + 1]['concept:name'] == par[1]:
                    if trace[i + 1]['concept:name'] in trace[i].enabled:
                        new_parallel_set.add(par)

    pairs = list(
        map(
            lambda p: ({p[0]}, {p[1]}),
            filter(
                lambda p: classic.__initial_filter(
                    alpha_abstraction.parallel_relation, p),
                alpha_abstraction.causal_relation)))

    for par in alpha_abstraction.parallel_relation:
        if par not in new_parallel_set and (
                par[1], par[0]) not in new_parallel_set and par[1] != par[0]:
            loop_set.add(par)
            pairs.append(({par[0]}, {par[1]}))
        else:
            new_parallel_set.add(par)

    for i in range(0, len(pairs)):
        t1 = pairs[i]
        for j in range(i, len(pairs)):
            t2 = pairs[j]
            if t1 != t2:
                if t1[0].issubset(t2[0]) or t1[1].issubset(t2[1]):
                    if not (classic.__check_is_unrelated(
                        (new_parallel_set.union(loop_set)),
                            alpha_abstraction.causal_relation, t1[0], t2[0])
                            or classic.__check_is_unrelated(
                                (new_parallel_set.union(loop_set)),
                                alpha_abstraction.causal_relation, t1[1],
                                t2[1])):
                        new_alpha_pair = (t1[0] | t2[0], t1[1] | t2[1])
                        if new_alpha_pair not in pairs:
                            pairs.append((t1[0] | t2[0], t1[1] | t2[1]))
    internal_places = filter(lambda p: classic.__pair_maximizer(pairs, p),
                             pairs)
    net = petri.petrinet.PetriNet('alpha_classic_net_' + str(time.time()))
    label_transition_dict = {}

    for i in range(0, len(labels)):
        label_transition_dict[labels[i]] = petri.petrinet.PetriNet.Transition(
            labels[i], labels[i])
        net.transitions.add(label_transition_dict[labels[i]])

    src = classic.__add_source(net, alpha_abstraction.start_activities,
                               label_transition_dict)
    sink = classic.__add_sink(net, alpha_abstraction.end_activities,
                              label_transition_dict)

    for pair in internal_places:
        place = petri.petrinet.PetriNet.Place(str(pair))
        net.places.add(place)
        for in_arc in pair[0]:
            petri.utils.add_arc_from_to(label_transition_dict[in_arc], place,
                                        net)
        for out_arc in pair[1]:
            petri.utils.add_arc_from_to(place, label_transition_dict[out_arc],
                                        net)
    return net, Marking({src: 1}), Marking({sink: 1})
Beispiel #11
0
def apply(log, parameters):
    """
    Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking

    Parameters
    -----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    -----------
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters:
        parameters[
            pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]
    # apply the reduction by default only on very small logs
    enable_reduction = parameters[
        "enable_reduction"] if "enable_reduction" in parameters else True

    # get the DFG
    if isinstance(log[0][0], tel.Event):
        dfg = [(k, v) for k, v in inductive_revise.get_dfg_graph_trans(
            log,
            parameters={
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
            }).items() if v > 0]
    else:
        dfg = [(k, v) for k, v in dfg_inst.apply(
            log,
            parameters={
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
            }).items() if v > 0]

    # get the activities in the log
    activities = attributes_filter.get_attribute_values(log, activity_key)

    # gets the start activities from the log
    start_activities = list(
        start_activities_filter.get_start_activities(
            log, parameters=parameters).keys())
    # gets the end activities from the log
    end_activities = list(
        end_activities_filter.get_end_activities(log,
                                                 parameters=parameters).keys())

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    net, initial_marking, final_marking = apply_dfg(
        dfg,
        parameters=parameters,
        activities=activities,
        contains_empty_traces=contains_empty_traces,
        start_activities=start_activities,
        end_activities=end_activities)
    """if enable_reduction:
        vis_trans = [x for x in net.transitions if x.label]
        hid_trans = [x for x in net.transitions if x.label is None]
        if vis_trans:
            ratio = len(hid_trans) / len(vis_trans)

            if ratio < 2.0:
                # avoid reducting too much complicated processes
                reduction_parameters = copy(parameters)
                if "is_reduction" not in reduction_parameters:
                    reduction_parameters["is_reduction"] = True
                if "thread_maximum_ex_time" not in reduction_parameters:
                    reduction_parameters["thread_maximum_ex_time"] = shared_constants.RED_MAX_THR_EX_TIME

                # do the replay
                aligned_traces = token_replay.apply(log, net, initial_marking, final_marking,
                                                    parameters=reduction_parameters)

                # apply petri_reduction technique in order to simplify the Petri net
                net = petri_cleaning.petri_reduction_treplay(net, parameters={"aligned_traces": aligned_traces})"""

    return net, initial_marking, final_marking
Beispiel #12
0
    def apply_fall_through_infrequent(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, self.parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_traces_present, enough_traces, new_log = fall_through_infrequent.empty_trace_filtering(
                self.log, self.f)
            self.log = new_log
        else:
            empty_traces_present = False
            enough_traces = False
        # if an empty trace is found, the empty trace fallthrough applies
        if empty_traces_present and enough_traces:
            logging.debug("empty_trace_if")
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=self.parameters).items() if v > 0]
            activities = attributes_filter.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_filter.get_start_activities(
                    new_log, parameters=parameters).keys())
            end_activities = list(
                end_activities_filter.get_end_activities(
                    new_log, parameters=parameters).keys())
            self.children.append(
                SubtreeInfrequent(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    self.f,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        elif empty_traces_present and not enough_traces:
            # no node is added to the PT, instead we just use recursion on the log without the empty traces
            self.detect_cut_if()
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
            else:
                activity_once = False
            if activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_filter.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_filter.get_attribute_values(
                    small_log, activity_key)
                start_activities = list(
                    start_activities_filter.get_start_activities(
                        new_log, parameters=parameters).keys())
                end_activities = list(
                    end_activities_filter.get_end_activities(
                        new_log, parameters=parameters).keys())
                # append the chosen activity as leaf:
                self.children.append(
                    SubtreeInfrequent(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        self.f,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log
                self.children.append(
                    SubtreeInfrequent(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        self.f,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, key = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                else:
                    activity_concurrent = False
                if activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_filter.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_filter.get_attribute_values(
                        small_log, activity_key)
                    start_activities = list(
                        start_activities_filter.get_start_activities(
                            new_log, parameters=parameters).keys())
                    end_activities = list(
                        end_activities_filter.get_end_activities(
                            new_log, parameters=parameters).keys())
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreeInfrequent(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            self.f,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log:
                    self.children.append(
                        SubtreeInfrequent(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            self.f,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                    else:
                        strict_tau_loop = False
                    if strict_tau_loop:
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                new_log, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                new_log, parameters=parameters).keys())
                        self.children.append(
                            SubtreeInfrequent(
                                new_log,
                                new_dfg,
                                self.master_dfg,
                                self.initial_dfg,
                                activities,
                                self.counts,
                                self.rec_depth + 1,
                                self.f,
                                noise_threshold=self.noise_threshold,
                                start_activities=start_activities,
                                end_activities=end_activities,
                                initial_start_activities=self.
                                initial_start_activities,
                                initial_end_activities=self.
                                initial_end_activities,
                                parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                        else:
                            tau_loop = False
                        if tau_loop:
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    new_log, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    new_log, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower_if")
                            self.detected_cut = 'flower'
Beispiel #13
0
    def detect_cut_if(self, second_iteration=False, parameters=None):
        # dfg_viz = dfg_factory.apply(self.log)
        # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
        # dfg_vis_factory.view(gviz)
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, self.parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # check base cases:
        empty_log = base_case.empty_log(self.log)
        single_activity = base_case.single_activity(self.log, activity_key)
        if empty_log:
            self.detected_cut = 'empty_log'
        elif single_activity:
            self.detected_cut = 'single_activity'
        # if no base cases are found, search for a cut:
        # use the cutting and splitting functions of im_plain:
        else:
            found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()

            if found_plain_cut:
                self.apply_cut_im_plain(type_of_cut, cut, activity_key)
            # if im_plain does not find a cut, we filter on our threshold and then again apply the im_cut detection
            # but this time, we have to use different splitting functions:
            else:
                self.filter_dfg_on_threshold()
                """
                dfg_viz = dfg_factory.apply(self.log)
                gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
                dfg_vis_factory.view(gviz)
                """
                found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()
                if found_plain_cut:
                    if type_of_cut == 'concurrent':
                        logging.debug("concurrent_cut_if")
                        self.detected_cut = 'concurrent'
                        new_logs = splitting_infrequent.split_xor_infrequent(
                            cut[1], self.log, activity_key)
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'sequential':
                        logging.debug("sequential_if")
                        new_logs = splitting_infrequent.split_sequence_infrequent(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'parallel':
                        logging.debug("parallel_if")
                        new_logs = split.split_parallel(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "parallel"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'loopCut':
                        logging.debug("loopCut_if")
                        new_logs = splitting_infrequent.split_loop_infrequent(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "loopCut"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))

                else:
                    self.apply_fall_through_infrequent(parameters)
Beispiel #14
0
 def apply_cut_im_plain(self, type_of_cut, cut, activity_key):
     # dfg_viz = dfg_factory.apply(self.log)
     # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
     # dfg_vis_factory.view(gviz)
     if type_of_cut == 'concurrent':
         self.detected_cut = 'concurrent'
         new_logs = split.split_xor(cut[1], self.log, activity_key)
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'sequential':
         new_logs = split.split_sequence(cut[1], self.log, activity_key)
         self.detected_cut = "sequential"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'parallel':
         new_logs = split.split_parallel(cut[1], self.log, activity_key)
         self.detected_cut = "parallel"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'loopCut':
         new_logs = split.split_loop(cut[1], self.log, activity_key)
         self.detected_cut = "loopCut"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
Beispiel #15
0
def apply_tree(log, parameters):
    """
    Apply the IM_FF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    if type(log) is pd.DataFrame:
        vars = variants_get.get_variants_count(log, parameters=parameters)
        return apply_tree_variants(vars, parameters=parameters)
    else:
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        log = converter.apply(log, parameters=parameters)
        # keep only the activity attribute (since the others are not used)
        log = filtering_utils.keep_only_one_attribute_per_event(
            log, activity_key)

        noise_threshold = exec_utils.get_param_value(
            Parameters.NOISE_THRESHOLD, parameters,
            shared_constants.NOISE_THRESHOLD_IMF)

        dfg = [(k, v)
               for k, v in dfg_inst.apply(log, parameters=parameters).items()
               if v > 0]
        c = Counts()
        activities = attributes_filter.get_attribute_values(log, activity_key)
        start_activities = list(
            start_activities_filter.get_start_activities(
                log, parameters=parameters).keys())
        end_activities = list(
            end_activities_filter.get_end_activities(
                log, parameters=parameters).keys())
        contains_empty_traces = False
        traces_length = [len(trace) for trace in log]
        if traces_length:
            contains_empty_traces = min([len(trace) for trace in log]) == 0

        # set the threshold parameter based on f and the max value in the dfg:
        max_value = 0
        for key, value in dfg:
            if value > max_value:
                max_value = value
        threshold = noise_threshold * max_value

        recursion_depth = 0
        sub = subtree.make_tree(log,
                                dfg,
                                dfg,
                                dfg,
                                activities,
                                c,
                                recursion_depth,
                                noise_threshold,
                                threshold,
                                start_activities,
                                end_activities,
                                start_activities,
                                end_activities,
                                parameters=parameters)

        process_tree = get_tree_repr_implain.get_repr(
            sub, 0, contains_empty_traces=contains_empty_traces)
        # Ensures consistency to the parent pointers in the process tree
        tree_consistency.fix_parent_pointers(process_tree)
        # Fixes a 1 child XOR that is added when single-activities flowers are found
        tree_consistency.fix_one_child_xor_flower(process_tree)
        # folds the process tree (to simplify it in case fallthroughs/filtering is applied)
        process_tree = util.fold(process_tree)

        return process_tree
Beispiel #16
0
def trans_alpha(log, parameters=None):
    dfg = {k: v for k, v in dfg_inst.apply(log).items() if v > 0}
    if parameters is None:
        parameters = {}
    if pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pm_util.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    start_activities = endpoints.derive_start_activities_from_log(
        log, parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY])
    end_activities = endpoints.derive_end_activities_from_log(
        log, parameters[pm_util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY])

    labels = set()
    for el in dfg:
        labels.add(el[0])
        labels.add(el[1])
    for a in start_activities:
        labels.add(a)
    for a in end_activities:
        labels.add(a)
    labels = list(labels)

    alpha_abstraction = alpha_classic_abstraction.ClassicAlphaAbstraction(
        start_activities, end_activities, dfg)

    pairs = list(
        map(
            lambda p: ({p[0]}, {p[1]}),
            filter(
                lambda p: classic.__initial_filter(
                    alpha_abstraction.parallel_relation, p),
                alpha_abstraction.causal_relation)))
    #this part added
    parallel_set = alpha_abstraction.parallel_relation
    loop_cand_set = set()
    for rel in parallel_set.copy():
        not_loop_flag = False
        pre_act = rel[0]
        post_act = rel[1]
        for trace in log:
            for i in range(len(trace) - 1):
                if trace[i]['concept:name'] == pre_act and trace[
                        i + 1]['concept:name'] == post_act:
                    pre_en = trace[i]['enabled']
                    if post_act in pre_en:  #not loop
                        not_loop_flag = True
                        break
                    else:  #loop
                        continue
            break
        if not not_loop_flag:
            loop_cand_set.add((pre_act, post_act))
    loop_set = set()
    for loop_cand in loop_cand_set:
        if loop_cand[::-1] in loop_cand_set and loop_cand[0] != loop_cand[1]:
            loop_set.add(loop_cand)

    #find loops based on enabling information
    #this part added
    for i in range(0, len(pairs)):
        t1 = pairs[i]
        for j in range(i, len(pairs)):
            t2 = pairs[j]
            if t1 != t2:
                if t1[0].issubset(t2[0]) or t1[1].issubset(t2[1]):
                    if not (classic.__check_is_unrelated(
                            alpha_abstraction.parallel_relation,
                            alpha_abstraction.causal_relation, t1[0], t2[0])
                            or classic.__check_is_unrelated(
                                alpha_abstraction.parallel_relation,
                                alpha_abstraction.causal_relation, t1[1],
                                t2[1])):
                        new_alpha_pair = (t1[0] | t2[0], t1[1] | t2[1])
                        if new_alpha_pair not in pairs:
                            pairs.append((t1[0] | t2[0], t1[1] | t2[1]))

    internal_places = filter(lambda p: classic.__pair_maximizer(pairs, p),
                             pairs)
    net = petri.petrinet.PetriNet('alpha_classic_net_' + str(time.time()))
    label_transition_dict = {}

    for i in range(0, len(labels)):
        label_transition_dict[labels[i]] = petri.petrinet.PetriNet.Transition(
            labels[i], labels[i])
        net.transitions.add(label_transition_dict[labels[i]])

    for pair in internal_places:
        place = petri.petrinet.PetriNet.Place(str(pair))
        net.places.add(place)

        for in_arc in pair[0]:
            petri.utils.add_arc_from_to(label_transition_dict[in_arc], place,
                                        net)
        for out_arc in pair[1]:
            petri.utils.add_arc_from_to(place, label_transition_dict[out_arc],
                                        net)

    src = classic.__add_source(net, alpha_abstraction.start_activities,
                               label_transition_dict)
    sink = classic.__add_sink(net, alpha_abstraction.end_activities,
                              label_transition_dict)

    loop_tail_set = set()
    for t in label_transition_dict.values():  #check if two-length-loop
        if len(t.in_arcs) == 0 and len(t.out_arcs) == 0:
            loop_tail_set.add(t)

    for loop_tail in loop_tail_set:
        if loop_set is not None:
            loop_body = None
            for loop in loop_set:
                if loop[0] == loop_tail.name:
                    loop_body = label_transition_dict[loop[1]]
            if loop_body is not None:
                for place in net.places:
                    for in_arc in place.in_arcs:
                        if in_arc.source == loop_body:
                            petri.utils.add_arc_from_to(
                                place, label_transition_dict[loop_tail.name],
                                net)
                            break
                    for out_arc in place.out_arcs:
                        if out_arc.target == loop_body:
                            petri.utils.add_arc_from_to(
                                label_transition_dict[loop_tail.name], place,
                                net)

    return net, Marking({src: 1}), Marking({sink: 1})