Esempio n. 1
0
    def initialize_tree(self,
                        dfg,
                        initial_dfg,
                        activities,
                        second_iteration=False):
        """
        Initialize the tree


        Parameters
        -----------
        dfg
            Directly follows graph of this subtree
        initial_dfg
            Referral directly follows graph that should be taken in account adding hidden/loop transitions
        activities
            Activities of this subtree
        second_iteration
            Boolean that indicates if we are executing this method for the second time
        """

        self.second_iteration = second_iteration

        if activities is None:
            self.activities = get_activities_from_dfg(dfg)
        else:
            self.activities = copy(activities)

        if second_iteration:
            self.dfg = clean_dfg_based_on_noise_thresh(self.dfg,
                                                       self.activities,
                                                       self.noise_threshold)
        else:
            self.dfg = copy(dfg)

        self.initial_dfg = initial_dfg

        self.outgoing = get_outgoing_edges(self.dfg)
        self.ingoing = get_ingoing_edges(self.dfg)
        self.self_loop_activities = get_activities_self_loop(self.dfg)
        self.initial_outgoing = get_outgoing_edges(self.initial_dfg)
        self.initial_ingoing = get_ingoing_edges(self.initial_dfg)
        self.activities_direction = get_activities_direction(
            self.dfg, self.activities)
        self.activities_dir_list = get_activities_dirlist(
            self.activities_direction)
        self.negated_dfg = negate(self.dfg)
        self.negated_activities = get_activities_from_dfg(self.negated_dfg)
        self.negated_outgoing = get_outgoing_edges(self.negated_dfg)
        self.negated_ingoing = get_ingoing_edges(self.negated_dfg)
        self.detected_cut = None
        self.children = []

        if second_iteration:
            self.detect_cut(second_iteration=second_iteration)
Esempio n. 2
0
def apply(dfg, parameters=None):
    """
    Clean Directly-Follows graph based on noise threshold

    Parameters
    -----------
    dfg
        Directly-Follows graph
    parameters
        Possible parameters of the algorithm, including:
            noiseThreshold -> Threshold of noise in the algorithm

    Returns
    ----------
    newDfg
        Cleaned dfg based on noise threshold
    """
    if parameters is None:
        parameters = {}
    noise_threshold = parameters[
        "noiseThreshold"] if "noiseThreshold" in parameters else filtering_constants.DEFAULT_NOISE_THRESH_DF

    activities = get_activities_from_dfg(dfg)

    return clean_dfg_based_on_noise_thresh(dfg, activities, noise_threshold)
Esempio n. 3
0
def apply(dfg,
          log=None,
          parameters=None,
          activities_count=None,
          measure="frequency"):
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters,
                                              "png")
    max_no_of_edges_in_diagram = exec_utils.get_param_value(
        Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 75)
    start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES,
                                                  parameters, [])
    end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES,
                                                parameters, [])

    if activities_count is None:
        if log is not None:
            activities_count = attr_get.get_attribute_values(
                log, activity_key, parameters=parameters)
        else:
            activities = dfg_utils.get_activities_from_dfg(dfg)
            activities_count = {key: 1 for key in activities}

    return graphviz_visualization(
        activities_count,
        dfg,
        image_format=image_format,
        measure=measure,
        max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
        start_activities=start_activities,
        end_activities=end_activities)
Esempio n. 4
0
def apply(dfg, log=None, parameters=None, activities_count=None, soj_time=None):
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY)
    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png")
    max_no_of_edges_in_diagram = exec_utils.get_param_value(Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 100000)
    start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters, [])
    end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters, [])
    font_size = exec_utils.get_param_value(Parameters.FONT_SIZE, parameters, 12)
    font_size = str(font_size)
    activities = dfg_utils.get_activities_from_dfg(dfg)

    if activities_count is None:
        if log is not None:
            activities_count = attr_get.get_attribute_values(log, activity_key, parameters=parameters)
        else:
            activities_count = {key: 1 for key in activities}

    if soj_time is None:
        if log is not None:
            soj_time = soj_time_get.apply(log, parameters=parameters)
        else:
            soj_time = {key: 0 for key in activities}

    return graphviz_visualization(activities_count, dfg, image_format=image_format, measure="performance",
                                  max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
                                  start_activities=start_activities, end_activities=end_activities, soj_time=soj_time,
                                  font_size=font_size)
Esempio n. 5
0
    def initialize_tree(self):
        """
        Initialize the tree
        """
        if self.activities is None:
            self.activities = list(set(y for x in self.traces for y in x))
        else:
            if self.parent is not None and self.parent.detected_cut_add_info == "loop":
                self.get_traces_loop()
            else:
                self.traces = self.get_traces_general()

            if self.second_iteration:
                self.traces, self.activities = self.clean_traces_noise()

        self.start_activities = list(set(x[0] for x in self.traces if x))
        self.end_activities = list(set(x[-1] for x in self.traces if x))
        self.activities_occurrences = Counter(
            [y for x in self.traces for y in x])
        self.dfg = Counter(
            (x[i - 1], x[i]) for x in self.traces for i in range(1, len(x)))
        self.dfg = [(x, y) for x, y in self.dfg.items()]
        self.initial_dfg = self.dfg

        self.outgoing = get_outgoing_edges(self.dfg)
        self.ingoing = get_ingoing_edges(self.dfg)
        self.self_loop_activities = get_activities_self_loop(self.dfg)
        self.activities_direction = get_activities_direction(
            self.dfg, self.activities)
        self.activities_dir_list = get_activities_dirlist(
            self.activities_direction)
        self.negated_dfg = negate(self.dfg)
        self.negated_activities = get_activities_from_dfg(self.negated_dfg)
        self.negated_outgoing = get_outgoing_edges(self.negated_dfg)
        self.negated_ingoing = get_ingoing_edges(self.negated_dfg)

        self.contains_empty_traces = min(
            len(x)
            for x in self.traces) == 0 if len(self.traces) > 0 else False
        self.must_insert_skip = self.contains_empty_traces
        if self.parent is not None and self.parent.detected_cut == "xor":
            self.must_insert_skip = False
        self.must_insert_skip = self.rec_must_insert_skip or self.must_insert_skip

        if not self.second_iteration:
            self.second_tree = self.clone_second_it()

        self.detected_cut = None
        self.children = []
Esempio n. 6
0
def dfg_vis(dfg, log=None, parameters=None, activities_count=None, measure="frequency"):
    if parameters is None:
        parameters = {}

    activity_key = (
        parameters[PARAMETER_CONSTANT_ACTIVITY_KEY]
        if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters
        else xes.DEFAULT_NAME_KEY
    )

    max_no_of_edges_in_diagram = 75

    if "maxNoOfEdgesInDiagram" in parameters:
        max_no_of_edges_in_diagram = parameters["maxNoOfEdgesInDiagram"]

    start_activities = (
        parameters["start_activities"] if "start_activities" in parameters else []
    )
    end_activities = (
        parameters["end_activities"] if "end_activities" in parameters else []
    )

    if activities_count is None:
        if log is not None:
            activities_count = attributes_filter.get_attribute_values(
                log, activity_key, parameters=parameters
            )
            activities_count["start"] = len(log)
        else:
            activities = dfg_utils.get_activities_from_dfg(dfg)
            activities_count = {key: 1 for key in activities}
            activities_count["start"] = None

    return graphviz_visualization(
        activities_count,
        dfg,
        measure=measure,
        max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
        start_activities=start_activities,
        end_activities=end_activities,
    )
def apply(dfg,
          log=None,
          parameters=None,
          activities_count=None,
          measure="frequency"):
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    image_format = "png"
    max_no_of_edges_in_diagram = 75

    if "format" in parameters:
        image_format = parameters["format"]
    if "maxNoOfEdgesInDiagram" in parameters:
        max_no_of_edges_in_diagram = parameters["maxNoOfEdgesInDiagram"]

    start_activities = parameters[
        "start_activities"] if "start_activities" in parameters else []
    end_activities = parameters[
        "end_activities"] if "end_activities" in parameters else []

    if activities_count is None:
        if log is not None:
            activities_count = attr_get.get_attribute_values(
                log, activity_key, parameters=parameters)
        else:
            activities = dfg_utils.get_activities_from_dfg(dfg)
            activities_count = {key: 1 for key in activities}

    return graphviz_visualization(
        activities_count,
        dfg,
        image_format=image_format,
        measure=measure,
        max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
        start_activities=start_activities,
        end_activities=end_activities)
Esempio n. 8
0
def apply(dfg, parameters=None):
    """
    Applies the DFG mining on a given object (if it is a Pandas dataframe or a log_skeleton, the DFG is calculated)

    Parameters
    -------------
    dfg
        Object (DFG) (if it is a Pandas dataframe or a log_skeleton, the DFG is calculated)
    parameters
        Parameters
    """
    if parameters is None:
        parameters = {}

    dfg = dfg
    start_activities = exec_utils.get_param_value(
        Parameters.START_ACTIVITIES, parameters,
        dfg_utils.infer_start_activities(dfg))
    end_activities = exec_utils.get_param_value(
        Parameters.END_ACTIVITIES, parameters,
        dfg_utils.infer_end_activities(dfg))
    activities = dfg_utils.get_activities_from_dfg(dfg)

    net = PetriNet("")
    im = Marking()
    fm = Marking()

    source = PetriNet.Place("source")
    net.places.add(source)
    im[source] = 1
    sink = PetriNet.Place("sink")
    net.places.add(sink)
    fm[sink] = 1

    places_corr = {}
    index = 0

    for act in activities:
        places_corr[act] = PetriNet.Place(act)
        net.places.add(places_corr[act])

    for act in start_activities:
        if act in places_corr:
            index = index + 1
            trans = PetriNet.Transition(act + "_" + str(index), act)
            net.transitions.add(trans)
            pn_util.add_arc_from_to(source, trans, net)
            pn_util.add_arc_from_to(trans, places_corr[act], net)

    for act in end_activities:
        if act in places_corr:
            index = index + 1
            inv_trans = PetriNet.Transition(act + "_" + str(index), None)
            net.transitions.add(inv_trans)
            pn_util.add_arc_from_to(places_corr[act], inv_trans, net)
            pn_util.add_arc_from_to(inv_trans, sink, net)

    for el in dfg.keys():
        act1 = el[0]
        act2 = el[1]

        index = index + 1
        trans = PetriNet.Transition(act2 + "_" + str(index), act2)
        net.transitions.add(trans)

        pn_util.add_arc_from_to(places_corr[act1], trans, net)
        pn_util.add_arc_from_to(trans, places_corr[act2], net)

    return net, im, fm
Esempio n. 9
0
def detect_cut(initial_dfg, dfg, parent, conf, process, initial_start_activities, initial_end_activities, activities):
    """
    Detect generally a cut in the graph (applying all the algorithms)
    """
    if dfg:
        # print('DFG' + str(dfg) + ' will be cut on ' + str(conf))
        # print(dfg)
        # Find in order: xor, seq, par, loop, seq, flower
        ingoing = get_ingoing_edges(dfg)
        outgoing = get_outgoing_edges(dfg)

        start_activities = infer_start_activities(dfg)
        end_activities = infer_end_activities(dfg)
        if parent == "m":
            initial_start_activities = start_activities
            initial_end_activities = end_activities
            activities = get_activities_from_dfg(dfg)
        else:
            activities = set(activities)
        conn_components = detection_utils.get_connected_components(ingoing, outgoing, activities)
        # print("Init Start: " + str(initial_start_activities) + ", Init End: " + str(initial_end_activities))
        # print(activities)

        xor_cut = detect_xor_cut(dfg, conn_components)
        if xor_cut[0]:
            found_cut = "xor"
            print(found_cut)
            for index, comp in enumerate(xor_cut[1]):
                # print(comp)
                filtered_dfg = filter_dfg_on_act(dfg, comp)
                save_cut(filtered_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities)
        else:
            this_nx_graph = detection_utils.transform_dfg_to_directed_nx_graph(activities, dfg)
            strongly_connected_components = [list(x) for x in nx.strongly_connected_components(this_nx_graph)]
            # print(strongly_connected_components)
            seq_cut = detect_sequential_cut(dfg, strongly_connected_components)
            if seq_cut[0]:
                found_cut = "seq"
                print("seq")
                for index, comp in enumerate(seq_cut[1]):
                    # print(comp)
                    filter_dfg = filter_dfg_on_act(dfg, comp)
                    print(filter_dfg)
                    save_cut(filter_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities)
                # self.put_skips_in_seq_cut()?
            else:
                negated_dfg = detection_utils.negate(dfg)
                negated_ingoing = get_ingoing_edges(negated_dfg)
                negated_outgoing = get_outgoing_edges(negated_dfg)
                par_cut = detect_parallel_cut(this_nx_graph, strongly_connected_components, negated_ingoing, negated_outgoing, activities, dfg, initial_start_activities, initial_end_activities, initial_dfg)
                if par_cut[0]:
                    found_cut = "par"
                    print("par")
                    i = 0
                    for comp in par_cut[1]:
                        i += 1
                        # print(comp)
                        filtter_dfg = filter_dfg_on_act(dfg, comp)
                        save_cut(filtter_dfg, comp, parent, found_cut, i, conf, process, initial_start_activities, initial_end_activities)
                else:
                    start_activities = infer_start_activities(dfg)
                    end_activities = infer_end_activities(dfg)
                    loop_cut = detect_loop_cut(dfg, activities, start_activities, end_activities)
                    if loop_cut[0]:
                        if loop_cut[2]:
                            found_cut = "loop"
                            print("loop")
                            for index, comp in enumerate(loop_cut[1]):
                                # print(comp)
                                filter_dfg = filter_dfg_on_act(dfg, comp)
                                save_cut(filter_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities)
                                # if loop_cut[3]:
                                #   insert_skip
                        else:
                            found_cut = "seq2"
                            print('seq 2')
                            # self.need_loop_on_subtree = True
                            for index, comp in enumerate(loop_cut[1]):
                                # print(comp)
                                filter_dfg = filter_dfg_on_act(dfg, comp)
                                save_cut(filter_dfg, comp, parent, found_cut, index, conf, process, initial_start_activities, initial_end_activities)
                                #insert_skip
                    else:
                        pass
                    found_cut = "flower"
                    print("flower")
                    #save_cut(dfg, comp, parent, found_cut, 0, conf, process)
        return found_cut
    else:
        print("no DFG or base_xor")
        return "base_xor"
Esempio n. 10
0
    def __init__(self,
                 frequency_dfg,
                 activities=None,
                 start_activities=None,
                 end_activities=None,
                 activities_occurrences=None,
                 default_edges_color="#000000",
                 performance_dfg=None,
                 dfg_window_2=None,
                 freq_triples=None,
                 net_name=DEFAULT_NET_NAME):
        """
        Initialize an Hueristics Net

        The implementation is based on the original paper on Heuristics Miner, namely:

        Weijters, A. J. M. M., Wil MP van Der Aalst, and AK Alves De Medeiros.
        "Process mining with the heuristics miner-algorithm."
        Technische Universiteit Eindhoven, Tech. Rep. WP 166 (2006): 1-34.

        and it manages to calculate the dependency matrix, the loops of length one and two, and
        the AND measure

        Parameters
        -------------
        frequency_dfg
            Directly-Follows graph (frequency)
        activities
            Activities
        start_activities
            Start activities
        end_activities
            End activities
        activities_occurrences
            Activities occurrences
        default_edges_color
            (If provided) Default edges color
        performance_dfg
            Performance DFG
        dfg_window_2
            DFG window 2
        freq_triples
            Frequency triples
        net_name
            (If provided) name of the heuristics net
        """
        self.net_name = [net_name]

        self.nodes = {}
        self.dependency_matrix = {}
        self.dfg_matrix = {}

        self.dfg = frequency_dfg
        self.performance_dfg = performance_dfg
        self.node_type = "frequency" if self.performance_dfg is None else "performance"

        self.activities = activities
        if self.activities is None:
            self.activities = dfg_utils.get_activities_from_dfg(frequency_dfg)
        if start_activities is None:
            self.start_activities = [
                dfg_utils.infer_start_activities(frequency_dfg)
            ]
        else:
            self.start_activities = [start_activities]
        if end_activities is None:
            self.end_activities = [
                dfg_utils.infer_end_activities(frequency_dfg)
            ]
        else:
            self.end_activities = [end_activities]
        self.activities_occurrences = activities_occurrences
        if self.activities_occurrences is None:
            self.activities_occurrences = {}
            for act in self.activities:
                self.activities_occurrences[
                    act] = dfg_utils.sum_activities_count(
                        frequency_dfg, [act])
        self.default_edges_color = [default_edges_color]
        self.dfg_window_2 = dfg_window_2
        self.dfg_window_2_matrix = {}
        self.freq_triples = freq_triples
        self.freq_triples_matrix = {}
Esempio n. 11
0
def apply(dfg: Dict[Tuple[str, str], int],
          log: EventLog = None,
          parameters: Optional[Dict[Any, Any]] = None,
          activities_count: Dict[str, int] = None,
          soj_time: Dict[str, float] = None) -> Digraph:
    """
    Visualize a frequency directly-follows graph

    Parameters
    -----------------
    dfg
        Frequency Directly-follows graph
    log
        (if provided) Event log for the calculation of statistics
    activities_count
        (if provided) Dictionary associating to each activity the number of occurrences in the log.
    soj_time
        (if provided) Dictionary associating to each activity the average sojourn time
    parameters
        Variant-specific parameters

    Returns
    -----------------
    gviz
        Graphviz digraph
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters,
                                              "png")
    max_no_of_edges_in_diagram = exec_utils.get_param_value(
        Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 100000)
    start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES,
                                                  parameters, {})
    end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES,
                                                parameters, {})
    font_size = exec_utils.get_param_value(Parameters.FONT_SIZE, parameters,
                                           12)
    font_size = str(font_size)
    activities = dfg_utils.get_activities_from_dfg(dfg)
    bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters,
                                         "transparent")
    stat_locale = exec_utils.get_param_value(Parameters.STAT_LOCALE,
                                             parameters, None)
    if stat_locale is None:
        stat_locale = {}

    if activities_count is None:
        if log is not None:
            activities_count = attr_get.get_attribute_values(
                log, activity_key, parameters=parameters)
        else:
            # the frequency of an activity in the log is at least the number of occurrences of
            # incoming arcs in the DFG.
            # if the frequency of the start activities nodes is also provided, use also that.
            activities_count = Counter({key: 0 for key in activities})
            for el in dfg:
                activities_count[el[1]] += dfg[el]
            if isinstance(start_activities, dict):
                for act in start_activities:
                    activities_count[act] += start_activities[act]

    if soj_time is None:
        if log is not None:
            soj_time = soj_time_get.apply(log, parameters=parameters)
        else:
            soj_time = {key: 0 for key in activities}

    return graphviz_visualization(
        activities_count,
        dfg,
        image_format=image_format,
        measure="frequency",
        max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
        start_activities=start_activities,
        end_activities=end_activities,
        soj_time=soj_time,
        font_size=font_size,
        bgcolor=bgcolor,
        stat_locale=stat_locale)