Ejemplo n.º 1
0
    def initialize_tree(self,
                        dfg,
                        log,
                        initial_dfg,
                        activities,
                        second_iteration=False,
                        end_call=True,
                        parameters=None):
        """
            Initialize the tree


            Parameters
            -----------
            dfg
                Directly follows graph of this subtree
            log
                the event log_skeleton
            initial_dfg
                Referral directly follows graph that should be taken in account adding hidden/loop transitions
            activities
                Activities of this subtree
            second_iteration
                Boolean that indicates if we are executing this method for the second time
            """

        self.second_iteration = second_iteration

        if activities is None:
            self.activities = get_activities_from_dfg(dfg)
        else:
            self.activities = copy(activities)

        if second_iteration:
            self.dfg = clean_dfg_based_on_noise_thresh(self.dfg,
                                                       self.activities,
                                                       self.noise_threshold)
        else:
            self.dfg = copy(dfg)

        self.initial_dfg = initial_dfg

        self.outgoing = get_outgoing_edges(self.dfg)
        self.ingoing = get_ingoing_edges(self.dfg)
        self.self_loop_activities = get_activities_self_loop(self.dfg)
        self.initial_outgoing = get_outgoing_edges(self.initial_dfg)
        self.initial_ingoing = get_ingoing_edges(self.initial_dfg)
        self.negated_dfg = negate(self.dfg)
        self.negated_activities = get_activities_from_dfg(self.negated_dfg)
        self.negated_outgoing = get_outgoing_edges(self.negated_dfg)
        self.negated_ingoing = get_ingoing_edges(self.negated_dfg)
        self.detected_cut = None
        self.children = []
        self.log = log
        self.original_log = log
        self.parameters = parameters

        self.detect_cut(second_iteration=False, parameters=parameters)
Ejemplo n.º 2
0
def apply(log, parameters=None):
    """
    Gets the frequency DFG

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)

    parameters["format"] = "svg"
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities

    gviz = dfg_vis_factory.apply(dfg,
                                 log=filtered_log,
                                 variant="frequency",
                                 parameters=parameters)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities,
                                             end_activities)

    net, im, fm = dfg_conv_factory.apply(dfg,
                                         parameters={
                                             "start_activities":
                                             start_activities,
                                             "end_activities": end_activities
                                         })

    return get_base64_from_gviz(gviz), export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "freq", None, "", activity_key
Ejemplo n.º 3
0
def apply(dataframe, parameters=None):
    """
    Gets the performance DFG

    Parameters
    ------------
    dataframe
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME

    parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True
    dataframe = attributes_filter.filter_df_keeping_spno_activities(
        dataframe,
        activity_key=activity_key,
        max_no_activities=constants.MAX_NO_ACTIVITIES)
    dataframe, end_activities = auto_filter.apply_auto_filter(
        dataframe, parameters=parameters)
    end_activities = list(end_activities.keys())
    [dfg, dfg_perf
     ] = df_statistics.get_dfg_graph(dataframe,
                                     activity_key=activity_key,
                                     timestamp_key=timestamp_key,
                                     case_id_glue=case_id_glue,
                                     sort_caseid_required=False,
                                     sort_timestamp_along_case_id=False,
                                     measure="both")
    activities_count = attributes_filter.get_attribute_values(
        dataframe, activity_key, parameters=parameters)
    activities = list(activities_count.keys())
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    dfg_perf = {x: y for x, y in dfg_perf.items() if x in dfg}
    start_activities = list(
        start_activities_filter.get_start_activities(
            dataframe, parameters=parameters).keys())
    gviz = dfg_vis_factory.apply(dfg_perf,
                                 activities_count=activities_count,
                                 variant="performance",
                                 parameters={
                                     "format": "svg",
                                     "start_activities": start_activities,
                                     "end_activities": end_activities
                                 })

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities,
                                             end_activities)

    net, im, fm = dfg_conv_factory.apply(dfg,
                                         parameters={
                                             "start_activities":
                                             start_activities,
                                             "end_activities": end_activities
                                         })

    return get_base64_from_gviz(gviz), export_petri_as_string(
        net, im, fm
    ), ".pnml", "parquet", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "perf", None, "", activity_key
Ejemplo n.º 4
0
    def calculate(
            self,
            dependency_thresh=defaults.DEFAULT_DEPENDENCY_THRESH,
            and_measure_thresh=defaults.DEFAULT_AND_MEASURE_THRESH,
            min_act_count=defaults.DEFAULT_MIN_ACT_COUNT,
            min_dfg_occurrences=defaults.DEFAULT_MIN_DFG_OCCURRENCES,
            dfg_pre_cleaning_noise_thresh=defaults.
        DEFAULT_DFG_PRE_CLEANING_NOISE_THRESH,
            loops_length_two_thresh=defaults.DEFAULT_LOOP_LENGTH_TWO_THRESH):
        """
        Calculate the dependency matrix, populate the nodes

        Parameters
        -------------
        dependency_thresh
            (Optional) dependency threshold
        and_measure_thresh
            (Optional) AND measure threshold
        min_act_count
            (Optional) minimum number of occurrences of an activity
        min_dfg_occurrences
            (Optional) minimum dfg occurrences
        dfg_pre_cleaning_noise_thresh
            (Optional) DFG pre cleaning noise threshold
        loops_length_two_thresh
            (Optional) loops length two threshold
        """
        self.dependency_matrix = None
        self.dependency_matrix = {}
        self.dfg_matrix = None
        self.dfg_matrix = {}
        self.performance_matrix = None
        self.performance_matrix = {}
        if dfg_pre_cleaning_noise_thresh > 0.0:
            self.dfg = clean_dfg_based_on_noise_thresh(
                self.dfg, self.activities, dfg_pre_cleaning_noise_thresh)
        for el in self.dfg:
            act1 = el[0]
            act2 = el[1]
            value = self.dfg[el]
            perf_value = self.performance_dfg[
                el] if self.performance_dfg is not None else self.dfg[el]
            if act1 not in self.dependency_matrix:
                self.dependency_matrix[act1] = {}
                self.dfg_matrix[act1] = {}
                self.performance_matrix[act1] = {}
            self.dfg_matrix[act1][act2] = value
            self.performance_matrix[act1][act2] = perf_value
            if not act1 == act2:
                inv_couple = (act2, act1)
                c1 = value
                if inv_couple in self.dfg:
                    c2 = self.dfg[inv_couple]
                    dep = (c1 - c2) / (c1 + c2 + 1)
                else:
                    dep = c1 / (c1 + 1)
            else:
                dep = value / (value + 1)
            self.dependency_matrix[act1][act2] = dep
        for n1 in self.dependency_matrix:
            for n2 in self.dependency_matrix[n1]:
                condition1 = n1 in self.activities_occurrences and self.activities_occurrences[
                    n1] >= min_act_count
                condition2 = n2 in self.activities_occurrences and self.activities_occurrences[
                    n2] >= min_act_count
                condition3 = self.dfg_matrix[n1][n2] >= min_dfg_occurrences
                condition4 = self.dependency_matrix[n1][n2] >= dependency_thresh
                condition = condition1 and condition2 and condition3 and condition4
                if condition:
                    if n1 not in self.nodes:
                        self.nodes[n1] = Node(
                            self,
                            n1,
                            self.activities_occurrences[n1],
                            is_start_node=(n1 in self.start_activities),
                            is_end_node=(n1 in self.end_activities),
                            default_edges_color=self.default_edges_color[0],
                            node_type=self.node_type,
                            net_name=self.net_name[0])
                    if n2 not in self.nodes:
                        self.nodes[n2] = Node(
                            self,
                            n2,
                            self.activities_occurrences[n2],
                            is_start_node=(n2 in self.start_activities),
                            is_end_node=(n2 in self.end_activities),
                            default_edges_color=self.default_edges_color[0],
                            node_type=self.node_type,
                            net_name=self.net_name[0])

                    repr_value = self.performance_matrix[n1][n2]
                    self.nodes[n1].add_output_connection(
                        self.nodes[n2],
                        self.dependency_matrix[n1][n2],
                        self.dfg_matrix[n1][n2],
                        repr_value=repr_value)
                    self.nodes[n2].add_input_connection(
                        self.nodes[n1],
                        self.dependency_matrix[n1][n2],
                        self.dfg_matrix[n1][n2],
                        repr_value=repr_value)
        for node in self.nodes:
            self.nodes[node].calculate_and_measure_out(
                and_measure_thresh=and_measure_thresh)
            self.nodes[node].calculate_and_measure_in(
                and_measure_thresh=and_measure_thresh)
Ejemplo n.º 5
0
    def calculate(
            self,
            dependency_thresh=defaults.DEFAULT_DEPENDENCY_THRESH,
            and_measure_thresh=defaults.DEFAULT_AND_MEASURE_THRESH,
            min_act_count=defaults.DEFAULT_MIN_ACT_COUNT,
            min_dfg_occurrences=defaults.DEFAULT_MIN_DFG_OCCURRENCES,
            dfg_pre_cleaning_noise_thresh=defaults.
        DEFAULT_DFG_PRE_CLEANING_NOISE_THRESH,
            loops_length_two_thresh=defaults.DEFAULT_LOOP_LENGTH_TWO_THRESH,
            parameters=None):
        """
        Calculate the dependency matrix, populate the nodes

        Parameters
        -------------
        dependency_thresh
            (Optional) dependency threshold
        and_measure_thresh
            (Optional) AND measure threshold
        min_act_count
            (Optional) minimum number of occurrences of an activity
        min_dfg_occurrences
            (Optional) minimum dfg occurrences
        dfg_pre_cleaning_noise_thresh
            (Optional) DFG pre cleaning noise threshold
        loops_length_two_thresh
            (Optional) loops length two threshold
        parameters
            Other parameters of the algorithm
        """
        if parameters is None:
            parameters = {}
        self.dependency_matrix = None
        self.dependency_matrix = {}
        self.dfg_matrix = None
        self.dfg_matrix = {}
        self.performance_matrix = None
        self.performance_matrix = {}
        if dfg_pre_cleaning_noise_thresh > 0.0:
            self.dfg = clean_dfg_based_on_noise_thresh(
                self.dfg,
                self.activities,
                dfg_pre_cleaning_noise_thresh,
                parameters=parameters)
        if self.dfg_window_2 is not None:
            for el in self.dfg_window_2:
                act1 = el[0]
                act2 = el[1]
                value = self.dfg_window_2[el]
                if act1 not in self.dfg_window_2_matrix:
                    self.dfg_window_2_matrix[act1] = {}
                self.dfg_window_2_matrix[act1][act2] = value
        if self.freq_triples is not None:
            for el in self.freq_triples:
                act1 = el[0]
                act2 = el[1]
                act3 = el[2]
                value = self.freq_triples[el]
                # avoid to consider self-loops
                if act1 == act3 and not act1 == act2:
                    if act1 not in self.freq_triples_matrix:
                        self.freq_triples_matrix[act1] = {}
                    self.freq_triples_matrix[act1][act2] = value
        for el in self.dfg:
            act1 = el[0]
            act2 = el[1]
            value = self.dfg[el]
            perf_value = self.performance_dfg[
                el] if self.performance_dfg is not None else self.dfg[el]
            if act1 not in self.dependency_matrix:
                self.dependency_matrix[act1] = {}
                self.dfg_matrix[act1] = {}
                self.performance_matrix[act1] = {}
            self.dfg_matrix[act1][act2] = value
            self.performance_matrix[act1][act2] = perf_value
            if not act1 == act2:
                inv_couple = (act2, act1)
                c1 = value
                if inv_couple in self.dfg:
                    c2 = self.dfg[inv_couple]
                    dep = (c1 - c2) / (c1 + c2 + 1)
                else:
                    dep = c1 / (c1 + 1)
            else:
                dep = value / (value + 1)
            self.dependency_matrix[act1][act2] = dep
        for n1 in self.dependency_matrix:
            for n2 in self.dependency_matrix[n1]:
                condition1 = n1 in self.activities_occurrences and self.activities_occurrences[
                    n1] >= min_act_count
                condition2 = n2 in self.activities_occurrences and self.activities_occurrences[
                    n2] >= min_act_count
                condition3 = self.dfg_matrix[n1][n2] >= min_dfg_occurrences
                condition4 = self.dependency_matrix[n1][n2] >= dependency_thresh
                condition = condition1 and condition2 and condition3 and condition4
                if condition:
                    if n1 not in self.nodes:
                        self.nodes[n1] = Node(
                            self,
                            n1,
                            self.activities_occurrences[n1],
                            is_start_node=(n1 in self.start_activities),
                            is_end_node=(n1 in self.end_activities),
                            default_edges_color=self.default_edges_color[0],
                            node_type=self.node_type,
                            net_name=self.net_name[0],
                            nodes_dictionary=self.nodes)
                    if n2 not in self.nodes:
                        self.nodes[n2] = Node(
                            self,
                            n2,
                            self.activities_occurrences[n2],
                            is_start_node=(n2 in self.start_activities),
                            is_end_node=(n2 in self.end_activities),
                            default_edges_color=self.default_edges_color[0],
                            node_type=self.node_type,
                            net_name=self.net_name[0],
                            nodes_dictionary=self.nodes)

                    repr_value = self.performance_matrix[n1][n2]
                    self.nodes[n1].add_output_connection(
                        self.nodes[n2],
                        self.dependency_matrix[n1][n2],
                        self.dfg_matrix[n1][n2],
                        repr_value=repr_value)
                    self.nodes[n2].add_input_connection(
                        self.nodes[n1],
                        self.dependency_matrix[n1][n2],
                        self.dfg_matrix[n1][n2],
                        repr_value=repr_value)
        for node in self.nodes:
            self.nodes[node].calculate_and_measure_out(
                and_measure_thresh=and_measure_thresh)
            self.nodes[node].calculate_and_measure_in(
                and_measure_thresh=and_measure_thresh)
            self.nodes[node].calculate_loops_length_two(
                self.dfg_matrix,
                self.freq_triples_matrix,
                loops_length_two_thresh=loops_length_two_thresh)
        nodes = list(self.nodes.keys())
        added_loops = set()
        for n1 in nodes:
            for n2 in self.nodes[n1].loop_length_two:
                if n1 in self.dfg_matrix and n2 in self.dfg_matrix[n1] and self.dfg_matrix[
                        n1][n2] >= min_dfg_occurrences and n1 in self.activities_occurrences and self.activities_occurrences[
                            n1] >= min_act_count and n2 in self.activities_occurrences and self.activities_occurrences[
                                n2] >= min_act_count:
                    if not (
                        (n1 in self.dependency_matrix
                         and n2 in self.dependency_matrix[n1] and
                         self.dependency_matrix[n1][n2] >= dependency_thresh)
                            or
                        (n2 in self.dependency_matrix
                         and n1 in self.dependency_matrix[n2] and
                         self.dependency_matrix[n2][n1] >= dependency_thresh)):
                        if n2 not in self.nodes:
                            self.nodes[n2] = Node(
                                self,
                                n2,
                                self.activities_occurrences[n2],
                                is_start_node=(n2 in self.start_activities),
                                is_end_node=(n2 in self.end_activities),
                                default_edges_color=self.
                                default_edges_color[0],
                                node_type=self.node_type,
                                net_name=self.net_name[0],
                                nodes_dictionary=self.nodes)
                        v_n1_n2 = self.dfg_matrix[n1][
                            n2] if n1 in self.dfg_matrix and n2 in self.dfg_matrix[
                                n1] else 0
                        v_n2_n1 = self.dfg_matrix[n2][
                            n1] if n2 in self.dfg_matrix and n1 in self.dfg_matrix[
                                n2] else 0
                        if (n1, n2) not in added_loops:
                            added_loops.add((n1, n2))
                            self.nodes[n1].add_output_connection(
                                self.nodes[n2],
                                0,
                                v_n1_n2,
                                repr_value=repr_value)
                            self.nodes[n2].add_input_connection(
                                self.nodes[n1],
                                0,
                                v_n2_n1,
                                repr_value=repr_value)

                        if (n2, n1) not in added_loops:
                            added_loops.add((n2, n1))
                            self.nodes[n2].add_output_connection(
                                self.nodes[n1],
                                0,
                                v_n2_n1,
                                repr_value=repr_value)
                            self.nodes[n1].add_input_connection(
                                self.nodes[n2],
                                0,
                                v_n1_n2,
                                repr_value=repr_value)
        if len(self.nodes) == 0:
            for act in self.activities:
                self.nodes[act] = Node(
                    self,
                    act,
                    self.activities_occurrences[act],
                    is_start_node=(act in self.start_activities),
                    is_end_node=(act in self.end_activities),
                    default_edges_color=self.default_edges_color[0],
                    node_type=self.node_type,
                    net_name=self.net_name[0],
                    nodes_dictionary=self.nodes)
Ejemplo n.º 6
0
def apply(dataframe, parameters=None):
    """
    Gets the Petri net through Inductive Miner, decorated by performance metric

    Parameters
    ------------
    dataframe
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME

    parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True
    dataframe = attributes_filter.filter_df_keeping_spno_activities(
        dataframe,
        activity_key=activity_key,
        max_no_activities=constants.MAX_NO_ACTIVITIES)
    dataframe, end_activities = auto_filter.apply_auto_filter(
        dataframe, parameters=parameters)
    end_activities = list(end_activities.keys())

    activities_count = attributes_filter.get_attribute_values(
        dataframe, activity_key, parameters=parameters)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            dataframe, parameters=parameters).keys())

    [dfg, dfg_perf
     ] = df_statistics.get_dfg_graph(dataframe,
                                     activity_key=activity_key,
                                     timestamp_key=timestamp_key,
                                     case_id_glue=case_id_glue,
                                     sort_caseid_required=False,
                                     sort_timestamp_along_case_id=False,
                                     measure="both")
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    dfg_perf = {x: y for x, y in dfg_perf.items() if x in dfg}

    net, im, fm = inductive_miner.apply_dfg(dfg,
                                            parameters,
                                            activities=activities,
                                            start_activities=start_activities,
                                            end_activities=end_activities)
    spaths = get_shortest_paths(net)

    bpmn_graph, el_corr, inv_el_corr, el_corr_keys_map = petri_to_bpmn.apply(
        net, im, fm)

    aggregated_statistics = get_decorations_from_dfg_spaths_acticount(
        net, dfg_perf, spaths, activities_count, variant="performance")

    bpmn_aggreg_statistics = convert_performance_map.convert_performance_map_to_bpmn(
        aggregated_statistics, inv_el_corr)
    #bpmn_graph = bpmn_embedding.embed_info_into_bpmn(bpmn_graph, bpmn_aggreg_statistics, "performance")
    bpmn_graph = bpmn_diagram_layouter.apply(bpmn_graph)
    bpmn_string = bpmn_exporter.get_string_from_bpmn(bpmn_graph)

    gviz = bpmn_vis_factory.apply_petri(
        net,
        im,
        fm,
        aggregated_statistics=aggregated_statistics,
        variant="performance",
        parameters={"format": "svg"})
    gviz2 = bpmn_vis_factory.apply_petri(
        net,
        im,
        fm,
        aggregated_statistics=aggregated_statistics,
        variant="performance",
        parameters={"format": "dot"})

    gviz_base64 = get_base64_from_file(gviz2.name)

    ret_graph = get_graph.get_graph_from_petri(net, im, fm)

    return get_base64_from_file(gviz.name), export_petri_as_string(
        net, im, fm
    ), ".pnml", "parquet", activities, start_activities, end_activities, gviz_base64, ret_graph, "indbpmn", "perf", bpmn_string, ".bpmn", activity_key
Ejemplo n.º 7
0
def apply(dataframe, parameters=None):
    """
    Gets the Petri net through Inductive Miner, decorated by frequency metric

    Parameters
    ------------
    dataframe
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME

    parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True
    dataframe = attributes_filter.filter_df_keeping_spno_activities(
        dataframe,
        activity_key=activity_key,
        max_no_activities=constants.MAX_NO_ACTIVITIES)
    dataframe, end_activities = auto_filter.apply_auto_filter(
        dataframe, parameters=parameters)
    end_activities = list(end_activities.keys())
    activities_count = attributes_filter.get_attribute_values(
        dataframe, activity_key, parameters=parameters)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            dataframe, parameters=parameters).keys())

    dfg = df_statistics.get_dfg_graph(dataframe,
                                      activity_key=activity_key,
                                      timestamp_key=timestamp_key,
                                      case_id_glue=case_id_glue,
                                      sort_caseid_required=False,
                                      sort_timestamp_along_case_id=False)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)

    net, im, fm = inductive_miner.apply_dfg(dfg,
                                            parameters,
                                            activities=activities,
                                            start_activities=start_activities,
                                            end_activities=end_activities)
    spaths = get_shortest_paths(net)
    aggregated_statistics = get_decorations_from_dfg_spaths_acticount(
        net, dfg, spaths, activities_count, variant="frequency")
    gviz = pn_vis_factory.apply(net,
                                im,
                                fm,
                                parameters={"format": "svg"},
                                variant="frequency",
                                aggregated_statistics=aggregated_statistics)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_petri(net, im, fm)

    return get_base64_from_gviz(gviz), export_petri_as_string(
        net, im, fm
    ), ".pnml", "parquet", activities, start_activities, end_activities, gviz_base64, ret_graph, "inductive", "freq", None, "", activity_key
Ejemplo n.º 8
0
def apply(log, parameters=None):
    """
    Gets the process tree using Inductive Miner Directly-Follows

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    tree = inductive_miner.apply_tree_dfg(dfg,
                                          parameters=parameters,
                                          activities=activities,
                                          start_activities=start_activities,
                                          end_activities=end_activities)
    parameters["format"] = "svg"
    gviz = pt_vis_factory.apply(tree, parameters=parameters)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    return get_base64_from_gviz(gviz), None, "", "xes", activities, start_activities, end_activities, gviz_base64, [], "tree", "freq", None, "", activity_key
Ejemplo n.º 9
0
def apply(log, parameters=None):
    """
    Gets the Petri net through Inductive Miner, decorated by frequency metric

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    # reduce the depth of the search done by token-based replay
    token_replay.MAX_REC_DEPTH = 1
    token_replay.MAX_IT_FINAL1 = 1
    token_replay.MAX_IT_FINAL2 = 1
    token_replay.MAX_REC_DEPTH_HIDTRANSENABL = 1

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    net, im, fm = inductive_miner.apply_dfg(dfg,
                                            parameters=parameters,
                                            activities=activities,
                                            start_activities=start_activities,
                                            end_activities=end_activities)

    parameters["format"] = "svg"
    gviz = pn_vis_factory.apply(net,
                                im,
                                fm,
                                log=filtered_log,
                                variant="frequency",
                                parameters=parameters)

    svg = get_base64_from_gviz(gviz)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_petri(net, im, fm)

    return svg, export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "inductive", "freq", None, "", activity_key
Ejemplo n.º 10
0
def apply(dataframe, parameters=None):
    """
    Gets the process tree using Inductive Miner Directly-Follows

    Parameters
    ------------
    dataframe
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME

    parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True
    dataframe = attributes_filter.filter_df_keeping_spno_activities(
        dataframe,
        activity_key=activity_key,
        max_no_activities=constants.MAX_NO_ACTIVITIES)
    dataframe, end_activities = auto_filter.apply_auto_filter(
        dataframe, parameters=parameters)
    end_activities = list(end_activities.keys())

    activities_count = attributes_filter.get_attribute_values(
        dataframe, activity_key, parameters=parameters)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            dataframe, parameters=parameters).keys())

    dfg = df_statistics.get_dfg_graph(dataframe,
                                      activity_key=activity_key,
                                      timestamp_key=timestamp_key,
                                      case_id_glue=case_id_glue,
                                      sort_caseid_required=False,
                                      sort_timestamp_along_case_id=False)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    tree = inductive_miner.apply_tree_dfg(dfg,
                                          parameters,
                                          activities=activities,
                                          start_activities=start_activities,
                                          end_activities=end_activities)
    gviz = pt_vis_factory.apply(tree, parameters={"format": "svg"})

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    return get_base64_from_gviz(gviz), None, "", "parquet", activities, start_activities, end_activities, gviz_base64, [], "tree", "freq", None, "", activity_key
Ejemplo n.º 11
0
def apply(log, parameters=None):
    """
    Gets the Petri net through Inductive Miner, decorated by performance metric

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    # reduce the depth of the search done by token-based replay
    token_replay.MAX_REC_DEPTH = 1
    token_replay.MAX_IT_FINAL1 = 1
    token_replay.MAX_IT_FINAL2 = 1
    token_replay.MAX_REC_DEPTH_HIDTRANSENABL = 1

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    net, im, fm = inductive_miner.apply_dfg(dfg,
                                            parameters=parameters,
                                            activities=activities,
                                            start_activities=start_activities,
                                            end_activities=end_activities)
    #parameters["format"] = "svg"
    #gviz = pn_vis_factory.apply(net, im, fm, log=log, variant="performance", parameters=parameters)

    bpmn_graph, el_corr, inv_el_corr, el_corr_keys_map = petri_to_bpmn.apply(
        net, im, fm)

    aggregated_statistics = token_decoration.get_decorations(
        filtered_log,
        net,
        im,
        fm,
        parameters=parameters,
        measure="performance")

    bpmn_aggreg_statistics = convert_performance_map.convert_performance_map_to_bpmn(
        aggregated_statistics, inv_el_corr)
    #bpmn_graph = bpmn_embedding.embed_info_into_bpmn(bpmn_graph, bpmn_aggreg_statistics, "performance")
    bpmn_graph = bpmn_diagram_layouter.apply(bpmn_graph)
    bpmn_string = bpmn_exporter.get_string_from_bpmn(bpmn_graph)

    gviz = bpmn_vis_factory.apply_petri(
        net,
        im,
        fm,
        aggregated_statistics=aggregated_statistics,
        variant="performance",
        parameters={"format": "svg"})
    gviz2 = bpmn_vis_factory.apply_petri(
        net,
        im,
        fm,
        aggregated_statistics=aggregated_statistics,
        variant="performance",
        parameters={"format": "dot"})

    svg = get_base64_from_file(gviz.name)

    gviz_base64 = get_base64_from_file(gviz2.name)

    ret_graph = get_graph.get_graph_from_petri(net, im, fm)

    return svg, export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "indbpmn", "perf", bpmn_string, ".bpmn", activity_key