Ejemplo n.º 1
0
def end_activities(log):
    log_end = end_activities_filter.get_end_activities(log)

    n_unique_end_activities = len(log_end)

    end_activities_occurrences = list(log_end.values())
    end_activities_min = np.min(end_activities_occurrences)
    end_activities_max = np.max(end_activities_occurrences)
    end_activities_mean = np.mean(end_activities_occurrences)
    end_activities_median = np.median(end_activities_occurrences)
    end_activities_std = np.std(end_activities_occurrences)
    end_activities_variance = np.var(end_activities_occurrences)
    end_activities_q1 = np.percentile(end_activities_occurrences, 25)
    end_activities_q3 = np.percentile(end_activities_occurrences, 75)
    end_activities_iqr = stats.iqr(end_activities_occurrences)
    end_activities_skewness = stats.skew(end_activities_occurrences)
    end_activities_kurtosis = stats.kurtosis(end_activities_occurrences)

    return [
        n_unique_end_activities,
        end_activities_min,
        end_activities_max,
        end_activities_mean,
        end_activities_median,
        end_activities_std,
        end_activities_variance,
        end_activities_q1,
        end_activities_q3,
        end_activities_iqr,
        end_activities_skewness,
        end_activities_kurtosis,
    ]
Ejemplo n.º 2
0
 def test_21(self):
     from pm4py.algo.filtering.pandas.end_activities import end_activities_filter
     from pm4py.util import constants
     df = self.load_running_example_df()
     end_acitivites = end_activities_filter.get_end_activities(df)
     filtered_df = end_activities_filter.apply(df, ["pay compensation"],
                                               parameters={
                                                   end_activities_filter.Parameters.CASE_ID_KEY: "case:concept:name",
                                                   end_activities_filter.Parameters.ACTIVITY_KEY: "concept:name"})
Ejemplo n.º 3
0
def apply(log, parameters):
    """
    Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking

    Parameters
    -----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    -----------
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters:
        parameters[pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]

    # get the DFG
    dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters={
        pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key}).items() if v > 0]

    # get the activities in the log
    activities = attributes_filter.get_attribute_values(log, activity_key)

    # gets the start activities from the log
    start_activities = list(start_activities_filter.get_start_activities(log, parameters=parameters).keys())
    # gets the end activities from the log
    end_activities = list(end_activities_filter.get_end_activities(log, parameters=parameters).keys())

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    net, initial_marking, final_marking = apply_dfg(dfg, parameters=parameters, activities=activities,
                                                    contains_empty_traces=contains_empty_traces,
                                                    start_activities=start_activities, end_activities=end_activities)

    return net, initial_marking, final_marking
Ejemplo n.º 4
0
def apply(log, parameters=None):
    """
    Gets the performance HNet

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(log, max_no_activities=constants.MAX_NO_ACTIVITIES,
                                                            parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(filtered_log, activity_key)
    start_activities_count = start_activities_filter.get_start_activities(filtered_log, parameters=parameters)
    end_activities_count = end_activities_filter.get_end_activities(filtered_log, parameters=parameters)
    activities = list(activities_count.keys())
    start_activities = list(start_activities_count.keys())
    end_activities = list(end_activities_count.keys())

    dfg_freq = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg_perf = dfg_factory.apply(filtered_log, variant="performance", parameters=parameters)

    heu_net = HeuristicsNet(dfg_freq, performance_dfg=dfg_perf, activities=activities, start_activities=start_activities, end_activities=end_activities, activities_occurrences=activities_count)

    heu_net.calculate(dfg_pre_cleaning_noise_thresh=constants.DEFAULT_DFG_CLEAN_MULTIPLIER * decreasingFactor)

    vis = heu_vis_factory.apply(heu_net, parameters={"format": "svg"})
    vis2 = heu_vis_factory.apply(heu_net, parameters={"format": "dot"})

    gviz_base64 = get_base64_from_file(vis2.name)

    return get_base64_from_file(vis.name), None, "", "xes", activities, start_activities, end_activities, gviz_base64, [], "heuristics", "perf", None, "", activity_key
Ejemplo n.º 5
0
    def get_end_activities(self, parameters=None):
        """
        Gets the end activities from the log

        Returns
        -------------
        end_activities_dict
            Dictionary of end activities
        """
        if parameters is None:
            parameters = {}
        parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = self.activity_key
        parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = self.activity_key
        return end_activities_filter.get_end_activities(self.log, parameters=parameters)
Ejemplo n.º 6
0
def apply_heu(log, parameters=None):
    """
    Discovers an Heuristics Net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including: activity_key, case_id_glue, timestamp_key,
        dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh,
        loops_length_two_thresh

    Returns
    ------------
    heu
        Heuristics Net
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    start_activities = log_sa_filter.get_start_activities(
        log, parameters=parameters)
    end_activities = log_ea_filter.get_end_activities(log,
                                                      parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(
        log, activity_key, parameters=parameters)
    activities = list(activities_occurrences.keys())
    dfg = dfg_factory.apply(log, parameters=parameters)
    parameters_w2 = deepcopy(parameters)
    parameters_w2["window"] = 2
    dfg_window_2 = dfg_factory.apply(log, parameters=parameters_w2)
    freq_triples = dfg_factory.apply(log,
                                     parameters=parameters,
                                     variant="freq_triples")

    return apply_heu_dfg(dfg,
                         activities=activities,
                         activities_occurrences=activities_occurrences,
                         start_activities=start_activities,
                         end_activities=end_activities,
                         dfg_window_2=dfg_window_2,
                         freq_triples=freq_triples,
                         parameters=parameters)
Ejemplo n.º 7
0
def apply_tree(log, parameters):
    """
    Apply the IMDF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]

    # get the DFG
    dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters={
        pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key}).items() if v > 0]

    # gets the start activities from the log
    start_activities = start_activities_filter.get_start_activities(log, parameters=parameters)
    # gets the end activities from the log
    end_activities = end_activities_filter.get_end_activities(log, parameters=parameters)

    # get the activities in the log
    activities = attributes_filter.get_attribute_values(log, activity_key)

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    return apply_tree_dfg(dfg, parameters, activities=activities, contains_empty_traces=contains_empty_traces,
                          start_activities=start_activities, end_activities=end_activities)
Ejemplo n.º 8
0
def read_xes(data_dir, dataset, aggregate_type, mode="pruning"):
    prune_parameter_freq = 350
    prune_parameter_time = -1  #keep all
    #read the xes file
    if dataset in "BPIC14":
        # log = csv_importer.import_event_stream(os.path.join(data_dir, dataset + ".csv"))
        data = csv_import_adapter.import_dataframe_from_path(os.path.join(
            data_dir, dataset + ".csv"),
                                                             sep=";")
        data['case:concept:name'] = data['Incident ID']
        data['time:timestamp'] = data['DateStamp']
        data['concept:name'] = data['IncidentActivity_Type']
        log = conversion_factory.apply(data)
    elif dataset == "Unrineweginfectie":
        data = csv_import_adapter.import_dataframe_from_path(os.path.join(
            data_dir, dataset + ".csv"),
                                                             sep=",")
        data['case:concept:name'] = data['Patientnummer']
        data['time:timestamp'] = data['Starttijd']
        data['concept:name'] = data['Aciviteit']
        log = conversion_factory.apply(data)
    else:
        log = xes_import_factory.apply(os.path.join(data_dir,
                                                    dataset + ".xes"))
        data = get_dataframe_from_event_stream(log)

    # dataframe = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME)
    # dfg_freq = dfg_factory.apply(log,variant="frequency")
    # dfg_time =get_dfg_time(data,aggregate_type,dataset)

    if aggregate_type == AggregateType.FREQ:
        dfg = dfg_factory.apply(log, variant="frequency")
    else:
        dfg = get_dfg_time(data, aggregate_type, dataset)
    """Getting Start and End activities"""
    # log = xes_importer.import_log(xes_file)
    log_start = start_activities_filter.get_start_activities(log)
    log_end = end_activities_filter.get_end_activities(log)
    # return dfg_freq,dfg_time
    return dfg
Ejemplo n.º 9
0
def apply(log, parameters=None):
    """
    Gets the process tree using Inductive Miner Directly-Follows

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    tree = inductive_miner.apply_tree_dfg(dfg,
                                          parameters=parameters,
                                          activities=activities,
                                          start_activities=start_activities,
                                          end_activities=end_activities)
    parameters["format"] = "svg"
    gviz = pt_vis_factory.apply(tree, parameters=parameters)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    return get_base64_from_gviz(gviz), None, "", "xes", activities, start_activities, end_activities, gviz_base64, [], "tree", "freq", None, "", activity_key
Ejemplo n.º 10
0
## Start activities

from pm4py.algo.filtering.log.start_activities import start_activities_filter

log_start = start_activities_filter.get_start_activities(log)

log = start_activities_filter.apply_auto_filter(
    log, parameters={"decreasingFactor": 0.6})
print(start_activities_filter.get_start_activities(log))

## End activities

from pm4py.algo.filtering.log.end_activities import end_activities_filter

log_end = end_activities_filter.get_end_activities(log)

log_af_ea = end_activities_filter.apply_auto_filter(
    log, parameters={"decreasingFactor": 0.6})
print(end_activities_filter.get_end_activities(log_af_ea))

## Other Events

from pm4py.algo.filtering.log.attributes import attributes_filter
from pm4py.util import constants
log = attributes_filter.apply_auto_filter(
    log,
    parameters={
        constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "concept:name",
        "decreasingFactor": 0.6
    })
Ejemplo n.º 11
0
 def test_20(self):
     from pm4py.algo.filtering.log.end_activities import end_activities_filter
     log = self.load_running_example_xes()
     end_activities = end_activities_filter.get_end_activities(log)
     filtered_log = end_activities_filter.apply(log, ["pay compensation"])
Ejemplo n.º 12
0
result_num = []

print(model)

tree = 5
tree_num = 0
tree_avg = 0
print(tree)
for sam in range(1, 11):
    print(sam)
    input_file_path = os.path.join("input_data", "df_complete_logs",
                                   "%d_1000_%d.xes" % (tree, sam))
    log = xes_importer.apply(input_file_path)
    dfg_org = dfg_factory.apply(log)
    start_act = set(get_start_activities(log).keys())
    end_act = set(get_end_activities(log).keys())
    #make petri net for simulation
    net, im, fm = heu_factory.apply(log)

    num = len(dfg_org.keys())
    sim_log = sim_factory.apply(net,
                                im,
                                parameters={
                                    'maxTraceLength': 20,
                                    'noTraces': 1000
                                })
    dfg_algo = dfg_factory.apply(log)
    start_act_algo = set(get_start_activities(sim_log).keys())
    end_act_algo = set(get_end_activities(sim_log).keys())

    score = 0
Ejemplo n.º 13
0
    def detect_cut(self, second_iteration=False, parameters=None):
        if pkgutil.find_loader("networkx"):
            import networkx as nx

            if parameters is None:
                parameters = {}
            activity_key = exec_utils.get_param_value(
                Parameters.ACTIVITY_KEY, parameters,
                pmutil.xes_constants.DEFAULT_NAME_KEY)

            # check base cases:
            empty_log = base_case.empty_log(self.log)
            single_activity = base_case.single_activity(self.log, activity_key)
            if empty_log:
                self.detected_cut = 'empty_log'
            elif single_activity:
                self.detected_cut = 'single_activity'
            # if no base cases are found, search for a cut:
            else:
                conn_components = detection_utils.get_connected_components(
                    self.ingoing, self.outgoing, self.activities)
                this_nx_graph = transform_dfg_to_directed_nx_graph(
                    self.dfg, activities=self.activities)
                strongly_connected_components = [
                    list(x)
                    for x in nx.strongly_connected_components(this_nx_graph)
                ]
                xor_cut = self.detect_xor(conn_components)
                # the following part searches for a cut in the current log_skeleton
                # if a cut is found, the log_skeleton is split according to the cut, the resulting logs are saved in new_logs
                # recursion is used on all the logs in new_logs
                if xor_cut[0]:
                    logging.debug("xor_cut")
                    self.detected_cut = 'concurrent'
                    new_logs = split.split_xor(xor_cut[1], self.log,
                                               activity_key)
                    for i in range(len(new_logs)):
                        new_logs[
                            i] = filtering_utils.keep_one_trace_per_variant(
                                new_logs[i], parameters=parameters)
                    for l in new_logs:
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            l, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            l, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                l, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                l, parameters=parameters).keys())
                        self.children.append(
                            SubtreePlain(l,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                else:
                    sequence_cut = cut_detection.detect_sequential_cut(
                        self, self.dfg, strongly_connected_components)
                    if sequence_cut[0]:
                        logging.debug("sequence_cut")
                        new_logs = split.split_sequence(
                            sequence_cut[1], self.log, activity_key)
                        for i in range(len(new_logs)):
                            new_logs[
                                i] = filtering_utils.keep_one_trace_per_variant(
                                    new_logs[i], parameters=parameters)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    else:
                        parallel_cut = self.detect_concurrent()
                        if parallel_cut[0]:
                            logging.debug("parallel_cut")
                            new_logs = split.split_parallel(
                                parallel_cut[1], self.log, activity_key)
                            for i in range(len(new_logs)):
                                new_logs[
                                    i] = filtering_utils.keep_one_trace_per_variant(
                                        new_logs[i], parameters=parameters)
                            self.detected_cut = "parallel"
                            for l in new_logs:
                                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                    l, parameters=parameters).items() if v > 0]
                                activities = attributes_filter.get_attribute_values(
                                    l, activity_key)
                                start_activities = list(
                                    start_activities_filter.
                                    get_start_activities(
                                        l, parameters=parameters).keys())
                                end_activities = list(
                                    end_activities_filter.get_end_activities(
                                        l, parameters=parameters).keys())
                                self.children.append(
                                    SubtreePlain(
                                        l,
                                        new_dfg,
                                        self.master_dfg,
                                        self.initial_dfg,
                                        activities,
                                        self.counts,
                                        self.rec_depth + 1,
                                        noise_threshold=self.noise_threshold,
                                        start_activities=start_activities,
                                        end_activities=end_activities,
                                        initial_start_activities=self.
                                        initial_start_activities,
                                        initial_end_activities=self.
                                        initial_end_activities,
                                        parameters=parameters))
                        else:
                            loop_cut = self.detect_loop()
                            if loop_cut[0]:
                                logging.debug("loop_cut")
                                new_logs = split.split_loop(
                                    loop_cut[1], self.log, activity_key)
                                for i in range(len(new_logs)):
                                    new_logs[
                                        i] = filtering_utils.keep_one_trace_per_variant(
                                            new_logs[i], parameters=parameters)
                                self.detected_cut = "loopCut"
                                for l in new_logs:
                                    new_dfg = [
                                        (k, v) for k, v in dfg_inst.apply(
                                            l, parameters=parameters).items()
                                        if v > 0
                                    ]
                                    activities = attributes_filter.get_attribute_values(
                                        l, activity_key)
                                    start_activities = list(
                                        start_activities_filter.
                                        get_start_activities(
                                            l, parameters=parameters).keys())
                                    end_activities = list(
                                        end_activities_filter.
                                        get_end_activities(
                                            l, parameters=parameters).keys())
                                    self.children.append(
                                        SubtreePlain(
                                            l,
                                            new_dfg,
                                            self.master_dfg,
                                            self.initial_dfg,
                                            activities,
                                            self.counts,
                                            self.rec_depth + 1,
                                            noise_threshold=self.
                                            noise_threshold,
                                            start_activities=start_activities,
                                            end_activities=end_activities,
                                            initial_start_activities=self.
                                            initial_start_activities,
                                            initial_end_activities=self.
                                            initial_end_activities,
                                            parameters=parameters))

                            # if the code gets to this point, there is no base_case and no cut found in the log_skeleton
                            # therefore, we now apply fall through:
                            else:
                                self.apply_fall_through(parameters)
        else:
            msg = "networkx is not available. inductive miner cannot be used!"
            logging.error(msg)
            raise Exception(msg)
Ejemplo n.º 14
0
def apply(log, parameters=None, classic_output=False):
    """
    Gets a simple model out of a log

    Parameters
    -------------
    log
        Trace log
    parameters
        Parameters of the algorithm, including:
            maximum_number_activities -> Maximum number of activities to keep
            discovery_algorithm -> Discovery algorithm to use (alpha, inductive)
            desidered_output -> Desidered output of the algorithm (default: Petri)
            include_filtered_log -> Include the filtered log in the output
            include_dfg_frequency -> Include the DFG of frequencies in the output
            include_dfg_performance -> Include the DFG of performance in the output
            include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output
            include_filtered_dfg_performance -> Include the filtered DFG of performance in the output
    classic_output
        Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking)
        or can return a more detailed dictionary
    """
    if parameters is None:
        parameters = {}

    returned_dictionary = {}

    net = None
    initial_marking = None
    final_marking = None
    bpmn_graph = None
    dfg_frequency = None
    dfg_performance = None
    filtered_dfg_frequency = None
    filtered_dfg_performance = None

    maximum_number_activities = parameters[
        "maximum_number_activities"] if "maximum_number_activities" in parameters else 20
    discovery_algorithm = parameters["discovery_algorithm"] if "discovery_algorithm" in parameters else "alpha"
    desidered_output = parameters["desidered_output"] if "desidered_output" in parameters else "petri"
    include_filtered_log = parameters["include_filtered_log"] if "include_filtered_log" in parameters else True
    include_dfg_frequency = parameters["include_dfg_frequency"] if "include_dfg_frequency" in parameters else True
    include_dfg_performance = parameters[
        "include_dfg_performance"] if "include_dfg_performance" in parameters else False
    include_filtered_dfg_frequency = parameters[
        "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True
    include_filtered_dfg_performance = parameters[
        "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else False

    if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters:
        activity_key = parameters[
            PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key
    else:
        log, activity_key = insert_classifier.search_act_class_attr(log)
        if activity_key is None:
            activity_key = DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key

    if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY]

    activities_count_dictio = attributes_filter.get_attribute_values(log, activity_key)
    activities_count_list = []
    for activity in activities_count_dictio:
        activities_count_list.append([activity, activities_count_dictio[activity]])

    activities_count_list = sorted(activities_count_list, key=lambda x: x[1], reverse=True)
    activities_count_list = activities_count_list[:min(len(activities_count_list), maximum_number_activities)]
    activities_keep_list = [x[0] for x in activities_count_list]

    log = attributes_filter.apply(log, activities_keep_list, parameters=parameters)

    filtered_log = None

    if "alpha" in discovery_algorithm:
        # parameters_sa = deepcopy(parameters)
        # parameters_sa["decreasingFactor"] = 1.0
        filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters)
        filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters)
        filtered_log = filter_topvariants_soundmodel.apply(filtered_log, parameters=parameters)
    elif "dfg_mining" in discovery_algorithm:
        filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters)
        filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters)
        filtered_log = auto_filter.apply_auto_filter(filtered_log, parameters=parameters)

    if include_dfg_frequency or "dfg_mining" in discovery_algorithm:
        dfg_frequency = dfg_factory.apply(log, parameters=parameters, variant="frequency")
    if include_dfg_performance:
        dfg_performance = dfg_factory.apply(log, parameters=parameters, variant="performance")
    if include_filtered_dfg_frequency:
        filtered_dfg_frequency = dfg_factory.apply(filtered_log, parameters=parameters, variant="frequency")
    if include_filtered_dfg_performance:
        filtered_dfg_performance = dfg_factory.apply(filtered_log, parameters=parameters, variant="performance")

    if "alpha" in discovery_algorithm:
        net, initial_marking, final_marking = alpha_miner.apply(filtered_log, parameters=parameters)
    elif "dfg_mining" in discovery_algorithm:
        start_activities = start_activities_filter.get_start_activities(filtered_log, parameters=parameters)
        end_activities = end_activities_filter.get_end_activities(filtered_log, parameters=parameters)

        parameters_conv = {}
        parameters_conv["start_activities"] = start_activities
        parameters_conv["end_activities"] = end_activities

        net, initial_marking, final_marking = dfg_conv_factory.apply(dfg_frequency, parameters=parameters_conv)

    if filtered_log is not None and include_filtered_log:
        returned_dictionary["filtered_log"] = filtered_log
    if net is not None and desidered_output == "petri":
        returned_dictionary["net"] = net
    if initial_marking is not None and desidered_output == "petri":
        returned_dictionary["initial_marking"] = initial_marking
    if final_marking is not None and desidered_output == "petri":
        returned_dictionary["final_marking"] = final_marking
    if bpmn_graph is not None and desidered_output == "bpmn":
        returned_dictionary["bpmn_graph"] = bpmn_graph
    if dfg_frequency is not None and include_dfg_frequency:
        returned_dictionary["dfg_frequency"] = dfg_frequency
    if dfg_performance is not None and include_dfg_performance:
        returned_dictionary["dfg_performance"] = dfg_performance
    if filtered_dfg_frequency is not None and include_filtered_dfg_frequency:
        returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency
    if filtered_dfg_performance is not None and include_filtered_dfg_performance:
        returned_dictionary["filtered_dfg_performance"] = filtered_dfg_performance

    if classic_output:
        if net is not None and desidered_output == "petri":
            return net, initial_marking, final_marking

    return returned_dictionary
Ejemplo n.º 15
0
    sum_tree_num = 0
    print(tree)
    tree_avg_tel = 0
    tree_avg_log = 0
    tree_avg_sum = 0
    for sam in range(1, 11):
        print(sam)

        path = os.path.join("input_data", "df_complete_logs",
                            "%d_1000_%d.xes" % (tree, sam))
        log = xes_importer.apply(path)
        tel = xes_importer.apply(path)
        xes_utils.set_enabled(tel)
        dfg_100 = dfg_factory.apply(log)
        start_act = set(get_start_activities(log).keys())
        end_act = set(get_end_activities(log).keys())

        result_norm = []
        result_tel = []

        num = len(dfg_100.keys())
        score_tel = 0
        score_log = 0
        score_sum = 0
        su_tel = 0
        su_log = 0
        su_sum = 0
        for k in range(10):
            found_tel = False
            found_log = False
            found_sum = False
Ejemplo n.º 16
0
def apply_tree(log, parameters):
    """
    Apply the IM_FF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    if type(log) is pd.DataFrame:
        vars = variants_get.get_variants_count(log, parameters=parameters)
        return apply_tree_variants(vars, parameters=parameters)
    else:
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        log = converter.apply(log, parameters=parameters)
        # keep only the activity attribute (since the others are not used)
        log = filtering_utils.keep_only_one_attribute_per_event(
            log, activity_key)

        noise_threshold = exec_utils.get_param_value(
            Parameters.NOISE_THRESHOLD, parameters,
            shared_constants.NOISE_THRESHOLD_IMF)

        dfg = [(k, v)
               for k, v in dfg_inst.apply(log, parameters=parameters).items()
               if v > 0]
        c = Counts()
        activities = attributes_filter.get_attribute_values(log, activity_key)
        start_activities = list(
            start_activities_filter.get_start_activities(
                log, parameters=parameters).keys())
        end_activities = list(
            end_activities_filter.get_end_activities(
                log, parameters=parameters).keys())
        contains_empty_traces = False
        traces_length = [len(trace) for trace in log]
        if traces_length:
            contains_empty_traces = min([len(trace) for trace in log]) == 0

        # set the threshold parameter based on f and the max value in the dfg:
        max_value = 0
        for key, value in dfg:
            if value > max_value:
                max_value = value
        threshold = noise_threshold * max_value

        recursion_depth = 0
        sub = subtree.make_tree(log,
                                dfg,
                                dfg,
                                dfg,
                                activities,
                                c,
                                recursion_depth,
                                noise_threshold,
                                threshold,
                                start_activities,
                                end_activities,
                                start_activities,
                                end_activities,
                                parameters=parameters)

        process_tree = get_tree_repr_implain.get_repr(
            sub, 0, contains_empty_traces=contains_empty_traces)
        # Ensures consistency to the parent pointers in the process tree
        tree_consistency.fix_parent_pointers(process_tree)
        # Fixes a 1 child XOR that is added when single-activities flowers are found
        tree_consistency.fix_one_child_xor_flower(process_tree)
        # folds the process tree (to simplify it in case fallthroughs/filtering is applied)
        process_tree = util.fold(process_tree)

        return process_tree
Ejemplo n.º 17
0
def apply(log, parameters=None):
    """
    Gets the Petri net through Inductive Miner, decorated by frequency metric

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    # reduce the depth of the search done by token-based replay
    token_replay.MAX_REC_DEPTH = 1
    token_replay.MAX_IT_FINAL1 = 1
    token_replay.MAX_IT_FINAL2 = 1
    token_replay.MAX_REC_DEPTH_HIDTRANSENABL = 1

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    net, im, fm = inductive_miner.apply_dfg(dfg,
                                            parameters=parameters,
                                            activities=activities,
                                            start_activities=start_activities,
                                            end_activities=end_activities)

    parameters["format"] = "svg"
    gviz = pn_vis_factory.apply(net,
                                im,
                                fm,
                                log=filtered_log,
                                variant="frequency",
                                parameters=parameters)

    svg = get_base64_from_gviz(gviz)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_petri(net, im, fm)

    return svg, export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "inductive", "freq", None, "", activity_key
Ejemplo n.º 18
0
for tree in range(1, 11):
    tel_tree_num = 0
    log_tree_num = 0
    print(tree)
    tree_avg_tel = 0
    tree_avg_log = 0
    for sam in range(1, 11):
        print(sam)

        path = os.path.join("input_data", "df_complete_logs",
                            "%d_1000_%d.xes" % (tree, sam))
        log = xes_importer.apply(path)
        dfg_org = dfg_factory.apply(log)
        start_act = set(get_start_activities(log).keys())
        end_act = set(get_end_activities(log).keys())

        alpha_path = os.path.join("input_data", "df_complete_logs",
                                  "df_complete_alpha",
                                  "%d_alpha_%d.xes" % (tree, sam))
        alpha_log = xes_importer.apply(alpha_path)

        num = len(dfg_org.keys())
        score_tel = 0
        su_tel = 0
        for k in range(10):
            for n in range(1, 1000):
                sampled_log = sampling.sample_log(alpha_log, no_traces=n)
                dfg_log = dfg_factory.apply(sampled_log)

                yes_dfg = 0
Ejemplo n.º 19
0
    def detect_loop(self):
        # p0 is part of return value, it contains the partition of activities
        # write all start and end activities in p1
        if self.contains_empty_trace():
            return [False, []]
        start_activities = list(
            start_activities_filter.get_start_activities(
                self.log, parameters=self.parameters).keys())
        end_activities = list(
            end_activities_filter.get_end_activities(
                self.log, parameters=self.parameters).keys())
        p1 = []
        for act in start_activities:
            if act not in p1:
                p1.append(act)
        for act in end_activities:
            if act not in p1:
                p1.append(act)

        # create new dfg without the transitions to start and end activities
        new_dfg = copy(self.dfg)
        copy_dfg = copy(new_dfg)
        for ele in copy_dfg:
            if ele[0][0] in p1 or ele[0][1] in p1:
                new_dfg.remove(ele)
        # get connected components of this new dfg
        new_ingoing = get_ingoing_edges(new_dfg)
        new_outgoing = get_outgoing_edges(new_dfg)
        # it was a pain in the *** to get a working directory of the current_activities, as we can't iterate ove the dfg
        current_activities = {}
        for element in self.activities:
            if element not in p1:
                current_activities.update({element: 1})
        p0 = detection_utils.get_connected_components(new_ingoing,
                                                      new_outgoing,
                                                      current_activities)
        p0.insert(0, p1)

        iterable_dfg = []
        for i in range(0, len(self.dfg)):
            iterable_dfg.append(self.dfg[i][0])
        # p0 is like P1,P2,...,Pn in line 3 on page 190 of the IM Thesis
        # check for subsets in p0 that have connections to and end or from a start activity
        p0_copy = []
        for int_el in p0:
            p0_copy.append(int_el)
        for element in p0_copy:  # for every set in p0
            removed = False
            if element in p0 and element != p0[0]:
                for act in element:  # for every activity in this set
                    for e in end_activities:  # for every end activity
                        if e not in start_activities:
                            if (act, e) in iterable_dfg:  # check if connected
                                # is there an element in dfg pointing from any act in a subset of p0 to an end activity
                                for activ in element:
                                    if activ not in p0[0]:
                                        p0[0].append(activ)
                                if element in p0:
                                    p0.remove(
                                        element
                                    )  # remove subsets that are connected to an end activity
                                removed = True
                                break
                    if removed:
                        break
                    for s in start_activities:
                        if s not in end_activities:
                            if not removed:
                                if (s, act) in iterable_dfg:
                                    for acti in element:
                                        if acti not in p0[0]:
                                            p0[0].append(acti)
                                    if element in p0:
                                        p0.remove(
                                            element
                                        )  # remove subsets that are connected to an end activity
                                    removed = True
                                    break
                            else:
                                break
                    if removed:
                        break

        iterable_dfg = []
        for i in range(0, len(self.dfg)):
            iterable_dfg.append(self.dfg[i][0])

        p0_copy = []
        for int_el in p0:
            p0_copy.append(int_el)
        for element in p0_copy:
            if element in p0 and element != p0[0]:
                for act in element:
                    for e in self.end_activities:
                        if (
                                e, act
                        ) in iterable_dfg:  # get those act, that are connected from an end activity
                            for e2 in self.end_activities:  # check, if the act is connected from all end activities
                                if (e2, act) not in iterable_dfg:
                                    for acti in element:
                                        if acti not in p0[0]:
                                            p0[0].append(acti)
                                    if element in p0:
                                        p0.remove(
                                            element
                                        )  # remove subsets that are connected to an end activity
                                    break
                    for s in self.start_activities:
                        if (
                                act, s
                        ) in iterable_dfg:  # same as above (in this case for activities connected to
                            # a start activity)
                            for s2 in self.start_activities:
                                if (act, s2) not in iterable_dfg:
                                    for acti in element:
                                        if acti not in p0[0]:
                                            p0[0].append(acti)
                                    if element in p0:
                                        p0.remove(
                                            element
                                        )  # remove subsets that are connected to an end activity
                                    break

        if len(p0) > 1:
            return [True, p0]
        else:
            return [False, []]
Ejemplo n.º 20
0
 def apply_cut_im_plain(self, type_of_cut, cut, activity_key):
     # dfg_viz = dfg_factory.apply(self.log)
     # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
     # dfg_vis_factory.view(gviz)
     if type_of_cut == 'concurrent':
         self.detected_cut = 'concurrent'
         new_logs = split.split_xor(cut[1], self.log, activity_key)
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'sequential':
         new_logs = split.split_sequence(cut[1], self.log, activity_key)
         self.detected_cut = "sequential"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'parallel':
         new_logs = split.split_parallel(cut[1], self.log, activity_key)
         self.detected_cut = "parallel"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'loopCut':
         new_logs = split.split_loop(cut[1], self.log, activity_key)
         self.detected_cut = "loopCut"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
Ejemplo n.º 21
0
    def detect_cut_if(self, second_iteration=False, parameters=None):
        # dfg_viz = dfg_factory.apply(self.log)
        # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
        # dfg_vis_factory.view(gviz)
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, self.parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # check base cases:
        empty_log = base_case.empty_log(self.log)
        single_activity = base_case.single_activity(self.log, activity_key)
        if empty_log:
            self.detected_cut = 'empty_log'
        elif single_activity:
            self.detected_cut = 'single_activity'
        # if no base cases are found, search for a cut:
        # use the cutting and splitting functions of im_plain:
        else:
            found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()

            if found_plain_cut:
                self.apply_cut_im_plain(type_of_cut, cut, activity_key)
            # if im_plain does not find a cut, we filter on our threshold and then again apply the im_cut detection
            # but this time, we have to use different splitting functions:
            else:
                self.filter_dfg_on_threshold()
                """
                dfg_viz = dfg_factory.apply(self.log)
                gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
                dfg_vis_factory.view(gviz)
                """
                found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()
                if found_plain_cut:
                    if type_of_cut == 'concurrent':
                        logging.debug("concurrent_cut_if")
                        self.detected_cut = 'concurrent'
                        new_logs = splitting_infrequent.split_xor_infrequent(
                            cut[1], self.log, activity_key)
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'sequential':
                        logging.debug("sequential_if")
                        new_logs = splitting_infrequent.split_sequence_infrequent(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'parallel':
                        logging.debug("parallel_if")
                        new_logs = split.split_parallel(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "parallel"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'loopCut':
                        logging.debug("loopCut_if")
                        new_logs = splitting_infrequent.split_loop_infrequent(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "loopCut"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))

                else:
                    self.apply_fall_through_infrequent(parameters)
Ejemplo n.º 22
0
def apply_tree(log, parameters=None):
    """
    Apply the IM algorithm to a log_skeleton obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log_skeleton to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    if pkgutil.find_loader("pandas"):
        import pandas as pd
        from pm4py.statistics.variants.pandas import get as variants_get

        if type(log) is pd.DataFrame:
            vars = variants_get.get_variants_count(log, parameters=parameters)
            return apply_tree_variants(vars, parameters=parameters)

    activity_key = exec_utils.get_param_value(
        Parameters.ACTIVITY_KEY, parameters,
        pmutil.xes_constants.DEFAULT_NAME_KEY)

    log = converter.apply(log, parameters=parameters)
    # since basic IM is influenced once per variant, it makes sense to keep one trace per variant
    log = filtering_utils.keep_one_trace_per_variant(log,
                                                     parameters=parameters)
    # keep only the activity attribute (since the others are not used)
    log = filtering_utils.keep_only_one_attribute_per_event(log, activity_key)

    dfg = [(k, v)
           for k, v in dfg_inst.apply(log, parameters=parameters).items()
           if v > 0]
    c = Counts()
    activities = attributes_filter.get_attribute_values(log, activity_key)
    start_activities = list(
        start_activities_filter.get_start_activities(
            log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(log,
                                                 parameters=parameters).keys())
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    recursion_depth = 0
    sub = subtree.make_tree(log, dfg, dfg, dfg, activities, c, recursion_depth,
                            0.0, start_activities, end_activities,
                            start_activities, end_activities, parameters)

    process_tree = get_tree_repr_implain.get_repr(
        sub, 0, contains_empty_traces=contains_empty_traces)
    # Ensures consistency to the parent pointers in the process tree
    tree_consistency.fix_parent_pointers(process_tree)
    # Fixes a 1 child XOR that is added when single-activities flowers are found
    tree_consistency.fix_one_child_xor_flower(process_tree)
    # folds the process tree (to simplify it in case fallthroughs/filtering is applied)
    process_tree = util.fold(process_tree)

    return process_tree
Ejemplo n.º 23
0
    def apply_fall_through_infrequent(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, self.parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_traces_present, enough_traces, new_log = fall_through_infrequent.empty_trace_filtering(
                self.log, self.f)
            self.log = new_log
        else:
            empty_traces_present = False
            enough_traces = False
        # if an empty trace is found, the empty trace fallthrough applies
        if empty_traces_present and enough_traces:
            logging.debug("empty_trace_if")
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=self.parameters).items() if v > 0]
            activities = attributes_filter.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_filter.get_start_activities(
                    new_log, parameters=parameters).keys())
            end_activities = list(
                end_activities_filter.get_end_activities(
                    new_log, parameters=parameters).keys())
            self.children.append(
                SubtreeInfrequent(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    self.f,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        elif empty_traces_present and not enough_traces:
            # no node is added to the PT, instead we just use recursion on the log without the empty traces
            self.detect_cut_if()
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
            else:
                activity_once = False
            if activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_filter.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_filter.get_attribute_values(
                    small_log, activity_key)
                start_activities = list(
                    start_activities_filter.get_start_activities(
                        new_log, parameters=parameters).keys())
                end_activities = list(
                    end_activities_filter.get_end_activities(
                        new_log, parameters=parameters).keys())
                # append the chosen activity as leaf:
                self.children.append(
                    SubtreeInfrequent(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        self.f,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log
                self.children.append(
                    SubtreeInfrequent(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        self.f,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, key = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                else:
                    activity_concurrent = False
                if activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_filter.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_filter.get_attribute_values(
                        small_log, activity_key)
                    start_activities = list(
                        start_activities_filter.get_start_activities(
                            new_log, parameters=parameters).keys())
                    end_activities = list(
                        end_activities_filter.get_end_activities(
                            new_log, parameters=parameters).keys())
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreeInfrequent(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            self.f,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log:
                    self.children.append(
                        SubtreeInfrequent(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            self.f,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                    else:
                        strict_tau_loop = False
                    if strict_tau_loop:
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                new_log, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                new_log, parameters=parameters).keys())
                        self.children.append(
                            SubtreeInfrequent(
                                new_log,
                                new_dfg,
                                self.master_dfg,
                                self.initial_dfg,
                                activities,
                                self.counts,
                                self.rec_depth + 1,
                                self.f,
                                noise_threshold=self.noise_threshold,
                                start_activities=start_activities,
                                end_activities=end_activities,
                                initial_start_activities=self.
                                initial_start_activities,
                                initial_end_activities=self.
                                initial_end_activities,
                                parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                        else:
                            tau_loop = False
                        if tau_loop:
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    new_log, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    new_log, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower_if")
                            self.detected_cut = 'flower'
Ejemplo n.º 24
0
def apply(log, parameters=None):
    """
    Gets the frequency DFG

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)

    parameters["format"] = "svg"
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities

    gviz = dfg_vis_factory.apply(dfg,
                                 log=filtered_log,
                                 variant="frequency",
                                 parameters=parameters)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities,
                                             end_activities)

    net, im, fm = dfg_conv_factory.apply(dfg,
                                         parameters={
                                             "start_activities":
                                             start_activities,
                                             "end_activities": end_activities
                                         })

    return get_base64_from_gviz(gviz), export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "freq", None, "", activity_key
Ejemplo n.º 25
0
def apply(log, parameters):
    """
    Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking

    Parameters
    -----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    -----------
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters:
        parameters[
            pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]
    # apply the reduction by default only on very small logs
    enable_reduction = parameters[
        "enable_reduction"] if "enable_reduction" in parameters else True

    # get the DFG
    if isinstance(log[0][0], tel.Event):
        dfg = [(k, v) for k, v in inductive_revise.get_dfg_graph_trans(
            log,
            parameters={
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
            }).items() if v > 0]
    else:
        dfg = [(k, v) for k, v in dfg_inst.apply(
            log,
            parameters={
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
            }).items() if v > 0]

    # get the activities in the log
    activities = attributes_filter.get_attribute_values(log, activity_key)

    # gets the start activities from the log
    start_activities = list(
        start_activities_filter.get_start_activities(
            log, parameters=parameters).keys())
    # gets the end activities from the log
    end_activities = list(
        end_activities_filter.get_end_activities(log,
                                                 parameters=parameters).keys())

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    net, initial_marking, final_marking = apply_dfg(
        dfg,
        parameters=parameters,
        activities=activities,
        contains_empty_traces=contains_empty_traces,
        start_activities=start_activities,
        end_activities=end_activities)
    """if enable_reduction:
        vis_trans = [x for x in net.transitions if x.label]
        hid_trans = [x for x in net.transitions if x.label is None]
        if vis_trans:
            ratio = len(hid_trans) / len(vis_trans)

            if ratio < 2.0:
                # avoid reducting too much complicated processes
                reduction_parameters = copy(parameters)
                if "is_reduction" not in reduction_parameters:
                    reduction_parameters["is_reduction"] = True
                if "thread_maximum_ex_time" not in reduction_parameters:
                    reduction_parameters["thread_maximum_ex_time"] = shared_constants.RED_MAX_THR_EX_TIME

                # do the replay
                aligned_traces = token_replay.apply(log, net, initial_marking, final_marking,
                                                    parameters=reduction_parameters)

                # apply petri_reduction technique in order to simplify the Petri net
                net = petri_cleaning.petri_reduction_treplay(net, parameters={"aligned_traces": aligned_traces})"""

    return net, initial_marking, final_marking
Ejemplo n.º 26
0
    def apply_fall_through(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_trace, new_log = fall_through.empty_trace(self.log)
            # if an empty trace is found, the empty trace fallthrough applies
            #
        else:
            empty_trace = False
        if empty_trace:
            logging.debug("empty_trace")
            activites_left = []
            for trace in new_log:
                for act in trace:
                    if act[activity_key] not in activites_left:
                        activites_left.append(act[activity_key])
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=parameters).items() if v > 0]
            activities = attributes_filter.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_filter.get_start_activities(
                    new_log, parameters=self.parameters).keys())
            end_activities = list(
                end_activities_filter.get_end_activities(
                    new_log, parameters=self.parameters).keys())
            self.children.append(
                SubtreePlain(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
                small_log = filtering_utils.keep_one_trace_per_variant(
                    small_log, parameters=parameters)
            else:
                activity_once = False
            if use_act_once_per_trace and activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_filter.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_filter.get_attribute_values(
                    small_log, activity_key)
                self.children.append(
                    SubtreePlain(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log_skeleton
                start_activities = list(
                    start_activities_filter.get_start_activities(
                        new_log, parameters=self.parameters).keys())
                end_activities = list(
                    end_activities_filter.get_end_activities(
                        new_log, parameters=self.parameters).keys())
                self.children.append(
                    SubtreePlain(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, activity_left_out = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                    small_log = filtering_utils.keep_one_trace_per_variant(
                        small_log, parameters=parameters)
                else:
                    activity_concurrent = False
                if use_act_concurrent and activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_filter.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_filter.get_attribute_values(
                        small_log, activity_key)
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreePlain(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log_skeleton:
                    start_activities = list(
                        start_activities_filter.get_start_activities(
                            new_log, parameters=self.parameters).keys())
                    end_activities = list(
                        end_activities_filter.get_end_activities(
                            new_log, parameters=self.parameters).keys())
                    self.children.append(
                        SubtreePlain(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                        new_log = filtering_utils.keep_one_trace_per_variant(
                            new_log, parameters=parameters)
                    else:
                        strict_tau_loop = False
                    if use_strict_tau_loop and strict_tau_loop:
                        activites_left = []
                        for trace in new_log:
                            for act in trace:
                                if act[activity_key] not in activites_left:
                                    activites_left.append(act[activity_key])
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                new_log, parameters=self.parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                new_log, parameters=self.parameters).keys())
                        self.children.append(
                            SubtreePlain(new_log,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                            new_log = filtering_utils.keep_one_trace_per_variant(
                                new_log, parameters=parameters)
                        else:
                            tau_loop = False
                        if use_tau_loop and tau_loop:
                            activites_left = []
                            for trace in new_log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower model")
                            activites_left = []
                            for trace in self.log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'flower'
Ejemplo n.º 27
0
def apply(log, parameters=None):
    """
    Gets the Petri net through Inductive Miner, decorated by performance metric

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    # reduce the depth of the search done by token-based replay
    token_replay.MAX_REC_DEPTH = 1
    token_replay.MAX_IT_FINAL1 = 1
    token_replay.MAX_IT_FINAL2 = 1
    token_replay.MAX_REC_DEPTH_HIDTRANSENABL = 1

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    net, im, fm = inductive_miner.apply_dfg(dfg,
                                            parameters=parameters,
                                            activities=activities,
                                            start_activities=start_activities,
                                            end_activities=end_activities)
    #parameters["format"] = "svg"
    #gviz = pn_vis_factory.apply(net, im, fm, log=log, variant="performance", parameters=parameters)

    bpmn_graph, el_corr, inv_el_corr, el_corr_keys_map = petri_to_bpmn.apply(
        net, im, fm)

    aggregated_statistics = token_decoration.get_decorations(
        filtered_log,
        net,
        im,
        fm,
        parameters=parameters,
        measure="performance")

    bpmn_aggreg_statistics = convert_performance_map.convert_performance_map_to_bpmn(
        aggregated_statistics, inv_el_corr)
    #bpmn_graph = bpmn_embedding.embed_info_into_bpmn(bpmn_graph, bpmn_aggreg_statistics, "performance")
    bpmn_graph = bpmn_diagram_layouter.apply(bpmn_graph)
    bpmn_string = bpmn_exporter.get_string_from_bpmn(bpmn_graph)

    gviz = bpmn_vis_factory.apply_petri(
        net,
        im,
        fm,
        aggregated_statistics=aggregated_statistics,
        variant="performance",
        parameters={"format": "svg"})
    gviz2 = bpmn_vis_factory.apply_petri(
        net,
        im,
        fm,
        aggregated_statistics=aggregated_statistics,
        variant="performance",
        parameters={"format": "dot"})

    svg = get_base64_from_file(gviz.name)

    gviz_base64 = get_base64_from_file(gviz2.name)

    ret_graph = get_graph.get_graph_from_petri(net, im, fm)

    return svg, export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "indbpmn", "perf", bpmn_string, ".bpmn", activity_key