Beispiel #1
0
 def test_filtering_attributes_traces(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "running-example.xes")
     log = xes_importer.import_log(input_log)
     log1 = attributes_filter.apply(log, ["reject request"],
                                    parameters={"positive": True})
     log2 = attributes_filter.apply(log, ["reject request"],
                                    parameters={"positive": True})
     del log1
     del log2
Beispiel #2
0
def diagnose_from_trans_fitness(log, trans_fitness, parameters=None):
    """
    Provide some conformance diagnostics related to transitions that are executed in a unfit manner

    Parameters
    -------------
    log
        Trace log
    trans_fitness
        For each transition, keeps track of unfit executions
    parameters
        Possible parameters of the algorithm, including:
            PARAMETER_CONSTANT_TIMESTAMP_KEY -> attribute of the event containing the timestamp

    Returns
    -------------
    diagnostics
        For each problematic transition, diagnostics about case duration
    """
    if parameters is None:
        parameters = {}

    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    diagnostics = {}

    parameters_filtering = deepcopy(parameters)
    parameters_filtering["positive"] = True

    for trans in trans_fitness:
        if len(trans_fitness[trans]["underfed_traces"]) > 0:
            filtered_log_act = attributes_filter.apply(
                log, [trans.label], parameters=parameters_filtering)
            fit_cases = []
            underfed_cases = []
            for trace in log:
                if trace in trans_fitness[trans]["underfed_traces"]:
                    underfed_cases.append(trace)
                elif trace in filtered_log_act:
                    fit_cases.append(trace)
            if fit_cases and underfed_cases:
                n_fit = len(fit_cases)
                n_underfed = len(underfed_cases)
                fit_median_time = get_median_case_duration(
                    fit_cases, timestamp_key=timestamp_key)
                underfed_median_time = get_median_case_duration(
                    underfed_cases, timestamp_key=timestamp_key)
                relative_throughput = underfed_median_time / fit_median_time if fit_median_time > 0 else 0

                diagn_dict = {
                    "n_fit": n_fit,
                    "n_underfed": n_underfed,
                    "fit_median_time": fit_median_time,
                    "underfed_median_time": underfed_median_time,
                    "relative_throughput": relative_throughput
                }
                diagnostics[trans] = diagn_dict
    return diagnostics
def diagnose_from_notexisting_activities(log, notexisting_activities_in_model, parameters=None):
    """
    Provide some conformance diagnostics related to activities that are not present in the model

    Parameters
    -------------
    log
        Trace log
    notexisting_activities_in_model
        Not existing activities in the model
    parameters
        Possible parameters of the algorithm, including:
            PARAMETER_CONSTANT_TIMESTAMP_KEY -> attribute of the event containing the timestamp

    Returns
    -------------
    diagnostics
        For each problematic activity, diagnostics about case duration
    """
    if parameters is None:
        parameters = {}

    timestamp_key = parameters[
        constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    diagnostics = {}

    parameters_filtering = deepcopy(parameters)
    parameters_filtering["positive"] = False
    values = list(notexisting_activities_in_model.keys())

    filtered_log = attributes_filter.apply(log, values, parameters=parameters_filtering)

    for act in notexisting_activities_in_model:
        fit_cases = []
        containing_cases = []
        for trace in log:
            if trace in notexisting_activities_in_model[act]:
                containing_cases.append(trace)
            elif trace in filtered_log:
                fit_cases.append(trace)
        if containing_cases and fit_cases:
            n_containing = len(containing_cases)
            n_fit = len(fit_cases)
            fit_median_time = get_median_case_duration(fit_cases, timestamp_key=timestamp_key)
            containing_median_time = get_median_case_duration(containing_cases, timestamp_key=timestamp_key)
            relative_throughput = containing_median_time / fit_median_time if fit_median_time > 0 else 0

            diagn_dict = {"n_containing": n_containing, "n_fit": n_fit, "fit_median_time": fit_median_time,
                          "containing_median_time": containing_median_time,
                          "relative_throughput": relative_throughput}
            diagnostics[act] = diagn_dict
    return diagnostics
Beispiel #4
0
def apply(log, parameters=None, classic_output=False):
    """
    Gets a simple model out of a log

    Parameters
    -------------
    log
        Trace log
    parameters
        Parameters of the algorithm, including:
            maximum_number_activities -> Maximum number of activities to keep
            discovery_algorithm -> Discovery algorithm to use (alpha, inductive)
            desidered_output -> Desidered output of the algorithm (default: Petri)
            include_filtered_log -> Include the filtered log in the output
            include_dfg_frequency -> Include the DFG of frequencies in the output
            include_dfg_performance -> Include the DFG of performance in the output
            include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output
            include_filtered_dfg_performance -> Include the filtered DFG of performance in the output
    classic_output
        Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking)
        or can return a more detailed dictionary
    """
    if parameters is None:
        parameters = {}

    returned_dictionary = {}

    net = None
    initial_marking = None
    final_marking = None
    bpmn_graph = None
    dfg_frequency = None
    dfg_performance = None
    filtered_dfg_frequency = None
    filtered_dfg_performance = None

    maximum_number_activities = parameters[
        "maximum_number_activities"] if "maximum_number_activities" in parameters else 20
    discovery_algorithm = parameters[
        "discovery_algorithm"] if "discovery_algorithm" in parameters else "alphaclassic"
    desidered_output = parameters[
        "desidered_output"] if "desidered_output" in parameters else "petri"
    include_filtered_log = parameters[
        "include_filtered_log"] if "include_filtered_log" in parameters else True
    include_dfg_frequency = parameters[
        "include_dfg_frequency"] if "include_dfg_frequency" in parameters else True
    include_dfg_performance = parameters[
        "include_dfg_performance"] if "include_dfg_performance" in parameters else True
    include_filtered_dfg_frequency = parameters[
        "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True
    include_filtered_dfg_performance = parameters[
        "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else True

    if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters:
        activity_key = parameters[
            PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key
    else:
        log, activity_key = insert_classifier.search_act_class_attr(log)
        if activity_key is None:
            activity_key = DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key

    if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = parameters[
            PARAMETER_CONSTANT_ATTRIBUTE_KEY]

    activities_count_dictio = attributes_filter.get_attribute_values(
        log, activity_key)
    activities_count_list = []
    for activity in activities_count_dictio:
        activities_count_list.append(
            [activity, activities_count_dictio[activity]])

    activities_count_list = sorted(activities_count_list,
                                   key=lambda x: x[1],
                                   reverse=True)
    activities_count_list = activities_count_list[:min(
        len(activities_count_list), maximum_number_activities)]
    activities_keep_list = [x[0] for x in activities_count_list]

    log = attributes_filter.apply(log,
                                  activities_keep_list,
                                  parameters=parameters)

    filtered_log = None

    if "alpha" in discovery_algorithm:
        filtered_log = start_activities_filter.apply_auto_filter(
            log, parameters=parameters)
        filtered_log = end_activities_filter.apply_auto_filter(
            filtered_log, parameters=parameters)
        filtered_log = filter_topvariants_soundmodel.apply(
            filtered_log, parameters=parameters)
    elif "inductive" in discovery_algorithm:
        filtered_log = auto_filter.apply_auto_filter(log,
                                                     parameters=parameters)

    if include_dfg_frequency:
        dfg_frequency = dfg_factory.apply(log,
                                          parameters=parameters,
                                          variant="frequency")
    if include_dfg_performance:
        dfg_performance = dfg_factory.apply(log,
                                            parameters=parameters,
                                            variant="performance")
    if include_filtered_dfg_frequency:
        filtered_dfg_frequency = dfg_factory.apply(filtered_log,
                                                   parameters=parameters,
                                                   variant="frequency")
    if include_filtered_dfg_performance:
        filtered_dfg_performance = dfg_factory.apply(filtered_log,
                                                     parameters=parameters,
                                                     variant="performance")

    if "alpha" in discovery_algorithm:
        net, initial_marking, final_marking = alpha_miner.apply(
            filtered_log, parameters=parameters)

    if filtered_log is not None and include_filtered_log:
        returned_dictionary["filtered_log"] = filtered_log
    if net is not None and desidered_output == "petri":
        returned_dictionary["net"] = net
    if initial_marking is not None and desidered_output == "petri":
        returned_dictionary["initial_marking"] = initial_marking
    if final_marking is not None and desidered_output == "petri":
        returned_dictionary["final_marking"] = final_marking
    if bpmn_graph is not None and desidered_output == "bpmn":
        returned_dictionary["bpmn_graph"] = bpmn_graph
    if dfg_frequency is not None and include_dfg_frequency:
        returned_dictionary["dfg_frequency"] = dfg_frequency
    if dfg_performance is not None and include_dfg_performance:
        returned_dictionary["dfg_performance"] = dfg_performance
    if filtered_dfg_frequency is not None and include_filtered_dfg_frequency:
        returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency
    if filtered_dfg_performance is not None and include_filtered_dfg_performance:
        returned_dictionary[
            "filtered_dfg_performance"] = filtered_dfg_performance

    if classic_output:
        if net is not None and desidered_output == "petri":
            return net, initial_marking, final_marking

    return returned_dictionary