예제 #1
0
def get_log_traces_to_activities(log, activities, parameters=None):
    """
    Get sublogs taking to each one of the specified activities

    Parameters
    -------------
    log
        Trace log object
    activities
        List of activities in the log
    parameters
        Possible parameters of the algorithm, including:
            PARAMETER_CONSTANT_ACTIVITY_KEY -> activity
            PARAMETER_CONSTANT_TIMESTAMP_KEY -> timestamp

    Returns
    -------------
    list_logs
        List of event logs taking to the first occurrence of each activity
    considered_activities
        All activities that are effectively have been inserted in the list of logs (in some of them, the resulting log
        may be empty)
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key

    list_logs = []
    considered_activities = []
    for act in activities:
        other_acts = [ac for ac in activities if not ac == act]
        parameters_filt1 = deepcopy(parameters)
        parameters_filt2 = deepcopy(parameters)
        parameters_filt1["positive"] = True
        parameters_filt2["positive"] = False
        filtered_log = basic_filter.filter_log_traces_attr(
            log, [act], parameters=parameters_filt1)
        logging.info("get_log_traces_to_activities activities=" +
                     str(activities) + " act=" + str(act) +
                     " 0 len(filtered_log)=" + str(len(filtered_log)))
        filtered_log = basic_filter.filter_log_traces_attr(
            filtered_log, other_acts, parameters=parameters_filt2)
        logging.info("get_log_traces_to_activities activities=" +
                     str(activities) + " act=" + str(act) +
                     " 1 len(filtered_log)=" + str(len(filtered_log)))
        filtered_log, act_durations = get_log_traces_until_activity(
            filtered_log, act, parameters=parameters)
        logging.info("get_log_traces_to_activities activities=" +
                     str(activities) + " act=" + str(act) +
                     " 2 len(filtered_log)=" + str(len(filtered_log)))
        if filtered_log:
            list_logs.append(filtered_log)
            considered_activities.append(act)

    return list_logs, considered_activities
예제 #2
0
def diagnose_from_trans_fitness(log, trans_fitness, parameters=None):
    """
    Provide some conformance diagnostics related to transitions that are executed in a unfit manner

    Parameters
    -------------
    log
        Trace log
    trans_fitness
        For each transition, keeps track of unfit executions
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> attribute of the event containing the timestamp

    Returns
    -------------
    diagnostics
        For each problematic transition, diagnostics about case duration
    """
    if parameters is None:
        parameters = {}

    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               xes.DEFAULT_TIMESTAMP_KEY)
    diagnostics = {}

    parameters_filtering = deepcopy(parameters)
    parameters_filtering["positive"] = True

    for trans in trans_fitness:
        if len(trans_fitness[trans]["underfed_traces"]) > 0:
            filtered_log_act = basic_filter.filter_log_traces_attr(
                log, [trans.label], parameters=parameters_filtering)
            fit_cases = []
            underfed_cases = []
            for trace in log:
                if trace in trans_fitness[trans]["underfed_traces"]:
                    underfed_cases.append(trace)
                elif trace in filtered_log_act:
                    fit_cases.append(trace)
            if fit_cases and underfed_cases:
                n_fit = len(fit_cases)
                n_underfed = len(underfed_cases)
                fit_median_time = get_median_case_duration(
                    fit_cases, timestamp_key=timestamp_key)
                underfed_median_time = get_median_case_duration(
                    underfed_cases, timestamp_key=timestamp_key)
                relative_throughput = underfed_median_time / fit_median_time if fit_median_time > 0 else 0

                diagn_dict = {
                    "n_fit": n_fit,
                    "n_underfed": n_underfed,
                    "fit_median_time": fit_median_time,
                    "underfed_median_time": underfed_median_time,
                    "relative_throughput": relative_throughput
                }
                diagnostics[trans] = diagn_dict
    return diagnostics
def diagnose_from_notexisting_activities(log, notexisting_activities_in_model, parameters=None):
    """
    Provide some conformance diagnostics related to activities that are not present in the model

    Parameters
    -------------
    log
        Trace log
    notexisting_activities_in_model
        Not existing activities in the model
    parameters
        Possible parameters of the algorithm, including:
            PARAMETER_CONSTANT_TIMESTAMP_KEY -> attribute of the event containing the timestamp

    Returns
    -------------
    diagnostics
        For each problematic activity, diagnostics about case duration
    """
    if parameters is None:
        parameters = {}

    timestamp_key = parameters[
        constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    diagnostics = {}

    parameters_filtering = deepcopy(parameters)
    parameters_filtering["positive"] = False
    values = list(notexisting_activities_in_model.keys())

    filtered_log = basic_filter.filter_log_traces_attr(log, values, parameters=parameters_filtering)

    for act in notexisting_activities_in_model:
        fit_cases = []
        containing_cases = []
        for trace in log:
            if trace in notexisting_activities_in_model[act]:
                containing_cases.append(trace)
            elif trace in filtered_log:
                fit_cases.append(trace)
        if containing_cases and fit_cases:
            n_containing = len(containing_cases)
            n_fit = len(fit_cases)
            fit_median_time = get_median_case_duration(fit_cases, timestamp_key=timestamp_key)
            containing_median_time = get_median_case_duration(containing_cases, timestamp_key=timestamp_key)
            relative_throughput = containing_median_time / fit_median_time if fit_median_time > 0 else 0

            diagn_dict = {"n_containing": n_containing, "n_fit": n_fit, "fit_median_time": fit_median_time,
                          "containing_median_time": containing_median_time,
                          "relative_throughput": relative_throughput}
            diagnostics[act] = diagn_dict
    return diagnostics
예제 #4
0
def diagnose_from_notexisting_activities(log,
                                         notexisting_activities_in_model,
                                         parameters=None):
    """
    Perform root cause analysis related to activities that are not present in the model

    Parameters
    -------------
    log
        Trace log object
    notexisting_activities_in_model
        Not existing activities in the model
    parameters
        Possible parameters of the algorithm, including:
            string_attributes -> List of string event attributes to consider
                in building the decision tree
            numeric_attributes -> List of numeric event attributes to consider
                in building the decision tree

    Returns
    -----------
    diagnostics
        For each problematic transition:
            - a decision tree comparing fit and unfit executions
            - feature names
            - classes
    """
    from sklearn import tree

    if parameters is None:
        parameters = {}

    diagnostics = {}
    string_attributes = exec_utils.get_param_value(
        Parameters.STRING_ATTRIBUTES, parameters, [])
    numeric_attributes = exec_utils.get_param_value(
        Parameters.NUMERIC_ATTRIBUTES, parameters, [])
    enable_multiplier = exec_utils.get_param_value(
        Parameters.ENABLE_MULTIPLIER, parameters, False)

    parameters_filtering = deepcopy(parameters)
    parameters_filtering["positive"] = False
    values = list(notexisting_activities_in_model.keys())

    filtered_log = basic_filter.filter_log_traces_attr(
        log, values, parameters=parameters_filtering)

    for act in notexisting_activities_in_model:
        fit_cases_repr = []
        containing_cases_repr = []
        for trace in log:
            if trace in notexisting_activities_in_model[act]:
                containing_cases_repr.append(
                    notexisting_activities_in_model[act][trace])
            elif trace in filtered_log:
                fit_cases_repr.append(dict(trace[-1]))

        if fit_cases_repr and containing_cases_repr:
            data, feature_names = form_representation_from_dictio_couple(
                fit_cases_repr,
                containing_cases_repr,
                string_attributes,
                numeric_attributes,
                enable_multiplier=enable_multiplier)

            target = []
            classes = []

            if enable_multiplier:
                multiplier_first = int(
                    max(
                        float(len(containing_cases_repr)) /
                        float(len(fit_cases_repr)), 1))
                multiplier_second = int(
                    max(
                        float(len(fit_cases_repr)) /
                        float(len(containing_cases_repr)), 1))
            else:
                multiplier_first = 1
                multiplier_second = 1

            for j in range(multiplier_first):
                for i in range(len(fit_cases_repr)):
                    target.append(0)
            classes.append("fit")

            for j in range(multiplier_second):
                for i in range(len(containing_cases_repr)):
                    target.append(1)
            classes.append("containing")

            target = np.asarray(target)
            clf = tree.DecisionTreeClassifier(max_depth=7)
            clf.fit(data, target)
            diagn_dict = {
                "clf": clf,
                "data": data,
                "feature_names": feature_names,
                "target": target,
                "classes": classes
            }

            diagnostics[act] = diagn_dict

    return diagnostics