def test_filtering_attributes_traces(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "running-example.xes")
     log = xes_importer.import_log(input_log)
     log1 = attributes_filter.apply(log, ["reject request"],
                                    parameters={"positive": True})
     log2 = attributes_filter.apply(log, ["reject request"],
                                    parameters={"positive": True})
     del log1
     del log2
def get_log_traces_to_activities(log, activities, parameters=None):
    """
    Get sublogs taking to each one of the specified activities

    Parameters
    -------------
    log
        Trace log object
    activities
        List of activities in the log
    parameters
        Possible parameters of the algorithm, including:
            PARAMETER_CONSTANT_ACTIVITY_KEY -> activity
            PARAMETER_CONSTANT_TIMESTAMP_KEY -> timestamp

    Returns
    -------------
    list_logs
        List of event logs taking to the first occurrence of each activity
    considered_activities
        All activities that are effectively have been inserted in the list of logs (in some of them, the resulting log
        may be empty)
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key

    list_logs = []
    considered_activities = []
    for act in activities:
        other_acts = [ac for ac in activities if not ac == act]
        parameters_filt1 = deepcopy(parameters)
        parameters_filt2 = deepcopy(parameters)
        parameters_filt1["positive"] = True
        parameters_filt2["positive"] = False
        filtered_log = attributes_filter.apply(log, [act], parameters=parameters_filt1)
        logging.info("get_log_traces_to_activities activities=" + str(activities) + " act=" + str(
            act) + " 0 len(filtered_log)=" + str(len(filtered_log)))
        filtered_log = attributes_filter.apply(filtered_log, other_acts, parameters=parameters_filt2)
        logging.info("get_log_traces_to_activities activities=" + str(activities) + " act=" + str(
            act) + " 1 len(filtered_log)=" + str(len(filtered_log)))
        filtered_log, act_durations = get_log_traces_until_activity(filtered_log, act, parameters=parameters)
        logging.info("get_log_traces_to_activities activities=" + str(activities) + " act=" + str(
            act) + " 2 len(filtered_log)=" + str(len(filtered_log)))
        if filtered_log:
            list_logs.append(filtered_log)
            considered_activities.append(act)

    return list_logs, considered_activities
Beispiel #3
0
 def test_35(self):
     from pm4py.util import constants
     from pm4py.algo.filtering.pandas.attributes import attributes_filter
     df = self.load_receipt_df()
     df_traces_pos = attributes_filter.apply(df, ["Resource10"],
                                             parameters={
                                                 attributes_filter.Parameters.CASE_ID_KEY: "case:concept:name",
                                                 attributes_filter.Parameters.ATTRIBUTE_KEY: "org:resource",
                                                 attributes_filter.Parameters.POSITIVE: True})
     df_traces_neg = attributes_filter.apply(df, ["Resource10"],
                                             parameters={
                                                 attributes_filter.Parameters.CASE_ID_KEY: "case:concept:name",
                                                 attributes_filter.Parameters.ATTRIBUTE_KEY: "org:resource",
                                                 attributes_filter.Parameters.POSITIVE: False})
def apply(dataframe, filter, parameters=None):
    """
    Apply a filter to the current log (attributes filter)

    Parameters
    ------------
    log
        Event log
    filter
        Filter to apply
    parameters
        Parameters of the algorithm

    Returns
    ------------
    log
        Event log
    """
    if parameters is None:
        parameters = {}

    parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = filter[1][0]
    parameters["positive"] = False

    return attributes_filter.apply(dataframe,
                                   filter[1][1],
                                   parameters=parameters)
def diagnose_from_trans_fitness(log, trans_fitness, parameters=None):
    """
    Provide some conformance diagnostics related to transitions that are executed in a unfit manner

    Parameters
    -------------
    log
        Trace log
    trans_fitness
        For each transition, keeps track of unfit executions
    parameters
        Possible parameters of the algorithm, including:
            PARAMETER_CONSTANT_TIMESTAMP_KEY -> attribute of the event containing the timestamp

    Returns
    -------------
    diagnostics
        For each problematic transition, diagnostics about case duration
    """
    if parameters is None:
        parameters = {}

    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    diagnostics = {}

    parameters_filtering = deepcopy(parameters)
    parameters_filtering["positive"] = True

    for trans in trans_fitness:
        if len(trans_fitness[trans]["underfed_traces"]) > 0:
            filtered_log_act = attributes_filter.apply(
                log, [trans.label], parameters=parameters_filtering)
            fit_cases = []
            underfed_cases = []
            for trace in log:
                if trace in trans_fitness[trans]["underfed_traces"]:
                    underfed_cases.append(trace)
                elif trace in filtered_log_act:
                    fit_cases.append(trace)
            if fit_cases and underfed_cases:
                n_fit = len(fit_cases)
                n_underfed = len(underfed_cases)
                fit_median_time = get_median_case_duration(
                    fit_cases, timestamp_key=timestamp_key)
                underfed_median_time = get_median_case_duration(
                    underfed_cases, timestamp_key=timestamp_key)
                relative_throughput = underfed_median_time / fit_median_time if fit_median_time > 0 else 0

                diagn_dict = {
                    "n_fit": n_fit,
                    "n_underfed": n_underfed,
                    "fit_median_time": fit_median_time,
                    "underfed_median_time": underfed_median_time,
                    "relative_throughput": relative_throughput
                }
                diagnostics[trans] = diagn_dict
    return diagnostics
Beispiel #6
0
def filter_log_by_caseid(log, values):
    """
    Filters log by case ID.
    :param log: log to be filtered
    :param values: value that should be filtered
    :return: filtered log
    """
    parameters = {constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "caseid"}
    return attributes_filter.apply(log, values, parameters=parameters)
Beispiel #7
0
def filterfile(sourceFile, outputFile, patternText, inclusive):
    log = importer.apply(sourceFile)
    activities = attributes_filter.get_attribute_values(log, CONCEPT_NAME)
    filteredLog = attributes_filter.apply(
        log, [patternText],
        parameters={
            attributes_filter.Parameters.ATTRIBUTE_KEY: CONCEPT_NAME,
            attributes_filter.Parameters.POSITIVE: inclusive
        })
    xes_exporter.apply(log, outputFile)
Beispiel #8
0
def filter_open_cases(log):
    log_selected = attributes_filter.apply(
        log, ["Payment Handled"],
        parameters={
            xes_constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY:
            constants.concept_key,
            "positive": True
        })
    util.print_filtered_cases_count(len(log), len(log_selected))
    return log_selected
def filter_cases(log, starts, ends, parameters):
    for classifier_attributes in [starts, ends]:
        log = attributes_filter.apply(
            log,
            classifier_attributes,
            parameters={
                constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY:
                parameters[performance_constants.EVENT_CLASSIFIER],
                'positive':
                True
            })
    return log
Beispiel #10
0
def average_case_duration(
        log: EventLog,
        t1: Union[datetime, str],
        t2: Union[datetime, str],
        r: str,
        parameters: Optional[Dict[Union[str, Parameters],
                                  Any]] = None) -> float:
    """
    The average duration of cases completed during a given time slot in which a given resource was involved.

    Metric RBI 4.4 in Pika, Anastasiia, et al.
    "Mining resource profiles from event logs." ACM Transactions on Management Information Systems (TMIS) 8.1 (2017): 1-30.

    Parameters
    -----------------
    log
        Event log
    t1
        Left interval
    t2
        Right interval
    r
        Resource

    Returns
    ----------------
    metric
        Value of the metric
    """
    if parameters is None:
        parameters = {}

    resource_key = exec_utils.get_param_value(
        Parameters.RESOURCE_KEY, parameters,
        xes_constants.DEFAULT_RESOURCE_KEY)

    from pm4py.algo.filtering.log.attributes import attributes_filter
    parameters_filter = {
        attributes_filter.Parameters.ATTRIBUTE_KEY: resource_key
    }
    log = attributes_filter.apply(log, [r], parameters=parameters_filter)

    from pm4py.algo.filtering.log.timestamp import timestamp_filter
    log = timestamp_filter.filter_traces_intersecting(log,
                                                      t1,
                                                      t2,
                                                      parameters=parameters)

    from pm4py.statistics.traces.generic.log import case_statistics
    cd = case_statistics.get_cases_description(log,
                                               parameters=parameters).values()
    return mean(x["caseDuration"] for x in cd)
Beispiel #11
0
def social_position(
        log: EventLog,
        t1_0: Union[datetime, str],
        t2_0: Union[datetime, str],
        r: str,
        parameters: Optional[Dict[Union[str, Parameters],
                                  Any]] = None) -> float:
    """
    The fraction of resources involved in the same cases with a given resource during a given time slot with
    respect to the total number of resources active during the time slot.

    Metric RBI 5.2 in Pika, Anastasiia, et al.
    "Mining resource profiles from event logs." ACM Transactions on Management Information Systems (TMIS) 8.1 (2017): 1-30.

    Parameters
    -----------------
    df
        Dataframe
    t1_0
        Left interval
    t2_0
        Right interval
    r
        Resource

    Returns
    ----------------
    metric
        Value of the metric
    """
    if parameters is None:
        parameters = {}

    resource_key = exec_utils.get_param_value(
        Parameters.RESOURCE_KEY, parameters,
        xes_constants.DEFAULT_RESOURCE_KEY)

    from pm4py.algo.filtering.log.timestamp import timestamp_filter
    log = timestamp_filter.apply_events(log, t1_0, t2_0, parameters=parameters)

    from pm4py.algo.filtering.log.attributes import attributes_filter
    parameters_filter = {
        attributes_filter.Parameters.ATTRIBUTE_KEY: resource_key
    }

    filtered_log = attributes_filter.apply(log, [r],
                                           parameters=parameters_filter)

    q1 = float(len(filtered_log))
    q2 = float(len(log))

    return q1 / q2 if q2 > 0 else 0.0
Beispiel #12
0
def filter_abnormal_cases(log, criteria):
    # Shift+Alt+Insert to disable multi caret
    tofilter_log = copy.deepcopy(log)
    for key, values in criteria.items():
        for value in values:
            tofilter_log = attributes_filter.apply(
                tofilter_log, [value],
                parameters={
                    xes_constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: key,
                    "positive": True
                })
    tofilter_cases = [
        case.attributes[constants.concept_key] for case in tofilter_log
    ]
Beispiel #13
0
def apply(log, parameters=None, classic_output=False):
    """
    Gets a simple model out of a log

    Parameters
    -------------
    log
        Trace log
    parameters
        Parameters of the algorithm, including:
            maximum_number_activities -> Maximum number of activities to keep
            discovery_algorithm -> Discovery algorithm to use (alpha, inductive)
            desidered_output -> Desidered output of the algorithm (default: Petri)
            include_filtered_log -> Include the filtered log in the output
            include_dfg_frequency -> Include the DFG of frequencies in the output
            include_dfg_performance -> Include the DFG of performance in the output
            include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output
            include_filtered_dfg_performance -> Include the filtered DFG of performance in the output
    classic_output
        Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking)
        or can return a more detailed dictionary
    """
    if parameters is None:
        parameters = {}

    returned_dictionary = {}

    net = None
    initial_marking = None
    final_marking = None
    bpmn_graph = None
    dfg_frequency = None
    dfg_performance = None
    filtered_dfg_frequency = None
    filtered_dfg_performance = None

    maximum_number_activities = parameters[
        "maximum_number_activities"] if "maximum_number_activities" in parameters else 20
    discovery_algorithm = parameters["discovery_algorithm"] if "discovery_algorithm" in parameters else "alpha"
    desidered_output = parameters["desidered_output"] if "desidered_output" in parameters else "petri"
    include_filtered_log = parameters["include_filtered_log"] if "include_filtered_log" in parameters else True
    include_dfg_frequency = parameters["include_dfg_frequency"] if "include_dfg_frequency" in parameters else True
    include_dfg_performance = parameters[
        "include_dfg_performance"] if "include_dfg_performance" in parameters else False
    include_filtered_dfg_frequency = parameters[
        "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True
    include_filtered_dfg_performance = parameters[
        "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else False

    if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters:
        activity_key = parameters[
            PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key
    else:
        log, activity_key = insert_classifier.search_act_class_attr(log)
        if activity_key is None:
            activity_key = DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key

    if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY]

    activities_count_dictio = attributes_filter.get_attribute_values(log, activity_key)
    activities_count_list = []
    for activity in activities_count_dictio:
        activities_count_list.append([activity, activities_count_dictio[activity]])

    activities_count_list = sorted(activities_count_list, key=lambda x: x[1], reverse=True)
    activities_count_list = activities_count_list[:min(len(activities_count_list), maximum_number_activities)]
    activities_keep_list = [x[0] for x in activities_count_list]

    log = attributes_filter.apply(log, activities_keep_list, parameters=parameters)

    filtered_log = None

    if "alpha" in discovery_algorithm:
        # parameters_sa = deepcopy(parameters)
        # parameters_sa["decreasingFactor"] = 1.0
        filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters)
        filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters)
        filtered_log = filter_topvariants_soundmodel.apply(filtered_log, parameters=parameters)
    elif "dfg_mining" in discovery_algorithm:
        filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters)
        filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters)
        filtered_log = auto_filter.apply_auto_filter(filtered_log, parameters=parameters)

    if include_dfg_frequency or "dfg_mining" in discovery_algorithm:
        dfg_frequency = dfg_factory.apply(log, parameters=parameters, variant="frequency")
    if include_dfg_performance:
        dfg_performance = dfg_factory.apply(log, parameters=parameters, variant="performance")
    if include_filtered_dfg_frequency:
        filtered_dfg_frequency = dfg_factory.apply(filtered_log, parameters=parameters, variant="frequency")
    if include_filtered_dfg_performance:
        filtered_dfg_performance = dfg_factory.apply(filtered_log, parameters=parameters, variant="performance")

    if "alpha" in discovery_algorithm:
        net, initial_marking, final_marking = alpha_miner.apply(filtered_log, parameters=parameters)
    elif "dfg_mining" in discovery_algorithm:
        start_activities = start_activities_filter.get_start_activities(filtered_log, parameters=parameters)
        end_activities = end_activities_filter.get_end_activities(filtered_log, parameters=parameters)

        parameters_conv = {}
        parameters_conv["start_activities"] = start_activities
        parameters_conv["end_activities"] = end_activities

        net, initial_marking, final_marking = dfg_conv_factory.apply(dfg_frequency, parameters=parameters_conv)

    if filtered_log is not None and include_filtered_log:
        returned_dictionary["filtered_log"] = filtered_log
    if net is not None and desidered_output == "petri":
        returned_dictionary["net"] = net
    if initial_marking is not None and desidered_output == "petri":
        returned_dictionary["initial_marking"] = initial_marking
    if final_marking is not None and desidered_output == "petri":
        returned_dictionary["final_marking"] = final_marking
    if bpmn_graph is not None and desidered_output == "bpmn":
        returned_dictionary["bpmn_graph"] = bpmn_graph
    if dfg_frequency is not None and include_dfg_frequency:
        returned_dictionary["dfg_frequency"] = dfg_frequency
    if dfg_performance is not None and include_dfg_performance:
        returned_dictionary["dfg_performance"] = dfg_performance
    if filtered_dfg_frequency is not None and include_filtered_dfg_frequency:
        returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency
    if filtered_dfg_performance is not None and include_filtered_dfg_performance:
        returned_dictionary["filtered_dfg_performance"] = filtered_dfg_performance

    if classic_output:
        if net is not None and desidered_output == "petri":
            return net, initial_marking, final_marking

    return returned_dictionary
def diagnose_from_notexisting_activities(log,
                                         notexisting_activities_in_model,
                                         parameters=None):
    """
    Perform root cause analysis related to activities that are not present in the model

    Parameters
    -------------
    log
        Trace log object
    notexisting_activities_in_model
        Not existing activities in the model
    parameters
        Possible parameters of the algorithm, including:
            string_attributes -> List of string event attributes to consider
                in building the decision tree
            numeric_attributes -> List of numeric event attributes to consider
                in building the decision tree

    Returns
    -----------
    diagnostics
        For each problematic transition:
            - a decision tree comparing fit and unfit executions
            - feature names
            - classes
    """
    if parameters is None:
        parameters = {}

    diagnostics = {}
    string_attributes = parameters[
        "string_attributes"] if "string_attributes" in parameters else []
    numeric_attributes = parameters[
        "numeric_attributes"] if "numeric_attributes" in parameters else []
    enable_multiplier = parameters[
        "enable_multiplier"] if "enable_multiplier" in parameters else False

    parameters_filtering = deepcopy(parameters)
    parameters_filtering["positive"] = False
    values = list(notexisting_activities_in_model.keys())

    filtered_log = attributes_filter.apply(log,
                                           values,
                                           parameters=parameters_filtering)

    for act in notexisting_activities_in_model:
        fit_cases_repr = []
        containing_cases_repr = []
        for trace in log:
            if trace in notexisting_activities_in_model[act]:
                containing_cases_repr.append(
                    notexisting_activities_in_model[act][trace])
            elif trace in filtered_log:
                fit_cases_repr.append(dict(trace[-1]))

        if fit_cases_repr and containing_cases_repr:
            data, feature_names = form_representation_from_dictio_couple(
                fit_cases_repr,
                containing_cases_repr,
                string_attributes,
                numeric_attributes,
                enable_multiplier=enable_multiplier)

            target = []
            classes = []

            if enable_multiplier:
                multiplier_first = int(
                    max(
                        float(len(containing_cases_repr)) /
                        float(len(fit_cases_repr)), 1))
                multiplier_second = int(
                    max(
                        float(len(fit_cases_repr)) /
                        float(len(containing_cases_repr)), 1))
            else:
                multiplier_first = 1
                multiplier_second = 1

            for j in range(multiplier_first):
                for i in range(len(fit_cases_repr)):
                    target.append(0)
            classes.append("fit")

            for j in range(multiplier_second):
                for i in range(len(containing_cases_repr)):
                    target.append(1)
            classes.append("containing")

            target = np.asarray(target)
            clf = tree.DecisionTreeClassifier(max_depth=7)
            clf.fit(data, target)
            diagn_dict = {
                "clf": clf,
                "data": data,
                "feature_names": feature_names,
                "target": target,
                "classes": classes
            }

            diagnostics[act] = diagn_dict

    return diagnostics
Beispiel #15
0
variants_count_filtered_log1 = case_statistics.get_variant_statistics(
    filtered_log1)
print(variants_count_filtered_log1)

#---
from pm4py.algo.filtering.log.attributes import attributes_filter
activities = attributes_filter.get_attribute_values(log, "concept:name")
resources = attributes_filter.get_attribute_values(log, "org:resource")
activities
resources

#not containing any resource
from pm4py.util import constants
tracefilter_log_pos = attributes_filter.apply(
    log, ["Resource10"],
    parameters={
        constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "org:resource",
        "positive": True
    })
tracefilter_log_neg = attributes_filter.apply(
    log, ["Resource10"],
    parameters={
        constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "org:resource",
        "positive": False
    })

eventsfilter_log = attributes_filter.apply_events(
    log, ["Resource10"],
    parameters={
        constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "org:resource",
        "positive": True
    })
def diagnose_from_notexisting_activities(log,
                                         notexisting_activities_in_model,
                                         parameters=None):
    """
    Provide some conformance diagnostics related to activities that are not present in the model

    Parameters
    -------------
    log
        Trace log
    notexisting_activities_in_model
        Not existing activities in the model
    parameters
        Possible parameters of the algorithm, including:
            PARAMETER_CONSTANT_TIMESTAMP_KEY -> attribute of the event containing the timestamp

    Returns
    -------------
    diagnostics
        For each problematic activity, diagnostics about case duration
    """
    if parameters is None:
        parameters = {}

    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    diagnostics = {}

    parameters_filtering = deepcopy(parameters)
    parameters_filtering["positive"] = False
    values = list(notexisting_activities_in_model.keys())

    filtered_log = attributes_filter.apply(log,
                                           values,
                                           parameters=parameters_filtering)

    for act in notexisting_activities_in_model:
        fit_cases = []
        containing_cases = []
        for trace in log:
            if trace in notexisting_activities_in_model[act]:
                containing_cases.append(trace)
            elif trace in filtered_log:
                fit_cases.append(trace)
        if containing_cases and fit_cases:
            n_containing = len(containing_cases)
            n_fit = len(fit_cases)
            fit_median_time = get_median_case_duration(
                fit_cases, timestamp_key=timestamp_key)
            containing_median_time = get_median_case_duration(
                containing_cases, timestamp_key=timestamp_key)
            relative_throughput = containing_median_time / fit_median_time if fit_median_time > 0 else 0

            diagn_dict = {
                "n_containing": n_containing,
                "n_fit": n_fit,
                "fit_median_time": fit_median_time,
                "containing_median_time": containing_median_time,
                "relative_throughput": relative_throughput
            }
            diagnostics[act] = diagn_dict
    return diagnostics
Beispiel #17
0
from grm import preprocessing, GRM
from grm.util import get_activities
from pm4py.algo.filtering.log.attributes import attributes_filter
from pm4py.util import constants
from pm4py.objects.log.util import sampling

model_path = '../best_models/sp2020/2020-05-06-05-40_best_model.pickle'
logfile = "sp2020.csv"
name_of_case_id = "CASE_ID"
name_of_activity = "ACTIVITY"
name_of_timestamp = "TIMESTAMP"
name_of_label = "REPAIR_IN_TIME_5D"

log = preprocessing.import_data("data", logfile, separator=";", quote='"', case_id=name_of_case_id,
                                activity=name_of_activity,
                                time_stamp=name_of_timestamp, target=name_of_label)

activities = get_activities(log)
grm_model = GRM.GRM(log, activities, restore_file=model_path)

log = attributes_filter.apply(log, [0],
                              parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "label", "positive": True})
log = sampling.sample(log, n=5000)
grm_model.visualize_dfg(save_file=True, log=log, file_name="sp2020_", variant="all")
Beispiel #18
0
    if sb_1_trib:
      list_ojs, list_classes, list_ojs_cod, list_classes_cod = filtra_tribunal(sb_1_trib)
    sb_1_classes = st.selectbox("Classe", list_classes, 2)
    sb_1_OJ = st.selectbox("Órgão Julgador 1", list_ojs, 0 )
    sb_2_OJ = st.selectbox("Órgão Julgador 2", list_ojs, 1)

    rd_metrica =st.radio("Métrica",('Frequência', 'Tempo'))



tracefilter_log_pos = log
if sb_1_trib:
  tracefilter_log_pos =  attributes_filter.apply(tracefilter_log_pos, 
                                            sb_1_trib, 
                                          parameters={
                                              #attributes_filter.Parameters.CASE_ID_KEY: 'case:concept:name',
                                              attributes_filter.Parameters.ATTRIBUTE_KEY: "org:siglaTribunal",
                                              attributes_filter.Parameters.POSITIVE: True
                                              }
                                          )
print("trib",sb_1_trib,len(tracefilter_log_pos))
if sb_1_classes:
  tracefilter_log_pos =  attributes_filter.apply(tracefilter_log_pos, 
                                            sb_1_classes, 
                                          parameters={
                                              #attributes_filter.Parameters.CASE_ID_KEY: 'case:concept:name',
                                              attributes_filter.Parameters.ATTRIBUTE_KEY: "org:Classe",
                                              attributes_filter.Parameters.POSITIVE: True
                                              }
                                          )
print("classes",sb_1_classes,len(tracefilter_log_pos))
if sb_1_OJ:
Beispiel #19
0
events_2017 = 0
for trace in fil_log_17:
    events_2017 += len(trace)
print("2017 events", events_2017)

# activities
activities = attributes_filter.get_attribute_values(fil_log_17, "concept:name")
print("2017 activities", len(activities))

# class distribution
labels = attributes_filter.get_attribute_values(fil_log_17, "Accepted")
print(labels)

trace_filter_log_pos = attributes_filter.apply(
    fil_log_17, [True],
    parameters={
        attributes_filter.PARAMETER_CONSTANT_ATTRIBUTE_KEY: name_of_label,
        "positive": True
    })
tracefilter_log_neg = attributes_filter.apply(
    fil_log_17, [True],
    parameters={
        attributes_filter.PARAMETER_CONSTANT_ATTRIBUTE_KEY: name_of_label,
        "positive": False
    })
pos = len(trace_filter_log_pos)
neg = len(tracefilter_log_neg)
print("2017 pos", pos, ", part: ", pos / (pos + neg))
print("2017 neg", neg, ", part: ", neg / (pos + neg))

# sp2020____________________________________________________________________________________________________________
log_file = "coffeemachine_service_repair.csv"
def create_process_models(output_case_traces_cluster, path_data_sources,
                          dir_runtime_files, dir_dfg_cluster_files,
                          filename_dfg_cluster, rel_proportion_dfg_threshold,
                          logging_level):
    """
    Creates directly follows graphs out of a event log.
    :param output_case_traces_cluster: traces that are visualised
    :param path_data_sources: path of sources and outputs
    :param dir_runtime_files: folder containing files read and written during runtime
    :param dir_dfg_cluster_files: folder containing dfg png files
    :param filename_dfg_cluster: filename of dfg file (per cluster)
    :param rel_proportion_dfg_threshold: threshold for filtering out sensors in dfg relative to max occurrences of a sensor
    :param logging_level: level of logging
    :return:
    """

    # keep only needed columns
    output_case_traces_cluster = output_case_traces_cluster.reindex(
        columns={'Case', 'LC_Activity', 'Timestamp', 'Cluster'})
    output_case_traces_cluster = output_case_traces_cluster.rename(
        columns={
            'Case': 'case:concept:name',
            'LC_Activity': 'concept:name',
            'Timestamp': 'time:timestamp'
        })

    # create directory for dfg pngs
    os.mkdir(path_data_sources + dir_runtime_files + dir_dfg_cluster_files)
    # create dfg for each cluster
    clusters = output_case_traces_cluster.Cluster.unique()
    for cluster in clusters:
        log = output_case_traces_cluster.loc[output_case_traces_cluster.Cluster
                                             == cluster]
        log = log.astype(str)

        # convert pandas data frame to pm4py event log for further processing
        log = log_converter.apply(log)

        # keep only activities with more than certain number of occurrences
        activities = attributes_get.get_attribute_values(log, 'concept:name')
        # determine that number relative to the max number of occurrences of a sensor in a cluster. (the result is
        # the threshold at which an activity/activity strand is kept)
        min_number_of_occurrences = round(
            (max(activities.values()) * rel_proportion_dfg_threshold), 0)
        activities = {
            x: y
            for x, y in activities.items() if y >= min_number_of_occurrences
        }
        log = attributes_filter.apply(log, activities)

        # create dfg out of event log
        dfg = dfg_discovery.apply(log)

        # define start and
        start_activities = sa_get.get_start_activities(log)
        end_activities = ea_get.get_end_activities(log)

        # create png of dfg (if the graph does not show a graph, it is possible that the sensors did not trigger often)
        gviz = dfg_visualization.apply(
            dfg=dfg,
            log=log,
            variant=dfg_visualization.Variants.FREQUENCY,
            parameters={
                'start_activities': start_activities,
                'end_activities': end_activities
            })
        dfg_visualization.save(
            gviz,
            path_data_sources + dir_runtime_files + dir_dfg_cluster_files +
            (filename_dfg_cluster.format(cluster=str(cluster))))

    # logger
    logger = logging.getLogger(inspect.stack()[0][3])
    logger.setLevel(logging_level)
    logger.info("Saved directly follows graphs into '../%s'.",
                path_data_sources + dir_runtime_files + dir_dfg_cluster_files)