Example #1
0
    def detect_cut_if(self, second_iteration=False, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)
        # check base cases:
        empty_log = base_case.empty_log(self.log)
        single_activity = base_case.single_activity(self.log, activity_key)
        if empty_log:
            self.detected_cut = 'empty_log'
        elif single_activity:
            self.detected_cut = 'single_activity'
        # if no base cases are found, search for a cut:
        # use the cutting and splitting functions of im_plain:
        else:
            found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()

            if found_plain_cut:
                self.apply_cut_im_plain(type_of_cut, cut, activity_key)
            # if im_plain does not find a cut, we filter on our threshold and then again apply the im_cut detection
            # but this time, we have to use different splitting functions:
            else:
                self.filter_dfg_on_threshold()
                found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()
                if found_plain_cut:
                    if type_of_cut == 'concurrent':
                        logging.debug("concurrent_cut_if")
                        self.detected_cut = 'concurrent'
                        new_logs = splitting_infrequent.split_xor_infrequent(
                            cut[1], self.log, activity_key)
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'sequential':
                        logging.debug("sequential_if")
                        new_logs = splitting_infrequent.split_sequence_infrequent(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'parallel':
                        logging.debug("parallel_if")
                        new_logs = split.split_parallel(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "parallel"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'loopCut':
                        logging.debug("loopCut_if")
                        new_logs = splitting_infrequent.split_loop_infrequent(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "loopCut"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))

                else:
                    self.apply_fall_through_infrequent(parameters)
Example #2
0
def apply(
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> ProcessTree:
    """
    Generate a process tree

    Parameters
    ------------
    parameters
        Paramters of the algorithm, including:
            Parameters.REC_DEPTH -> current recursion depth
            Parameters.MIN_REC_DEPTH -> minimum recursion depth
            Parameters.MAX_REC_DEPTH -> maximum recursion depth
            Parameters.PROB_LEAF -> Probability to get a leaf

    Returns
    ------------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    rec_depth = exec_utils.get_param_value(Parameters.REC_DEPTH, parameters, 0)
    min_rec_depth = exec_utils.get_param_value(Parameters.MIN_REC_DEPTH,
                                               parameters, 1)
    max_rec_depth = exec_utils.get_param_value(Parameters.MAX_REC_DEPTH,
                                               parameters, 3)
    prob_leaf = exec_utils.get_param_value(Parameters.PROB_LEAF, parameters,
                                           0.25)

    next_parameters = {
        Parameters.REC_DEPTH: rec_depth + 1,
        Parameters.MIN_REC_DEPTH: min_rec_depth,
        Parameters.MAX_REC_DEPTH: max_rec_depth,
        Parameters.PROB_LEAF: prob_leaf
    }

    is_leaf = False

    if min_rec_depth <= rec_depth <= max_rec_depth:
        r = random.random()
        if r < prob_leaf:
            is_leaf = True
    elif rec_depth > max_rec_depth:
        is_leaf = True

    if is_leaf:
        current_tree = ProcessTree(label=generate_random_string(6))
    elif rec_depth == 0:
        current_tree = ProcessTree(operator=Operator.SEQUENCE)
        start = ProcessTree(label=generate_random_string(6),
                            parent=current_tree)
        current_tree.children.append(start)
        node = apply(parameters=next_parameters)
        node.parent = current_tree
        current_tree.children.append(node)
        end = ProcessTree(label=generate_random_string(6))
        end.parent = current_tree
        current_tree.children.append(end)
    else:
        o = get_random_operator()

        current_tree = ProcessTree(operator=o)
        if o == Operator.SEQUENCE:
            n_min = 2
            n_max = 6
            selected_n = random.randrange(n_min, n_max)
            for i in range(selected_n):
                child = apply(parameters=next_parameters)
                child.parent = current_tree
                current_tree.children.append(child)
        elif o == Operator.LOOP:
            do = apply(parameters=next_parameters)
            do.parent = current_tree
            current_tree.children.append(do)
            redo = apply(parameters=next_parameters)
            redo.parent = current_tree
            current_tree.children.append(redo)
            exit = ProcessTree(parent=current_tree)
            current_tree.children.append(exit)
        elif o == Operator.XOR:
            n_min = 2
            n_max = 5
            selected_n = random.randrange(n_min, n_max)
            for i in range(selected_n):
                child = apply(parameters=next_parameters)
                child.parent = current_tree
                current_tree.children.append(child)
        elif o == Operator.PARALLEL:
            n_min = 2
            n_max = 4
            selected_n = random.randrange(n_min, n_max)
            for i in range(selected_n):
                child = apply(parameters=next_parameters)
                child.parent = current_tree
                current_tree.children.append(child)
    return current_tree
Example #3
0
def preprocess_log(log,
                   activities=None,
                   activities_counter=None,
                   parameters=None):
    """
    Preprocess the log to get a grouped list of simplified traces (per activity)

    Parameters
    --------------
    log
        Log object
    activities
        (if provided) activities of the log
    activities_counter
        (if provided) counter of the activities of the log
    parameters
        Parameters of the algorithm

    Returns
    --------------
    traces_list
        List of simplified traces of the log
    trace_grouped_list
        Grouped list of simplified traces (per activity)
    activities
        Activities of the log
    activities_counter
        Activities counter
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    caseid_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters,
                                            constants.CASE_CONCEPT_NAME)
    index_key = exec_utils.get_param_value(Parameters.INDEX_KEY, parameters,
                                           DEFAULT_INDEX_KEY)

    if type(log) is pd.DataFrame:
        # keep only the two columns before conversion
        log = log[list(
            set([activity_key, timestamp_key, start_timestamp_key,
                 caseid_key]))]

    log = converter.apply(log,
                          variant=converter.Variants.TO_EVENT_LOG,
                          parameters=parameters)

    traces_list = []
    for trace in log:
        trace_stream = [{
            activity_key:
            trace[i][activity_key],
            timestamp_key:
            trace[i][timestamp_key].timestamp(),
            start_timestamp_key:
            trace[i][start_timestamp_key].timestamp(),
            index_key:
            i
        } for i in range(len(trace))]
        trace_stream = sorted(
            trace_stream,
            key=lambda x:
            (x[start_timestamp_key], x[timestamp_key], x[index_key]))
        traces_list.append(trace_stream)

    if activities is None:
        activities = sorted(
            list(set(y[activity_key] for x in traces_list for y in x)))

    trace_grouped_list = []
    for trace in traces_list:
        gr = []
        for act in activities:
            act_gr = [x for x in trace if x[activity_key] == act]
            gr.append(act_gr)
        trace_grouped_list.append(gr)

    if activities_counter is None:
        activities_counter = Counter(y[activity_key] for x in traces_list
                                     for y in x)

    return traces_list, trace_grouped_list, activities, activities_counter
Example #4
0
def apply(log, parameters=None):
    """
    Calculates the Subcontracting metric

    Parameters
    ------------
    log
        Log
    parameters
        Possible parameters of the algorithm:
            Parameters.N -> n of the algorithm proposed in the Wil SNA paper

    Returns
    -----------
    tuple
        Tuple containing the metric matrix and the resources list
    """
    if parameters is None:
        parameters = {}

    import numpy
    from pm4py.statistics.traces.pandas import case_statistics

    resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY,
                                              parameters,
                                              xes.DEFAULT_RESOURCE_KEY)
    n = exec_utils.get_param_value(Parameters.N, parameters, 2)

    parameters_variants = {
        case_statistics.Parameters.ACTIVITY_KEY: resource_key,
        case_statistics.Parameters.ATTRIBUTE_KEY: resource_key
    }
    variants_occ = {
        x["variant"]: x["case:concept:name"]
        for x in case_statistics.get_variant_statistics(
            log, parameters=parameters_variants)
    }
    variants_resources = list(variants_occ.keys())
    resources = [x.split(",") for x in variants_resources]
    flat_list = sorted(
        list(set([item for sublist in resources for item in sublist])))

    metric_matrix = numpy.zeros((len(flat_list), len(flat_list)))

    sum_i_to_j = {}

    for rv in resources:
        for i in range(len(rv) - n):
            res_i = flat_list.index(rv[i])
            res_i_n = flat_list.index(rv[i + n])
            if res_i == res_i_n:
                if res_i not in sum_i_to_j:
                    sum_i_to_j[res_i] = {}
                    for j in range(i + 1, i + n):
                        res_j = flat_list.index(rv[j])
                        if res_j not in sum_i_to_j[res_i]:
                            sum_i_to_j[res_i][res_j] = 0
                        sum_i_to_j[res_i][res_j] += variants_occ[",".join(rv)]

    dividend = 0
    for rv in resources:
        dividend = dividend + variants_occ[",".join(rv)] * (len(rv) - 1)

    for key1 in sum_i_to_j:
        for key2 in sum_i_to_j[key1]:
            metric_matrix[key1][key2] = sum_i_to_j[key1][key2] / dividend

    return [metric_matrix, flat_list, True]
Example #5
0
def get_decorations(log,
                    net,
                    initial_marking,
                    final_marking,
                    parameters=None,
                    measure="frequency",
                    ht_perf_method="last"):
    """
    Calculate decorations in order to annotate the Petri net

    Parameters
    -----------
    log
        Trace log
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    parameters
        Parameters associated to the algorithm
    measure
        Measure to represent on the process model (frequency/performance)
    ht_perf_method
        Method to use in order to annotate hidden transitions (performance value could be put on the last possible
        point (last) or in the first possible point (first)

    Returns
    ------------
    decorations
        Decorations to put on the process model
    """
    if parameters is None:
        parameters = {}

    aggregation_measure = exec_utils.get_param_value(
        Parameters.AGGREGATION_MEASURE, parameters, None)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    stat_locale = exec_utils.get_param_value(Parameters.STAT_LOCALE,
                                             parameters, {})

    variants_idx = variants_get.get_variants_from_log_trace_idx(
        log, parameters=parameters)
    variants = variants_get.convert_variants_trace_idx_to_trace_obj(
        log, variants_idx)

    parameters_tr = {
        token_replay.Variants.TOKEN_REPLAY.value.Parameters.ACTIVITY_KEY:
        activity_key,
        token_replay.Variants.TOKEN_REPLAY.value.Parameters.VARIANTS: variants
    }

    # do the replay
    aligned_traces = token_replay.apply(log,
                                        net,
                                        initial_marking,
                                        final_marking,
                                        parameters=parameters_tr)

    # apply petri_reduction technique in order to simplify the Petri net
    # net = reduction.apply(net, parameters={"aligned_traces": aligned_traces})

    element_statistics = performance_map.single_element_statistics(
        log,
        net,
        initial_marking,
        aligned_traces,
        variants_idx,
        activity_key=activity_key,
        timestamp_key=timestamp_key,
        ht_perf_method=ht_perf_method)

    aggregated_statistics = performance_map.aggregate_statistics(
        element_statistics,
        measure=measure,
        aggregation_measure=aggregation_measure,
        stat_locale=stat_locale)

    return aggregated_statistics
Example #6
0
def __insert_start_from_previous_event(
    df: pd.DataFrame,
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> pd.DataFrame:
    """
    Inserts the start timestamp of an event set to the completion of the previous event in the case

    Parameters
    ---------------
    df
        Dataframe

    Returns
    ---------------
    df
        Dataframe with the start timestamp for each event
    """
    if parameters is None:
        parameters = {}

    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    resource_key = exec_utils.get_param_value(
        Parameters.RESOURCE_KEY, parameters,
        xes_constants.DEFAULT_RESOURCE_KEY)
    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                             parameters,
                                             constants.CASE_CONCEPT_NAME)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_START_TIMESTAMP_KEY)

    from pm4py.util import pandas_utils

    df = df[[timestamp_key, resource_key, case_id_key, activity_key]]

    df = pandas_utils.insert_index(df)
    df = df.sort_values(
        [case_id_key, timestamp_key, constants.DEFAULT_INDEX_KEY])

    shifted_df = df[[case_id_key, timestamp_key]].shift(1)
    shifted_df.columns = [x + "_2" for x in shifted_df.columns]

    concat_df = pd.concat([df, shifted_df], axis=1)
    concat_df = concat_df[concat_df[case_id_key] == concat_df[
        case_id_key +
        "_2"]][[constants.DEFAULT_INDEX_KEY, timestamp_key + "_2"]]

    del shifted_df
    concat_df = concat_df.to_dict("records")
    concat_df = {
        x[constants.DEFAULT_INDEX_KEY]: x[timestamp_key + "_2"]
        for x in concat_df
    }

    df[start_timestamp_key] = df[constants.DEFAULT_INDEX_KEY].map(concat_df)
    df[start_timestamp_key] = df[start_timestamp_key].fillna(df[timestamp_key])
    df = df.sort_values(
        [start_timestamp_key, timestamp_key, constants.DEFAULT_INDEX_KEY])

    return df
Example #7
0
def interaction_two_resources(
        df: pd.DataFrame,
        t1: Union[datetime, str],
        t2: Union[datetime, str],
        r1: str,
        r2: str,
        parameters: Optional[Dict[Union[str, Parameters],
                                  Any]] = None) -> float:
    """
    The number of cases completed during a given time slot in which two given resources were involved.

    Metric RBI 5.1 in Pika, Anastasiia, et al.
    "Mining resource profiles from event logs." ACM Transactions on Management Information Systems (TMIS) 8.1 (2017): 1-30.

    Parameters
    -----------------
    df
        Dataframe
    t1
        Left interval
    t2
        Right interval
    r1
        Resource 1
    r2
        Resource 2

    Returns
    ----------------
    metric
        Value of the metric
    """
    if parameters is None:
        parameters = {}

    t1 = get_dt_from_string(t1)
    t2 = get_dt_from_string(t2)

    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    resource_key = exec_utils.get_param_value(
        Parameters.RESOURCE_KEY, parameters,
        xes_constants.DEFAULT_RESOURCE_KEY)
    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                             parameters,
                                             constants.CASE_CONCEPT_NAME)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)

    df = df[[timestamp_key, resource_key, case_id_key, activity_key]]

    from pm4py.algo.filtering.pandas.attributes import attributes_filter
    parameters_filter = {
        attributes_filter.Parameters.ATTRIBUTE_KEY: resource_key
    }
    df = attributes_filter.apply(df, [r1], parameters=parameters_filter)
    df = attributes_filter.apply(df, [r2], parameters=parameters_filter)
    last_df = df.groupby(case_id_key).last().reset_index()
    last_df = last_df[last_df[timestamp_key] >= t1]
    last_df = last_df[last_df[timestamp_key] < t2]
    cases = set(last_df[case_id_key].unique())
    df = df[df[case_id_key].isin(cases)]

    return df[case_id_key].nunique()
Example #8
0
def __transform_model_to_mem_efficient_structure(net,
                                                 im,
                                                 fm,
                                                 trace,
                                                 parameters=None):
    """
    Transform the Petri net model to a memory efficient structure

    Parameters
    --------------
    net
        Petri net
    im
        Initial marking
    fm
        Final marking
    trace
        Trace
    parameters
        Parameters

    Returns
    --------------
    model_struct
        Model data structure, including:
            PLACES_DICT: associates each place to a number
            INV_TRANS_DICT: associates a number to each transition
            LABELS_DICT: labels dictionary (a label to a number)
            TRANS_LABELS_DICT: associates each transition to the number corresponding to its label
            TRANS_PRE_DICT: preset of a transition, expressed as in this data structure
            TRANS_POST_DICT: postset of a transition, expressed as in this data structure
            TRANSF_IM: transformed initial marking
            TRANSF_FM: transformed final marking
            TRANSF_MODEL_COST_FUNCTION: transformed model cost function
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, DEFAULT_NAME_KEY)
    labels = sorted(list(set(x[activity_key] for x in trace)))

    model_cost_function = exec_utils.get_param_value(
        Parameters.PARAM_MODEL_COST_FUNCTION, parameters, None)

    if model_cost_function is None:
        model_cost_function = {}
        for t in net.transitions:
            if t.label is not None:
                model_cost_function[t] = align_utils.STD_MODEL_LOG_MOVE_COST
            else:
                preset_t = Marking()
                for a in t.in_arcs:
                    preset_t[a.source] = a.weight
                # optimization 12/08/2020
                #
                # instead of giving undiscriminately weight 1 to
                # invisible transitions, assign weight 0 to the ones
                # for which no 'sync' transition is enabled in their
                # activation markings.
                #
                # this requires to modify the state of the alignment, keeping track
                # of the length of the alignment, to avoid loops.
                en_t = enabled_transitions(net, preset_t)
                vis_t_trace = [t for t in en_t if t.label in labels]
                if len(vis_t_trace) == 0:
                    model_cost_function[t] = 0
                else:
                    model_cost_function[t] = align_utils.STD_TAU_COST

    places_dict = {place: index for index, place in enumerate(net.places)}
    trans_dict = {trans: index for index, trans in enumerate(net.transitions)}

    labels = sorted(
        list(set(t.label for t in net.transitions if t.label is not None)))
    labels_dict = {labels[i]: i for i in range(len(labels))}

    trans_labels_dict = {}
    for t in net.transitions:
        trans_labels_dict[trans_dict[t]] = labels_dict[
            t.label] if t.label is not None else None

    trans_pre_dict = {
        trans_dict[t]: {places_dict[x.source]: x.weight
                        for x in t.in_arcs}
        for t in net.transitions
    }
    trans_post_dict = {
        trans_dict[t]: {places_dict[x.target]: x.weight
                        for x in t.out_arcs}
        for t in net.transitions
    }

    transf_im = {places_dict[p]: im[p] for p in im}
    transf_fm = {places_dict[p]: fm[p] for p in fm}

    transf_model_cost_function = {
        trans_dict[t]: model_cost_function[t]
        for t in net.transitions
    }

    inv_trans_dict = {y: x for x, y in trans_dict.items()}

    return {
        PLACES_DICT: places_dict,
        INV_TRANS_DICT: inv_trans_dict,
        LABELS_DICT: labels_dict,
        TRANS_LABELS_DICT: trans_labels_dict,
        TRANS_PRE_DICT: trans_pre_dict,
        TRANS_POST_DICT: trans_post_dict,
        TRANSF_IM: transf_im,
        TRANSF_FM: transf_fm,
        TRANSF_MODEL_COST_FUNCTION: transf_model_cost_function
    }
Example #9
0
def apply_tree(log, parameters=None):
    """
    Apply the IM algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    if pkgutil.find_loader("pandas"):
        import pandas as pd
        from pm4py.statistics.variants.pandas import get as variants_get

        if type(log) is pd.DataFrame:
            vars = variants_get.get_variants_count(log, parameters=parameters)
            return apply_tree_variants(vars, parameters=parameters)

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters,
                                              pmutil.xes_constants.DEFAULT_NAME_KEY)

    log = converter.apply(log, parameters=parameters)
    # since basic IM is influenced once per variant, it makes sense to keep one trace per variant
    log = filtering_utils.keep_one_trace_per_variant(log, parameters=parameters)
    # keep only the activity attribute (since the others are not used)
    log = filtering_utils.keep_only_one_attribute_per_event(log, activity_key)

    dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters=parameters).items() if v > 0]
    c = Counts()
    activities = attributes_get.get_attribute_values(log, activity_key)
    start_activities = list(start_activities_get.get_start_activities(log, parameters=parameters).keys())
    end_activities = list(end_activities_get.get_end_activities(log, parameters=parameters).keys())
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    recursion_depth = 0
    sub = subtree.make_tree(log, dfg, dfg, dfg, activities, c, recursion_depth, 0.0, start_activities,
                            end_activities,
                            start_activities, end_activities, parameters)

    process_tree = get_tree_repr_implain.get_repr(sub, 0, contains_empty_traces=contains_empty_traces)
    # Ensures consistency to the parent pointers in the process tree
    tree_consistency.fix_parent_pointers(process_tree)
    # Fixes a 1 child XOR that is added when single-activities flowers are found
    tree_consistency.fix_one_child_xor_flower(process_tree)
    # folds the process tree (to simplify it in case fallthroughs/filtering is applied)
    process_tree = util.fold(process_tree)

    return process_tree
def align_fake_log_stop_marking(fake_log,
                                net,
                                marking,
                                final_marking,
                                parameters=None):
    """
    Align the 'fake' log with all the prefixes in order to get the markings in which
    the alignment stops

    Parameters
    -------------
    fake_log
        Fake log
    net
        Petri net
    marking
        Marking
    final_marking
        Final marking
    parameters
        Parameters of the algorithm

    Returns
    -------------
    alignment
        For each trace in the log, return the marking in which the alignment stops (expressed as place name with count)
    """
    if parameters is None:
        parameters = {}
    show_progress_bar = exec_utils.get_param_value(
        Parameters.SHOW_PROGRESS_BAR, parameters, True)
    multiprocessing = exec_utils.get_param_value(Parameters.MULTIPROCESSING,
                                                 parameters, False)

    progress = None
    if pkgutil.find_loader("tqdm") and show_progress_bar and len(fake_log) > 1:
        from tqdm.auto import tqdm
        progress = tqdm(
            total=len(fake_log),
            desc="computing precision with alignments, completed variants :: ")

    if multiprocessing:
        align_intermediate_result = __align_log_with_multiprocessing_stop_marking(
            fake_log,
            net,
            marking,
            final_marking,
            progress,
            parameters=parameters)
    else:
        align_intermediate_result = __align_log_wo_multiprocessing_stop_marking(
            fake_log,
            net,
            marking,
            final_marking,
            progress,
            parameters=parameters)

    align_result = []
    for i in range(len(align_intermediate_result)):
        res = align_intermediate_result[i]
        if res is not None:
            align_result.append([])
            for mark in res:
                res2 = {}
                for pl in mark:
                    # transforms the markings for easier correspondence at the end
                    # (distributed engine friendly!)
                    res2[(pl.name[0], pl.name[1])] = mark[pl]

                align_result[-1].append(res2)
        else:
            # if there is no path from the initial marking
            # replaying the given prefix, then add None
            align_result.append(None)

    # gracefully close progress bar
    if progress is not None:
        progress.close()
    del progress

    return align_result
def apply(log, net, marking, final_marking, parameters=None):
    """
    Get Align-ET Conformance precision

    Parameters
    ----------
    log
        Trace log
    net
        Petri net
    marking
        Initial marking
    final_marking
        Final marking
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> Activity key
    """

    if parameters is None:
        parameters = {}

    debug_level = parameters[
        "debug_level"] if "debug_level" in parameters else 0

    activity_key = exec_utils.get_param_value(
        Parameters.ACTIVITY_KEY, parameters, log_lib.util.xes.DEFAULT_NAME_KEY)

    # default value for precision, when no activated transitions (not even by looking at the initial marking) are found
    precision = 1.0
    sum_ee = 0
    sum_at = 0
    unfit = 0

    if not check_soundness.check_easy_soundness_net_in_fin_marking(
            net, marking, final_marking):
        raise Exception(
            "trying to apply Align-ETConformance on a Petri net that is not a easy sound net!!"
        )

    prefixes, prefix_count = precision_utils.get_log_prefixes(
        log, activity_key=activity_key)
    prefixes_keys = list(prefixes.keys())
    fake_log = precision_utils.form_fake_log(prefixes_keys,
                                             activity_key=activity_key)

    align_stop_marking = align_fake_log_stop_marking(fake_log,
                                                     net,
                                                     marking,
                                                     final_marking,
                                                     parameters=parameters)
    all_markings = transform_markings_from_sync_to_original_net(
        align_stop_marking, net, parameters=parameters)

    for i in range(len(prefixes)):
        markings = all_markings[i]

        if markings is not None:
            log_transitions = set(prefixes[prefixes_keys[i]])
            activated_transitions_labels = set()
            for m in markings:
                # add to the set of activated transitions in the model the activated transitions
                # for each prefix
                activated_transitions_labels = activated_transitions_labels.union(
                    x.label for x in utils.
                    get_visible_transitions_eventually_enabled_by_marking(
                        net, m) if x.label is not None)
            escaping_edges = activated_transitions_labels.difference(
                log_transitions)

            sum_at += len(activated_transitions_labels) * prefix_count[
                prefixes_keys[i]]
            sum_ee += len(escaping_edges) * prefix_count[prefixes_keys[i]]

            if debug_level > 1:
                print("")
                print("prefix=", prefixes_keys[i])
                print("log_transitions=", log_transitions)
                print("activated_transitions=", activated_transitions_labels)
                print("escaping_edges=", escaping_edges)
        else:
            unfit += prefix_count[prefixes_keys[i]]

    if debug_level > 0:
        print("\n")
        print("overall unfit", unfit)
        print("overall activated transitions", sum_at)
        print("overall escaping edges", sum_ee)

    # fix: also the empty prefix should be counted!
    start_activities = set(get_start_activities(log, parameters=parameters))
    trans_en_ini_marking = set([
        x.label for x in get_visible_transitions_eventually_enabled_by_marking(
            net, marking)
    ])
    diff = trans_en_ini_marking.difference(start_activities)
    sum_at += len(log) * len(trans_en_ini_marking)
    sum_ee += len(log) * len(diff)
    # end fix

    if sum_at > 0:
        precision = 1 - float(sum_ee) / float(sum_at)

    return precision
Example #12
0
def apply(tree, parameters=None):
    """
    Performs an extensive playout of the process tree

    Parameters
    -------------
    tree
        Process tree
    parameters
        Possible parameters, including:
        - Parameters.MIN_TRACE_LENGTH => minimum length of a trace (default: 1)
        - Parameters.MAX_TRACE_LENGTH => maximum length of a trace (default: min_allowed_trace_length)
        - Parameters.MAX_LOOP_OCC => maximum number of occurrences for a loop (default: MAX_TRACE_LENGTH)
        - Parameters.ACTIVITY_KEY => activity key
        - Parameters.MAX_LIMIT_NUM_TRACES => maximum number to the limit of traces; the playout shall stop when the number is reached (default: 100000)
    Returns
    -------------
    log
        Event log
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    # to save memory in the returned log, allocate each activity once. to know the list of activities of the
    # process tree, use the footprints module
    fp_tree = fp_discovery.apply(tree, parameters=parameters)
    activities = fp_tree["activities"]
    activities = {act: Event({activity_key: act}) for act in activities}

    min_allowed_trace_length = bottomup_discovery.get_min_trace_length(
        tree, parameters=parameters)
    min_trace_length = exec_utils.get_param_value(Parameters.MIN_TRACE_LENGTH,
                                                  parameters, 1)
    max_trace_length = exec_utils.get_param_value(Parameters.MAX_TRACE_LENGTH,
                                                  parameters,
                                                  min_allowed_trace_length)
    max_loop_occ = exec_utils.get_param_value(Parameters.MAX_LOOP_OCC,
                                              parameters,
                                              int(max_trace_length / 2))
    max_limit_num_traces = exec_utils.get_param_value(
        Parameters.MAX_LIMIT_NUM_TRACES, parameters, 100000)
    return_set_strings = exec_utils.get_param_value(
        Parameters.RETURN_SET_STRINGS, parameters, False)

    bottomup = bottomup_discovery.get_bottomup_nodes(tree,
                                                     parameters=parameters)
    min_rem_dict = bottomup_discovery.get_min_rem_dict(tree,
                                                       parameters=parameters)
    max_rem_dict = bottomup_discovery.get_max_rem_dict(tree,
                                                       parameters=parameters)

    playout_dictio = {}
    for i in range(len(bottomup)):
        get_playout(bottomup[i], playout_dictio, min_trace_length,
                    max_trace_length, max_loop_occ, min_rem_dict, max_rem_dict,
                    max_limit_num_traces)
    tree_playout_traces = playout_dictio[tree][TRACES]

    if return_set_strings:
        return tree_playout_traces

    log = EventLog()
    for tr0 in tree_playout_traces:
        trace = Trace()
        for act in tr0:
            trace.append(activities[act])
        log.append(trace)

    return log
Example #13
0
def apply(
    log: EventLog,
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> List[Any]:
    """
    Calculates the Subcontracting metric

    Parameters
    ------------
    log
        Log
    parameters
        Possible parameters of the algorithm:
            Parameters.N -> n of the algorithm proposed in the Wil SNA paper

    Returns
    -----------
    tuple
        Tuple containing the metric matrix and the resources list
    """
    if parameters is None:
        parameters = {}

    resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY,
                                              parameters,
                                              xes.DEFAULT_RESOURCE_KEY)
    n = exec_utils.get_param_value(Parameters.N, parameters, 2)

    parameters_variants = {
        variants_filter.Parameters.ACTIVITY_KEY: resource_key,
        variants_filter.Parameters.ATTRIBUTE_KEY: resource_key
    }
    variants_occ = {
        x: len(y)
        for x, y in variants_filter.get_variants(
            log, parameters=parameters_variants).items()
    }
    variants_resources = list(variants_occ.keys())
    resources = [
        variants_util.get_activities_from_variant(y)
        for y in variants_resources
    ]

    flat_list = sorted(
        list(set([item for sublist in resources for item in sublist])))

    metric_matrix = numpy.zeros((len(flat_list), len(flat_list)))

    sum_i_to_j = {}
    dividend = 0

    for idx, rv in enumerate(resources):
        rvj = variants_resources[idx]
        dividend += variants_occ[rvj]

        for i in range(len(rv) - n):
            res_i = flat_list.index(rv[i])
            res_i_n = flat_list.index(rv[i + n])
            if res_i == res_i_n:
                if res_i not in sum_i_to_j:
                    sum_i_to_j[res_i] = {}
                    for j in range(i + 1, i + n):
                        res_j = flat_list.index(rv[j])
                        if res_j not in sum_i_to_j[res_i]:
                            sum_i_to_j[res_i][res_j] = 0
                        sum_i_to_j[res_i][res_j] += variants_occ[rvj]

    for key1 in sum_i_to_j:
        for key2 in sum_i_to_j[key1]:
            metric_matrix[key1][key2] = sum_i_to_j[key1][key2] / dividend

    return [metric_matrix, flat_list, True]
Example #14
0
    def apply_fall_through_infrequent(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, self.parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_traces_present, enough_traces, new_log = fall_through_infrequent.empty_trace_filtering(
                self.log, self.f)
            self.log = new_log
        else:
            empty_traces_present = False
            enough_traces = False
        # if an empty trace is found, the empty trace fallthrough applies
        if empty_traces_present and enough_traces:
            logging.debug("empty_trace_if")
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=self.parameters).items() if v > 0]
            activities = attributes_filter.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_filter.get_start_activities(
                    new_log, parameters=parameters).keys())
            end_activities = list(
                end_activities_filter.get_end_activities(
                    new_log, parameters=parameters).keys())
            self.children.append(
                SubtreeInfrequent(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    self.f,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        elif empty_traces_present and not enough_traces:
            # no node is added to the PT, instead we just use recursion on the log_skeleton without the empty traces
            self.detect_cut_if(parameters=parameters)
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
            else:
                activity_once = False
            if activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_filter.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_filter.get_attribute_values(
                    small_log, activity_key)
                start_activities = list(
                    start_activities_filter.get_start_activities(
                        new_log, parameters=parameters).keys())
                end_activities = list(
                    end_activities_filter.get_end_activities(
                        new_log, parameters=parameters).keys())
                # append the chosen activity as leaf:
                self.children.append(
                    SubtreeInfrequent(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        self.f,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log_skeleton
                self.children.append(
                    SubtreeInfrequent(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        self.f,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, key = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                else:
                    activity_concurrent = False
                if activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_filter.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_filter.get_attribute_values(
                        small_log, activity_key)
                    start_activities = list(
                        start_activities_filter.get_start_activities(
                            new_log, parameters=parameters).keys())
                    end_activities = list(
                        end_activities_filter.get_end_activities(
                            new_log, parameters=parameters).keys())
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreeInfrequent(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            self.f,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log_skeleton:
                    self.children.append(
                        SubtreeInfrequent(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            self.f,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                    else:
                        strict_tau_loop = False
                    if strict_tau_loop:
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                new_log, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                new_log, parameters=parameters).keys())
                        self.children.append(
                            SubtreeInfrequent(
                                new_log,
                                new_dfg,
                                self.master_dfg,
                                self.initial_dfg,
                                activities,
                                self.counts,
                                self.rec_depth + 1,
                                self.f,
                                noise_threshold=self.noise_threshold,
                                start_activities=start_activities,
                                end_activities=end_activities,
                                initial_start_activities=self.
                                initial_start_activities,
                                initial_end_activities=self.
                                initial_end_activities,
                                parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                        else:
                            tau_loop = False
                        if tau_loop:
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    new_log, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    new_log, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower_if")
                            self.detected_cut = 'flower'
def apply(dfg, parameters=None):
    """
    Applies the DFG mining on a given object (if it is a Pandas dataframe or a log, the DFG is calculated)

    Parameters
    -------------
    dfg
        Object (DFG) (if it is a Pandas dataframe or a log, the DFG is calculated)
    parameters
        Parameters
    """
    if parameters is None:
        parameters = {}

    dfg = dfg
    start_activities = exec_utils.get_param_value(
        Parameters.START_ACTIVITIES, parameters,
        dfg_utils.infer_start_activities(dfg))
    end_activities = exec_utils.get_param_value(
        Parameters.END_ACTIVITIES, parameters,
        dfg_utils.infer_end_activities(dfg))
    activities = dfg_utils.get_activities_from_dfg(dfg)

    net = PetriNet("")
    im = Marking()
    fm = Marking()

    source = PetriNet.Place("source")
    net.places.add(source)
    im[source] = 1
    sink = PetriNet.Place("sink")
    net.places.add(sink)
    fm[sink] = 1

    places_corr = {}
    index = 0

    for act in activities:
        places_corr[act] = PetriNet.Place(act)
        net.places.add(places_corr[act])

    for act in start_activities:
        if act in places_corr:
            index = index + 1
            trans = PetriNet.Transition(act + "_" + str(index), act)
            net.transitions.add(trans)
            pn_util.add_arc_from_to(source, trans, net)
            pn_util.add_arc_from_to(trans, places_corr[act], net)

    for act in end_activities:
        if act in places_corr:
            index = index + 1
            inv_trans = PetriNet.Transition(act + "_" + str(index), None)
            net.transitions.add(inv_trans)
            pn_util.add_arc_from_to(places_corr[act], inv_trans, net)
            pn_util.add_arc_from_to(inv_trans, sink, net)

    for el in dfg.keys():
        act1 = el[0]
        act2 = el[1]

        index = index + 1
        trans = PetriNet.Transition(act2 + "_" + str(index), act2)
        net.transitions.add(trans)

        pn_util.add_arc_from_to(places_corr[act1], trans, net)
        pn_util.add_arc_from_to(trans, places_corr[act2], net)

    return net, im, fm
Example #16
0
def preprocessing(log, parameters=None):
    """
    Preprocessing step for the Aplha+ algorithm. Removing all transitions from the log with a loop of length one.

    Parameters
    ------------
    log
        Event log
    parameters
        Parameters of the algorithm

    Returns
    -------------
    log
        filtered log and a list of the filtered transitions
    loop_one_list
        Loop one list
    A_filtered
        Dictionary: activity before the loop-length-one activity
    B_filtered
        Dictionary: activity after the loop-length-one activity
    loops_in_first_place
        Loops in source place
    loops_in_last_place
        Loops in sink place
    """
    loops_in_first_place = set()
    loops_in_last_place = set()

    if parameters is None:
        parameters = {}
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_util.DEFAULT_NAME_KEY)

    # List for values that have a loop of length one
    loop_one_list = []
    # Log without activities that have a loop of length one
    filtered_log = EventLog()
    # dictionary A: activity before the loop-length-one activity
    A = {}
    # dictionary B: activity after the loop-length-one activity
    B = {}
    A_filtered = {}
    B_filtered = {}
    # inserting artificial start and end activity, since it is not allowed to have a loop at the source place
    # (according to paper)
    for trace in log:
        trace.insert(0, {activity_key: 'artificial_start'})
        trace.append({activity_key: 'artificial_end'})
    for trace in log:
        i = 0
        while i < len(trace) - 1:
            test = trace[1]
            current = trace[i][activity_key]
            successor = trace[i + 1][activity_key]
            if current == successor:
                if current not in loop_one_list:
                    loop_one_list.append(current)
            i += 1
    for trace in log:
        i = 0
        filtered_trace = Trace()
        while i < len(trace) - 1:
            current = trace[i][activity_key]
            successor = trace[i + 1][activity_key]
            if not current in loop_one_list:
                filtered_trace.append(current)
            if successor in loop_one_list:
                if not current in loop_one_list:
                    if current in A:
                        A[successor].append(current)
                    else:
                        A[successor] = [current]
            if current in loop_one_list:
                if not successor in loop_one_list:
                    if current in B:
                        B[current].append(successor)
                    else:
                        B[current] = [successor]
            if i == len(trace) - 2:
                if not successor in loop_one_list:
                    filtered_trace.append(successor)
            i += 1
        filtered_log.append(filtered_trace)
    # Making sets instead of lists
    for key, value in A.items():
        A_filtered[key] = set(value)
    # Making sets instead of lists
    for key, value in B.items():
        B_filtered[key] = set(value)
    for trace in log:
        if trace.__getitem__(0) in loop_one_list:
            loops_in_first_place.add(trace.__getitem__(0))
        if trace.__getitem__(len(trace) - 1) in loop_one_list:
            loops_in_last_place.add(trace.__getitem__(len(trace) - 1))
    loops_in_first_place = list(loops_in_first_place)
    loops_in_last_place = list(loops_in_last_place)

    return (filtered_log, loop_one_list, A_filtered, B_filtered,
            loops_in_first_place, loops_in_last_place)
Example #17
0
def activity_frequency(
        df: pd.DataFrame,
        t1: Union[datetime, str],
        t2: Union[datetime, str],
        r: str,
        a: str,
        parameters: Optional[Dict[Union[str, Parameters],
                                  Any]] = None) -> float:
    """
    Fraction of completions of a given activity a, by a given resource r, during a given time slot, [t1, t2),
    with respect to the total number of activity completions by resource r during [t1, t2)

    Metric RBI 1.3 in Pika, Anastasiia, et al.
    "Mining resource profiles from event logs." ACM Transactions on Management Information Systems (TMIS) 8.1 (2017): 1-30.

    Parameters
    -----------------
    df
        Dataframe
    t1
        Left interval
    t2
        Right interval
    r
        Resource
    a
        Activity

    Returns
    ----------------
    metric
        Value of the metric
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    resource_key = exec_utils.get_param_value(
        Parameters.RESOURCE_KEY, parameters,
        xes_constants.DEFAULT_RESOURCE_KEY)

    t1 = get_dt_from_string(t1)
    t2 = get_dt_from_string(t2)

    df = df[[activity_key, timestamp_key, resource_key]]
    df = df[df[resource_key] == r]
    df = df[df[timestamp_key] >= t1]
    df = df[df[timestamp_key] < t2]

    total = len(df)

    df = df[df[activity_key] == a]

    activity_a = len(df)

    return float(activity_a) / float(total) if total > 0 else 0.0
Example #18
0
def apply(log, net, im, fm, parameters=None):
    """
    Performs a Monte Carlo simulation of an accepting Petri net without duplicate transitions and where the preset is always
    distinct from the postset (FIFO variant; the semaphores pile up if waiting is needed, and the first in is the first to win
    the semaphore)

    Parameters
    -------------
    log
        Event log
    net
        Accepting Petri net without duplicate transitions and where the preset is always distinct from the postset
    im
        Initial marking
    fm
        Final marking
    parameters
        Parameters of the algorithm:
            PARAM_NUM_SIMULATIONS => (default: 100)
            PARAM_FORCE_DISTRIBUTION => Force a particular stochastic distribution (e.g. normal) when the stochastic map
            is discovered from the log (default: None; no distribution is forced)
            PARAM_ENABLE_DIAGNOSTICS => Enable the printing of diagnostics (default: True)
            PARAM_DIAGN_INTERVAL => Interval of time in which diagnostics of the simulation are printed (default: 32)
            PARAM_CASE_ARRIVAL_RATIO => Case arrival of new cases (default: None; inferred from the log)
            PARAM_PROVIDED_SMAP => Stochastic map that is used in the simulation (default: None; inferred from the log)
            PARAM_MAP_RESOURCES_PER_PLACE => Specification of the number of resources available per place
            (default: None; each place gets the default number of resources)
            PARAM_DEFAULT_NUM_RESOURCES_PER_PLACE => Default number of resources per place when not specified
            (default: 1; each place gets 1 resource and has to wait for the resource to finish)
            PARAM_SMALL_SCALE_FACTOR => Scale factor for the sleeping time of the actual simulation
            (default: 864000.0, 10gg)
            PARAM_MAX_THREAD_EXECUTION_TIME => Maximum execution time per thread (default: 60.0, 1 minute)

    Returns
    ------------
    simulated_log
        Simulated event log
    simulation_result
        Result of the simulation:
            Outputs.OUTPUT_PLACES_INTERVAL_TREES => inteval trees that associate to each place the times in which it was occupied.
            Outputs.OUTPUT_TRANSITIONS_INTERVAL_TREES => interval trees that associate to each transition the intervals of time
            in which it could not fire because some token was in the output.
            Outputs.OUTPUT_CASES_EX_TIME => Throughput time of the cases included in the simulated log
            Outputs.OUTPUT_MEDIAN_CASES_EX_TIME => Median of the throughput times
            Outputs.OUTPUT_CASE_ARRIVAL_RATIO => Case arrival ratio that was specified in the simulation
            Outputs.OUTPUT_TOTAL_CASES_TIME => Total time occupied by cases of the simulated log
    """
    if parameters is None:
        parameters = {}

    from intervaltree import IntervalTree

    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    no_simulations = exec_utils.get_param_value(
        Parameters.PARAM_NUM_SIMULATIONS, parameters, 100)
    force_distribution = exec_utils.get_param_value(
        Parameters.PARAM_FORCE_DISTRIBUTION, parameters, None)
    enable_diagnostics = exec_utils.get_param_value(
        Parameters.PARAM_ENABLE_DIAGNOSTICS, parameters, True)
    diagn_interval = exec_utils.get_param_value(
        Parameters.PARAM_DIAGN_INTERVAL, parameters, 32.0)
    case_arrival_ratio = exec_utils.get_param_value(
        Parameters.PARAM_CASE_ARRIVAL_RATIO, parameters, None)
    smap = exec_utils.get_param_value(Parameters.PARAM_PROVIDED_SMAP,
                                      parameters, None)
    resources_per_places = exec_utils.get_param_value(
        Parameters.PARAM_MAP_RESOURCES_PER_PLACE, parameters, None)
    default_num_resources_per_places = exec_utils.get_param_value(
        Parameters.PARAM_DEFAULT_NUM_RESOURCES_PER_PLACE, parameters, 1)
    small_scale_factor = exec_utils.get_param_value(
        Parameters.PARAM_SMALL_SCALE_FACTOR, parameters, 864000)
    max_thread_exec_time = exec_utils.get_param_value(
        Parameters.PARAM_MAX_THREAD_EXECUTION_TIME, parameters, 60.0)

    if case_arrival_ratio is None:
        case_arrival_ratio = case_arrival.get_case_arrival_avg(
            log, parameters=parameters)
    if resources_per_places is None:
        resources_per_places = {}

    logging.basicConfig()
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)

    places_interval_trees = {}
    transitions_interval_trees = {}
    cases_ex_time = []
    list_cases = {}

    for place in net.places:
        # assign a semaphore to each place.
        if place in resources_per_places:
            place.semaphore = Semaphore(resources_per_places[place])
        else:
            # if the user does not specify the number of resources per place,
            # the default number is used
            place.semaphore = Semaphore(default_num_resources_per_places)
        place.assigned_time = []
        places_interval_trees[place] = IntervalTree()
    for trans in net.transitions:
        transitions_interval_trees[trans] = IntervalTree()

    # when the user does not specify any map from transitions to random variables,
    # a replay operation is performed
    if smap is None:
        if enable_diagnostics:
            logger.info(str(time()) + " started the replay operation.")
        if force_distribution is not None:
            smap = replay.get_map_from_log_and_net(
                log,
                net,
                im,
                fm,
                force_distribution=force_distribution,
                parameters=parameters)
        else:
            smap = replay.get_map_from_log_and_net(log,
                                                   net,
                                                   im,
                                                   fm,
                                                   parameters=parameters)
        if enable_diagnostics:
            logger.info(str(time()) + " ended the replay operation.")

    # the start timestamp is set to 1000000 instead of 0 to avoid problems with 32 bit machines
    start_time = 1000000
    threads = []
    for i in range(no_simulations):
        list_cases[i] = Trace()
        t = SimulationThread(i, net, im, fm, smap, start_time,
                             places_interval_trees, transitions_interval_trees,
                             cases_ex_time, list_cases, enable_diagnostics,
                             diagn_interval, small_scale_factor,
                             max_thread_exec_time)
        t.start()
        threads.append(t)
        start_time = start_time + case_arrival_ratio
        # wait a factor before opening a thread and the next one
        sleep(case_arrival_ratio / small_scale_factor)

    for t in threads:
        t.join()

    i = 0
    while i < len(threads):
        if threads[i].terminated_correctly is False:
            del list_cases[threads[i].id]
            del threads[i]
            del cases_ex_time[i]
            continue
        i = i + 1

    if enable_diagnostics:
        logger.info(str(time()) + " ended the Monte carlo simulation.")

    log = EventLog(list(list_cases.values()))
    min_timestamp = log[0][0][timestamp_key].timestamp()
    max_timestamp = max(y[timestamp_key].timestamp() for x in log for y in x)

    transitions_interval_trees = {
        t.name: y
        for t, y in transitions_interval_trees.items()
    }

    return log, {
        Outputs.OUTPUT_PLACES_INTERVAL_TREES.value: places_interval_trees,
        Outputs.OUTPUT_TRANSITIONS_INTERVAL_TREES.value:
        transitions_interval_trees,
        Outputs.OUTPUT_CASES_EX_TIME.value: cases_ex_time,
        Outputs.OUTPUT_MEDIAN_CASES_EX_TIME.value: median(cases_ex_time),
        Outputs.OUTPUT_CASE_ARRIVAL_RATIO.value: case_arrival_ratio,
        Outputs.OUTPUT_TOTAL_CASES_TIME.value: max_timestamp - min_timestamp
    }
Example #19
0
def average_duration_activity(
        df: pd.DataFrame,
        t1: Union[datetime, str],
        t2: Union[datetime, str],
        r: str,
        a: str,
        parameters: Optional[Dict[Union[str, Parameters],
                                  Any]] = None) -> float:
    """
    The average duration of instances of a given activity completed during a given time slot by a given resource.

    Metric RBI 4.3 in Pika, Anastasiia, et al.
    "Mining resource profiles from event logs." ACM Transactions on Management Information Systems (TMIS) 8.1 (2017): 1-30.

    Parameters
    -----------------
    df
        Dataframe
    t1
        Left interval
    t2
        Right interval
    r
        Resource
    a
        Activity

    Returns
    ----------------
    metric
        Value of the metric
    """
    if parameters is None:
        parameters = {}

    t1 = get_dt_from_string(t1)
    t2 = get_dt_from_string(t2)

    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    resource_key = exec_utils.get_param_value(
        Parameters.RESOURCE_KEY, parameters,
        xes_constants.DEFAULT_RESOURCE_KEY)
    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                             parameters,
                                             constants.CASE_CONCEPT_NAME)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters, None)
    if start_timestamp_key is None:
        df = __insert_start_from_previous_event(df, parameters=parameters)
        start_timestamp_key = xes_constants.DEFAULT_START_TIMESTAMP_KEY

    df = df[[
        timestamp_key, resource_key, case_id_key, activity_key,
        start_timestamp_key
    ]]
    df = df[df[resource_key] == r]
    df = df[df[activity_key] == a]
    df = df[df[timestamp_key] >= t1]
    df = df[df[timestamp_key] < t2]

    return float((df[timestamp_key] -
                  df[start_timestamp_key]).astype('timedelta64[s]').mean())
Example #20
0
def get_map_from_log_and_net(log, net, initial_marking, final_marking, force_distribution=None, parameters=None):
    """
    Get transition stochastic distribution map given the log and the Petri net

    Parameters
    -----------
    log
        Event log
    net
        Petri net
    initial_marking
        Initial marking of the Petri net
    final_marking
        Final marking of the Petri net
    force_distribution
        If provided, distribution to force usage (e.g. EXPONENTIAL)
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> activity name
            Parameters.TIMESTAMP_KEY -> timestamp key

    Returns
    -----------
    stochastic_map
        Map that to each transition associates a random variable
    """
    stochastic_map = {}

    if parameters is None:
        parameters = {}

    token_replay_variant = exec_utils.get_param_value(Parameters.TOKEN_REPLAY_VARIANT, parameters,
                                                      executor.Variants.TOKEN_REPLAY)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters,
                                               xes_constants.DEFAULT_TIMESTAMP_KEY)

    parameters_variants = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key}
    variants_idx = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters_variants)
    variants = variants_module.convert_variants_trace_idx_to_trace_obj(log, variants_idx)

    parameters_tr = {token_replay.Parameters.ACTIVITY_KEY: activity_key, token_replay.Parameters.VARIANTS: variants}

    # do the replay
    aligned_traces = executor.apply(log, net, initial_marking, final_marking, variant=token_replay_variant,
                                        parameters=parameters_tr)

    element_statistics = performance_map.single_element_statistics(log, net, initial_marking,
                                                                   aligned_traces, variants_idx,
                                                                   activity_key=activity_key,
                                                                   timestamp_key=timestamp_key,
                                                                   parameters={"business_hours": True})

    for el in element_statistics:
        if type(el) is PetriNet.Transition and "performance" in element_statistics[el]:
            values = element_statistics[el]["performance"]

            rand = RandomVariable()
            rand.calculate_parameters(values, force_distribution=force_distribution)

            no_of_times_enabled = element_statistics[el]['no_of_times_enabled']
            no_of_times_activated = element_statistics[el]['no_of_times_activated']

            if no_of_times_enabled > 0:
                rand.set_weight(float(no_of_times_activated) / float(no_of_times_enabled))
            else:
                rand.set_weight(0.0)

            stochastic_map[el] = rand

    return stochastic_map
Example #21
0
def apply(c, Aub, bub, Aeq, beq, parameters=None):
    """
    Gets the overall solution of the problem

    Parameters
    ------------
    c
        c parameter of the algorithm
    Aub
        A_ub parameter of the algorithm
    bub
        b_ub parameter of the algorithm
    Aeq
        A_eq parameter of the algorithm
    beq
        b_eq parameter of the algorithm
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    sol
        Solution of the LP problem by the given algorithm
    """
    if parameters is None:
        parameters = {}

    require_ilp = exec_utils.get_param_value(Parameters.REQUIRE_ILP,
                                             parameters, False)

    solver = pywraplp.Solver('LinearProgrammingExample',
                             pywraplp.Solver.GLOP_LINEAR_PROGRAMMING)
    solver.Clear()
    solver.SuppressOutput()

    x_list = []
    for i in range(Aub.shape[1]):
        if require_ilp:
            x = solver.IntVar(-solver.infinity(), solver.infinity(),
                              "x_" + str(i))
        else:
            x = solver.NumVar(-solver.infinity(), solver.infinity(),
                              "x_" + str(i))
        x_list.append(x)

    objective = solver.Objective()
    for j in range(len(c)):
        if abs(c[j]) > MIN_THRESHOLD:
            objective.SetCoefficient(x_list[j], c[j])

    for i in range(Aub.shape[0]):
        ok = False
        for j in range(Aub.shape[1]):
            if abs(Aub[i, j]) > MIN_THRESHOLD:
                ok = True
                break
        if ok:
            constraint = solver.Constraint(-solver.infinity(), bub[i])
            for j in range(Aub.shape[1]):
                if abs(Aub[i, j]) > MIN_THRESHOLD:
                    constraint.SetCoefficient(x_list[j], Aub[i, j])

    if Aeq is not None and beq is not None:
        for i in range(Aeq.shape[0]):
            ok = False
            for j in range(Aeq.shape[1]):
                if abs(Aeq[i, j]) > MIN_THRESHOLD:
                    ok = True
                    break
            if ok:
                constraint = solver.Constraint(beq[i], beq[i])
                for j in range(Aeq.shape[1]):
                    if abs(Aeq[i, j]) > MIN_THRESHOLD:
                        constraint.SetCoefficient(x_list[j], Aeq[i, j])

    objective.SetMinimization()

    status = solver.Solve()

    if status == 0:
        sol_value = 0.0
        for j in range(len(c)):
            if abs(c[j]) > MIN_THRESHOLD:
                sol_value = sol_value + c[j] * x_list[j].solution_value()
        points = [x.solution_value() for x in x_list]
    else:
        return None

    return {"c": c, "x_list": x_list, "sol_value": sol_value, "points": points}
Example #22
0
def apply(dataframe, list_activities, sample_size, parameters):
    """
    Finds the performance spectrum provided a dataframe
    and a list of activities

    Parameters
    -------------
    dataframe
        Dataframe
    list_activities
        List of activities interesting for the performance spectrum (at least two)
    sample_size
        Size of the sample
    parameters
        Parameters of the algorithm,  including:
            - Parameters.ACTIVITY_KEY
            - Parameters.TIMESTAMP_KEY
            - Parameters.CASE_ID_KEY

    Returns
    -------------
    points
        Points of the performance spectrum
    """
    if parameters is None:
        parameters = {}

    import pandas as pd
    import numpy as np

    case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                              parameters, CASE_CONCEPT_NAME)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               xes.DEFAULT_TIMESTAMP_KEY)

    dataframe = dataframe[[case_id_glue, activity_key, timestamp_key]]
    dataframe = dataframe[dataframe[activity_key].isin(list_activities)]
    dataframe = pandas_utils.insert_index(dataframe,
                                          constants.DEFAULT_EVENT_INDEX_KEY)
    dataframe = dataframe.sort_values(
        [case_id_glue, timestamp_key, constants.DEFAULT_EVENT_INDEX_KEY])
    dataframe[timestamp_key] = dataframe[timestamp_key].astype(
        np.int64) / 10**9
    list_replicas = []
    activity_names = []
    filt_col_names = []
    for i in range(len(list_activities)):
        if i > 0:
            dataframe = dataframe.shift(-1)
            activity_names.append("+'@@'+")
        ren = {x: x + "_" + str(i) for x in dataframe.columns}
        list_replicas.append(dataframe.rename(columns=ren))
        filt_col_names.append(timestamp_key + "_" + str(i))

        activity_names.append("dataframe[activity_key+'_" + str(i) + "']")

    dataframe = pd.concat(list_replicas, axis=1)
    for i in range(len(list_activities) - 1):
        dataframe = dataframe[dataframe[case_id_glue + "_" +
                                        str(i)] == dataframe[case_id_glue +
                                                             "_" + str(i + 1)]]
    dataframe["@@merged_activity"] = eval("".join(activity_names))
    desidered_act = "@@".join(list_activities)
    dataframe = dataframe[dataframe["@@merged_activity"] == desidered_act]
    dataframe = dataframe[filt_col_names]

    if len(dataframe) > sample_size:
        dataframe = dataframe.sample(n=sample_size)

    points = pandas_utils.to_dict_records(dataframe)
    points = [[p[tk] for tk in filt_col_names] for p in points]
    points = sorted(points, key=lambda x: x[0])

    return points
Example #23
0
def apply_trace(trace, list_nets, parameters=None):
    """
    Align a trace against a decomposition

    Parameters
    --------------
    trace
        Trace
    list_nets
        List of Petri nets (decomposed)
    parameters
        Parameters of the algorithm

    Returns
    --------------
    alignment
        Alignment of the trace
    """
    if parameters is None:
        parameters = {}

    max_align_time_trace = exec_utils.get_param_value(
        Parameters.PARAM_MAX_ALIGN_TIME_TRACE, parameters, sys.maxsize)
    threshold_border_agreement = exec_utils.get_param_value(
        Parameters.PARAM_THRESHOLD_BORDER_AGREEMENT, parameters, 100000000)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, DEFAULT_NAME_KEY)
    icache = exec_utils.get_param_value(Parameters.ICACHE, parameters, dict())
    mcache = exec_utils.get_param_value(Parameters.MCACHE, parameters, dict())
    cons_nets = copy(list_nets)
    acache = get_acache(cons_nets)
    cons_nets_result = []
    cons_nets_alres = []
    cons_nets_costs = []
    max_val_alres = 0
    start_time = time.time()
    i = 0
    while i < len(cons_nets):
        this_time = time.time()
        if this_time - start_time > max_align_time_trace:
            # the alignment did not termine in the provided time
            return None
        net, im, fm = cons_nets[i]
        proj = Trace([x for x in trace if x[activity_key] in net.lvis_labels])
        if len(proj) > 0:
            acti = tuple(x[activity_key] for x in proj)
            tup = (cons_nets[i], acti)
            if tup not in icache:
                al, cf = align(proj, net, im, fm, parameters=parameters)
                alres = get_alres(al)
                icache[tup] = (al, cf, alres)
            al, cf, alres = icache[tup]
            cons_nets_result.append(al)
            cons_nets_alres.append(alres)
            cons_nets_costs.append(cf)
            if this_time - start_time > max_align_time_trace:
                # the alignment did not termine in the provided time
                return None
            max_val_alres = max(
                max_val_alres,
                max(z for y in alres.values() for z in y) if alres else 0)
            border_disagreements = 0
            if max_val_alres > 0:
                comp_to_merge = set()
                for act in [
                        x[activity_key] for x in trace
                        if x[activity_key] in net.lvis_labels
                ]:
                    for ind in acache[act]:
                        if ind >= i:
                            break
                        if cons_nets_alres[ind] is None or cons_nets_alres[
                                ind] is None:
                            # the alignment did not termine in the provided time
                            return None
                        if cons_nets_alres[ind][act] != cons_nets_alres[i][act]:
                            for ind2 in acache[act]:
                                comp_to_merge.add(ind2)
                if comp_to_merge:
                    comp_to_merge = sorted(list(comp_to_merge), reverse=True)
                    border_disagreements += len(comp_to_merge)
                    # if the number of border disagreements exceed the specified threshold
                    # then stop iterating on the trace
                    if border_disagreements > threshold_border_agreement:
                        return None
                    comp_to_merge_ids = tuple(
                        list(cons_nets[j][0].t_tuple for j in comp_to_merge))
                    if comp_to_merge_ids not in mcache:
                        mcache[
                            comp_to_merge_ids] = decomp_utils.merge_sublist_nets(
                                [cons_nets[zz] for zz in comp_to_merge])
                    new_comp = mcache[comp_to_merge_ids]
                    cons_nets.append(new_comp)
                    j = 0
                    while j < len(comp_to_merge):
                        z = comp_to_merge[j]
                        if z < i:
                            i = i - 1
                        if z <= i:
                            del cons_nets_result[z]
                            del cons_nets_alres[z]
                            del cons_nets_costs[z]
                        del cons_nets[z]
                        j = j + 1
                    acache = get_acache(cons_nets)
                    continue
        else:
            cons_nets_result.append(None)
            cons_nets_alres.append(None)
            cons_nets_costs.append(None)
        i = i + 1
    if this_time - start_time > max_align_time_trace:
        # the alignment did not termine in the provided time
        return None
    alignment = recompose_alignment(
        cons_nets,
        cons_nets_result,
    )
    overall_cost_dict = {}
    for cf in cons_nets_costs:
        if cf is not None:
            for el in cf:
                overall_cost_dict[el] = cf[el]
    cost = 0
    for el in alignment:
        cost = cost + overall_cost_dict[el]
    alignment = [x[1] for x in alignment]
    if this_time - start_time > max_align_time_trace:
        # the alignment did not termine in the provided time
        return None
    res = {"cost": cost, "alignment": alignment}
    best_worst_cost = exec_utils.get_param_value(Parameters.BEST_WORST_COST,
                                                 parameters, None)
    if best_worst_cost is not None and len(trace) > 0:
        cost1 = cost // utils.STD_MODEL_LOG_MOVE_COST
        fitness = 1.0 - cost1 / (best_worst_cost + len(trace))
        res["fitness"] = fitness
    return res
def diagnose_from_trans_fitness(log, trans_fitness, parameters=None):
    """
    Perform root cause analysis starting from transition fitness knowledge

    Parameters
    -------------
    log
        Trace log object
    trans_fitness
        Transition fitness object
    parameters
        Possible parameters of the algorithm, including:
            string_attributes -> List of string event attributes to consider
                in building the decision tree
            numeric_attributes -> List of numeric event attributes to consider
                in building the decision tree

    Returns
    -----------
    diagnostics
        For each problematic transition:
            - a decision tree comparing fit and unfit executions
            - feature names
            - classes
    """
    from sklearn import tree

    if parameters is None:
        parameters = {}

    diagnostics = {}
    string_attributes = exec_utils.get_param_value(
        Parameters.STRING_ATTRIBUTES, parameters, [])
    numeric_attributes = exec_utils.get_param_value(
        Parameters.NUMERIC_ATTRIBUTES, parameters, [])
    enable_multiplier = exec_utils.get_param_value(
        Parameters.ENABLE_MULTIPLIER, parameters, False)

    for trans in trans_fitness:
        if len(trans_fitness[trans]["underfed_traces"]) > 0:
            fit_cases_repr = []
            underfed_cases_repr = []

            for trace in log:
                if trace in trans_fitness[trans]["underfed_traces"]:
                    underfed_cases_repr.append(
                        trans_fitness[trans]["underfed_traces"][trace][0])
                elif trace in trans_fitness[trans]["fit_traces"]:
                    fit_cases_repr.append(
                        trans_fitness[trans]["fit_traces"][trace][0])

            if fit_cases_repr and underfed_cases_repr:
                data, feature_names = form_representation_from_dictio_couple(
                    fit_cases_repr,
                    underfed_cases_repr,
                    string_attributes,
                    numeric_attributes,
                    enable_multiplier=enable_multiplier)
                target = []
                classes = []

                if enable_multiplier:
                    multiplier_first = int(
                        max(
                            float(len(underfed_cases_repr)) /
                            float(len(fit_cases_repr)), 1))
                    multiplier_second = int(
                        max(
                            float(len(fit_cases_repr)) /
                            float(len(underfed_cases_repr)), 1))
                else:
                    multiplier_first = 1
                    multiplier_second = 1

                for j in range(multiplier_first):
                    for i in range(len(fit_cases_repr)):
                        target.append(0)
                classes.append("fit")

                for j in range(multiplier_second):
                    for i in range(len(underfed_cases_repr)):
                        target.append(1)
                classes.append("underfed")

                target = np.asarray(target)
                clf = tree.DecisionTreeClassifier(max_depth=7)
                clf.fit(data, target)
                diagn_dict = {
                    "clf": clf,
                    "data": data,
                    "feature_names": feature_names,
                    "target": target,
                    "classes": classes
                }

                diagnostics[trans] = diagn_dict

    return diagnostics
Example #25
0
def apply(c, Aub, bub, Aeq, beq, parameters=None):
    """
    Gets the overall solution of the problem

    Parameters
    ------------
    c
        c parameter of the algorithm
    Aub
        A_ub parameter of the algorithm
    bub
        b_ub parameter of the algorithm
    Aeq
        A_eq parameter of the algorithm
    beq
        b_eq parameter of the algorithm
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    sol
        Solution of the LP problem by the given algorithm
    """
    if parameters is None:
        parameters = {}

    require_ilp = exec_utils.get_param_value(Parameters.REQUIRE_ILP,
                                             parameters, False)

    prob = LpProblem("", LpMinimize)

    x_list = []
    for i in range(Aub.shape[1]):
        if require_ilp:
            x_list.append(
                LpVariable("x_" + get_terminal_part_name_num(i),
                           cat='Integer'))
        else:
            x_list.append(LpVariable("x_" + get_terminal_part_name_num(i)))

    eval_str = ""
    expr_count = 0
    for j in range(len(c)):
        if abs(c[j]) > MIN_THRESHOLD:
            if expr_count > 0:
                eval_str = eval_str + " + "
            eval_str = eval_str + str(c[j]) + "*x_list[" + str(j) + "]"
            expr_count = expr_count + 1
    eval_str = eval_str + ", \"objective\""
    prob += eval(eval_str)

    for i in range(Aub.shape[0]):
        expr_count = 0
        eval_str = 0
        eval_str = ""
        for j in range(Aub.shape[1]):
            if abs(Aub[i, j]) > MIN_THRESHOLD:
                if expr_count > 0:
                    eval_str = eval_str + " + "
                eval_str = eval_str + str(Aub[i,
                                              j]) + "*x_list[" + str(j) + "]"
                expr_count = expr_count + 1
        if eval_str:
            eval_str = eval_str + "<=" + str(
                bub[i]) + ", \"vinc_" + get_terminal_part_name_num(i) + "\""

            prob += eval(eval_str)

    if Aeq is not None and beq is not None:
        for i in range(Aeq.shape[0]):
            expr_count = 0
            eval_str = 0
            eval_str = ""
            for j in range(Aeq.shape[1]):
                if abs(Aeq[i, j]) > MIN_THRESHOLD:
                    if expr_count > 0:
                        eval_str = eval_str + " + "
                    eval_str = eval_str + str(
                        Aeq[i, j]) + "*x_list[" + str(j) + "]"
                    expr_count = expr_count + 1
            if eval_str:
                eval_str = eval_str + "==" + str(
                    beq[i]) + ", \"vinceq_" + get_terminal_part_name_num(
                        i + 1 + Aub.shape[0]) + "\""

                prob += eval(eval_str)

    filename = tempfile.NamedTemporaryFile(suffix='.lp').name
    prob.writeLP(filename)
    solver(prob)

    return prob
def diagnose_from_notexisting_activities(log,
                                         notexisting_activities_in_model,
                                         parameters=None):
    """
    Perform root cause analysis related to activities that are not present in the model

    Parameters
    -------------
    log
        Trace log object
    notexisting_activities_in_model
        Not existing activities in the model
    parameters
        Possible parameters of the algorithm, including:
            string_attributes -> List of string event attributes to consider
                in building the decision tree
            numeric_attributes -> List of numeric event attributes to consider
                in building the decision tree

    Returns
    -----------
    diagnostics
        For each problematic transition:
            - a decision tree comparing fit and unfit executions
            - feature names
            - classes
    """
    from sklearn import tree

    if parameters is None:
        parameters = {}

    diagnostics = {}
    string_attributes = exec_utils.get_param_value(
        Parameters.STRING_ATTRIBUTES, parameters, [])
    numeric_attributes = exec_utils.get_param_value(
        Parameters.NUMERIC_ATTRIBUTES, parameters, [])
    enable_multiplier = exec_utils.get_param_value(
        Parameters.ENABLE_MULTIPLIER, parameters, False)

    parameters_filtering = deepcopy(parameters)
    parameters_filtering["positive"] = False
    values = list(notexisting_activities_in_model.keys())

    filtered_log = basic_filter.filter_log_traces_attr(
        log, values, parameters=parameters_filtering)

    for act in notexisting_activities_in_model:
        fit_cases_repr = []
        containing_cases_repr = []
        for trace in log:
            if trace in notexisting_activities_in_model[act]:
                containing_cases_repr.append(
                    notexisting_activities_in_model[act][trace])
            elif trace in filtered_log:
                fit_cases_repr.append(dict(trace[-1]))

        if fit_cases_repr and containing_cases_repr:
            data, feature_names = form_representation_from_dictio_couple(
                fit_cases_repr,
                containing_cases_repr,
                string_attributes,
                numeric_attributes,
                enable_multiplier=enable_multiplier)

            target = []
            classes = []

            if enable_multiplier:
                multiplier_first = int(
                    max(
                        float(len(containing_cases_repr)) /
                        float(len(fit_cases_repr)), 1))
                multiplier_second = int(
                    max(
                        float(len(fit_cases_repr)) /
                        float(len(containing_cases_repr)), 1))
            else:
                multiplier_first = 1
                multiplier_second = 1

            for j in range(multiplier_first):
                for i in range(len(fit_cases_repr)):
                    target.append(0)
            classes.append("fit")

            for j in range(multiplier_second):
                for i in range(len(containing_cases_repr)):
                    target.append(1)
            classes.append("containing")

            target = np.asarray(target)
            clf = tree.DecisionTreeClassifier(max_depth=7)
            clf.fit(data, target)
            diagn_dict = {
                "clf": clf,
                "data": data,
                "feature_names": feature_names,
                "target": target,
                "classes": classes
            }

            diagnostics[act] = diagn_dict

    return diagnostics
Example #27
0
def get_cases_description(log, parameters=None):
    """
    Get a description of traces present in the log

    Parameters
    -----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
        Parameters.CASE_ID_KEY -> Trace attribute in which the case ID is contained
        Parameters.TIMESTAMP_KEY -> Column that identifies the timestamp
        Parameters.ENABLE_SORT -> Enable sorting of traces
        Parameters.SORT_BY_INDEX ->         Sort the traces using this index:
            0 -> case ID
            1 -> start time
            2 -> end time
            3 -> difference
        Parameters.SORT_ASCENDING -> Set sort direction (boolean; it true then the sort direction is ascending, otherwise
        descending)
        Parameters.MAX_RET_CASES -> Set the maximum number of returned traces

    Returns
    -----------
    ret
        Dictionary of traces associated to their start timestamp, their end timestamp and their duration
    """

    if parameters is None:
        parameters = {}

    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                             parameters, DEFAULT_TRACEID_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)
    enable_sort = exec_utils.get_param_value(Parameters.ENABLE_SORT,
                                             parameters, True)
    sort_by_index = exec_utils.get_param_value(Parameters.SORT_BY_INDEX,
                                               parameters, 0)
    sort_ascending = exec_utils.get_param_value(Parameters.SORT_ASCENDING,
                                                parameters, True)
    max_ret_cases = exec_utils.get_param_value(Parameters.MAX_RET_CASES,
                                               parameters, None)
    business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS,
                                                parameters, False)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters,
                                            [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters,
                                          [6, 7])

    statistics_list = []

    for index, trace in enumerate(log):
        if trace:
            ci = trace.attributes[
                case_id_key] if case_id_key in trace.attributes else "EMPTY" + str(
                    index)
            st = trace[0][timestamp_key]
            et = trace[-1][timestamp_key]
            if business_hours:
                bh = BusinessHours(st.replace(tzinfo=None),
                                   et.replace(tzinfo=None),
                                   worktiming=worktiming,
                                   weekends=weekends)
                diff = bh.getseconds()
            else:
                diff = et.timestamp() - st.timestamp()
            st = st.timestamp()
            et = et.timestamp()
            statistics_list.append([ci, st, et, diff])

    if enable_sort:
        statistics_list = sorted(statistics_list,
                                 key=lambda x: x[sort_by_index],
                                 reverse=not sort_ascending)

    if max_ret_cases is not None:
        statistics_list = statistics_list[:min(len(statistics_list
                                                   ), max_ret_cases)]

    statistics_dict = {}

    for el in statistics_list:
        statistics_dict[str(el[0])] = {
            "startTime": el[1],
            "endTime": el[2],
            "caseDuration": el[3]
        }

    return statistics_dict
Example #28
0
def apply_log(log, petri_net, initial_marking, final_marking, parameters=None, variant=DEFAULT_VARIANT):
    """
    apply alignments to a log
    Parameters
    -----------
    log
        object of the form :class:`pm4py.log.log.EventLog` event log
    petri_net
        :class:`pm4py.objects.petri.petrinet.PetriNet` the model to use for the alignment
    initial_marking
        :class:`pm4py.objects.petri.petrinet.Marking` initial marking of the net
    final_marking
        :class:`pm4py.objects.petri.petrinet.Marking` final marking of the net
    variant
        selected variant of the algorithm, possible values: {\'Variants.VERSION_STATE_EQUATION_A_STAR, Variants.VERSION_DIJKSTRA_NO_HEURISTICS \'}
    parameters
        :class:`dict` parameters of the algorithm,

    Returns
    -----------
    alignment
        :class:`list` of :class:`dict` with keys **alignment**, **cost**, **visited_states**, **queued_states** and
        **traversed_arcs**
        The alignment is a sequence of labels of the form (a,t), (a,>>), or (>>,t)
        representing synchronous/log/model-moves.
    """
    if parameters is None:
        parameters = dict()

    if not check_soundness.check_relaxed_soundness_net_in_fin_marking(petri_net, initial_marking, final_marking):
        raise Exception("trying to apply alignments on a Petri net that is not a relaxed sound net!!")

    start_time = time.time()
    max_align_time = exec_utils.get_param_value(Parameters.PARAM_MAX_ALIGN_TIME, parameters,
                                                sys.maxsize)
    max_align_time_case = exec_utils.get_param_value(Parameters.PARAM_MAX_ALIGN_TIME_TRACE, parameters,
                                                     sys.maxsize)

    parameters_best_worst = copy(parameters)

    best_worst_cost = exec_utils.get_variant(variant).get_best_worst_cost(petri_net, initial_marking, final_marking,
                                                                          parameters=parameters_best_worst)

    variants_idxs = exec_utils.get_param_value(Parameters.VARIANTS_IDX, parameters, None)
    if variants_idxs is None:
        variants_idxs = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters)

    one_tr_per_var = []
    variants_list = []
    for index_variant, var in enumerate(variants_idxs):
        variants_list.append(var)

    for var in variants_list:
        one_tr_per_var.append(log[variants_idxs[var][0]])

    all_alignments = []
    for trace in one_tr_per_var:
        this_max_align_time = min(max_align_time_case, (max_align_time - (time.time() - start_time)) * 0.5)
        parameters[Parameters.PARAM_MAX_ALIGN_TIME_TRACE] = this_max_align_time
        all_alignments.append(apply_trace(trace, petri_net, initial_marking, final_marking, parameters=copy(parameters),
                                          variant=variant))

    al_idx = {}
    for index_variant, variant in enumerate(variants_idxs):
        for trace_idx in variants_idxs[variant]:
            al_idx[trace_idx] = all_alignments[index_variant]

    alignments = []
    for i in range(len(log)):
        alignments.append(al_idx[i])

    # assign fitness to traces
    for index, align in enumerate(alignments):
        if align is not None:
            unfitness_upper_part = align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST
            if unfitness_upper_part == 0:
                align['fitness'] = 1
            elif (len(log[index]) + best_worst_cost) > 0:
                align['fitness'] = 1 - (
                        (align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST) / (len(log[index]) + best_worst_cost))
            else:
                align['fitness'] = 0
    return alignments
Example #29
0
def A_eventually_B_eventually_C(df0, A, B, C, parameters=None):
    """
    Applies the A eventually B eventually C rule

    Parameters
    ------------
    df0
        Dataframe
    A
        A Attribute value
    B
        B Attribute value
    C
        C Attribute value
    parameters
        Parameters of the algorithm, including the attribute key and the positive parameter:
        - If True, returns all the cases containing A, B and C and in which A was eventually followed by B and B was eventually followed by C
        - If False, returns all the cases not containing A or B or C, or in which an instance of A was not eventually
        followed by an instance of B or an instance of B was not eventually followed by C

    Returns
    ------------
    filtered_df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}

    case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME)
    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
    enable_timestamp = exec_utils.get_param_value(Parameters.ENABLE_TIMESTAMP, parameters, False)
    timestamp_diff_boundaries = exec_utils.get_param_value(Parameters.TIMESTAMP_DIFF_BOUNDARIES, parameters, [])

    colset = [case_id_glue, attribute_key]
    if enable_timestamp:
        colset.append(timestamp_key)

    df = df0.copy()
    df = df[colset]
    df = pandas_utils.insert_index(df)
    df_A = df[df[attribute_key] == A].copy()
    df_B = df[df[attribute_key] == B].copy()
    df_C = df[df[attribute_key] == C].copy()
    df_B["@@conceptname"] = df_B[case_id_glue]
    df_B = df_B.groupby(case_id_glue).last().set_index("@@conceptname")
    df_C["@@conceptname"] = df_C[case_id_glue]
    df_C = df_C.groupby(case_id_glue).last().set_index("@@conceptname")

    df_join = df_A.join(df_B, on=case_id_glue, rsuffix="_2").dropna()
    df_join["@@diffindex"] = df_join[constants.DEFAULT_INDEX_KEY+"_2"] - df_join[constants.DEFAULT_INDEX_KEY]
    df_join = df_join[df_join["@@diffindex"] > 0]
    df_join = df_join.join(df_C, on=case_id_glue, rsuffix="_3").dropna()
    df_join["@@diffindex2"] = df_join[constants.DEFAULT_INDEX_KEY+"_3"] - df_join[constants.DEFAULT_INDEX_KEY+"_2"]
    df_join = df_join[df_join["@@diffindex2"] > 0]

    if enable_timestamp:
        df_join["@@difftimestamp"] = (df_join[timestamp_key + "_2"] - df_join[timestamp_key]).astype('timedelta64[s]')
        df_join["@@difftimestamp2"] = (df_join[timestamp_key + "_3"] - df_join[timestamp_key + "_2"]).astype(
            'timedelta64[s]')
        if timestamp_diff_boundaries:
            df_join = df_join[df_join["@@difftimestamp"] >= timestamp_diff_boundaries[0][0]]
            df_join = df_join[df_join["@@difftimestamp"] <= timestamp_diff_boundaries[0][1]]
            df_join = df_join[df_join["@@difftimestamp2"] >= timestamp_diff_boundaries[1][0]]
            df_join = df_join[df_join["@@difftimestamp2"] <= timestamp_diff_boundaries[1][1]]

    i1 = df.set_index(case_id_glue).index
    i2 = df_join.set_index(case_id_glue).index

    if positive:
        return df0[i1.isin(i2)]
    else:
        return df0[~i1.isin(i2)]
Example #30
0
def apply_actlist(trace, model, parameters=None):
    """
    Apply log-skeleton based conformance checking given the list of activities of a trace
    and a log-skeleton model

    Parameters
    --------------
    trace
        List of activities of a trace
    model
        Log-skeleton model
    parameters
        Parameters of the algorithm, including:
        - the activity key (pm4py:param:activity_key)
        - the list of considered constraints (considered_constraints) among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq

    Returns
    --------------
    aligned_trace
        Containing:
        - is_fit => boolean that tells if the trace is perfectly fit according to the model
        - dev_fitness => deviation based fitness (between 0 and 1; the more the trace is near to 1 the more fit is)
        - deviations => list of deviations in the model
    """
    if parameters is None:
        parameters = {}

    consid_constraints = exec_utils.get_param_value(
        Parameters.CONSIDERED_CONSTRAINTS, parameters,
        Parameters.DEFAULT_CONSIDERED_CONSTRAINTS.value)
    trace_info = trace_skel.get_trace_info(trace)

    ret = {}
    ret[Outputs.DEVIATIONS.value] = []
    dev_total = 0
    conf_total = 0

    default_considered_constraints = Parameters.DEFAULT_CONSIDERED_CONSTRAINTS.value

    i = 0
    while i < len(default_considered_constraints):
        if default_considered_constraints[i] in consid_constraints:
            if default_considered_constraints[
                    i] == DiscoveryOutputs.ACTIV_FREQ.value:
                this_constraints = {
                    x: y
                    for x, y in model[
                        default_considered_constraints[i]].items()
                }
                conf_total += len(
                    list(act for act in trace_info[i]
                         if act in this_constraints)) + len(
                             list(act for act in trace_info[i]
                                  if act not in this_constraints)) + len(
                                      list(act for act in this_constraints
                                           if min(this_constraints[act]) > 0
                                           and not act in trace))
                for act in trace_info[i]:
                    if act in this_constraints:
                        if trace_info[i][act] not in this_constraints[act]:
                            dev_total += 1
                            ret[Outputs.DEVIATIONS.value].append(
                                (default_considered_constraints[i],
                                 (act, trace_info[i][act])))
                    else:
                        dev_total += 1
                        ret[Outputs.DEVIATIONS.value].append(
                            (default_considered_constraints[i], (act, 0)))
                for act in this_constraints:
                    if min(this_constraints[act]) > 0 and not act in trace:
                        dev_total += 1
                        ret[Outputs.DEVIATIONS.value].append(
                            (default_considered_constraints[i], (act, 0)))
            elif default_considered_constraints[
                    i] == DiscoveryOutputs.NEVER_TOGETHER.value:
                this_constraints = {
                    x
                    for x in model[default_considered_constraints[i]]
                    if x[0] in trace
                }
                conf_total += len(this_constraints)
                setinte = this_constraints.intersection(trace_info[i])
                dev_total += len(setinte)
                if len(setinte) > 0:
                    ret[Outputs.DEVIATIONS.value].append(
                        (default_considered_constraints[i], tuple(setinte)))
            else:
                this_constraints = {
                    x
                    for x in model[default_considered_constraints[i]]
                    if x[0] in trace
                }
                conf_total += len(this_constraints)
                setdiff = this_constraints.difference(trace_info[i])
                dev_total += len(setdiff)
                if len(setdiff) > 0:
                    ret[Outputs.DEVIATIONS.value].append(
                        (default_considered_constraints[i], tuple(setdiff)))
        i = i + 1
    ret[Outputs.NO_DEV_TOTAL.value] = dev_total
    ret[Outputs.NO_CONSTR_TOTAL.value] = conf_total
    ret[Outputs.DEV_FITNESS.value] = 1.0 - float(dev_total) / float(
        conf_total) if conf_total > 0 else 1.0
    ret[Outputs.DEVIATIONS.value] = sorted(ret[Outputs.DEVIATIONS.value],
                                           key=lambda x: (x[0], x[1]))
    ret[Outputs.IS_FIT.value] = len(ret[Outputs.DEVIATIONS.value]) == 0
    return ret