def get_decorations(log, net, initial_marking, final_marking, parameters=None, measure="frequency",
                    ht_perf_method="last"):
    """
    Calculate decorations in order to annotate the Petri net

    Parameters
    -----------
    log
        Trace log
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    parameters
        Parameters associated to the algorithm
    measure
        Measure to represent on the process model (frequency/performance)
    ht_perf_method
        Method to use in order to annotate hidden transitions (performance value could be put on the last possible
        point (last) or in the first possible point (first)

    Returns
    ------------
    decorations
        Decorations to put on the process model
    """
    if parameters is None:
        parameters = {}

    aggregation_measure = exec_utils.get_param_value(Parameters.AGGREGATION_MEASURE, parameters, None)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters,
                                               xes_constants.DEFAULT_TIMESTAMP_KEY)

    variants_idx = variants_get.get_variants_from_log_trace_idx(log, parameters=parameters)
    variants = variants_get.convert_variants_trace_idx_to_trace_obj(log, variants_idx)

    parameters_tr = {token_replay.Variants.TOKEN_REPLAY.value.Parameters.ACTIVITY_KEY: activity_key,
                     token_replay.Variants.TOKEN_REPLAY.value.Parameters.VARIANTS: variants}

    # do the replay
    aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr)

    # apply petri_reduction technique in order to simplify the Petri net
    # net = reduction.apply(net, parameters={"aligned_traces": aligned_traces})

    element_statistics = performance_map.single_element_statistics(log, net, initial_marking,
                                                                   aligned_traces, variants_idx,
                                                                   activity_key=activity_key,
                                                                   timestamp_key=timestamp_key,
                                                                   ht_perf_method=ht_perf_method)

    aggregated_statistics = performance_map.aggregate_statistics(element_statistics, measure=measure,
                                                                 aggregation_measure=aggregation_measure)

    return aggregated_statistics
Ejemplo n.º 2
0
def apply_log(log, list_nets, parameters=None):
    """
    Apply the recomposition alignment approach
    to a log and a decomposed Petri net

    Parameters
    --------------
    log
        Log
    list_nets
        Decomposition
    parameters
        Parameters of the algorithm

    Returns
    --------------
    aligned_traces
        For each trace, return its alignment
    """
    if parameters is None:
        parameters = {}
    icache = exec_utils.get_param_value(Parameters.ICACHE, parameters, dict())
    mcache = exec_utils.get_param_value(Parameters.MCACHE, parameters, dict())

    parameters[Parameters.ICACHE] = icache
    parameters[Parameters.MCACHE] = mcache

    variants_idxs = variants_module.get_variants_from_log_trace_idx(
        log, parameters=parameters)
    one_tr_per_var = []
    variants_list = []
    for index_variant, variant in enumerate(variants_idxs):
        variants_list.append(variant)
    for variant in variants_list:
        one_tr_per_var.append(log[variants_idxs[variant][0]])
    all_alignments = []
    max_align_time = exec_utils.get_param_value(
        Parameters.PARAM_MAX_ALIGN_TIME, parameters, sys.maxsize)
    start_time = time.time()
    for index, trace in enumerate(one_tr_per_var):
        this_time = time.time()
        if this_time - start_time <= max_align_time:
            alignment = apply_trace(trace, list_nets, parameters=parameters)
        else:
            alignment = None
        all_alignments.append(alignment)
    al_idx = {}
    for index_variant, variant in enumerate(variants_idxs):
        for trace_idx in variants_idxs[variant]:
            al_idx[trace_idx] = all_alignments[index_variant]
    alignments = []
    for i in range(len(log)):
        alignments.append(al_idx[i])
    return alignments
Ejemplo n.º 3
0
def __get_variants_structure(log, parameters):
    variants_idxs = exec_utils.get_param_value(Parameters.VARIANTS_IDX, parameters, None)
    if variants_idxs is None:
        variants_idxs = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters)

    one_tr_per_var = []
    variants_list = []
    for index_variant, var in enumerate(variants_idxs):
        variants_list.append(var)

    for var in variants_list:
        one_tr_per_var.append(log[variants_idxs[var][0]])

    return variants_idxs, one_tr_per_var
Ejemplo n.º 4
0
def get_transition_performance_with_token_replay(log, net, im, fm):
    """
    Gets the transition performance through the usage of token-based replay

    Parameters
    -------------
    log
        Event log
    net
        Petri net
    im
        Initial marking
    fm
        Final marking

    Returns
    --------------
    transition_performance
        Dictionary where each transition label is associated to performance measures
    """
    from pm4py.algo.conformance.tokenreplay import factory as token_replay
    variants_idx = variants_get.get_variants_from_log_trace_idx(log)
    aligned_traces = token_replay.apply(log, net, im, fm)
    element_statistics = single_element_statistics(log, net, im,
                                                                   aligned_traces, variants_idx)

    transition_performance = {}
    for el in element_statistics:
        if type(el) is PetriNet.Transition and el.label is not None:
            if "log_idx" in element_statistics[el] and "performance" in element_statistics[el]:
                if len(element_statistics[el]["performance"]) > 0:
                    transition_performance[str(el)] = {"all_values": [], "case_association": {}, "mean": 0.0,
                                                       "median": 0.0}
                    for i in range(len(element_statistics[el]["log_idx"])):
                        if not element_statistics[el]["log_idx"][i] in transition_performance[str(el)][
                            "case_association"]:
                            transition_performance[str(el)]["case_association"][
                                element_statistics[el]["log_idx"][i]] = []
                        transition_performance[str(el)]["case_association"][
                            element_statistics[el]["log_idx"][i]].append(
                            element_statistics[el]["performance"][i])
                        transition_performance[str(el)]["all_values"].append(element_statistics[el]["performance"][i])
                    transition_performance[str(el)]["all_values"] = sorted(
                        transition_performance[str(el)]["all_values"])
                    if transition_performance[str(el)]["all_values"]:
                        transition_performance[str(el)]["mean"] = mean(transition_performance[str(el)]["all_values"])
                        transition_performance[str(el)]["median"] = median(
                            transition_performance[str(el)]["all_values"])
    return transition_performance
Ejemplo n.º 5
0
def __approximate_alignments_for_log(log: EventLog,
                                     pt: ProcessTree,
                                     max_tl: int,
                                     max_th: int,
                                     parameters=None):
    if parameters is None:
        parameters = {}

    a_sets, sa_sets, ea_sets, tau_sets = initialize_a_sa_ea_tau_sets(pt)
    variants = get_variants_from_log_trace_idx(log, parameters=parameters)
    inv_corr = {}

    max_align_time = exec_utils.get_param_value(
        Parameters.PARAM_MAX_ALIGN_TIME, parameters, sys.maxsize)
    log_alignment_start_time = time.time()

    for i, var in enumerate(variants):
        this_time = time.time()

        if this_time - log_alignment_start_time <= max_align_time:
            parameters["trace_alignment_start_time"] = this_time
            alignment = __approximate_alignment_for_trace(
                pt,
                a_sets,
                sa_sets,
                ea_sets,
                tau_sets,
                log[variants[var][0]],
                max_tl,
                max_th,
                parameters=parameters)
            alignment = add_fitness_and_cost_info_to_alignments(
                alignment, pt, log[variants[var][0]], parameters=parameters)
        else:
            alignment = None

        for idx in variants[var]:
            inv_corr[idx] = alignment
    alignments = []
    for i in range(len(log)):
        alignments.append(inv_corr[i])
    return alignments
Ejemplo n.º 6
0
def apply_multiprocessing(log,
                          net,
                          initial_marking,
                          final_marking,
                          parameters=None,
                          variant=TOKEN_REPLAY):
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY
    if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_CASEID_KEY] = pmutil.constants.CASE_ATTRIBUTE_GLUE

    variants_idxs = parameters[
        VARIANTS_IDX] if VARIANTS_IDX in parameters else None
    if variants_idxs is None:
        variants_idxs = variants_module.get_variants_from_log_trace_idx(
            log, parameters=parameters)
    variants_list = [[x, len(y)] for x, y in variants_idxs.items()]

    no_cores = mp.cpu_count()

    petri_net_string = petri_exporter.export_petri_as_string(
        net, initial_marking, final_marking)

    n = math.ceil(len(variants_list) / no_cores)

    variants_list_split = list(chunks(variants_list, n))

    # Define an output queue
    output = mp.Queue()

    processes = [
        mp.Process(target=VERSIONS_MULTIPROCESSING[variant](
            output, x, petri_net_string, parameters=parameters))
        for x in variants_list_split
    ]

    # Run processes
    for p in processes:
        p.start()

    results = []
    for p in processes:
        result = output.get()
        results.append(result)

    al_idx = {}
    for index, el in enumerate(variants_list_split):
        for index2, var_item in enumerate(el):
            variant = var_item[0]
            for trace_idx in variants_idxs[variant]:
                al_idx[trace_idx] = results[index][index2]

    replayed_cases = []
    for i in range(len(log)):
        replayed_cases.append(al_idx[i])

    return replayed_cases
Ejemplo n.º 7
0
def apply_log(log,
              petri_net,
              initial_marking,
              final_marking,
              parameters=None,
              variant=DEFAULT_VARIANT):
    """
    apply alignments to a log
    Parameters
    -----------
    log
        object of the form :class:`pm4py.log.log.EventLog` event log
    petri_net
        :class:`pm4py.objects.petri.petrinet.PetriNet` the model to use for the alignment
    initial_marking
        :class:`pm4py.objects.petri.petrinet.Marking` initial marking of the net
    final_marking
        :class:`pm4py.objects.petri.petrinet.Marking` final marking of the net
    variant
        selected variant of the algorithm, possible values: {\'Variants.VERSION_STATE_EQUATION_A_STAR, Variants.VERSION_DIJKSTRA_NO_HEURISTICS \'}
    parameters
        :class:`dict` parameters of the algorithm,

    Returns
    -----------
    alignment
        :class:`list` of :class:`dict` with keys **alignment**, **cost**, **visited_states**, **queued_states** and
        **traversed_arcs**
        The alignment is a sequence of labels of the form (a,t), (a,>>), or (>>,t)
        representing synchronous/log/model-moves.
    """
    if parameters is None:
        parameters = dict()

    if not check_soundness.check_easy_soundness_net_in_fin_marking(
            petri_net, initial_marking, final_marking):
        raise Exception(
            "trying to apply alignments on a Petri net that is not a easy sound net!!"
        )

    start_time = time.time()
    max_align_time = exec_utils.get_param_value(
        Parameters.PARAM_MAX_ALIGN_TIME, parameters, sys.maxsize)
    max_align_time_case = exec_utils.get_param_value(
        Parameters.PARAM_MAX_ALIGN_TIME_TRACE, parameters, sys.maxsize)

    parameters_best_worst = copy(parameters)

    best_worst_cost = exec_utils.get_variant(variant).get_best_worst_cost(
        petri_net,
        initial_marking,
        final_marking,
        parameters=parameters_best_worst)

    variants_idxs = exec_utils.get_param_value(Parameters.VARIANTS_IDX,
                                               parameters, None)
    if variants_idxs is None:
        variants_idxs = variants_module.get_variants_from_log_trace_idx(
            log, parameters=parameters)

    one_tr_per_var = []
    variants_list = []
    for index_variant, var in enumerate(variants_idxs):
        variants_list.append(var)

    for var in variants_list:
        one_tr_per_var.append(log[variants_idxs[var][0]])

    all_alignments = []
    for trace in one_tr_per_var:
        this_max_align_time = min(max_align_time_case,
                                  (max_align_time -
                                   (time.time() - start_time)) * 0.5)
        parameters[Parameters.PARAM_MAX_ALIGN_TIME_TRACE] = this_max_align_time
        all_alignments.append(
            apply_trace(trace,
                        petri_net,
                        initial_marking,
                        final_marking,
                        parameters=copy(parameters),
                        variant=variant))

    al_idx = {}
    for index_variant, variant in enumerate(variants_idxs):
        for trace_idx in variants_idxs[variant]:
            al_idx[trace_idx] = all_alignments[index_variant]

    alignments = []
    for i in range(len(log)):
        alignments.append(al_idx[i])

    # assign fitness to traces
    for index, align in enumerate(alignments):
        if align is not None:
            unfitness_upper_part = align[
                'cost'] // align_utils.STD_MODEL_LOG_MOVE_COST
            if unfitness_upper_part == 0:
                align['fitness'] = 1
            elif (len(log[index]) + best_worst_cost) > 0:
                align['fitness'] = 1 - (
                    (align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST) /
                    (len(log[index]) + best_worst_cost))
            else:
                align['fitness'] = 0
    return alignments
Ejemplo n.º 8
0
def apply(log, aligned_traces, parameters=None):
    """
    Gets the alignment table visualization from the alignments output

    Parameters
    -------------
    log
        Event log
    aligned_traces
        Aligned traces
    parameters
        Parameters of the algorithm

    Returns
    -------------
    gviz
        Graphviz object
    """
    if parameters is None:
        parameters = {}

    variants_idx_dict = variants_get.get_variants_from_log_trace_idx(
        log, parameters=parameters)

    variants_idx_list = []
    for variant in variants_idx_dict:
        variants_idx_list.append((variant, variants_idx_dict[variant]))
    variants_idx_list = sorted(variants_idx_list,
                               key=lambda x: len(x[1]),
                               reverse=True)

    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters,
                                              "png")

    table_alignments_list = [
        "digraph {\n", "tbl [\n", "shape=plaintext\n", "label=<\n"
    ]
    table_alignments_list.append(
        "<table border='0' cellborder='1' color='blue' cellspacing='0'>\n")

    table_alignments_list.append(
        "<tr><td>Variant</td><td>Alignment</td></tr>\n")

    for index, variant in enumerate(variants_idx_list):
        al_tr = aligned_traces[variant[1][0]]
        table_alignments_list.append("<tr>")
        table_alignments_list.append("<td><font point-size='9'>Variant " +
                                     str(index + 1) + " (" +
                                     str(len(variant[1])) +
                                     " occurrences)</font></td>")
        table_alignments_list.append(
            "<td><font point-size='6'><table border='0'><tr>")
        for move in al_tr['alignment']:
            move_descr = str(move[1]).replace(">", "&gt;")
            if not move[0][0] == ">>" or move[0][1] == ">>":
                table_alignments_list.append("<td bgcolor=\"green\">" +
                                             move_descr + "</td>")
            elif move[0][1] == ">>":
                table_alignments_list.append("<td bgcolor=\"violet\">" +
                                             move_descr + "</td>")
            elif move[0][0] == ">>":
                table_alignments_list.append("<td bgcolor=\"gray\">" +
                                             move_descr + "</td>")
        table_alignments_list.append("</tr></table></font></td>")
        table_alignments_list.append("</tr>")

    table_alignments_list.append("</table>\n")
    table_alignments_list.append(">];\n")
    table_alignments_list.append("}\n")

    table_alignments = "".join(table_alignments_list)

    filename = tempfile.NamedTemporaryFile(suffix='.gv')

    gviz = Source(table_alignments, filename=filename.name)
    gviz.format = image_format

    return gviz
Ejemplo n.º 9
0
def apply_log(log, list_nets, parameters=None):
    """
    Apply the recomposition alignment approach
    to a log and a decomposed Petri net

    Parameters
    --------------
    log
        Log
    list_nets
        Decomposition
    parameters
        Parameters of the algorithm

    Returns
    --------------
    aligned_traces
        For each trace, return its alignment
    """
    if parameters is None:
        parameters = {}

    show_progress_bar = exec_utils.get_param_value(
        Parameters.SHOW_PROGRESS_BAR, parameters, True)
    icache = exec_utils.get_param_value(Parameters.ICACHE, parameters, dict())
    mcache = exec_utils.get_param_value(Parameters.MCACHE, parameters, dict())

    parameters[Parameters.ICACHE] = icache
    parameters[Parameters.MCACHE] = mcache

    variants_idxs = variants_module.get_variants_from_log_trace_idx(
        log, parameters=parameters)

    progress = None
    if pkgutil.find_loader("tqdm") and show_progress_bar:
        from tqdm.auto import tqdm
        progress = tqdm(
            total=len(variants_idxs),
            desc=
            "aligning log with decomposition/recomposition, completed variants :: "
        )

    one_tr_per_var = []
    variants_list = []
    for index_variant, variant in enumerate(variants_idxs):
        variants_list.append(variant)
    for variant in variants_list:
        one_tr_per_var.append(log[variants_idxs[variant][0]])
    all_alignments = []
    max_align_time = exec_utils.get_param_value(
        Parameters.PARAM_MAX_ALIGN_TIME, parameters, sys.maxsize)
    start_time = time.time()
    for index, trace in enumerate(one_tr_per_var):
        this_time = time.time()
        if this_time - start_time <= max_align_time:
            alignment = apply_trace(trace, list_nets, parameters=parameters)
        else:
            alignment = None
        if progress is not None:
            progress.update()
        all_alignments.append(alignment)
    al_idx = {}
    for index_variant, variant in enumerate(variants_idxs):
        for trace_idx in variants_idxs[variant]:
            al_idx[trace_idx] = all_alignments[index_variant]
    alignments = []
    for i in range(len(log)):
        alignments.append(al_idx[i])
    # gracefully close progress bar
    if progress is not None:
        progress.close()
    del progress
    return alignments
Ejemplo n.º 10
0
def apply_log_multiprocessing(log, petri_net, initial_marking, final_marking, parameters=None, version=DEFAULT_VARIANT):
    warnings.warn('factory methods are deprecated, use algorithm entrypoint instead', DeprecationWarning)
    if parameters is None:
        parameters = dict()

    if not check_soundness.check_easy_soundness_net_in_fin_marking(petri_net, initial_marking, final_marking):
        raise Exception("trying to apply alignments on a Petri net that is not a relaxed sound net!!")

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY
    model_cost_function = parameters[
        PARAM_MODEL_COST_FUNCTION] if PARAM_MODEL_COST_FUNCTION in parameters else None
    sync_cost_function = parameters[
        PARAM_SYNC_COST_FUNCTION] if PARAM_SYNC_COST_FUNCTION in parameters else None
    if model_cost_function is None or sync_cost_function is None:
        # reset variables value
        model_cost_function = dict()
        sync_cost_function = dict()
        for t in petri_net.transitions:
            if t.label is not None:
                model_cost_function[t] = align_utils.STD_MODEL_LOG_MOVE_COST
                sync_cost_function[t] = 0
            else:
                model_cost_function[t] = 1

    parameters[pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = activity_key
    parameters[
        PARAM_MODEL_COST_FUNCTION] = model_cost_function
    parameters[
        PARAM_SYNC_COST_FUNCTION] = sync_cost_function
    parameters_best_worst = copy(parameters)
    if PARAM_MAX_ALIGN_TIME_TRACE in parameters_best_worst:
        del parameters_best_worst[PARAM_MAX_ALIGN_TIME_TRACE]

    best_worst_cost = VERSIONS_COST[version](petri_net, initial_marking, final_marking,
                                             parameters=parameters_best_worst)

    variants_idxs = parameters[VARIANTS_IDX] if VARIANTS_IDX in parameters else None
    if variants_idxs is None:
        variants_idxs = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters)
    variants_list = [[x, len(y)] for x, y in variants_idxs.items()]

    no_cores = mp.cpu_count()

    petri_net_string = petri_exporter.export_petri_as_string(petri_net, initial_marking, final_marking)

    n = math.ceil(len(variants_list) / no_cores)

    variants_list_split = list(chunks(variants_list, n))

    # Define an output queue
    output = mp.Queue()

    processes = [mp.Process(
        target=VERSIONS_VARIANTS_LIST_MPROCESSING[version](output, x, petri_net_string, parameters=parameters)) for x in
        variants_list_split]

    # Run processes
    for p in processes:
        p.start()

    results = []
    for p in processes:
        result = output.get()
        results.append(result)

    al_idx = {}
    for index, el in enumerate(variants_list_split):
        for index2, var_item in enumerate(el):
            variant = var_item[0]
            for trace_idx in variants_idxs[variant]:
                al_idx[trace_idx] = results[index][variant]

    alignments = []
    for i in range(len(log)):
        alignments.append(al_idx[i])

    # assign fitness to traces
    for index, align in enumerate(alignments):
        if align is not None:
            unfitness_upper_part = align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST
            if unfitness_upper_part == 0:
                align['fitness'] = 1
            elif (len(log[index]) + best_worst_cost) > 0:
                align['fitness'] = 1 - (
                        (align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST) / (len(log[index]) + best_worst_cost))
            else:
                align['fitness'] = 0

    return alignments
Ejemplo n.º 11
0
def apply_log(log, petri_net, initial_marking, final_marking, parameters=None, version=DEFAULT_VARIANT):
    """
    apply alignments to a log
    Parameters
    -----------
    log
        object of the form :class:`pm4py.log.log.EventLog` event log
    petri_net
        :class:`pm4py.objects.petri.petrinet.PetriNet` the model to use for the alignment
    initial_marking
        :class:`pm4py.objects.petri.petrinet.Marking` initial marking of the net
    final_marking
        :class:`pm4py.objects.petri.petrinet.Marking` final marking of the net
    version
        :class:`str` selected variant of the algorithm, possible values: {\'state_equation_a_star\'}
    parameters
        :class:`dict` parameters of the algorithm,
        for key \'state_equation_a_star\':
            pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> Attribute in the log that contains the activity
            pm4py.algo.conformance.alignments.variants.state_equation_a_star.PARAM_MODEL_COST_FUNCTION ->
            mapping of each transition in the model to corresponding synchronous costs
            pm4py.algo.conformance.alignments.variants.state_equation_a_star.PARAM_SYNC_COST_FUNCTION ->
            mapping of each transition in the model to corresponding model cost
            pm4py.algo.conformance.alignments.variants.state_equation_a_star.PARAM_TRACE_COST_FUNCTION ->
            mapping of each index of the trace to a positive cost value
    Returns
    -----------
    alignment
        :class:`list` of :class:`dict` with keys **alignment**, **cost**, **visited_states**, **queued_states** and
        **traversed_arcs**
        The alignment is a sequence of labels of the form (a,t), (a,>>), or (>>,t)
        representing synchronous/log/model-moves.
    """
    warnings.warn('factory methods are deprecated, use algorithm entrypoint instead', DeprecationWarning)
    if parameters is None:
        parameters = dict()

    if not check_soundness.check_easy_soundness_net_in_fin_marking(petri_net, initial_marking, final_marking):
        raise Exception("trying to apply alignments on a Petri net that is not a easy sound net!!")

    start_time = time.time()
    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY
    model_cost_function = parameters[
        PARAM_MODEL_COST_FUNCTION] if PARAM_MODEL_COST_FUNCTION in parameters else None
    sync_cost_function = parameters[
        PARAM_SYNC_COST_FUNCTION] if PARAM_SYNC_COST_FUNCTION in parameters else None
    max_align_time = parameters[PARAM_MAX_ALIGN_TIME] if PARAM_MAX_ALIGN_TIME in parameters else DEFAULT_MAX_ALIGN_TIME
    max_align_time_case = parameters[
        PARAM_MAX_ALIGN_TIME_TRACE] if PARAM_MAX_ALIGN_TIME_TRACE in parameters else DEFAULT_MAX_ALIGN_TIME_TRACE

    if model_cost_function is None or sync_cost_function is None:
        # reset variables value
        model_cost_function = dict()
        sync_cost_function = dict()
        for t in petri_net.transitions:
            if t.label is not None:
                model_cost_function[t] = align_utils.STD_MODEL_LOG_MOVE_COST
                sync_cost_function[t] = 0
            else:
                model_cost_function[t] = 1

    parameters[pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = activity_key
    parameters[
        PARAM_MODEL_COST_FUNCTION] = model_cost_function
    parameters[
        PARAM_SYNC_COST_FUNCTION] = sync_cost_function
    parameters_best_worst = copy(parameters)
    if PARAM_MAX_ALIGN_TIME_TRACE in parameters_best_worst:
        del parameters_best_worst[PARAM_MAX_ALIGN_TIME_TRACE]

    best_worst_cost = VERSIONS_COST[version](petri_net, initial_marking, final_marking,
                                             parameters=parameters_best_worst)

    variants_idxs = parameters[VARIANTS_IDX] if VARIANTS_IDX in parameters else None
    if variants_idxs is None:
        variants_idxs = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters)

    one_tr_per_var = []
    variants_list = []
    for index_variant, variant in enumerate(variants_idxs):
        variants_list.append(variant)

    for variant in variants_list:
        one_tr_per_var.append(log[variants_idxs[variant][0]])

    all_alignments = []
    for trace in one_tr_per_var:
        this_max_align_time = min(max_align_time_case, (max_align_time - (time.time() - start_time)) * 0.5)
        parameters[PARAM_MAX_ALIGN_TIME_TRACE] = this_max_align_time
        all_alignments.append(apply_trace(trace, petri_net, initial_marking, final_marking, parameters=copy(parameters),
                                          version=version))

    al_idx = {}
    for index_variant, variant in enumerate(variants_idxs):
        for trace_idx in variants_idxs[variant]:
            al_idx[trace_idx] = all_alignments[index_variant]

    alignments = []
    for i in range(len(log)):
        alignments.append(al_idx[i])

    # assign fitness to traces
    for index, align in enumerate(alignments):
        if align is not None:
            unfitness_upper_part = align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST
            if unfitness_upper_part == 0:
                align['fitness'] = 1
            elif (len(log[index]) + best_worst_cost) > 0:
                align['fitness'] = 1 - (
                        (align['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST) / (len(log[index]) + best_worst_cost))
            else:
                align['fitness'] = 0
    return alignments
Ejemplo n.º 12
0
def apply(log, net, initial_marking, final_marking, parameters=None):
    """
    Method to apply token-based replay

    Parameters
    -----------
    log
        Log
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    parameters
        Parameters of the algorithm
    """
    if parameters is None:
        parameters = {}

    for t in net.transitions:
        ma = Marking()
        for a in t.out_arcs:
            p = a.target
            ma[p] = a.weight
        t.out_marking = ma

    for t in net.transitions:
        ma = Marking()
        for a in t.in_arcs:
            p = a.source
            ma[p] = a.weight
        t.in_marking = ma

    variants_idxs = variants_filter.get_variants_from_log_trace_idx(
        log, parameters=parameters)
    results = []

    tmap = {}
    bmap = {}
    for t in net.transitions:
        if t.label is not None:
            if t.label not in tmap:
                tmap[t.label] = []
            tmap[t.label].append(t)

    for variant in variants_idxs:
        vlist = variants_util.get_activities_from_variant(variant)
        result = tr_vlist(vlist,
                          net,
                          initial_marking,
                          final_marking,
                          tmap,
                          bmap,
                          parameters=parameters)
        results.append(result)

    al_idx = {}
    for index_variant, variant in enumerate(variants_idxs):
        for trace_idx in variants_idxs[variant]:
            al_idx[trace_idx] = results[index_variant]

    ret = []
    for i in range(len(log)):
        ret.append(al_idx[i])

    return ret
Ejemplo n.º 13
0
def get_map_from_log_and_net(log,
                             net,
                             initial_marking,
                             final_marking,
                             force_distribution=None,
                             parameters=None):
    """
    Get transition stochastic distribution map given the log and the Petri net

    Parameters
    -----------
    log
        Event log
    net
        Petri net
    initial_marking
        Initial marking of the Petri net
    final_marking
        Final marking of the Petri net
    force_distribution
        If provided, distribution to force usage (e.g. EXPONENTIAL)
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> activity name
            Parameters.TIMESTAMP_KEY -> timestamp key

    Returns
    -----------
    stochastic_map
        Map that to each transition associates a random variable
    """
    stochastic_map = {}

    if parameters is None:
        parameters = {}

    token_replay_variant = exec_utils.get_param_value(
        Parameters.TOKEN_REPLAY_VARIANT, parameters,
        executor.Variants.TOKEN_REPLAY)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)

    parameters_variants = {
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
    }
    variants_idx = variants_module.get_variants_from_log_trace_idx(
        log, parameters=parameters_variants)
    variants = variants_module.convert_variants_trace_idx_to_trace_obj(
        log, variants_idx)

    parameters_tr = {
        token_replay.Parameters.ACTIVITY_KEY: activity_key,
        token_replay.Parameters.VARIANTS: variants
    }

    # do the replay
    aligned_traces = executor.apply(log,
                                    net,
                                    initial_marking,
                                    final_marking,
                                    variant=token_replay_variant,
                                    parameters=parameters_tr)

    element_statistics = performance_map.single_element_statistics(
        log,
        net,
        initial_marking,
        aligned_traces,
        variants_idx,
        activity_key=activity_key,
        timestamp_key=timestamp_key,
        parameters={"business_hours": True})

    for el in element_statistics:
        if type(
                el
        ) is PetriNet.Transition and "performance" in element_statistics[el]:
            values = element_statistics[el]["performance"]

            rand = RandomVariable()
            rand.calculate_parameters(values,
                                      force_distribution=force_distribution)

            no_of_times_enabled = element_statistics[el]['no_of_times_enabled']
            no_of_times_activated = element_statistics[el][
                'no_of_times_activated']

            if no_of_times_enabled > 0:
                rand.set_weight(
                    float(no_of_times_activated) / float(no_of_times_enabled))
            else:
                rand.set_weight(0.0)

            stochastic_map[el] = rand

    return stochastic_map
Ejemplo n.º 14
0
def get_attributes(log,
                   decision_points,
                   attributes,
                   use_trace_attributes,
                   trace_attributes,
                   k,
                   net,
                   initial_marking,
                   final_marking,
                   decision_points_names,
                   parameters=None):
    """
    This method aims to construct for each decision place a table where for each decision place a list if given with the
     label of the later decision and as value the given attributes
    :param log: Log on which the method is applied
    :param alignments: Computed alignments for a log and a model
    :param decision_points: Places that have multiple outgoing arcs
    :param attributes: Attributes that are considered
    :param use_trace_attributes: If trace attributes have to be considered or not
    :param trace_attributes: List of trace attributes that are considered
    :param k: Taking k last activities into account
    :return: Dictionary that has as keys the decision places. The value for this key is a list.
    The content of these lists are tuples. The first element of these tuples is information regrading the attributes,
    the second element of these tuples is the transition which chosen in a decision.
    """
    if parameters is None:
        parameters = {}
    I = {}
    for key in decision_points:
        I[key] = []
    A = {}
    for attri in attributes:
        A[attri] = None
    i = 0
    # first, take a look at the variants
    variants_idxs = variants_module.get_variants_from_log_trace_idx(
        log, parameters=parameters)
    one_variant = []
    for variant in variants_idxs:
        one_variant.append(variant)
        # TODO: Token based replay code mit paramter für nur varianten einbeziehen ausstatten
    replay_result = token_replay.apply(log,
                                       net,
                                       initial_marking,
                                       final_marking,
                                       parameters=parameters)
    replay_result = simplify_token_replay(replay_result)
    count = 0
    for variant in replay_result:
        if variant['trace_fitness'] == 1.0:
            for trace_index in variants_idxs[one_variant[count]]:
                last_k_list = [None] * k
                trace = log[trace_index]
                if use_trace_attributes:
                    for attribute in trace_attributes:
                        # can be done here since trace attributes does not change for whole trace
                        A[attribute] = trace.attributes[attribute]
                j = 0
                # j is a pointer which points to the current event inside a trace
                for transition in variant['activated_transitions']:
                    for key, value in decision_points_names.items():
                        if transition.label in value:
                            for element in last_k_list:
                                if element != None:
                                    if transition.label != None:
                                        I[key].append(
                                            (element.copy(), transition.label))
                                    else:
                                        I[key].append(
                                            (element.copy(), transition.name))
                    for attri in attributes:
                        # print(variant, transition.label, j)
                        if attri in trace[j]:
                            # only add the attribute information if it is present in the event
                            A[attri] = trace[j][attri]
                    # add A to last_k_list. Using modulo to access correct entry
                    last_k_list[j % k] = A.copy()
                    if transition.label != None:
                        if not j + 1 >= len(trace):
                            # Problem otherwise: If there are tau-transition after the last event related transition,
                            # the pointer j which points to the current event in a trace, gets out of range
                            j += 1
        else:
            example_trace = log[variants_idxs[one_variant[count]][0]]
            align_parameters = copy(parameters)
            align_parameters[star.Parameters.
                             PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE] = True
            alignment = ali.apply(example_trace,
                                  net,
                                  initial_marking,
                                  final_marking,
                                  parameters=align_parameters)['alignment']
            for trace_index in variants_idxs[one_variant[count]]:
                last_k_list = [None] * k
                trace = log[trace_index]
                if use_trace_attributes:
                    for attribute in trace_attributes:
                        # can be done here since trace attributes does not change for whole trace
                        A[attribute] = trace.attributes[attribute]
                j = 0
                for el in alignment:
                    if el[1][1] != '>>':
                        # If move in model
                        for key, value in decision_points.items():
                            if el[0][1] in value:
                                for element in last_k_list:
                                    if element != None:
                                        # only add those entries where information is provided
                                        if el[1][1] == None:
                                            # for some dt algorithms, the entry None might be a problem, since it is left out later
                                            I[key].append(
                                                (element.copy(), el[0][1]))
                                        else:
                                            I[key].append(
                                                (element.copy(), el[1][1]))
                    if el[1][0] != '>>' and el[1][1] != '>>':
                        # If there is a move in log and model
                        for attri in attributes:
                            if attri in trace[j]:
                                # only add the attribute information if it is present in the event
                                A[attri] = trace[j][attri]
                        # add A to last_k_list. Using modulo to access correct entry
                        last_k_list[j % k] = A.copy()
                    if el[1][0] != '>>':
                        # only go to next event in trace if the current event has been aligned
                        # TODO: Discuss if this is correct or can lead to problems
                        j += 1
        count += 1
    return I
Ejemplo n.º 15
0
def apply(df, discovery_algorithm=discover_inductive, parameters=None):
    if parameters is None:
        parameters = {}

    allowed_activities = parameters[
        "allowed_activities"] if "allowed_activities" in parameters else None
    debug = parameters["debug"] if "debug" in parameters else True

    try:
        if df.type == "succint":
            df = succint_mdl_to_exploded_mdl.apply(df)
            df.type = "exploded"
    except:
        pass

    if len(df) == 0:
        df = pd.DataFrame({"event_id": [], "event_activity": []})

    min_node_freq = parameters[
        "min_node_freq"] if "min_node_freq" in parameters else 0
    min_edge_freq = parameters[
        "min_edge_freq"] if "min_edge_freq" in parameters else 0

    df = clean_frequency.apply(df, min_node_freq)
    df = clean_arc_frequency.apply(df, min_edge_freq)

    if len(df) == 0:
        df = pd.DataFrame({"event_id": [], "event_activity": []})

    persps = [x for x in df.columns if not x.startswith("event_")]

    ret = {}
    ret["nets"] = {}
    ret["act_count"] = {}
    ret["replay"] = {}
    ret["group_size_hist"] = {}
    ret["act_count_replay"] = {}
    ret["group_size_hist_replay"] = {}
    ret["aligned_traces"] = {}
    ret["place_fitness_per_trace"] = {}
    ret["aggregated_statistics_frequency"] = {}
    ret["aggregated_statistics_performance_min"] = {}
    ret["aggregated_statistics_performance_max"] = {}
    ret["aggregated_statistics_performance_median"] = {}
    ret["aggregated_statistics_performance_mean"] = {}

    diff_log = 0
    diff_model = 0
    diff_token_replay = 0
    diff_performance_annotation = 0
    diff_basic_stats = 0

    for persp in persps:
        aa = time.time()
        if debug:
            print(persp, "getting log")
        log = algorithm.apply(df, persp, parameters=parameters)
        if debug:
            print(len(log))

        if allowed_activities is not None:
            if persp not in allowed_activities:
                continue
            filtered_log = attributes_filter.apply_events(
                log, allowed_activities[persp])
        else:
            filtered_log = log
        bb = time.time()

        diff_log += (bb - aa)

        # filtered_log = variants_filter.apply_auto_filter(deepcopy(filtered_log), parameters={"decreasingFactor": 0.5})

        if debug:
            print(len(log))
            print(persp, "got log")

        cc = time.time()
        #net, im, fm = inductive_miner.apply(filtered_log)
        net, im, fm = discovery_algorithm(filtered_log)
        """if persp == "items":
            trans_map = {t.label:t for t in net.transitions}
            source_place_it = list(trans_map["item out of stock"].in_arcs)[0].source
            target_place_re = list(trans_map["reorder item"].out_arcs)[0].target
            skip_trans_1 = PetriNet.Transition(str(uuid.uuid4()), None)
            net.transitions.add(skip_trans_1)
            add_arc_from_to(source_place_it, skip_trans_1, net)
            add_arc_from_to(skip_trans_1, target_place_re, net)"""

        #net = reduce_petri_net(net)
        dd = time.time()

        diff_model += (dd - cc)

        # net, im, fm = alpha_miner.apply(filtered_log)
        if debug:
            print(persp, "got model")

        xx1 = time.time()
        activ_count = algorithm.apply(df,
                                      persp,
                                      variant="activity_occurrence",
                                      parameters=parameters)
        if debug:
            print(persp, "got activ_count")
        xx2 = time.time()

        ee = time.time()
        variants_idx = variants_module.get_variants_from_log_trace_idx(log)
        # variants = variants_module.convert_variants_trace_idx_to_trace_obj(log, variants_idx)
        # parameters_tr = {PARAM_ACTIVITY_KEY: "concept:name", "variants": variants}

        if debug:
            print(persp, "got variants")

        aligned_traces, place_fitness_per_trace, transition_fitness_per_trace, notexisting_activities_in_model = tr_factory.apply(
            log,
            net,
            im,
            fm,
            parameters={
                "enable_pltr_fitness": True,
                "disable_variants": True
            })

        if debug:
            print(persp, "done tbr")

        element_statistics = performance_map.single_element_statistics(
            log, net, im, aligned_traces, variants_idx)

        if debug:
            print(persp, "done element_statistics")
        ff = time.time()

        diff_token_replay += (ff - ee)

        aggregated_statistics = performance_map.aggregate_statistics(
            element_statistics)

        if debug:
            print(persp, "done aggregated_statistics")

        element_statistics_performance = performance_map.single_element_statistics(
            log, net, im, aligned_traces, variants_idx)

        if debug:
            print(persp, "done element_statistics_performance")

        gg = time.time()

        aggregated_statistics_performance_min = performance_map.aggregate_statistics(
            element_statistics_performance,
            measure="performance",
            aggregation_measure="min")
        aggregated_statistics_performance_max = performance_map.aggregate_statistics(
            element_statistics_performance,
            measure="performance",
            aggregation_measure="max")
        aggregated_statistics_performance_median = performance_map.aggregate_statistics(
            element_statistics_performance,
            measure="performance",
            aggregation_measure="median")
        aggregated_statistics_performance_mean = performance_map.aggregate_statistics(
            element_statistics_performance,
            measure="performance",
            aggregation_measure="mean")

        hh = time.time()

        diff_performance_annotation += (hh - ee)

        if debug:
            print(persp, "done aggregated_statistics_performance")

        group_size_hist = algorithm.apply(df,
                                          persp,
                                          variant="group_size_hist",
                                          parameters=parameters)

        if debug:
            print(persp, "done group_size_hist")

        occurrences = {}
        for trans in transition_fitness_per_trace:
            occurrences[trans.label] = set()
            for trace in transition_fitness_per_trace[trans]["fit_traces"]:
                if not trace in transition_fitness_per_trace[trans][
                        "underfed_traces"]:
                    case_id = trace.attributes["concept:name"]
                    for event in trace:
                        if event["concept:name"] == trans.label:
                            occurrences[trans.label].add(
                                (case_id, event["event_id"]))
            # print(transition_fitness_per_trace[trans])

        len_different_ids = {}
        for act in occurrences:
            len_different_ids[act] = len(set(x[1] for x in occurrences[act]))

        eid_acti_count = {}
        for act in occurrences:
            eid_acti_count[act] = {}
            for x in occurrences[act]:
                if not x[0] in eid_acti_count:
                    eid_acti_count[act][x[0]] = 0
                eid_acti_count[act][x[0]] = eid_acti_count[act][x[0]] + 1
            eid_acti_count[act] = sorted(list(eid_acti_count[act].values()))

        ii = time.time()

        diff_basic_stats += (ii - hh) + (xx2 - xx1)

        ret["nets"][persp] = [net, im, fm]
        ret["act_count"][persp] = activ_count
        ret["aligned_traces"][persp] = aligned_traces
        ret["place_fitness_per_trace"][persp] = place_fitness_per_trace
        ret["aggregated_statistics_frequency"][persp] = aggregated_statistics
        ret["aggregated_statistics_performance_min"][
            persp] = aggregated_statistics_performance_min
        ret["aggregated_statistics_performance_max"][
            persp] = aggregated_statistics_performance_max
        ret["aggregated_statistics_performance_median"][
            persp] = aggregated_statistics_performance_median
        ret["aggregated_statistics_performance_mean"][
            persp] = aggregated_statistics_performance_mean

        ret["replay"][persp] = aggregated_statistics
        ret["group_size_hist"][persp] = group_size_hist
        ret["act_count_replay"][persp] = len_different_ids
        ret["group_size_hist_replay"][persp] = eid_acti_count

    ret["computation_statistics"] = {
        "diff_log": diff_log,
        "diff_model": diff_model,
        "diff_token_replay": diff_token_replay,
        "diff_performance_annotation": diff_performance_annotation,
        "diff_basic_stats": diff_basic_stats
    }

    return ret
Ejemplo n.º 16
0
def get_decorations(log, net, initial_marking, final_marking, parameters=None, measure="frequency",
                    ht_perf_method="last"):
    """
    Calculate decorations in order to annotate the Petri net

    Parameters
    -----------
    log
        Trace log
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    parameters
        Parameters associated to the algorithm
    measure
        Measure to represent on the process model (frequency/performance)
    ht_perf_method
        Method to use in order to annotate hidden transitions (performance value could be put on the last possible
        point (last) or in the first possible point (first)

    Returns
    ------------
    decorations
        Decorations to put on the process model
    """
    if parameters is None:
        parameters = {}

    aggregation_measure = None
    if "aggregationMeasure" in parameters:
        aggregation_measure = parameters["aggregationMeasure"]

    activity_key = parameters[
        PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[PARAM_TIMESTAMP_KEY] if PARAM_TIMESTAMP_KEY in parameters else "time:timestamp"

    parameters_variants = {PARAM_ACTIVITY_KEY: activity_key}
    variants_idx = variants_get.get_variants_from_log_trace_idx(log, parameters=parameters_variants)
    variants = variants_get.convert_variants_trace_idx_to_trace_obj(log, variants_idx)

    parameters_tr = {PARAM_ACTIVITY_KEY: activity_key, "variants": variants}

    # do the replay
    aligned_traces = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters_tr)

    # apply petri_reduction technique in order to simplify the Petri net
    # net = reduction.apply(net, parameters={"aligned_traces": aligned_traces})

    element_statistics = performance_map.single_element_statistics(log, net, initial_marking,
                                                                   aligned_traces, variants_idx,
                                                                   activity_key=activity_key,
                                                                   timestamp_key=timestamp_key,
                                                                   ht_perf_method=ht_perf_method)

    aggregated_statistics = performance_map.aggregate_statistics(element_statistics, measure=measure,
                                                                 aggregation_measure=aggregation_measure)

    return aggregated_statistics