Exemple #1
0
def apply_auto_filter(trace_log, parameters=None):
    """
    Apply some filters in battery to the log in order to get a simplified log
    
    Parameters
    ----------
    trace_log
        Trace log
    parameters
        Eventual parameters applied to the algorithms:
            decreasingFactor -> Decreasing factor (provided to all algorithms)
            activity_key -> Activity key (must be specified if different from concept:name)
    
    Returns
    ---------
    filtered_log
        Filtered log
    """

    # the following filters are applied:
    # - activity filter (keep only attributes with a reasonable number of occurrences)
    # - paths filter (keep only paths with a reasonable number of occurrences)
    # - variant filter (keep only variants with a reasonable number of occurrences)
    # - start attributes filter (keep only variants that starts with a plausible start activity)
    # - end attributes filter (keep only variants that starts with a plausible end activity)

    if parameters is None:
        parameters = {}

    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    decreasing_factor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else filtering_constants.DECREASING_FACTOR

    parameters_child = {"decreasingFactor": decreasing_factor, constants.PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key}

    variants = variants_module.get_variants(trace_log, parameters=parameters_child)

    filtered_log1 = attributes_filter.apply_auto_filter(trace_log, variants=variants, parameters=parameters_child)
    del trace_log
    variants = variants_module.get_variants(filtered_log1, parameters=parameters_child)
    filtered_log2 = paths_filter.apply_auto_filter(filtered_log1, variants=variants, parameters=parameters_child)
    del filtered_log1
    variants = variants_module.get_variants(filtered_log2, parameters=parameters_child)
    filtered_log3 = variants_module.apply_auto_filter(filtered_log2, variants=variants, parameters=parameters_child)
    variants = variants_module.get_variants(filtered_log3, parameters=parameters_child)
    del filtered_log2
    filtered_log4 = start_activities_filter.apply_auto_filter(filtered_log3, variants=variants,
                                                              parameters=parameters_child)
    del filtered_log3
    filtered_log5 = end_activities_filter.apply_auto_filter(filtered_log4, variants=variants,
                                                            parameters=parameters_child)
    del filtered_log4

    return filtered_log5
def calculate_prefix_alignments_multiprocessing(petri_net_filename, log,
                                                path_to_files):
    results = []

    pnml_file_path = os.path.join(path_to_files, petri_net_filename)
    net, im, fm = petri.importer.pnml.import_net(pnml_file_path)

    variants = variants_filter.get_variants(log)
    pool = Pool()
    processes = []
    global number_traces
    number_traces = len(variants)
    for v in variants:
        trace = variants[v][0]
        variant_count = len(variants[v])
        p = pool.apply_async(calculate_prefix_alignment_for_trace,
                             args=(
                                 trace,
                                 net,
                                 im,
                                 fm,
                                 variant_count,
                             ),
                             callback=print_progress_on_console)
        processes.append(p)
    pool.close()
    pool.join()
    for p in processes:
        results.append(p.get())
    results_path = os.path.join(
        path_to_files,
        "RESULTS_" + petri_net_filename + '_' + str(date.today()) + ".pickle")
    with open(results_path, 'wb') as handle:
        pickle.dump(results, handle)
def get_variants_from_log(log, activity_key, disable_variants=False):
    """
    Gets the variants from the log (allow disabling by giving each trace a different variant)

    Parameters
    -------------
    log
        Trace log
    activity_key
        Attribute that is the activity
    disable_variants
        Boolean value that disable variants

    Returns
    -------------
    variants
        Variants contained in the log
    """
    if disable_variants:
        variants = {}
        for trace in log:
            variants[str(hash(trace))] = [trace]
        return variants
    parameters_variants = {
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
    }
    variants = variants_module.get_variants(log,
                                            parameters=parameters_variants)
    return variants
def apply_auto_filter(trace_log, variants=None, parameters=None):
    """
    Apply an end attributes filter detecting automatically a percentage
    
    Parameters
    ----------
    trace_log
        Trace log
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    parameters
        Parameters of the algorithm, including:
            decreasingFactor -> Decreasing factor (stops the algorithm when the next activity by occurrence is below
            this factor in comparison to previous)
            attribute_key -> Attribute key (must be specified if different from concept:name)
    
    Returns
    ---------
    filtered_log
        Filtered log    
    """
    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY
    decreasing_factor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else DECREASING_FACTOR

    parameters_variants = {
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key
    }

    if variants is None:
        variants = variants_filter.get_variants(trace_log,
                                                parameters=parameters_variants)
    vc = variants_filter.get_variants_sorted_by_count(variants)
    start_activities = get_start_activities(trace_log,
                                            parameters=parameters_variants)
    salist = start_activities_common.get_sorted_start_activities_list(
        start_activities)
    sathreshold = start_activities_common.get_start_activities_threshold(
        salist, decreasing_factor)
    filtered_log = filter_log_by_start_activities(start_activities, variants,
                                                  vc, sathreshold,
                                                  attribute_key)
    return filtered_log
Exemple #5
0
def get_variant_statistics(trace_log, parameters=None):
    """
    Gets a dictionary whose key is the variant and as value there
    is the list of traces that share the variant

    Parameters
    ----------
    trace_log
        Trace log
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            max_variants_to_return -> Maximum number of variants to return
            variants -> If provided, avoid recalculation of the variants

    Returns
    ----------
    variants_list
        List of variants along the statistics
    """

    if parameters is None:
        parameters = {}
    max_variants_to_return = parameters[
        "max_variants_to_return"] if "max_variants_to_return" in parameters else None
    varnt = parameters[
        "variants"] if "variants" in parameters else variants_filter.get_variants(
            trace_log, parameters=parameters)
    variants_list = []
    for var in varnt:
        variants_list.append({"variant": var, "count": len(varnt[var])})
    variants_list = sorted(variants_list,
                           key=lambda x: x["count"],
                           reverse=True)
    if max_variants_to_return:
        variants_list = variants_list[:min(len(variants_list
                                               ), max_variants_to_return)]
    return variants_list
Exemple #6
0
def apply(log, parameters=None):
    """
    Returns a log from which a sound workflow net could be extracted taking into account
    a discovery algorithm returning models only with visible transitions

    Parameters
    ------------
    log
        Trace log
    parameters
        Possible parameters of the algorithm, including:
            discovery_algorithm -> Discovery algorithm to consider, possible choices: alphaclassic
            max_no_variants -> Maximum number of variants to consider to return a Petri net

    Returns
    ------------
    filtered_log
        Filtered trace log
    """
    if parameters is None:
        parameters = {}
    discovery_algorithm = parameters[
        "discovery_algorithm"] if "discovery_algorithm" in parameters else "alphaclassic"
    max_no_variants = parameters[
        "max_no_variants"] if "max_no_variants" in parameters else 20
    all_variants_dictio = variants_filter.get_variants(log,
                                                       parameters=parameters)
    all_variants_list = []
    for var in all_variants_dictio:
        all_variants_list.append([var, len(all_variants_dictio[var])])
    all_variants_list = sorted(all_variants_list,
                               key=lambda x: (x[1], x[0]),
                               reverse=True)
    considered_variants = []
    considered_traces = []

    i = 0
    while i < min(len(all_variants_list), max_no_variants):
        variant = all_variants_list[i][0]

        considered_variants.append(variant)
        considered_traces.append(all_variants_dictio[variant][0])
        filtered_log = TraceLog(considered_traces)
        net = None
        initial_marking = None
        final_marking = None
        if discovery_algorithm == "alphaclassic" or discovery_algorithm == "alpha":
            net, initial_marking, final_marking = alpha_miner.apply(
                filtered_log, parameters=parameters)
        is_sound = check_soundness.check_petri_wfnet_and_soundness(net)
        if not is_sound:
            del considered_variants[-1]
            del considered_traces[-1]
        else:
            try:
                alignments = alignment_factory.apply(filtered_log, net,
                                                     initial_marking,
                                                     final_marking)
                del alignments
                fitness = replay_fitness_factory.apply(filtered_log,
                                                       net,
                                                       initial_marking,
                                                       final_marking,
                                                       parameters=parameters)
                if fitness["log_fitness"] < 0.99999:
                    del considered_variants[-1]
                    del considered_traces[-1]
            except TypeError:
                del considered_variants[-1]
                del considered_traces[-1]
        i = i + 1

    sound_log = TraceLog()
    if considered_variants:
        sound_log = variants_filter.apply(log,
                                          considered_variants,
                                          parameters=parameters)

    return sound_log
Exemple #7
0
def apply_log(log, net, initial_marking, final_marking, enable_place_fitness=False, consider_remaining_in_fitness=False,
              activity_key="concept:name", reach_mark_through_hidden=True, stop_immediately_unfit=False,
              walk_through_hidden_trans=True, places_shortest_path_by_hidden=None,
              variants=None):
    """
    Apply token-based replay to a log

    Parameters
    ----------
    log
        Trace log
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    enable_place_fitness
        Enable fitness calculation at place level
    consider_remaining_in_fitness
        Boolean value telling if the remaining tokens should be considered in fitness evaluation
    activity_key
        Name of the attribute that contains the activity
    reach_mark_through_hidden
        Boolean value that decides if we shall try to reach the final marking through hidden transitions
    stop_immediately_unfit
        Boolean value that decides if we shall stop immediately when a non-conformance is detected
    walk_through_hidden_trans
        Boolean value that decides if we shall walk through hidden transitions in order to enable visible transitions
    places_shortest_path_by_hidden
        Shortest paths between places by hidden transitions
    variants
        List of variants contained in the event log
    """
    post_fix_cache = PostFixCaching()
    marking_to_activity_cache = MarkingToActivityCaching()
    if places_shortest_path_by_hidden is None:
        places_shortest_path_by_hidden = get_places_shortest_path_by_hidden(net)

    place_fitness_per_trace = {}

    aligned_traces = []

    if enable_place_fitness:
        for place in net.places:
            place_fitness_per_trace[place] = {"underfed_traces": set(), "overfed_traces": set()}
    trans_map = {}
    for t in net.transitions:
        trans_map[t.label] = t
    if len(log) > 0:
        if len(log[0]) > 0:
            if activity_key in log[0][0]:
                if variants is None:
                    parameters_variants = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key}
                    variants = variants_module.get_variants(log, parameters=parameters_variants)
                vc = variants_module.get_variants_sorted_by_count(variants)
                threads = {}
                threads_results = {}

                for i in range(len(vc)):
                    variant = vc[i][0]
                    threads_keys = list(threads.keys())
                    if len(threads_keys) > MAX_NO_THREADS:
                        for j in range(len(threads_keys)):
                            t = threads[threads_keys[j]]
                            t.join()
                            threads_results[threads_keys[j]] = {"trace_is_fit": copy(t.t_fit),
                                                                "trace_fitness": copy(t.t_value),
                                                                "activated_transitions": copy(t.act_trans),
                                                                "reached_marking": copy(t.reached_marking),
                                                                "enabled_transitions_in_marking": copy(
                                                                    t.enabled_trans_in_mark),
                                                                "transitions_with_problems": copy(
                                                                    t.trans_probl)}
                            del threads[threads_keys[j]]
                        del threads_keys
                    threads[variant] = ApplyTraceTokenReplay(variants[variant][0], net, initial_marking, final_marking,
                                                             trans_map, enable_place_fitness, place_fitness_per_trace,
                                                             places_shortest_path_by_hidden,
                                                             consider_remaining_in_fitness,
                                                             activity_key=activity_key,
                                                             reach_mark_through_hidden=reach_mark_through_hidden,
                                                             stop_immediately_when_unfit=stop_immediately_unfit,
                                                             walk_through_hidden_trans=walk_through_hidden_trans,
                                                             post_fix_caching=post_fix_cache,
                                                             marking_to_activity_caching=marking_to_activity_cache)
                    threads[variant].start()
                threads_keys = list(threads.keys())
                for j in range(len(threads_keys)):
                    t = threads[threads_keys[j]]
                    t.join()
                    threads_results[threads_keys[j]] = {"trace_is_fit": copy(t.t_fit), "trace_fitness": copy(t.t_value),
                                                        "activated_transitions": copy(t.act_trans),
                                                        "reached_marking": copy(t.reached_marking),
                                                        "enabled_transitions_in_marking": copy(
                                                            t.enabled_trans_in_mark),
                                                        "transitions_with_problems": copy(t.trans_probl)}
                    del threads[threads_keys[j]]
                for trace in log:
                    trace_variant = ",".join([x[activity_key] for x in trace])
                    t = threads_results[trace_variant]

                    aligned_traces.append(t)
            else:
                raise NoConceptNameException("at least an event is without " + activity_key)

    if enable_place_fitness:
        return aligned_traces, place_fitness_per_trace
    else:
        return aligned_traces