Beispiel #1
0
def aggregate_statistics(statistics, measure="frequency", aggregation_measure=None):
    """
    Gets aggregated statistics

    Parameters
    ----------
    statistics
        Individual element statistics (including unaggregated performances)
    measure
        Desidered view on data (frequency or performance)
    aggregation_measure
        Aggregation measure (e.g. mean, min) to use

    Returns
    ----------
    aggregated_statistics
        Aggregated statistics for arcs, transitions, places
    """
    min_trans_frequency, max_trans_frequency = find_min_max_trans_frequency(statistics)
    min_arc_frequency, max_arc_frequency = find_min_max_arc_frequency(statistics)
    min_arc_performance, max_arc_performance = find_min_max_arc_performance(statistics, aggregation_measure)
    aggregated_statistics = {}
    for elem in statistics.keys():
        if type(elem) is PetriNet.Arc:
            if measure == "frequency":
                freq = statistics[elem]["count"]
                arc_penwidth = get_arc_penwidth(freq, min_arc_frequency, max_arc_frequency)
                aggregated_statistics[elem] = {"label": str(freq), "penwidth": str(arc_penwidth)}
            elif measure == "performance":
                if statistics[elem]["performance"]:
                    aggr_stat = aggregate_stats(statistics, elem, aggregation_measure)
                    aggr_stat_hr = human_readable_stat(aggr_stat)
                    arc_penwidth = get_arc_penwidth(aggr_stat, min_arc_performance, max_arc_performance)
                    aggregated_statistics[elem] = {"label": aggr_stat_hr, "penwidth": str(arc_penwidth)}
        elif type(elem) is PetriNet.Transition:
            if measure == "frequency":
                if elem.label is not None:
                    freq = statistics[elem]["count"]
                    color = get_trans_freq_color(freq, min_trans_frequency, max_trans_frequency)
                    aggregated_statistics[elem] = {"label": elem.label + " (" + str(freq) + ")", "color": color}
        elif type(elem) is PetriNet.Place:
            pass
    return aggregated_statistics
Beispiel #2
0
def execute_script():
    log = xes_importer.apply(
        os.path.join("..", "tests", "input_data", "receipt.xes"))
    throughput_time = case_statistics.get_median_caseduration(log)
    variants, variants_times = variants_filter.get_variants_along_with_case_durations(
        log)
    dfg = dfg_discovery.apply(log)
    filtered_log = variants_filter.apply_auto_filter(deepcopy(log))
    # filtered_log = log
    tree = inductive_miner.apply_tree(filtered_log)
    fp_log = fp_discovery.apply(log,
                                variant=fp_discovery.Variants.ENTIRE_EVENT_LOG)
    fp_model = fp_discovery.apply(tree)
    conf = fp_conformance.apply(fp_log, fp_model)
    conf_occ = sorted([(x, dfg[x]) for x in conf],
                      key=lambda y: (y[1], y[0][0], y[0][1]),
                      reverse=True)
    print(
        "source activity\t\ttarget activity\t\toccurrences\t\tthroughput time log\t\tthroughput time traces with path"
    )
    for i in range(min(10, len(conf_occ))):
        path = conf_occ[i][0]
        occ = conf_occ[i][1]
        red_log = paths_filter.apply(log, [path])
        red_throughput_time = case_statistics.get_median_caseduration(red_log)
        print("%s\t\t%s\t\t%d\t\t%s\t\t%s" %
              (path[0], path[1], occ, human_readable_stat(throughput_time),
               human_readable_stat(red_throughput_time)))
    variants_length = sorted([(x, len(variants[x])) for x in variants.keys()],
                             key=lambda y: (y[1], y[0]),
                             reverse=True)
    print(
        "\nvariant\t\toccurrences\t\tthroughput time log\t\tthroughput time traces with path"
    )
    for i in range(min(10, len(variants_length))):
        var = variants_length[i][0]
        vark = str(var)
        if len(vark) > 10:
            vark = vark[:10]
        occ = variants_length[i][1]
        fp_log_var = fp_discovery.apply(
            variants[var], variant=fp_discovery.Variants.ENTIRE_EVENT_LOG)
        conf_var = fp_conformance.apply(fp_log_var, fp_model)
        is_fit = str(len(conf_var) == 0)
        var_throughput = case_statistics.get_median_caseduration(variants[var])
        print("%s\t\t%d\t\t%s\t\t%s\t\t%s" %
              (vark, occ, is_fit, throughput_time,
               human_readable_stat(var_throughput)))

    # print(conf_occ)
    conf_colors = tree_visualization.apply(tree, conf)
    if True:
        gviz = pt_visualizer.apply(
            tree,
            parameters={
                "format":
                "svg",
                pt_visualizer.Variants.WO_DECORATION.value.Parameters.COLOR_MAP:
                conf_colors,
                pt_visualizer.Variants.WO_DECORATION.value.Parameters.ENABLE_DEEPCOPY:
                False
            })
        pt_visualizer.view(gviz)
Beispiel #3
0
def apply(network_analysis_edges0: Dict[Tuple[str, str], Dict[str, Any]],
          parameters: Optional[Dict[Any, Any]] = None) -> Digraph:
    """
    Creates a visualization of the network analysis (performance view)

    Parameters
    -----------------
    network_analysis_edges0
        Edges of the network analysis
    parameters
        Parameters of the algorithm, including:
        - Parameters.FORMAT => the format of the visualization
        - Parameters.BGCOLOR => the background color
        - Parameters.ACTIVITY_THRESHOLD => the minimum number of occurrences for an activity to be included (default: 1)
        - Parameters.EDGE_THRESHOLD => the minimum number of occurrences for an edge to be included (default: 1)
        - Parameters.AGGREGATION_MEASURE => the aggregation measure (default: mean)

    Returns
    ------------------
    digraph
        Graphviz graph
    """
    if parameters is None:
        parameters = {}

    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters,
                                              "png")
    bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters,
                                         "transparent")
    activity_threshold = exec_utils.get_param_value(
        Parameters.ACTIVITY_THRESHOLD, parameters, 1)
    edge_threshold = exec_utils.get_param_value(Parameters.EDGE_THRESHOLD,
                                                parameters, 1)
    aggregation_measure = exec_utils.get_param_value(
        Parameters.AGGREGATION_MEASURE, parameters, "mean")

    aggregation_f = mean

    if aggregation_measure == "median":
        aggregation_f = median
    elif aggregation_measure == "min":
        aggregation_f = min
    elif aggregation_measure == "max":
        aggregation_f = max
    elif aggregation_measure == "stdev":
        aggregation_f = stdev
    elif aggregation_measure == "sum":
        aggregation_f = sum

    filename = tempfile.NamedTemporaryFile(suffix='.gv')
    viz = Digraph("pt",
                  filename=filename.name,
                  engine='dot',
                  graph_attr={'bgcolor': bgcolor})
    viz.attr('node', shape='ellipse', fixedsize='false')

    network_analysis_edges = {}
    network_analysis_edges_agg_performance = {}
    for x in network_analysis_edges0:
        network_analysis_edges[x] = {}
        network_analysis_edges_agg_performance[x] = {}
        for y in network_analysis_edges0[x]:
            network_analysis_edges[x][y] = len(network_analysis_edges0[x][y])
            network_analysis_edges_agg_performance[x][y] = aggregation_f(
                network_analysis_edges0[x][y])

    nodes = set(x[0] for x in network_analysis_edges).union(
        set(x[1] for x in network_analysis_edges))
    nodes_in_degree = {x: 0 for x in nodes}
    nodes_out_degree = {x: 0 for x in nodes}
    for edge in network_analysis_edges:
        for edge_value in network_analysis_edges[edge]:
            if network_analysis_edges[edge][edge_value] >= edge_threshold:
                nodes_in_degree[
                    edge[1]] += network_analysis_edges[edge][edge_value]
                nodes_out_degree[
                    edge[0]] += network_analysis_edges[edge][edge_value]
    nodes_max_degree = {
        x: max(nodes_in_degree[x], nodes_out_degree[x])
        for x in nodes
    }

    max_node_value = sys.maxsize
    min_node_value = -sys.maxsize

    nodes_dict = {}
    for node in nodes_max_degree:
        if nodes_max_degree[node] >= activity_threshold:
            nodes_dict[node] = str(uuid.uuid4())
            viz.node(nodes_dict[node],
                     node + "\n(in=" + str(nodes_in_degree[node]) + "; out=" +
                     str(nodes_out_degree[node]) + ")",
                     style="filled",
                     fillcolor=vis_utils.get_trans_freq_color(
                         nodes_max_degree[node], max_node_value,
                         max_node_value))
            count = nodes_max_degree[node]
            if count > max_node_value:
                max_node_value = count
            elif count < min_node_value:
                min_node_value = count

    min_edge_value = sys.maxsize
    max_edge_value = -sys.maxsize

    for edge in network_analysis_edges:
        if edge[0] in nodes_dict and edge[1] in nodes_dict:
            for edge_value in network_analysis_edges[edge]:
                count = network_analysis_edges[edge][edge_value]
                if count > max_edge_value:
                    max_edge_value = count
                elif count < min_edge_value:
                    min_edge_value = count

    for edge in network_analysis_edges:
        if edge[0] in nodes_dict and edge[1] in nodes_dict:
            for edge_value in network_analysis_edges[edge]:
                if network_analysis_edges[edge][edge_value] >= edge_threshold:
                    viz.edge(nodes_dict[edge[0]],
                             nodes_dict[edge[1]],
                             label=edge_value + "\n" +
                             vis_utils.human_readable_stat(
                                 network_analysis_edges_agg_performance[edge]
                                 [edge_value]) + "",
                             penwidth=str(
                                 vis_utils.get_arc_penwidth(
                                     network_analysis_edges[edge][edge_value],
                                     min_edge_value, max_edge_value)))

    viz.format = image_format

    return viz