def aggregate_statistics(statistics, measure="frequency", aggregation_measure=None): """ Gets aggregated statistics Parameters ---------- statistics Individual element statistics (including unaggregated performances) measure Desidered view on data (frequency or performance) aggregation_measure Aggregation measure (e.g. mean, min) to use Returns ---------- aggregated_statistics Aggregated statistics for arcs, transitions, places """ min_trans_frequency, max_trans_frequency = find_min_max_trans_frequency(statistics) min_arc_frequency, max_arc_frequency = find_min_max_arc_frequency(statistics) min_arc_performance, max_arc_performance = find_min_max_arc_performance(statistics, aggregation_measure) aggregated_statistics = {} for elem in statistics.keys(): if type(elem) is PetriNet.Arc: if measure == "frequency": freq = statistics[elem]["count"] arc_penwidth = get_arc_penwidth(freq, min_arc_frequency, max_arc_frequency) aggregated_statistics[elem] = {"label": str(freq), "penwidth": str(arc_penwidth)} elif measure == "performance": if statistics[elem]["performance"]: aggr_stat = aggregate_stats(statistics, elem, aggregation_measure) aggr_stat_hr = human_readable_stat(aggr_stat) arc_penwidth = get_arc_penwidth(aggr_stat, min_arc_performance, max_arc_performance) aggregated_statistics[elem] = {"label": aggr_stat_hr, "penwidth": str(arc_penwidth)} elif type(elem) is PetriNet.Transition: if measure == "frequency": if elem.label is not None: freq = statistics[elem]["count"] color = get_trans_freq_color(freq, min_trans_frequency, max_trans_frequency) aggregated_statistics[elem] = {"label": elem.label + " (" + str(freq) + ")", "color": color} elif type(elem) is PetriNet.Place: pass return aggregated_statistics
def execute_script(): log = xes_importer.apply( os.path.join("..", "tests", "input_data", "receipt.xes")) throughput_time = case_statistics.get_median_caseduration(log) variants, variants_times = variants_filter.get_variants_along_with_case_durations( log) dfg = dfg_discovery.apply(log) filtered_log = variants_filter.apply_auto_filter(deepcopy(log)) # filtered_log = log tree = inductive_miner.apply_tree(filtered_log) fp_log = fp_discovery.apply(log, variant=fp_discovery.Variants.ENTIRE_EVENT_LOG) fp_model = fp_discovery.apply(tree) conf = fp_conformance.apply(fp_log, fp_model) conf_occ = sorted([(x, dfg[x]) for x in conf], key=lambda y: (y[1], y[0][0], y[0][1]), reverse=True) print( "source activity\t\ttarget activity\t\toccurrences\t\tthroughput time log\t\tthroughput time traces with path" ) for i in range(min(10, len(conf_occ))): path = conf_occ[i][0] occ = conf_occ[i][1] red_log = paths_filter.apply(log, [path]) red_throughput_time = case_statistics.get_median_caseduration(red_log) print("%s\t\t%s\t\t%d\t\t%s\t\t%s" % (path[0], path[1], occ, human_readable_stat(throughput_time), human_readable_stat(red_throughput_time))) variants_length = sorted([(x, len(variants[x])) for x in variants.keys()], key=lambda y: (y[1], y[0]), reverse=True) print( "\nvariant\t\toccurrences\t\tthroughput time log\t\tthroughput time traces with path" ) for i in range(min(10, len(variants_length))): var = variants_length[i][0] vark = str(var) if len(vark) > 10: vark = vark[:10] occ = variants_length[i][1] fp_log_var = fp_discovery.apply( variants[var], variant=fp_discovery.Variants.ENTIRE_EVENT_LOG) conf_var = fp_conformance.apply(fp_log_var, fp_model) is_fit = str(len(conf_var) == 0) var_throughput = case_statistics.get_median_caseduration(variants[var]) print("%s\t\t%d\t\t%s\t\t%s\t\t%s" % (vark, occ, is_fit, throughput_time, human_readable_stat(var_throughput))) # print(conf_occ) conf_colors = tree_visualization.apply(tree, conf) if True: gviz = pt_visualizer.apply( tree, parameters={ "format": "svg", pt_visualizer.Variants.WO_DECORATION.value.Parameters.COLOR_MAP: conf_colors, pt_visualizer.Variants.WO_DECORATION.value.Parameters.ENABLE_DEEPCOPY: False }) pt_visualizer.view(gviz)
def apply(network_analysis_edges0: Dict[Tuple[str, str], Dict[str, Any]], parameters: Optional[Dict[Any, Any]] = None) -> Digraph: """ Creates a visualization of the network analysis (performance view) Parameters ----------------- network_analysis_edges0 Edges of the network analysis parameters Parameters of the algorithm, including: - Parameters.FORMAT => the format of the visualization - Parameters.BGCOLOR => the background color - Parameters.ACTIVITY_THRESHOLD => the minimum number of occurrences for an activity to be included (default: 1) - Parameters.EDGE_THRESHOLD => the minimum number of occurrences for an edge to be included (default: 1) - Parameters.AGGREGATION_MEASURE => the aggregation measure (default: mean) Returns ------------------ digraph Graphviz graph """ if parameters is None: parameters = {} image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters, "transparent") activity_threshold = exec_utils.get_param_value( Parameters.ACTIVITY_THRESHOLD, parameters, 1) edge_threshold = exec_utils.get_param_value(Parameters.EDGE_THRESHOLD, parameters, 1) aggregation_measure = exec_utils.get_param_value( Parameters.AGGREGATION_MEASURE, parameters, "mean") aggregation_f = mean if aggregation_measure == "median": aggregation_f = median elif aggregation_measure == "min": aggregation_f = min elif aggregation_measure == "max": aggregation_f = max elif aggregation_measure == "stdev": aggregation_f = stdev elif aggregation_measure == "sum": aggregation_f = sum filename = tempfile.NamedTemporaryFile(suffix='.gv') viz = Digraph("pt", filename=filename.name, engine='dot', graph_attr={'bgcolor': bgcolor}) viz.attr('node', shape='ellipse', fixedsize='false') network_analysis_edges = {} network_analysis_edges_agg_performance = {} for x in network_analysis_edges0: network_analysis_edges[x] = {} network_analysis_edges_agg_performance[x] = {} for y in network_analysis_edges0[x]: network_analysis_edges[x][y] = len(network_analysis_edges0[x][y]) network_analysis_edges_agg_performance[x][y] = aggregation_f( network_analysis_edges0[x][y]) nodes = set(x[0] for x in network_analysis_edges).union( set(x[1] for x in network_analysis_edges)) nodes_in_degree = {x: 0 for x in nodes} nodes_out_degree = {x: 0 for x in nodes} for edge in network_analysis_edges: for edge_value in network_analysis_edges[edge]: if network_analysis_edges[edge][edge_value] >= edge_threshold: nodes_in_degree[ edge[1]] += network_analysis_edges[edge][edge_value] nodes_out_degree[ edge[0]] += network_analysis_edges[edge][edge_value] nodes_max_degree = { x: max(nodes_in_degree[x], nodes_out_degree[x]) for x in nodes } max_node_value = sys.maxsize min_node_value = -sys.maxsize nodes_dict = {} for node in nodes_max_degree: if nodes_max_degree[node] >= activity_threshold: nodes_dict[node] = str(uuid.uuid4()) viz.node(nodes_dict[node], node + "\n(in=" + str(nodes_in_degree[node]) + "; out=" + str(nodes_out_degree[node]) + ")", style="filled", fillcolor=vis_utils.get_trans_freq_color( nodes_max_degree[node], max_node_value, max_node_value)) count = nodes_max_degree[node] if count > max_node_value: max_node_value = count elif count < min_node_value: min_node_value = count min_edge_value = sys.maxsize max_edge_value = -sys.maxsize for edge in network_analysis_edges: if edge[0] in nodes_dict and edge[1] in nodes_dict: for edge_value in network_analysis_edges[edge]: count = network_analysis_edges[edge][edge_value] if count > max_edge_value: max_edge_value = count elif count < min_edge_value: min_edge_value = count for edge in network_analysis_edges: if edge[0] in nodes_dict and edge[1] in nodes_dict: for edge_value in network_analysis_edges[edge]: if network_analysis_edges[edge][edge_value] >= edge_threshold: viz.edge(nodes_dict[edge[0]], nodes_dict[edge[1]], label=edge_value + "\n" + vis_utils.human_readable_stat( network_analysis_edges_agg_performance[edge] [edge_value]) + "", penwidth=str( vis_utils.get_arc_penwidth( network_analysis_edges[edge][edge_value], min_edge_value, max_edge_value))) viz.format = image_format return viz