def make_scatter_plot(slice_idx_to_data, slice_idx_lower, slice_idx_upper,
                      stats):
    # Select slice indices
    all_slice_indices = sorted(slice_idx_to_data.keys())
    if slice_idx_lower is not None and slice_idx_upper is not None:
        slice_indices = all_slice_indices[slice_idx_lower:slice_idx_upper + 1]
    else:
        slice_indices = all_slice_indices

    kernel = ('wlst', 'logical_time', 5)
    idx_to_distances = {
        k: flatten_distance_matrix(v["kernel_distance"][kernel])
        for k, v in slice_idx_to_data.items()
    }

    x_vals = []
    y_vals = []
    for slice_idx, distances in idx_to_distances.items():
        base_x_val = slice_idx
        for d in distances:
            x_val = base_x_val + np.random.uniform(-0.25, 0.25)
            y_val = d
            x_vals.append(x_val)
            y_vals.append(y_val)

    fig, ax = plt.subplots()
    ax.scatter(x_vals, y_vals)

    x_axis_label = "% Messages Non-Deterministic"
    x_tick_labels = [
        "0", "20", "30", "40", "50", "60", "70", "80", "90", "100"
    ]
    x_ticks = list(range(len(x_tick_labels)))
    ax.set_xticks(x_ticks)
    ax.set_xticklabels(x_tick_labels, rotation=45)
    ax.set_xlabel(x_axis_label)

    # Y-axis stuff
    y_axis_label = "Kernel Distance (Higher == Runs Less Similar)"
    ax.set_ylabel(y_axis_label)

    # Plot title
    plot_title = "Fraction of Messages Non-Deterministic vs. Kernel Distance"
    plt.title(plot_title)

    plt.show()
コード例 #2
0
def get_distances_seq( slice_idx_to_data, slice_indices, kernel ):
    distance_mat_seq = [ ]
    for idx in slice_indices:
        distance_mat_seq.append( slice_idx_to_data[ idx ][ "kernel_distance" ][ kernel ] )
    distances_seq = [ flatten_distance_matrix(dm) for dm in distance_mat_seq ]
    return distances_seq 
コード例 #3
0
def main( kdts_data_path, kernel_file_path, block_traffic_data_path=None, flagged_slices=None, kdts_ymax=None, mre_ymax=None, output="mini_amr_kdts.png"):

    # Read in kernel distance time series data
    with open( kdts_data_path, "rb" ) as infile:
        slice_idx_to_data = pkl.load( infile )

    # Read in kernel definition file
    with open( kernel_file_path, "r" ) as infile:
        kernel = json.load(infile)
   

    # Unpack kernel distance time series data
    slice_indices = sorted( slice_idx_to_data.keys() )
    kernel_key = kernel_json_to_key( kernel )
    kernel_matrices = [ slice_idx_to_data[i]["kernel_distance"][kernel_key] for i in slice_indices ]
    kernel_distances = [ flatten_distance_matrix(km) for km in kernel_matrices ]

    # Get x-axis positions for kernel distance distribution boxes
    kdts_box_positions = slice_indices
    
    # Get boxplot data 
    kdts_box_data = kernel_distances

    # Configure figure
    base_figure_size = (16, 9)
    figure_scale = 1.5
    figure_size = [ dim * figure_scale for dim in base_figure_size ]
    
    # Make figure and axis for kernel distance time series boxplot
    fig, kdts_ax = plt.subplots( figsize = figure_size )

    # Configure boxplot appearance
    box_width = 0.5
    box_props = { "alpha" : 0.5 }
    flier_props = { "marker" : "+", "markersize" : 4 }
        
    if flagged_slices is not None:
        with open( flagged_slices, "rb" ) as infile:
            #flagged_indices = pkl.load( infile )["increasing_median"]  # TODO undo hardcode
            flagged_indices = pkl.load( infile )["kolmogorov_smirnov"]  # TODO undo hardcode

        non_flagged_box_positions = sorted(set(kdts_box_positions) - set(flagged_indices))
        flagged_box_positions = sorted(flagged_indices)

        non_flagged_box_data = [ kdts_box_data[i] for i in non_flagged_box_positions ]
        flagged_box_data = [ kdts_box_data[i] for i in flagged_box_positions ]
        
        non_flagged_box_props = box_props
        flagged_box_props = { "alpha" : 0.5, "facecolor" : "r" }

        non_flagged_kdts_boxes = kdts_ax.boxplot( non_flagged_box_data,
                                      widths = box_width,
                                      positions = non_flagged_box_positions,
                                      patch_artist = True,
                                      showfliers = True,
                                      boxprops = non_flagged_box_props,
                                      flierprops = flier_props )
        
        flagged_kdts_boxes = kdts_ax.boxplot( flagged_box_data,
                                              widths = box_width,
                                              positions = flagged_box_positions,
                                              patch_artist = True,
                                              showfliers = True,
                                              boxprops = flagged_box_props,
                                              flierprops = flier_props )

    else:
        # Create base kernel distance boxplot
        kdts_boxes = kdts_ax.boxplot( kdts_box_data,
                                      widths = box_width,
                                      positions = kdts_box_positions,
                                      patch_artist = True,
                                      showfliers = True,
                                      boxprops = box_props,
                                      flierprops = flier_props )
    


    # Read in mesh refinement block traffic data and plot, if available
    if block_traffic_data_path is not None:
        with open( block_traffic_data_path, "rb" ) as infile:
            block_traffic_data = pkl.load( infile )
        # Unpack
        mesh_refinement_rate = block_traffic_data["mesh_refinement_rate"]
        mre_to_block_traffic = block_traffic_data["mre_to_block_traffic"]
        # Copy axis
        mre_ax  = kdts_ax.twinx()
        # Get x-axis positions for block traffic data
        mre_data_positions = [ (x*mesh_refinement_rate)+x-1 for x in range( len( mre_to_block_traffic ) ) ][1:]
        # Get boxplot data
        mre_box_data = mre_to_block_traffic[1:]
        mre_data = [ np.mean(x) for x in mre_to_block_traffic ][1:]
        # Configure boxplot appearance
        mre_box_width = 0.5
        mre_box_props = { "alpha" : 0.5, "facecolor" : "r" }
        mre_flier_props = { "marker" : "*", "markersize" : 4 }
        # Create MRE block traffic line plot
        mre_plot_handle = mre_ax.plot( mre_data_positions, 
                                       mre_data,
                                       color="r",
                                       marker="o",
                                       linestyle="dashed",
                                       linewidth=2,
                                       markersize=12,
                                       label="Mesh Refinement Blocks Traffic"
                                     )
        # Configure MRE y-axis appearance
        mre_ax.set_ylabel("Number of Blocks Transferred During Mesh Refinement")
        if mre_ymax is not None:
            mre_ax.set_ylim(0, mre_ymax)
        # Compute correlation coefficients between block traffic and kernel distance
        kernel_distance_seq = []
        block_traffic_seq = []
        for i in range(len(mre_data_positions)):
            distance_data = kdts_box_data[ mre_data_positions[i] ]
            block_traffic_data = mre_box_data[i]
            kernel_distance_seq.append( np.var( distance_data ) )
            block_traffic_seq.append( np.median( block_traffic_data ) )
            #for dist,traffic in zip(distance_data, block_traffic_data):
            #    kernel_distance_seq.append(dist)
            #    block_traffic_seq.append(traffic)
        pearson_r, pearson_p = pearsonr( block_traffic_seq, kernel_distance_seq )
        spearman_r, spearman_p = spearmanr( block_traffic_seq, kernel_distance_seq )
        pearson_correlation_txt = "Pearson's r = {}, p = {}\n".format(np.round(pearson_r, 2), pearson_p)
        spearman_correlation_txt = "Spearman's ρ = {}, p = {}\n".format(np.round(spearman_r, 2), spearman_p)
        print( pearson_correlation_txt )
        print( spearman_correlation_txt )


    
     
    # Configure axes text appearance
    tick_label_fontdict = { "fontsize" : 12 } 

    # Configure x-axis appearance
    x_ticks = slice_indices
    if block_traffic_data_path is None:
        mesh_refinement_rate = 5
    x_tick_labels = [ str(x+1) if (x+1) % mesh_refinement_rate == 0 else '' for x in x_ticks ]
    kdts_ax.set_xticks( x_ticks )
    kdts_ax.set_xticklabels( x_tick_labels, rotation=0, fontdict=tick_label_fontdict )
    x_axis_padding = 5
    kdts_ax.set_xlim( -1*x_axis_padding, len(kdts_box_positions) + x_axis_padding )
    kdts_ax.set_xlabel("Slice Index")

    # Configure kernel distance time series y-axis appearance
    kdts_ax.set_ylabel("Kernel Distance (Higher == Runs Less Similar)")
    if kdts_ymax is not None:
        kdts_ax.set_ylim(0, kdts_ymax)

    # Configure title appearance
    # TODO

    # Annotate 
    # TODO

    # Configure legend appearance
    kdts_ax.legend( [ kdts_boxes["boxes"][0], mre_plot_handle[0] ], 
                    ["Kernel Distance Distrbutions", "Mesh Refinement Block Traffic"], 
                    loc="upper left" 
                  )

    # Save figure
    plt.savefig( output,
                 bbox_inches = "tight",
                 pad_inches = 0.25 )
コード例 #4
0
def main(kdts_path, kernel_path, pattern, ymax):

    # Load kernel distance time series
    with open(kdts_path, "rb") as infile:
        slice_idx_to_data = pkl.load(infile)

    # Load kernel definition
    with open(kernel_path, "r") as infile:
        kernel = json.load(infile)

    # Unpack kernel distance time series data
    slice_indices = sorted(slice_idx_to_data.keys())
    kernel_key = kernel_json_to_key(kernel)
    kernel_matrices = [
        slice_idx_to_data[i]["kernel_distance"][kernel_key]
        for i in slice_indices
    ]
    kernel_distances = [flatten_distance_matrix(km) for km in kernel_matrices]

    # Get scatter plot points
    scatter_x_vals, scatter_y_vals = get_scatter_plot_points(kernel_distances)

    # Package data for box plots
    bp_positions = []
    bp_data = []
    for i in range(len(kernel_distances)):
        bp_positions.append(i)
        bp_data.append(kernel_distances[i])

    # Specify appearance of boxes
    box_width = 0.5
    flierprops = {"marker": "+", "markersize": 4}
    boxprops = {"alpha": 1.0, "linewidth": 3, "color": "black"}

    # Specify appearance of scatter plot markers
    marker_size = 1
    marker_color = "lightblue"

    aspect_ratio = "widescreen"
    figure_scale = 1.5
    if aspect_ratio == "widescreen":
        base_figure_size = (16, 9)
    else:
        base_figure_size = (4, 3)

    figure_size = (figure_scale * base_figure_size[0],
                   figure_scale * base_figure_size[1])

    fig, ax = plt.subplots(figsize=figure_size)

    # Create box plots
    bp = ax.boxplot(bp_data,
                    widths=box_width,
                    positions=bp_positions,
                    patch_artist=True,
                    showfliers=False,
                    boxprops=boxprops,
                    flierprops=flierprops)

    # Overlay actual data points on same axis
    ax.scatter(scatter_x_vals, scatter_y_vals, s=marker_size, c=marker_color)

    # Plot annotation ( correlation coefficients )
    nd_fractions = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    nd_fraction_seq = []
    dist_seq = []
    for i in range(len(nd_fractions)):
        for d in kernel_distances[i]:
            nd_fraction_seq.append(nd_fractions[i])
            dist_seq.append(d)
    pearson_r, pearson_p = pearsonr(nd_fraction_seq, dist_seq)
    spearman_r, spearman_p = spearmanr(nd_fraction_seq, dist_seq)
    pearson_correlation_txt = "Pearson's r = {}, p = {}\n".format(
        np.round(pearson_r, 2), pearson_p)
    spearman_correlation_txt = "Spearman's rho = {}, p = {}\n".format(
        np.round(spearman_r, 2), spearman_p)
    print(pearson_correlation_txt)
    print(spearman_correlation_txt)

    annotation_lines = [
        "Correlation Coefficients\n", pearson_correlation_txt,
        spearman_correlation_txt
    ]

    annotation_txt = "".join(annotation_lines)
    annotation_font_size = 18
    ax.annotate(annotation_txt,
                xy=(0.45, 0.25),
                xycoords='axes fraction',
                fontsize=annotation_font_size,
                bbox=dict(boxstyle="square, pad=1", fc="w"))

    # Shared axis properties
    tick_label_fontdict = {"fontsize": 18}

    # X-axis properties
    x_tick_labels = [
        "0", "10", "20", "30", "40", "50", "60", "70", "80", "90", "100"
    ]
    x_tick_labels = [x + "%" for x in x_tick_labels]
    x_ticks = list(range(len(x_tick_labels)))
    ax.set_xticks(x_ticks)
    ax.set_xticklabels(x_tick_labels, rotation=0, fontdict=tick_label_fontdict)

    # Y-axis properties
    y_ticks = [0, 10, 20, 30, 40, 50, 60, 70]
    y_tick_labels = [str(y) for y in y_ticks]
    ax.set_yticks(y_ticks)
    ax.set_yticklabels(y_tick_labels, rotation=0, fontdict=tick_label_fontdict)
    if ymax is not None:
        ax.ylim(0, ymax)

    # Axis labels
    x_axis_label = "Percentage of Wildcard Receives (i.e., using MPI_ANY_SOURCE)"
    y_axis_label = "Kernel Distance (Higher == Runs Less Similar)"
    axis_label_fontdict = {"fontsize": 18}
    ax.set_xlabel(x_axis_label, fontdict=axis_label_fontdict)
    ax.set_ylabel(y_axis_label, fontdict=axis_label_fontdict)

    # Annotate plot
    pattern_to_nice_name = {
        "message_race": "Message Race",
        "amg2013": "AMG2013",
        "mini_mcb_grid": "Mini-MCB Grid",
        "unstructured_mesh": "Unstructured Mesh"
    }
    if pattern is not None:
        plot_title = "Percentage of Wildcard Receives vs. Kernel Distance - Communication Pattern: {}".format(
            pattern_to_nice_name[pattern])
    else:
        plot_title = "Percentage of Wildcard Receives vs. Kernel Distance"
    title_fontdict = {"fontsize": 20}
    plt.title(plot_title, fontdict=title_fontdict)

    if pattern is not None:
        save_path = "nd_fraction_vs_kernel_distance_{}.png".format(pattern)
    else:
        save_path = "nd_fraction_vs_kernel_distance.png"
    plt.savefig(save_path, bbox_inches="tight", pad_inches=0.25, dpi=600)
コード例 #5
0
def main(kdts_path, nd_neighbor_fraction):
    # Read in kdts data
    with open(kdts_path, "rb") as infile:
        slice_idx_to_data = pkl.load(infile)

    kernel = ('wlst', 'logical_time', 5)
    idx_to_distances = {
        k: flatten_distance_matrix(v["kernel_distance"][kernel])
        for k, v in slice_idx_to_data.items()
    }

    # Package data for scatter plot
    scatter_x_vals, scatter_y_vals = get_scatter_plot_points(idx_to_distances)

    # Package data for box-plots
    bp_positions = []
    bp_data = []
    for idx, distances in sorted(idx_to_distances.items()):
        bp_positions.append(idx)
        bp_data.append(distances)

    # Specify appearance of boxes
    box_width = 0.5
    flierprops = {"marker": "+", "markersize": 4}
    boxprops = {"alpha": 0.25}

    # Specify appearance of scatter plot markers
    marker_size = 6

    aspect_ratio = "widescreen"
    figure_scale = 1.5
    if aspect_ratio == "widescreen":
        base_figure_size = (16, 9)
    else:
        base_figure_size = (4, 3)

    figure_size = (figure_scale * base_figure_size[0],
                   figure_scale * base_figure_size[1])

    fig, ax = plt.subplots(figsize=figure_size)

    # Create box plots
    bp = ax.boxplot(bp_data,
                    widths=box_width,
                    positions=bp_positions,
                    patch_artist=True,
                    showfliers=False,
                    boxprops=boxprops,
                    flierprops=flierprops)

    # Overlay actual data points on same axis
    ax.scatter(scatter_x_vals, scatter_y_vals, s=marker_size)

    # Plot annotation ( correlation coefficients )
    nd_fractions = [0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    nd_fraction_seq = []
    dist_seq = []
    for i in range(len(nd_fractions)):
        for d in idx_to_distances[i]:
            nd_fraction_seq.append(nd_fractions[i])
            dist_seq.append(d)
    pearson_r, pearson_p = pearsonr(nd_fraction_seq, dist_seq)
    spearman_r, spearman_p = spearmanr(nd_fraction_seq, dist_seq)
    #pearson_correlation_txt = "Kernel distance vs. % ND → Pearson-R = {}, p = {}".format(np.round(pearson_r, 2), pearson_p)
    #spearman_correlation_txt = "Kernel distance vs. % ND → Spearman-R = {}, p = {}".format(np.round(spearman_r, 2), spearman_p)

    pearson_correlation_txt = "Pearson's r = {}, p = {}\n".format(
        np.round(pearson_r, 2), pearson_p)
    spearman_correlation_txt = "Spearman's rho = {}, p = {}\n".format(
        np.round(spearman_r, 2), spearman_p)
    print(pearson_correlation_txt)
    print(spearman_correlation_txt)

    annotation_lines = [
        "Kernel Distance vs. % Wildcard Receives: Correlation Coefficients\n",
        #"=================================================================\n",
        pearson_correlation_txt,
        spearman_correlation_txt
    ]

    annotation_txt = "".join(annotation_lines)
    annotation_font_size = 18
    #ax.annotate( annotation_txt,
    #             xy=(0.55, 0.25),
    #             xycoords='axes fraction',
    #             fontsize=annotation_font_size,
    #             bbox=dict(boxstyle="square, pad=1", fc="w")
    #           )

    # Tick labels
    tick_label_fontdict = {"fontsize": 12}
    x_tick_labels = [
        "0", "20", "30", "40", "50", "60", "70", "80", "90", "100"
    ]
    x_ticks = list(range(len(x_tick_labels)))
    ax.set_xticks(x_ticks)
    ax.set_xticklabels(x_tick_labels, rotation=0, fontdict=tick_label_fontdict)
    #y_ticks = [ 0, 5, 10, 15, 20, 25, 30, 35, 40 ]
    #y_tick_labels = [ str(y) for y in y_ticks ]
    #ax.set_yticks( y_ticks )
    #ax.set_yticklabels( y_tick_labels, rotation=0, fontdict=tick_label_fontdict )
    ax.set_ylim(0, 175)

    # Axis labels
    x_axis_label = "Percentage of Wildcard Receives (i.e., using MPI_ANY_SOURCE)"
    y_axis_label = "Kernel Distance (Higher == Runs Less Similar)"
    axis_label_fontdict = {"fontsize": 18}
    ax.set_xlabel(x_axis_label, fontdict=axis_label_fontdict)
    ax.set_ylabel(y_axis_label, fontdict=axis_label_fontdict)

    # Plot Title
    plot_title = "Percentage of Wildcard Receives vs. Kernel Distance - Communication Pattern: Unstructured Mesh ({}% neighbors non-deterministically chosen )".format(
        int(nd_neighbor_fraction * 100))
    title_fontdict = {"fontsize": 18}
    plt.title(plot_title, fontdict=title_fontdict)

    #plt.show()
    plt.savefig("unstructured_mesh_example.png",
                bbox_inches="tight",
                pad_inches=0.25)
コード例 #6
0
def detect_anomalies( kernel_distance_seq, policy ):
    # Unpack policy
    policy_name = policy["name"]
    policy_params = policy["params"]

    # Do a truly naive anomaly detection policy where we just define the slice 
    # containing the max kernel distance as anomalous and all others as not
    # anomalous. This is not really "anomaly detection" in any meaningful sense
    # But it suffices for testing the basic workflow
    if policy_name == "naive_max":
        max_dist_slice_idx = 0
        max_dist = 0
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            slice_max = max( distances)
            if max_distance_in_slice > max_dist:
                max_dist = slice_max
                max_dist_slice_idx = slice_idx
        return [ max_dist_slice_idx ]

    # Detect anomalies based on whether the median kernel distance increases
    # from slice to slice or not
    elif policy_name == "increasing_median":
        threshold = policy_params["threshold"]
        flagged_slice_indices = []
        prev_median_distance = 0
        curr_median_distance = 0
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            curr_median_distance = np.median( distances )
            #if curr_median_distance > prev_median_distance:
            if curr_median_distance - prev_median_distance > threshold:
                flagged_slice_indices.append( slice_idx )
            prev_median_distance = curr_median_distance
        return flagged_slice_indices
    
    elif policy_name == "kolmogorov_smirnov":
        flagged_slice_indices = []
        prev_distribution = None
        next_distribution = None
        for slice_idx in range(len(kernel_distance_seq))[1:-1]:
            prev_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx - 1 ])
            curr_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx  ])
            next_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx + 1 ])
            ks2_stat_prev, p_val_prev = ks_2samp( prev_dist, curr_dist )
            ks2_stat_next, p_val_next = ks_2samp( next_dist, curr_dist )
            thresh = 0.0001
            if p_val_prev < thresh and p_val_next < thresh:
                flagged_slice_indices.append( slice_idx )
        return flagged_slice_indices

    # Flag slices if the median kernel distance exceeds a user-supplied 
    # threshold
    elif policy_name == "median_exceeds_threshold":
        threshold = policy_params[ "threshold" ]
        flagged_slice_indices = []
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            median_distance = np.median( distances )
            if median_distance > threshold:
                flagged_slice_indices.append( slice_idx )
        return flagged_slice_indices
        
    # Randomly choose slices. This isn't really an anomaly detection policy, but
    # we use it to check whether the distribution of callstacks from a random
    # sample of slices looks different than the distribution of callstacks from
    # the flagged slices
    elif policy_name == "random":
        n_samples = policy_params["n_samples"]
        n_slices = len(kernel_distance_seq)
        n_generated = 0
        flagged_slice_indices = set()
        while n_generated < n_samples:
            # generate uniform random number between 0 and n_slices-1
            rand_slice_idx = np.random.randint( 0, n_slices, size=1 )[0]
            if rand_slice_idx not in flagged_slice_indices:
                flagged_slice_indices.add( rand_slice_idx )
                n_generated += 1
        return list( flagged_slice_indices )

    elif policy_name == "all":
        n_slices = len(kernel_distance_seq)
        return list( range( n_slices ) )
    

    elif policy_name == "ruptures_binary_segmentation":
        # Unpack policy
        model = policy_params[ "model" ]
        #width = policy_params[ "width" ]
        n_change_points = policy_params[ "n_change_points" ]
        penalty = policy_params[ "penalty" ]
        epsilon = policy_params[ "epsilon" ]

        # Get list of distance distributions
        distance_distribution_seq = []
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            distance_distribution_seq.append( distances )

        # Get some properties about the distances needed by Ruptures
        n_distributions = len( distance_distribution_seq )
        dim = len( distances )
        all_distances = []
        for d in distance_distribution_seq:
            all_distances += d
        sigma = np.std( all_distances )

        # Make into ndarray for ruptures
        #signal = np.array( [ np.array(d) for d in distance_distribution_seq ] )
        signal = np.array( [ np.array(d) for d in distance_distribution_seq ] )

        # Set up model
        algo = rpt.Binseg( model=model ).fit( signal )

        # Find change-points
        if n_change_points == "unknown":
            if penalty == True and epsilon == False:
                penalty_value = np.log( n_distributions ) * dim * sigma**2 
                change_points = algo.predict( pen=penalty_value )
            elif penalty == False and epsilon == True:
                threshold = 3 * n_distributions * sigma**2
                change_points = algo.predict( epsilon=threshold )
            else:
                raise ValueError("Invalid policy for window-based change-point detection: {}".format(policy_params))
        else:
            change_points = algo.predict( n_bkps=n_change_points )
        
        flagged_slice_indices = [ cp-1 for cp in change_points ]
        return flagged_slice_indices


    elif policy_name == "ruptures_window_based":
        # Unpack policy
        model = policy_params[ "model" ]
        width = policy_params[ "width" ]
        n_change_points = policy_params[ "n_change_points" ]
        penalty = policy_params[ "penalty" ]
        epsilon = policy_params[ "epsilon" ]

        # Get list of distance distributions
        distance_distribution_seq = []
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            distance_distribution_seq.append( distances )

        # Get some properties about the distances needed by Ruptures
        n_distributions = len( distance_distribution_seq )
        dim = len( distances )
        all_distances = []
        for d in distance_distribution_seq:
            all_distances += d
        sigma = np.std( all_distances )

        # Make into ndarray for ruptures
        signal = np.array( [ np.array(d) for d in distance_distribution_seq ] )

        # Set up model
        algo = rpt.Window( width=width, model=model ).fit( signal )

        # Find change-points
        if n_change_points == "unknown":
            if penalty == True and epsilon == False:
                penalty_value = np.log( n_distributions ) * dim * sigma**2 
                change_points = algo.predict( pen=penalty_value )
            elif penalty == False and epsilon == True:
                threshold = 3 * n_distributions * sigma**2
                change_points = algo.predict( epsilon=threshold )
            else:
                raise ValueError("Invalid policy for window-based change-point detection: {}".format(policy_params))
        else:
            change_points = algo.predict( n_bkps=n_change_points )
        
        flagged_slice_indices = [ cp-1 for cp in change_points ]
        return flagged_slice_indices
    


    else:
        raise NotImplementedError("Anomaly detection policy: {} is not implemented".format(policy_name))
コード例 #7
0
def main( kdts_path, pattern, output, kernel_path, nd_start, nd_iter, nd_end, nd_frac ):
    # Read in kdts data
    with open( kdts_path, "rb" ) as infile:
        slice_idx_to_data = pkl.load( infile )
    
    with open(kernel_path, "r" ) as infile:
        kernel = json.load(infile)
    
    # Unpack kernel distance time series data
    slice_indices = sorted( slice_idx_to_data.keys() )
    kernel_key = kernel_json_to_key( kernel )
    kernel_matrices = [ slice_idx_to_data[i]["kernel_distance"][kernel_key] for i in slice_indices ]
    idx_to_distances = [ flatten_distance_matrix(km) for km in kernel_matrices ]

    # Package data for scatter plot
    scatter_x_vals, scatter_y_vals = get_scatter_plot_points( idx_to_distances )

    # Package data for box-plots
    bp_positions = []
    bp_data = []
    for i in range( len(idx_to_distances) ):
        bp_positions.append( i )
        bp_data.append( idx_to_distances[i] )
    
    # Specify appearance of boxes
    box_width = 0.8
    flierprops = { "marker" : "+",
                   "markersize" : 4
                 }
    boxprops = { "alpha" : 0.5,
            "facecolor" : "tab:brown"
               } 
    whiskerprops = { "linewidth" : 3
            }
    
    # Specify appearance of scatter plot markers
    marker_size = 6
    marker_color = "b"
    alpha_value = 0.5
    
    aspect_ratio = "widescreen"
    figure_scale = 1.5
    if aspect_ratio == "widescreen":
        base_figure_size = (16, 9)
    else:
        base_figure_size = (4, 3)

    figure_size = (figure_scale*base_figure_size[0], figure_scale*base_figure_size[1] )

    fig,ax = plt.subplots( figsize=figure_size )

    # Create box plots 
    #bp = ax.boxplot( bp_data,
    #                 widths=box_width,
    #                 positions=bp_positions,
    #                 patch_artist=True,
    #                 showfliers=False,
    #                 boxprops=boxprops,
    #                 whiskerprops=whiskerprops,
    #                 flierprops=flierprops )

    #bp_quantiles = [[0.25, 0.5, 0.75] for i in range(len(bp_positions))]

    bp = ax.violinplot( bp_data, widths=box_width, positions=bp_positions, showmedians=True, showextrema=True )

    for sprops in bp['bodies']:
        #sprops.set_facecolor('#D43F3A')
        sprops.set_facecolor('tab:olive')
        sprops.set_edgecolor('black')
        sprops.set_alpha(1)

    #bp['cquantiles'].set_edgecolors('black')
    #bp['cquantiles'].set_linewidths(2.5)
    bp['cbars'].set_linewidths(2.5)
    bp['cbars'].set_edgecolors('black')
    bp['cmins'].set_linewidths(2.5)
    bp['cmins'].set_edgecolors('black')
    bp['cmaxes'].set_linewidths(2.5)
    bp['cmaxes'].set_edgecolors('black')
    bp['cmedians'].set_linewidths(3.5)
    bp['cmedians'].set_edgecolors('black')

    # Overlay actual data points on same axis
    #ax.scatter( scatter_x_vals, 
    #            scatter_y_vals,
    #            s=marker_size,
    #            c=marker_color,
    #            alpha=alpha_value)

    quartile1, medians, quartile3 = np.percentile(bp_data, [25, 50, 75], axis=1)
    #whiskers = np.array([
    #    adjacent_values(sorted_array, q1, q3)
    #    for sorted_array, q1, q3 in zip(bp_data, quartile1, quartile3)])
    #whiskers_min, whiskers_max = whiskers[:, 0], whiskers[:, 1]

    inds = np.arange(1, len(medians) + 1)
    #ax.scatter(inds, medians, marker='o', color='white', s=30, zorder=3)
    #ax.vlines(inds, quartile1, quartile3, color='k', linestyle='-', lw=5)
    #ax.vlines(inds, whiskers_min, whiskers_max, color='k', linestyle='-', lw=1)

    plt.ylim(ymin=0)

    # Plot annotation ( correlation coefficients )
    if ( nd_iter == 0 ):
        step_count = 0;
    else:
        step_count = int((nd_end - nd_start)/nd_iter);
    nd_fractions = [round(nd_start + (nd_iter * step_num), 2) for step_num in range(step_count + 1)]
    #nd_fractions = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
    nd_fraction_seq = []
    dist_seq = []
    for i in range( len( nd_fractions ) ):
        for d in idx_to_distances[i]:
            nd_fraction_seq.append( nd_fractions[i] )
            dist_seq.append( d )

    if ( len(nd_fraction_seq) > 1 ):
        pearson_r, pearson_p = pearsonr( nd_fraction_seq, dist_seq )
        spearman_r, spearman_p = spearmanr( nd_fraction_seq, dist_seq )
    #pearson_correlation_txt = "Kernel distance vs. % ND → Pearson-R = {}, p = {}".format(np.round(pearson_r, 2), pearson_p)
    #spearman_correlation_txt = "Kernel distance vs. % ND → Spearman-R = {}, p = {}".format(np.round(spearman_r, 2), spearman_p)

        pearson_correlation_txt = "Your Pearson's r value     = {}\n".format(np.round(pearson_r, 2))
        pearson_p_txt = "It's corresponding p value = {}\n".format(pearson_p)
        spearman_correlation_txt = "Your Spearman's ρ value    = {}\n".format(np.round(spearman_r, 2))
        spearman_p_txt = "It's corresponding p value = {}\n".format(spearman_p)
        print( pearson_correlation_txt )
        print( pearson_p_txt)
        print( "\n" )
        print( spearman_correlation_txt )
        print( spearman_p_txt)

        annotation_lines = [ "Kernel Distance vs. % Non-Deterministic Receives: Correlation Coefficients\n",
                         #"=================================================================\n",
                         pearson_correlation_txt,
                         spearman_correlation_txt
                       ]
    
        annotation_txt = "".join(annotation_lines)
        annotation_font_size = 18
    #ax.annotate( annotation_txt, 
    #             xy=(0.55, 0.25), 
    #             xycoords='axes fraction',
    #             fontsize=annotation_font_size,
    #             bbox=dict(boxstyle="square, pad=1", fc="w")
    #           )

    # Tick labels
    tick_label_fontdict = {"fontsize" : 16}
    x_tick_labels = [ str(int(100 * nd_fractions[index])) for index in range(step_count + 1)]
    #x_tick_labels = [ "0", "10", "20", "30", "40", "50", "60", "70", "80", "90", "100" ]
    x_ticks = list(range(len(x_tick_labels)))
    ax.set_xticks( x_ticks )
    ax.set_xticklabels( x_tick_labels, rotation=0, fontdict=tick_label_fontdict )
    y_ticks = list(range(0,int(max(scatter_y_vals))+11,10))
    y_tick_labels = [ str(y) for y in y_ticks ]
    ax.set_yticks( y_ticks )
    ax.set_yticklabels( y_tick_labels, rotation=0, fontdict=tick_label_fontdict )

    # Axis labels
    x_axis_label = "Percentage of Message Non-Determinism in Application"
    y_axis_label = "Kernel Distance (Higher == Runs Less Similar)"
    axis_label_fontdict = {"fontsize" : 20}
    ax.set_xlabel( x_axis_label, fontdict=axis_label_fontdict )
    ax.set_ylabel( y_axis_label, fontdict=axis_label_fontdict )

    # Plot Title
    name_dict = {
            "message_race" : "Message Race",
            "amg2013" : "AMG2013",
            "unstructured_mesh" : "Unstructured Mesh"
            }
    #if pattern == "unstructured_mesh":
        #plot_title = "Percentage of Message Non-Determinism vs. Kernel Distance - Communication Pattern: {} ({}% neighbors non-deterministically chosen )".format(name_dict[pattern], nd_frac)
    #else:
        #plot_title = "Percentage of Message Non-Determinism vs. Kernel Distance - Communication Pattern: {}".format(name_dict[pattern])
    #title_fontdict = {"fontsize" : 22}
    #plt.title( plot_title, fontdict=title_fontdict )

    #plt.show()
    plt.savefig( "{}.png".format(output),
                 bbox_inches="tight",
                 pad_inches=0.25
               )