def startProcessing(self):
     """
     This function will create plots using python's `matplotlib <http://matplotlib.org/index.html>`_.  Currently, it will make:
     
     1. A `violin plot <https://en.wikipedia.org/wiki/Violin_plot>`_ of the distribution of download times for each site given in :ref:`sitesData <sitesData-label>`.
     
     A violin plot example:
     
     .. image:: images/matploblib-violinplot.png
         :width: 300pt
     
     """
     logging.debug("Starting processing with matplotlib...")
         
     # Make a violin plot
     downloadTimes = {}
     for site in self.sitesData:
         siteTimes = self.sitesData[site]
         
         downloadTimes[site] = []
         
         for time in siteTimes:
             downloadTimes[site].append(float(time['duration']))
         
         testsize = get_option("raw_testsize")
         downloadTimes[site] = (float(testsize*8) / (1024*1024)) / numpy.array(downloadTimes[site])
         
         
     plt.violinplot(downloadTimes.values())
     plt.xticks(range(1, len(downloadTimes.keys())+1), downloadTimes.keys())
     plt.ylabel("Mb per second")
     plt.xlabel("Site")
     plt.title("Violin Plot of StashCache Transfer Speeds per Site")
     plt.savefig("violinplot.png")
     plt.clf()
Ejemplo n.º 2
0
        def _violin_plotting(self, data, xticklabels = None, showmeans = True, xlabel = '', ylabel = '', ylim = None):
            """
            Plot violin figures by a 2D data
            ----------------------------------
            Parameters:
            data: a 2 dimensional data, M*N, where N is the number of category
            xticklabels: xticklabels, by default is None
            showmeans: whether to show means in violin plot
            xlabel: xlabel
            ylabel: ylabel
            ylim: limitation of y

            Examples:
            ----------
            >>> plotviolin(data)
            """
            assert data.ndim == 2, 'A two-dimension data should be inputted'
            cat_num = data.shape[-1]
            ax = plt.subplot()                       
            plt.violinplot(data, np.arange(1, cat_num+1), showmeans = showmeans)
            ax.set_xticks(np.arange(1, cat_num+1))
            if xticklabels is not None:
                ax.set_xticklabels(xticklabels, fontsize = self._labelsize)
            plt.xlabel(xlabel)
            plt.ylabel(ylabel)
            if ylim is not None:
                plt.ylim(ylim)
            
            if self._isshow is True:
                plt.show()
Ejemplo n.º 3
0
def plot_gat_boxplots(data, labels, title="", showviolin=False):
    y = data
    if not y:  # empty
        print("No data provided to plotutils.plot_gat_boxplots")
        return
    med, confidence_interval_low, confidence_interval_high = median_confidence_intervals(y)

    fig, ax = plt.subplots()
    plt.boxplot(y, notch=False, labels=labels)
    # Plot separate error bars without line to show median confidence intervals
    x = np.arange(1, len(y) + 1)
    plt.errorbar(x, med, yerr=(confidence_interval_low, confidence_interval_high), fmt='none',
                 linewidth=3)

    if showviolin:
        mean, mean_confidence_interval_low, mean_confidence_interval_high = mean_confidence_intervals(y)
        plt.violinplot(y, showmeans=True, showmedians=True)
        plt.errorbar(x, mean, yerr=(mean_confidence_interval_low, mean_confidence_interval_high), fmt='none',
                     linewidth=3, color='g')

    plt.title(title)
    plt.ylabel("Goal Achievement Time (ms)")
    plt.xlabel("Algorithms")
    fig.autofmt_xdate()

    ax.autoscale(tight=True)
    # ymin, ymax = plt.ylim()
    # plt.ylim(ymin - 0.1, ymax + 0.1)
    plt.gcf().tight_layout()

    return None
    def plotViolin(self,data,labels_to_plot='all',series='all',transformations=None,save=True):
        if transformations is None:
            transformations = [(lambda x: x,'')]
        timestamp = utils.s_timestamp()
        figure_folder = os.path.join(self._default_figure_folder,timestamp)
        if save:
            #create a separate_folder
            if not os.path.exists(figure_folder):
                os.makedirs(figure_folder)
        if series == 'all':
            series = data.keys()
        if labels_to_plot == 'all':
            labels_to_plot = data[series[0]].keys()

        for label_to_plot in labels_to_plot:
            plt.figure(figsize=(10,15))
            data_to_plot = []
            for transformation_fun,transformation_label in transformations:
                for serie_name in series:
                    series_data = data[serie_name]
                    if label_to_plot not in series_data:
                        raise ValueError('Label "{0}" not in series "{1}"'.format(label_to_plot,serie_name))
                    raw_data = np.array([val for val in series_data[label_to_plot] if val is not None])
                    transformed_data = transformation_fun(raw_data)
                    data_to_plot.append(transformed_data)
            plt.ylabel(label_to_plot)
            plt.title(label_to_plot)
            plt.violinplot(data_to_plot,showmedians=True)
            plt.xticks( range(1,1+len(series)), series )
            plt.ylim([min([qwe for asd in data_to_plot for qwe in asd])-3,max([qwe for asd in data_to_plot for qwe in asd])+3])
            if save:
                plt.savefig(os.path.join(figure_folder,self.getDescr(label_to_plot))+'.png')
            else:
                plt.show()
def main(args):
    import io
    import sct_utils as sct
    import pickle
    import numpy as np
    import matplotlib.pyplot as plt

    # make sure number of inputs and labels are the same
    if len(arguments.input) != len(arguments.label):
        raise RuntimeError("Mismatch between # of files and labels")

    # fs = 10  # font size
    nb_plots = args.input.__len__()

    list_data = []
    text_results = []  # numerical results to display inside the figure
    for fname_pickle in args.input:
        df = pickle.load(io.open(fname_pickle, "rb"))
        # filter lines based on status. For status definition, see sct_pipeline
        # Note: the > 0 test is to filter out NaN
        df_dice = df.query("(status != 200) & (status != 201) & (dice > 0 )")["dice"]
        list_data.append(df_dice.get_values())
        # compute statistics
        count_passed = df.status[df.status == 0].count()
        count_failed = df.status[df.status == 99].count()
        count_crashed_run = df.status[df.status == 1].count()
        count_crashed_integrity = df.status[df.status == 2].count()
        count_total = count_passed + count_failed + count_crashed_run + count_crashed_integrity
        text_results.append('\n'.join(["PASS: {}/{}".format(count_passed, count_total),
                                       "FAIL: {}".format(count_failed),
                                       "CRASH_RUN: " + str(count_crashed_run),
                                       "CRASH_INTEGRITY: " + str(count_crashed_integrity)]))

    pos = np.arange(nb_plots)

    # plot fig
    fig, ax = plt.subplots(1)

    plt.violinplot(list_data, pos, points=100, widths=0.8, showmeans=True, showextrema=True, showmedians=True,
                   bw_method=0.5)
    plt.grid(axis='y')
    plt.ylabel('Dice coefficient')
    plt.xticks(pos, args.label)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ylim = ax.get_ylim()
    for i in range(nb_plots):
        plt.text(i + 0.02, ylim[0] + 0.01, text_results[i], horizontalalignment='left', verticalalignment='bottom')
    plt.savefig('violin_plot.png')
Ejemplo n.º 6
0
def plot_gat_boxplots(data, labels, title="", showviolin=False):
    """ Generate box plot from the given data.  The format of the data should be a list of lists where each inner
    list is associated with a different label.  Median confidence interval errorbars are overlayed on the box plot.
    :param data: the data to plot
    :param labels: the labels of the data
    :param title: the title of the plot
    :param showviolin: whether to overlay a violin plot over the plot which shows the distribution of the data
    :return: None
    """
    y = data
    if not y:  # empty
        print("No data provided to plotutils.plot_gat_boxplots")
        return None

    x = np.arange(1, len(y) + 1)

    # Calculate stats
    med, confidence_interval_low, confidence_interval_high = median_confidence_intervals(y)

    # Format axes to remove unnecessary borders and tick marks
    fig, ax = plt.subplots()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.tick_params(direction='out')
    ax.get_xaxis().tick_bottom()
    ax.get_yaxis().tick_left()

    # Do the actual plotting
    plt.boxplot(y, notch=False, labels=labels)
    # Plot separate error bars without line to show median confidence intervals
    plt.errorbar(x, med, yerr=(confidence_interval_low, confidence_interval_high), fmt='none',
                 linewidth=3)

    # Plot violin plot if specified
    if showviolin:
        mean, mean_confidence_interval_low, mean_confidence_interval_high = mean_confidence_intervals(y)
        plt.violinplot(y, showmeans=True, showmedians=True)
        plt.errorbar(x, mean, yerr=(mean_confidence_interval_low, mean_confidence_interval_high), fmt='none',
                     linewidth=3, color='g')

    # Set labels and ticks
    plt.title(title)
    plt.ylabel("Goal Achievement Time (ms)")
    plt.xlabel("Algorithms")
    fig.autofmt_xdate()  # auto-rotate x labels if needed

    ax.autoscale(tight=True)  # Remove whitespace from plot
    plt.gcf().tight_layout()  # Further whitespace removal

    return None  # legend is handled by boxplot function
Ejemplo n.º 7
0
def add_models_bests_scores_to_plot(hyp_dict, plt, change_color, ch_c2):
    from copy import deepcopy

    x_axis = []
    y_axis = []
    cv_dict_list = hyp_dict["all_models"]
    for idx, cv_dict in enumerate(cv_dict_list):
        models_list = cv_dict["models"]
        temp = []
        for model_dict in models_list:
            temp.append(model_dict["best_score"])
        x_axis.append(idx + 1)
        y_axis.append(deepcopy(temp))

    a = plt.violinplot(dataset=tuple(y_axis), positions=x_axis, widths=1, showextrema=True, showmeans=True)
    if change_color:
        for pc in a["bodies"]:
            pc.set_facecolor("blue")
            pc.set_color("blue")
            pc.set_edgecolor("blue")
    if ch_c2:
        for pc in a["bodies"]:
            pc.set_facecolor("red")
            pc.set_color("red")
            pc.set_edgecolor("red")
Ejemplo n.º 8
0
def violin_plot_data(data, x_key, y_key, color, name
                     , violin_widths=.35
                     , violin_showextrema=False
                     , violin_showmedians=False
                     , violin_showmeans=False
                     , reg_poly=0
                     , reg_color="red"
                     , reg_linstyle="-"):
    x_vals, y_vals = sort_for_violin_plot(data , key=x_key , value=y_key)
    vplt = plt.violinplot(tdata_times
                          , positions=x_vals
                          , widths=violin_widths
                          , showextrema=violin_showextrema
                          , showmedians=violin_showmedians
                          , showmeans=violin_showmeans)
    vplt_fake = set_violin_color_label(vplt, color, name)
    if 0 == reg_poly:
        return vplt_fake
    else:
        reg  = np.polyfit(data[x_key], data[y_key], reg_poly)
        rplt = plt.plot(data[x_key]
                        , np.polyval(reg, loop_data[x_key])
                        , linestyle=reg_linestyle
                        , color=reg_color)
        if reg[-1] < 0 :
            eq = "%.2f$A$ - %.2f\t" % (reg[0], (-1.0 * reg[1]))
        else:
            eq = "%.2f$A$ + %.2f\t" % (reg[0], reg[1])
        reg_fake = mlines.Line2D([],[],color=reg_color, label=reg_label)
        if reg_text:
            reg_eq_txt  = plt.text(reg_label, 8 , cdata_lplt_eq, color=cdata_lplt_fake.get_color())
        return vplt_fake, reg_fake
Ejemplo n.º 9
0
def draw_violin(dataset, posns, alpha=1, color='royalblue', meanmarker="*"):
    """
        Draw a violin to the current plot.
        Color the mean point.
        (Shared helper for `violin_plot` and `double_violin`)
    """
    ## Add data
    vp = plt.violinplot(dataset, positions=posns, showmeans=True, showextrema=True, showmedians=True)
    ## Re-color bodies
    for v in vp['bodies']:
        v.set_edgecolors('k')
        v.set_facecolors(color)
        v.set_alpha(alpha)
    ## Draw mean markers
    # Make original mean line invisible
    vp['cmeans'].set_alpha(0)
    # Draw data points
    for i in range(len(dataset)):
        plt.plot([posns[i]] * len(dataset[i]), dataset[i], "r+")
    ## Re-color median, min, max lines to be black
    for field in ['cmaxes', 'cmins', 'cbars', 'cmedians']:
        vp[field].set_color('k')
    # Draw the mean marker
    for i in range(len(dataset)):
        plt.plot(posns[i], [np.average(dataset[i])], color='w', marker=meanmarker, markeredgecolor='k')
    # Draw confidence interval (should be optional)
    for i in range(len(dataset)):
        stat = util.stats_of_row(dataset[i])
        plt.errorbar(posns[i], stat["mean"], yerr=stat["ci"][1] - stat["mean"], ecolor="magenta", capthick=4)
    return
Ejemplo n.º 10
0
    def plot_helper(self, ax, data, tit, typ='hi'):
        maxy = float('-inf')
        miny = float('inf')
        if typ == 'hi':
            alpha = np.min((1, 2.0/len(data['pdfs'])))
            for subj in data['pdfs']:
                x = data['xs'][subj]
                y = data['pdfs'][subj]
                ty = np.max(y)
                maxy = ty if ty > maxy else maxy
                ty = np.min(y)
                miny = ty if ty < miny else miny
                plt.plot(np.log(x+1), y, color=self.color, alpha=alpha)
                # plt.plot(x, y, color=self.color, alpha=alpha)
            modif = self.set_tick_labels(ax, miny, maxy)
            plt.ylabel('Density'+modif)
        elif typ == 'se':
            alpha = np.min((1, 2.0/len(data)))
            for subj in data:
                x = np.linspace(1, len(data[subj]), len(data[subj]))
                y = data[subj]
                ty = np.max(y)
                maxy = ty if ty > maxy else maxy
                ty = np.min(y)
                miny = ty if ty < miny else miny
                plt.plot(x, y, color=self.color, alpha=alpha)
            modif = self.set_tick_labels(ax, miny, maxy)
            if tit is 'Spectrum':
                plt.ylabel('Eigenvalue'+modif)
            else:
                plt.ylabel('Portion of Total Variance')
        elif typ == 'sc':
            x = 0
            y = data.values()
            if len(y) <= 1:
                plt.scatter(0, y, color=self.color)
                plt.xlim([-0.5, 0.5])
            else:
                voil = plt.violinplot(y)
                voil['bodies'][0].set_color(self.color)
            plt.ylabel('Count')

        if typ == 'sc':
            plt.ylim([np.min(y)*0.9, np.max(y)*1.1])
            plt.xticks([])
            plt.yticks([np.min(y), np.max(y)])
        else:
            if typ == 'se':
                plt.xlim([np.min(x), np.max(x)])
                plt.xticks([np.min(x),  np.max(x)])
            else:
                # plt.xlim([np.min(x), np.max(x)])
                # plt.xticks([np.min(x),  np.max(x)])
                plt.xlim([np.min(np.log(x+1)), np.max(np.log(x+1))])
                plt.xticks([np.min(np.log(x+1)),  np.max(np.log(x+1))])
            plt.ylim([miny, maxy])
            plt.yticks([miny, ((maxy - miny)/2), maxy])

        plt.title(tit, y=1.04)
Ejemplo n.º 11
0
 def _violinplot(self, data, names, title=None):
     """For making violinplots."""
     if not self.matplotlib_installed:
         return None
     nplayers = self.result_set.nplayers
     width = max(nplayers / 3, 12)
     height = width / 2
     figure = plt.figure(figsize=(width, height))
     spacing = 4
     positions = spacing * arange(1, nplayers + 1, 1)
     plt.violinplot(data, positions=positions, widths=spacing / 2,
                    showmedians=True, showextrema=False)
     plt.xticks(positions, names, rotation=90)
     plt.xlim(0, spacing * (nplayers + 1))
     plt.tick_params(axis='both', which='both', labelsize=8)
     if title:
         plt.title(title)
     return figure
Ejemplo n.º 12
0
def violinPloter2(data,x,title ,xlabel,ylabel ):
    plt.rc("figure",facecolor="#ffffff") # make the background white
    plt.figure() #use the fact that the background is white
    fig= plt.violinplot(ynew, showmeans=False,showmedians=True) 
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    #plt.setp(fig, xticks=[y+1 for y in range(len(ynew))], xticklabels=x)
    plt.gca().xaxis.set_ticklabels(x) #give the right number for the x label
    plt.title(title) 
    plt.autoscale(True)
    plt.show()
Ejemplo n.º 13
0
def main(*args):
    data = [grouping.every_point_in(fname) for fname in args]
    ## Add data
    fig,ax1 = plt.subplots() #add figsize?
    posns = [1.5 * x for x in range(1,1+len(data))]
    vp = plt.violinplot(data, positions=posns, showmeans=True, showextrema=True, showmedians=True)
    ## Re-color bodies
    for v in vp['bodies']:
        v.set_edgecolors('k')
        v.set_facecolors('royalblue')
        v.set_alpha(1)
    ## Re-color median, min, max lines to be black
    for field in ['cmaxes', 'cmins', 'cbars', 'cmedians']:
        vp[field].set_color('k')
    ## Draw stars, for means
    # Make line invisible
    vp['cmeans'].set_color('royalblue')
    vp['cmeans'].set_alpha(1)
    # Draw a *
    for i in range(len(data)):
        plt.plot([1.5 * (i+1)], [np.average(data[i])], color='w', marker='*', markeredgecolor='k')
    ## add a light-colored horizontal grid
    ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)
    ## plot axis: runtime + num types
    new_name = "all-runtimes"
    ax1.set_axisbelow(True)
    ax1.set_title("All Runtimes")
    ax1.set_xlabel("Program")
    ax1.set_ylabel("Runtime (ms)")
    # Reset y limit
    ymin,ymax = plt.ylim()
    plt.ylim(ymin-20, ymax)
    plt.xticks(posns, [get_label(fname) for fname in args], rotation=0, size='x-small')
    # ## Legend
    # # Reset y limit
    # ymin,ymax = ax1.get_ylim()
    # ax1.set_ylim(ymin-5, ymax)
    # plt.figtext(0.80, 0.04, "---", color='r', weight='roman', size='x-small')
    # plt.figtext(0.82, 0.04, "Least-sum path", color='k', weight='roman', size='x-small')
    plt.figtext(0.80, 0.01, '*', color='white', backgroundcolor='royalblue',weight='roman', size='medium')
    plt.figtext(0.82, 0.01, ' Average Value', color='black', weight='roman', size='x-small')
    ## Save & clear
    plt.savefig(new_name)
    plt.clf()
    print("Saved figure to %s" % new_name)
    return
Ejemplo n.º 14
0
def draw_violin(data, alpha=1, color='royalblue', meanmarker="*", positions=None):
    ## Add data
    posns = positions or range(1,1+len(data))
    vp = plt.violinplot(data, positions=posns, showmeans=True, showextrema=True, showmedians=True)
    ## Re-color bodies
    for v in vp['bodies']:
        v.set_edgecolors('k')
        v.set_facecolors(color)
        v.set_alpha(alpha)
    ## Re-color median, min, max lines to be black
    for field in ['cmaxes', 'cmins', 'cbars', 'cmedians']:
        vp[field].set_color('k')
    ## Draw stars, for means
    # Make line invisible
    vp['cmeans'].set_color(color)
    vp['cmeans'].set_alpha(alpha)
    # Draw a *
    for i in range(len(data)):
        plt.plot(posns[i], [np.average(data[i])], color='w', marker=meanmarker, markeredgecolor='k')
    return
Ejemplo n.º 15
0
def violin():
    from data_load import get_lcrb_prcp, get_climate_data
    prcp = get_lcrb_prcp()
    _,_,_, phaseind = get_climate_data()
    cmap = mpl.cm.get_cmap('viridis')
    rgba = [cmap(0.25), cmap(0.7)]

    data = [prcp[phaseind['lanina']], prcp, prcp[phaseind['elnino']]]
    violin_parts = plt.violinplot(data, showmeans = True)
    plt.ylim(0,650)
    plt.xticks(range(1,4),['La Nina', 'All Years', 'El Nino'], fontsize = 18)
    plt.title('MAMJ Precipitation in LCRB by Phase', fontsize = 22,
                fontweight = 'bold')

    for feature in ['cbars', 'cmins', 'cmeans', 'cmaxes']:
        violin_parts[feature].set_color('black')
    for pc, color in zip(violin_parts['bodies'], [cmap(0), cmap(0.75), cmap(0.4)]):
        pc.set_facecolor(color)
    plt.ylabel('Precipitation, mm', fontweight='bold')
    plt.savefig(EV['HOME'] + '/Desktop/Feb20Response/images/violin')
    return
Ejemplo n.º 16
0
def violinplot(xs, ys, rotation='horizontal', points=100, x_tick_offset=0,
               facecolour='lightgreen', edgecolour='green'):
    """Plots a vertical scatter plot.

    xs: List of x labels.
    ys: List of lists of points to scatter vertically.
    rotation: x label rotation. Default 'horizontal'.
    points: Number of points to use in the density estimate.
    x_tick_offset: How far to offset the x tick labels. Default 0.
    facecolour: Colour of the violin plots. Default light green.
    edgecolour: Colour of the violin lines. Default green.
    """
    vp = plt.violinplot(ys, showmeans=True, showextrema=False, points=points)
    # plt.violinplot has no arguments that let us set colours, so we have to do
    # it ourselves. http://stackoverflow.com/a/26291582/1105803
    for pc in vp['bodies']:
        pc.set_facecolor(facecolour)
        pc.set_edgecolor(edgecolour)
    vp['cmeans'].set_color(edgecolour)
    plt.xticks([1 + i + x_tick_offset for i in range(len(xs))],
               xs, rotation=rotation)
    plt.xlim((0.5, len(xs) + 0.5))  # Adds a little buffer.
Ejemplo n.º 17
0
def violinplot(data, labels, colors, positions, points = True, maincolor= 'k'):
    """custom violin plots.
            Args:
               data (array, (m x n)): n categories, m data points.
               labels array (n): x-tick label for each box.
               colors array (n): facecolor of box.
               positions array(n): location of violins.
              
            Returns:
                matplotlib boxplot object.
    """
    w = 0.8#np.diff(positions)[0]*0.8
    
    bp = plt.violinplot(data, positions= positions, vert=True, widths=w, \
    showmeans=False, showextrema=False, showmedians=True, points=100, bw_method=0.25, hold=None)
    plt.boxplot(data,notch=1,positions=positions,vert=1)
    for ind, box in enumerate(bp['bodies']):
        box.set(color=colors[ind], edgecolor=colors[ind],linewidth=1.5, alpha=1)
    bp['cmedians'].set(color= maincolor,linewidth=1.5)
    #bp['cmaxes'].set(color= maincolor,linewidth=1.5)
    #bp['cbars'].set(color= maincolor,linewidth=1.5)
    #bp['cmins'].set(color= maincolor,linewidth=1.5)
    
    plt.xticks(positions,labels, rotation =20)
    
    #### hackish!
    if points:
        for index, d in enumerate(data):
            if len(d)==len(positions) or len(data) != len(positions):
                jitter = positions + (1-2*np.random.rand(len(d)))*w/4.
                plt.scatter(jitter, d, color = maincolor, s = 8)
            else:
                jitter = positions[index] + (1-2*np.random.rand(len(d)))*w/4.
                plt.scatter(jitter, d, color = maincolor, s = 8)
        
        
    return bp
def seqnumber(seqname):
    return int(seqname.split('_')[-1])


with open('all_crassphage_clean.fna', 'r') as f:
    all_clean_contigs = [
        line.strip().replace('>', '').split(' ')[0] for line in f.readlines()
        if '>' in line
    ]

with open('all_crassphage_clean.pred.fna', 'r') as f:
    all_orfs = [
        line.strip().replace('>', '').split(' ')[0] for line in f.readlines()
        if '>' in line
    ]

contig_orfs = {contig: [] for contig in all_clean_contigs}
for orf in all_orfs:
    contig_orfs[contigname(orf)].append(orf)

contig_lens = [len(contig_orfs[contig]) for contig in contig_orfs]

with open('good_contigs_ids', 'r') as f:
    good_contigs = [line.strip() for line in f.readlines()]

good_contig_lens = [len(contig_orfs[contig]) for contig in good_contigs]

plt.violinplot([good_contig_lens, contig_lens], vert=False, showmedians=True)
plt.yticks((1, 2), ("viable contigs", "all contigs"))
plt.xlabel("n° of ORFs")
Ejemplo n.º 19
0
def savefig(filename):
	print('saving {}'.format(filename))
	plt.savefig('{}.pgf'.format(filename))
	plt.savefig('{}.pdf'.format(filename))


# Simple plot
fig, ax  = newfig(0.45)

pos = [1,2,3,4,5,6,7,8,9,10]
data = [[] for i in pos]

parseNanoDict(argv[1])

violin_parts = plt.violinplot(dataset=data, positions=pos,points=100, widths=0.9,
                      showmeans=True, showextrema=False, showmedians=False)

for pc in violin_parts['bodies']:
#     pc.set_facecolor('yellow')
#     pc.set_edgecolor('black')
    pc.set_linewidths(defaultLineWidth)
    pc.set_alpha(0.8)
    

#ax.plot(s)
plt.grid(True, zorder=5, axis='y')
plt.xlabel("number of unwinds")
plt.ylabel("runtime [us]")


Ejemplo n.º 20
0
def plot_shift(x, y, paired=False, n_boot=1000,
               percentiles=np.arange(10, 100, 10),
               ci=.95, seed=None, show_median=True, violin=True):
    """Shift plot.

    Parameters
    ----------
    x, y : array_like
        First and second set of observations.
    paired : bool
        Specify whether ``x`` and ``y`` are related (i.e. repeated
        measures) or independent.

        .. versionadded:: 0.3.0
    n_boot : int
        Number of bootstrap iterations. The higher, the better, the slower.
    percentiles: array_like
        Sequence of percentiles to compute, which must be between 0 and 100
        inclusive. Default set to [10, 20, 30, 40, 50, 60, 70, 80, 90].
    ci: float
        Confidence level (0.95 = 95%).
    seed : int or None
        Random seed for generating bootstrap samples, can be integer or
        None for no seed (default).
    show_median: boolean
        If True (default), show the median with black lines.
    violin: boolean
        If True (default), plot the density of X and Y distributions.
        Defaut set to True.

    Returns
    -------
    fig : matplotlib Figure instance
        Matplotlib Figure. To get the individual axes, use fig.axes.

    See also
    --------
    harrelldavis

    Notes
    -----
    The shift plot is described in [1]_.
    It computes a shift function [2]_ for two (in)dependent groups using the
    robust Harrell-Davis quantile estimator in conjunction with bias-corrected
    bootstrap confidence intervals.

    References
    ----------
    .. [1] Rousselet, G. A., Pernet, C. R. and Wilcox, R. R. (2017). Beyond
           differences in means: robust graphical methods to compare two groups
           in neuroscience. Eur J Neurosci, 46: 1738-1748.
           doi:10.1111/ejn.13610

    .. [2] https://garstats.wordpress.com/2016/07/12/shift-function/

    Examples
    --------
    Default shift plot

    .. plot::

        >>> import numpy as np
        >>> import pingouin as pg
        >>> np.random.seed(42)
        >>> x = np.random.normal(5.5, 2, 50)
        >>> y = np.random.normal(6, 1.5, 50)
        >>> fig = pg.plot_shift(x, y)

    With different options

    .. plot::

        >>> import numpy as np
        >>> import pingouin as pg
        >>> np.random.seed(42)
        >>> x = np.random.normal(5.5, 2, 30)
        >>> y = np.random.normal(6, 1.5, 30)
        >>> fig = pg.plot_shift(x, y, paired=True, n_boot=2000,
        ...                     percentiles=[25, 50, 75],
        ...                     show_median=False, seed=456, violin=False)
    """
    from pingouin.regression import _bca
    from pingouin.nonparametric import harrelldavis as hd

    # Safety check
    x = np.asarray(x)
    y = np.asarray(y)
    percentiles = np.asarray(percentiles) / 100  # Convert to 0 - 1 range
    assert x.ndim == 1, 'x must be 1D.'
    assert y.ndim == 1, 'y must be 1D.'
    nx, ny = x.size, y.size
    assert not np.isnan(x).any(), 'Missing values are not allowed.'
    assert not np.isnan(y).any(), 'Missing values are not allowed.'
    assert nx >= 10, 'x must have at least 10 samples.'
    assert ny >= 10, 'y must have at least 10 samples.'
    assert 0 < ci < 1, 'ci must be between 0 and 1.'
    if paired:
        assert nx == ny, 'x and y must have the same size when paired=True.'

    # Robust percentile
    x_per = hd(x, percentiles)
    y_per = hd(y, percentiles)
    delta = y_per - x_per

    # Compute bootstrap distribution of differences
    rng = np.random.RandomState(seed)
    if paired:
        bootsam = rng.choice(np.arange(nx), size=(nx, n_boot), replace=True)
        bootstat = (hd(y[bootsam], percentiles, axis=0) -
                    hd(x[bootsam], percentiles, axis=0))
    else:
        x_list = rng.choice(x, size=(nx, n_boot), replace=True)
        y_list = rng.choice(y, size=(ny, n_boot), replace=True)
        bootstat = (hd(y_list, percentiles, axis=0) -
                    hd(x_list, percentiles, axis=0))

    # Find upper and lower confidence interval for each quantiles
    # Bias-corrected confidence interval
    lower, median_per, upper = [], [], []
    for i, d in enumerate(delta):
        ci = _bca(bootstat[i, :], d, n_boot)
        median_per.append(_bca(bootstat[i, :], d, n_boot, alpha=1)[0])
        lower.append(ci[0])
        upper.append(ci[1])

    lower = np.asarray(lower)
    median_per = np.asarray(median_per)
    upper = np.asarray(upper)

    # Create long-format dataFrame for use with Seaborn
    data = pd.DataFrame({'value': np.concatenate([x, y]),
                         'variable': ['X'] * nx + ['Y'] * ny})

    #############################
    # Plots X and Y distributions
    #############################
    fig = plt.figure(figsize=(8, 5))
    ax1 = plt.subplot2grid((3, 3), (0, 0), rowspan=2, colspan=3)

    # Boxplot X & Y
    def adjacent_values(vals, q1, q3):
        upper_adjacent_value = q3 + (q3 - q1) * 1.5
        upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1])

        lower_adjacent_value = q1 - (q3 - q1) * 1.5
        lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1)
        return lower_adjacent_value, upper_adjacent_value

    for dis, pos in zip([x, y], [1.2, -0.2]):
        qrt1, medians, qrt3 = np.percentile(dis, [25, 50, 75])
        whiskers = adjacent_values(np.sort(dis), qrt1, qrt3)
        ax1.plot(medians, pos, marker='o', color='white', zorder=10)
        ax1.hlines(pos, qrt1, qrt3, color='k',
                   linestyle='-', lw=7, zorder=9)
        ax1.hlines(pos, whiskers[0], whiskers[1],
                   color='k', linestyle='-', lw=2, zorder=9)

    ax1 = sns.stripplot(data=data, x='value', y='variable',
                        orient='h', order=['Y', 'X'],
                        palette=['#88bedc', '#cfcfcf'])

    if violin:
        vl = plt.violinplot([y, x], showextrema=False, vert=False, widths=1)

        # Upper plot
        paths = vl['bodies'][0].get_paths()[0]
        paths.vertices[:, 1][paths.vertices[:, 1] >= 1] = 1
        paths.vertices[:, 1] = paths.vertices[:, 1] - 1.2
        vl['bodies'][0].set_edgecolor('k')
        vl['bodies'][0].set_facecolor('#88bedc')
        vl['bodies'][0].set_alpha(0.8)

        # Lower plot
        paths = vl['bodies'][1].get_paths()[0]
        paths.vertices[:, 1][paths.vertices[:, 1] <= 2] = 2
        paths.vertices[:, 1] = paths.vertices[:, 1] - 0.8
        vl['bodies'][1].set_edgecolor('k')
        vl['bodies'][1].set_facecolor('#cfcfcf')
        vl['bodies'][1].set_alpha(0.8)

        # Rescale ylim
        ax1.set_ylim(2, -1)

    for i in range(len(percentiles)):
        # Connection between quantiles
        if upper[i] < 0:
            col = '#4c72b0'
        elif lower[i] > 0:
            col = '#c34e52'
        else:
            col = 'darkgray'
        plt.plot([y_per[i], x_per[i]], [0.2, 0.8],
                 marker='o', color=col, zorder=10)
        # X quantiles
        plt.plot([x_per[i], x_per[i]], [0.8, 1.2], 'k--', zorder=9)
        # Y quantiles
        plt.plot([y_per[i], y_per[i]], [-0.2, 0.2], 'k--', zorder=9)

    if show_median:
        x_med, y_med = np.median(x), np.median(y)
        plt.plot([x_med, x_med], [0.8, 1.2], 'k-')
        plt.plot([y_med, y_med], [-0.2, 0.2], 'k-')

    plt.xlabel('Scores (a.u.)', size=15)
    ax1.set_yticklabels(['Y', 'X'], size=15)
    ax1.set_ylabel('')

    #######################
    # Plots quantiles shift
    #######################
    ax2 = plt.subplot2grid((3, 3), (2, 0), rowspan=1, colspan=3)
    for i, per in enumerate(x_per):
        if upper[i] < 0:
            col = '#4c72b0'
        elif lower[i] > 0:
            col = '#c34e52'
        else:
            col = 'darkgray'
        plt.plot([per, per], [upper[i], lower[i]], lw=3, color=col, zorder=10)
        plt.plot(per, median_per[i], marker='o', ms=10, color=col, zorder=10)

    plt.axhline(y=0, ls='--', lw=2, color='gray')

    ax2.set_xlabel('X quantiles', size=15)
    ax2.set_ylabel('Y - X quantiles \n differences (a.u.)', size=10)
    sns.despine()
    plt.tight_layout()

    return fig
Ejemplo n.º 21
0
def plotly_to_matplotlib(pygsti_fig,
                         save_to=None,
                         fontsize=12,
                         prec='compacthp',
                         boxLabels_fontsize=6):
    """
    Convert a pygsti (plotly) figure to a matplotlib figure.

    Parameters
    ----------
    pygsti_fig : ReportFigure
        A pyGSTi figure.

    save_to : str
        Output filename.  Extension determines type.  If None, then the
        matplotlib figure is returned instead of saved.

    fontsize : int, optional
        Base fontsize to use for converted figure.

    prec : int or {"compact","compacth"}
        Digits of precision to include in labels.

    boxLabels_fontsize : int, optional
        The size for labels on the boxes. If 0 then no labels are
        put on the boxes

    Returns
    -------
    matplotlib.Figure
        Matplotlib figure, unless save_to is not None, in which case
        the figure is closed and None is returned.
    """
    numMPLFigs = len(_plt.get_fignums())
    fig = pygsti_fig.plotlyfig
    data_trace_list = fig['data']

    if 'special' in pygsti_fig.metadata:
        if pygsti_fig.metadata['special'] == "keyplot":
            return special_keyplot(pygsti_fig, save_to, fontsize)
        else:
            raise ValueError("Invalid `special` label: %s" %
                             pygsti_fig.metadata['special'])

    #if axes is None:
    mpl_fig, axes = _plt.subplots()  # create a new figure if no axes are given

    layout = fig['layout']
    h, w = layout['height'], layout['width']
    # todo: get margins and subtract from h,w

    if mpl_fig is not None and w is not None and h is not None:
        mpl_size = w / 100.0, h / 100.0  # heusistic
        mpl_fig.set_size_inches(*mpl_size)  # was 12,8 for "super" color plot
        pygsti_fig.metadata[
            'mpl_fig_size'] = mpl_size  # record for later use by rendering commands

    def get(obj, x, default):
        """ Needed b/c in plotly v3 layout no longer is a dict """
        try:
            ret = obj[x]
            return ret if (ret is not None) else default
        except KeyError:
            return default
        raise ValueError(
            "Non-KeyError raised when trying to access a plotly hierarchy object."
        )

    xaxis, yaxis = layout['xaxis'], layout['yaxis']
    #annotations = get(layout,'annotations',[])
    title = get(layout, 'title', None)
    shapes = get(layout, 'shapes', [])  # assume only shapes are grid lines
    bargap = get(layout, 'bargap', 0)

    xlabel = get(xaxis, 'title', None)
    ylabel = get(yaxis, 'title', None)
    xlabels = get(xaxis, 'ticktext', None)
    ylabels = get(yaxis, 'ticktext', None)
    xtickvals = get(xaxis, 'tickvals', None)
    ytickvals = get(yaxis, 'tickvals', None)
    xaxistype = get(xaxis, 'type', None)
    yaxistype = get(yaxis, 'type', None)
    xaxisside = get(xaxis, 'side', 'bottom')
    yaxisside = get(yaxis, 'side', 'left')
    xtickangle = get(xaxis, 'tickangle', 0)
    xlim = get(xaxis, 'range', None)
    ylim = get(yaxis, 'range', None)

    if xaxisside == "top":
        axes.xaxis.set_label_position('top')
        axes.xaxis.tick_top()
        #axes.yaxis.set_ticks_position('both')

    if yaxisside == "right":
        axes.yaxis.set_label_position('right')
        axes.yaxis.tick_right()
        #axes.yaxis.set_ticks_position('both')

    if title is not None:
        if xaxisside == "top":
            axes.set_title(mpl_process_lbl(title), fontsize=fontsize,
                           y=2.5)  # push title up higher
        axes.set_title(mpl_process_lbl(title), fontsize=fontsize)

    if xlabel is not None:
        axes.set_xlabel(mpl_process_lbl(xlabel), fontsize=fontsize)

    if ylabel is not None:
        axes.set_ylabel(mpl_process_lbl(ylabel), fontsize=fontsize)

    if xtickvals is not None:
        axes.set_xticks(xtickvals, minor=False)

    if ytickvals is not None:
        axes.set_yticks(ytickvals, minor=False)

    if xlabels is not None:
        axes.set_xticklabels(mpl_process_lbls(xlabels),
                             rotation=0,
                             fontsize=(fontsize - 2))

    if ylabels is not None:
        axes.set_yticklabels(mpl_process_lbls(ylabels),
                             fontsize=(fontsize - 2))

    if xtickangle != 0:
        _plt.xticks(
            rotation=-xtickangle
        )  # minus b/c ploty & matplotlib have different sign conventions

    if xaxistype == 'log':
        axes.set_xscale("log")
    if yaxistype == 'log':
        axes.set_yscale("log")

    if xlim is not None:
        if xaxistype == 'log':  # plotly's limits are already log10'd in this case
            xlim = 10.0**xlim[0], 10.0**xlim[1]  # but matplotlib's aren't
        axes.set_xlim(xlim)

    if ylim is not None:
        if yaxistype == 'log':  # plotly's limits are already log10'd in this case
            ylim = 10.0**ylim[0], 10.0**ylim[1]  # but matplotlib's aren't
        axes.set_ylim(ylim)

    #figure out barwidth and offsets for bar plots
    num_bars = sum([get(d, 'type', '') == 'bar' for d in data_trace_list])
    currentBarOffset = 0
    barWidth = (1.0 - bargap) / num_bars if num_bars > 0 else 1.0

    #process traces
    handles = []
    labels = []  # for the legend
    boxes = []  # for violins
    for traceDict in data_trace_list:
        typ = get(traceDict, 'type', 'unknown')

        name = get(traceDict, 'name', None)
        showlegend = get(traceDict, 'showlegend', True)

        if typ == "heatmap":
            #colorscale = get(traceDict,'colorscale','unknown')
            # traceDict['z'] is *normalized* already - maybe would work here but not for box value labels
            plt_data = pygsti_fig.metadata['plt_data']
            show_colorscale = get(traceDict, 'showscale', True)

            mpl_size = (plt_data.shape[1] * 0.5, plt_data.shape[0] * 0.5)
            mpl_fig.set_size_inches(*mpl_size)
            #pygsti_fig.metadata['mpl_fig_size'] = mpl_size #record for later use by rendering commands

            colormap = pygsti_fig.colormap
            assert (colormap
                    is not None), 'Must separately specify a colormap...'
            norm, cmap = colormap.get_matplotlib_norm_and_cmap()

            masked_data = _np.ma.array(plt_data, mask=_np.isnan(plt_data))
            heatmap = axes.pcolormesh(masked_data, cmap=cmap, norm=norm)

            axes.set_xlim(0, plt_data.shape[1])
            axes.set_ylim(0, plt_data.shape[0])

            if xtickvals is not None:
                xtics = _np.array(
                    xtickvals) + 0.5  # _np.arange(plt_data.shape[1])+0.5
                axes.set_xticks(xtics, minor=False)

            if ytickvals is not None:
                ytics = _np.array(
                    ytickvals) + 0.5  # _np.arange(plt_data.shape[0])+0.5
                axes.set_yticks(ytics, minor=False)

            grid = bool(len(shapes) > 1)
            if grid:

                def _get_minor_tics(t):
                    return [(t[i] + t[i + 1]) / 2.0 for i in range(len(t) - 1)]

                axes.set_xticks(_get_minor_tics(xtics), minor=True)
                axes.set_yticks(_get_minor_tics(ytics), minor=True)
                axes.grid(which='minor',
                          axis='both',
                          linestyle='-',
                          linewidth=2)

            off = False  # Matplotlib used to allow 'off', but now False should be used
            if xlabels is None and ylabels is None:
                axes.tick_params(labelcolor='w',
                                 top=off,
                                 bottom=off,
                                 left=off,
                                 right=off)  # white tics
            else:
                axes.tick_params(top=off, bottom=off, left=off, right=off)

            #print("DB ann = ", len(annotations))
            #boxLabels = bool( len(annotations) >= 1 ) #TODO: why not plt_data.size instead of 1?
            #boxLabels = True  # maybe should always be true?
            if boxLabels_fontsize > 0:
                # Write values on colored squares
                for y in range(plt_data.shape[0]):
                    for x in range(plt_data.shape[1]):
                        if _np.isnan(plt_data[y, x]): continue
                        assert (_np.isfinite(plt_data[y, x])
                                ), "%s is not finite!" % str(plt_data[y, x])
                        axes.text(
                            x + 0.5,
                            y + 0.5,
                            mpl_process_lbl(_eformat(plt_data[y, x], prec),
                                            math=True),
                            horizontalalignment='center',
                            verticalalignment='center',
                            color=mpl_besttxtcolor(plt_data[y, x], cmap, norm),
                            fontsize=boxLabels_fontsize)

            if show_colorscale:
                cbar = _plt.colorbar(heatmap)
                cbar.ax.tick_params(labelsize=(fontsize - 2))

        elif typ == "scatter":
            mode = get(traceDict, 'mode', 'lines')
            marker = get(traceDict, 'marker', None)
            line = get(traceDict, 'line', None)
            if marker and (line is None):
                line = marker['line']  # 2nd attempt to get line props

            if marker:
                color = get(marker, 'color', None)
            if line and (color is None):
                color = get(line, 'color', None)
            if color is None:
                color = 'rgb(0,0,0)'
            color = mpl_color(color)

            linewidth = float(line['width']) if (
                line and get(line, 'width', None) is not None) else 1.0

            x = y = None
            if 'x' in traceDict and 'y' in traceDict:
                x = traceDict['x']
                y = traceDict['y']
            elif 'r' in traceDict and 't' in traceDict:
                x = traceDict['r']
                y = traceDict['t']

            assert (x is not None and y
                    is not None), "x and y both None in trace: %s" % traceDict
            lines = _plt.plot(x, y)
            if mode == 'lines':
                ls = '-'
                ms = 'None'
            elif mode == 'markers':
                ls = 'None'
                ms = "."
            elif mode == 'lines+markers':
                ls = '-'
                ms = "."
            else:
                raise ValueError("Unknown mode: %s" % mode)
            _plt.setp(lines,
                      linestyle=ls,
                      marker=ms,
                      color=color,
                      linewidth=linewidth)

            if showlegend and name:
                handles.append(lines[0])
                labels.append(name)

        elif typ == "scattergl":  # currently used only for colored points...
            x = traceDict['x']
            y = traceDict['y']
            assert (x is not None and y
                    is not None), "x and y both None in trace: %s" % traceDict

            colormap = pygsti_fig.colormap
            if colormap:
                norm, cmap = colormap.get_matplotlib_norm_and_cmap()
                s = _plt.scatter(x, y, c=y, s=50, cmap=cmap, norm=norm)
            else:
                s = _plt.scatter(x, y, c=y, s=50, cmap='gray')

            if showlegend and name:
                handles.append(s)
                labels.append(name)

        elif typ == "bar":
            xlabels = [str(xl) for xl in traceDict['x']
                       ]  # x "values" are actually bar labels in plotly

            #always grey=pos, red=neg type of bar plot for now (since that's all pygsti uses)
            y = _np.asarray(traceDict['y'])
            if 'plt_yerr' in pygsti_fig.metadata:
                yerr = pygsti_fig.metadata['plt_yerr']
            else:
                yerr = None

            # actual x values are just the integers + offset
            x = _np.arange(y.size) + currentBarOffset
            currentBarOffset += barWidth  # so next bar trace will be offset correctly

            marker = get(traceDict, 'marker', None)
            if marker and ('color' in marker):
                if isinstance(marker['color'], str):
                    color = mpl_color(marker['color'])
                elif isinstance(marker['color'], list):
                    color = [mpl_color(c) for c in marker['color']
                             ]  # b/c axes.bar can take a list of colors
                else:
                    color = "gray"

            if yerr is None:
                axes.bar(x, y, barWidth, color=color)
            else:
                axes.bar(x, y, barWidth, color=color, yerr=yerr.flatten().real)

            if xtickvals is not None:
                xtics = _np.array(
                    xtickvals) + 0.5  # _np.arange(plt_data.shape[1])+0.5
            else:
                xtics = x
            axes.set_xticks(xtics, minor=False)
            axes.set_xticklabels(mpl_process_lbls(xlabels),
                                 rotation=0,
                                 fontsize=(fontsize - 4))

        elif typ == "histogram":
            #histnorm = get(traceDict,'histnorm',None)
            marker = get(traceDict, 'marker', None)
            color = mpl_color(marker['color'] if marker
                              and isinstance(marker['color'], str) else "gray")
            xbins = traceDict['xbins']
            histdata = traceDict['x']

            if abs(xbins['size']) < 1e-6:
                histBins = 1
            else:
                histBins = int(
                    round((xbins['end'] - xbins['start']) / xbins['size']))

            histdata_finite = _np.take(
                histdata, _np.where(_np.isfinite(histdata)))[
                    0]  # take gives back (1,N) shaped array (why?)
            if yaxistype == 'log':
                if len(histdata_finite) == 0:
                    axes.set_yscale(
                        "linear"
                    )  # no data, and will get an error with log-scale, so switch to linear

            #histMin = min( histdata_finite ) if cmapFactory.vmin is None else cmapFactory.vmin
            #histMax = max( histdata_finite ) if cmapFactory.vmax is None else cmapFactory.vmax
            #_plt.hist(_np.clip(histdata_finite,histMin,histMax), histBins,
            #          range=[histMin, histMax], facecolor='gray', align='mid')
            _, _, patches = _plt.hist(histdata_finite,
                                      histBins,
                                      facecolor=color,
                                      align='mid')

            #If we've been given an array of colors
            if marker and ('color' in marker) and isinstance(
                    marker['color'], list):
                for p, c in zip(patches, marker['color']):
                    _plt.setp(p, 'facecolor', mpl_color(c))

        elif typ == "box":
            boxes.append(traceDict)

    if len(boxes) > 0:
        _plt.violinplot([box['y'] for box in boxes],
                        [box['x0'] for box in boxes],
                        points=10,
                        widths=1.,
                        showmeans=False,
                        showextrema=False,
                        showmedians=False)
        # above kwargs taken from Tim's original RB plot - we could set some of
        # these from boxes[0]'s properties like 'boxmean' (a boolean) FUTURE?

    extraartists = [axes]
    if len(handles) > 0:
        lgd = _plt.legend(handles,
                          labels,
                          bbox_to_anchor=(1.01, 1.0),
                          borderaxespad=0.,
                          loc="upper left")
        extraartists.append(lgd)

    if save_to:
        _gc.collect(
        )  # too many open files (b/c matplotlib doesn't close everything) can cause the below to fail
        _plt.savefig(save_to,
                     bbox_extra_artists=extraartists,
                     bbox_inches='tight')  # need extra artists otherwise
        #axis labels get clipped
        _plt.cla()
        _plt.close(mpl_fig)
        del mpl_fig
        _gc.collect()  # again, to be safe...
        if len(_plt.get_fignums()) != numMPLFigs:
            raise ValueError(
                "WARNING: MORE FIGURES OPEN NOW (%d) THAN WHEN WE STARTED %d)!!"
                % (len(_plt.get_fignums()), numMPLFigs))
        return None  # figure is closed!
    else:
        return mpl_fig
Ejemplo n.º 22
0
        print "Success rate: ", sum(success)/len(success)
        
    # store values
    success_values[el][a] = sum(success)/len(success)
    temp_len = len(t_min[t_min<num_timesteps-1])
    
    temp = t_min[t_min<num_timesteps-1]
 
    t_avg_values[el][a] = np.mean(t_sorted)            
    t_med_values[el][a] = np.median(t_sorted)            
    t_std_values[el][a] = np.std(t_sorted)

    data.append(temp)
      
plt.figure(1)
x = range(len(range_alpha))
plt.violinplot(data, x, showmedians=True)

for a in range(len(range_alpha)):  
    alpha = range_alpha[a]
    beta = range_beta[a]    
    lab.append('%.2f / %.2f' % (alpha, beta))

plt.title('lap %.2f' % lap)
fig1.savefig('violin_lap = %.2f.eps' % lap)
    
ax = plt.gca()
ax.set_xticks(range(len(range_alpha)))
ax.set_xticklabels(lab) 

Ejemplo n.º 23
0
            A.append(GetTransform(msg_[1]).get_pos().get_array())
        for msg_ in bag_.read_messages(topics='B1'):
            B1.append(GetTransform(msg_[1]).get_pos().get_array())
        for msg_ in bag_.read_messages(topics='B2'):
            B2.append(GetTransform(msg_[1]).get_pos().get_array())

        bag_.close()
    else:
        print("The provided path is not a .bag file")


if __name__ == "__main__":
    if len(sys.argv) == 2:
        ReadBag(sys.argv[1])
        error1 = np.linalg.norm(B1, axis=1) - np.linalg.norm(A, axis=1)
        error2 = np.linalg.norm(B2, axis=1) - np.linalg.norm(A, axis=1)

        # print(np.mean(error1) )
        # print(np.std(error1) )
        # print(np.mean(error2) )
        # print(np.std(error2) )

        plt.violinplot(np.abs(error1))
        plt.grid()
        plt.show()

        # plt.violinplot(np.abs(error2) )
        # plt.grid()
        # plt.show()
    else:
        print("Path to .bag file was not provided")
Ejemplo n.º 24
0
from nltk import tokenize
import dataset
import matplotlib.pyplot as plt

db = dataset.connect('sqlite:///reviews.db')
reviews = db['reviews'].all()

analyzer = SentimentIntensityAnalyzer()

sentiment_by_stars = [[] for r in range(1, 6)]

for review in reviews:
    full_review = review['title'] + '. ' + review['review']
    sentence_list = tokenize.sent_tokenize(full_review)
    cumulative_sentiment = 0.0
    for sentence in sentence_list:
        vs = analyzer.polarity_scores(sentence)
        cumulative_sentiment += vs["compound"]
    average_score = cumulative_sentiment / len(sentence_list)
    sentiment_by_stars[int(review['rating']) - 1].append(average_score)

plt.violinplot(sentiment_by_stars,
               range(1, 6),
               vert=False,
               widths=0.9,
               showmeans=False,
               showextrema=True,
               showmedians=True,
               bw_method='silverman')
plt.axvline(x=0, linewidth=1, color='black')
plt.show()
Ejemplo n.º 25
0
def SelectionViolinPlot(plotfile, ylabel, models, yvalues, symmetrizey, hlines=None, points=None, pointmarkercolor='or', usetex=True, legends=False, fixymin=None, fixymax=None, modelgroups=None):
    """Creates violin plot showing distribution of selection and significant sites.

    Calling arguments:

    *plotfile* : name of PDF plot to create.

    *ylabel* : ylabel for the plot.

    *models* : list of models for which we create violin plots.

    *yvalues* : list of the same length as *models*, each entry is a list
    of the Y-values (such as P-values).

    *symmetrizey* : make y-axis symmetric around zero?

    *hlines* : if not *None*, list of the same length as *models* with each entry
    a list giving y-value for where we draw horizontal lines for that *model*. 
    Alternatively, can be a single number -- in that case, we draw a horizontal
    line across the whole plot at that number.

    *points* : if not *None*, list of the same length as *models* with each entry
    a list giving the y-value for points to be placed for that *model*.

    *pointmarkercolor* : specifies marker and color of points in *points*. 
    Should either a length-two string giving marker and color for all points
    (such as *or* for circles, red) or a list of lists of the same length
    as *points* with each entry specifying the marker and color for that point.

    *usetex* : use LaTex formatting of strings?

    *legends* : Create legend(s) with names of points specified by *pointmarkercolor*?
    If it is not *False* or *None*, then *legends* should be a list. Each entry
    should be a 3-tuple *(markercolors, names, title)*. In this tuple,
    *markercolors* and *names* are lists of the same
    length, with *markercolors* being a list of marker / color (e.g. *or*
    for circles, red) and *names* being a list of the string corresponding
    to each marker / color; *title* is the title for the legend.

    *fixymin* : if not *None*, the y-minimum is fixed to this value.

    *fixymax* : if not *None*, the y-maximum is fixed to this value.

    *modelgroups* : do we "group" models on the x-axis? If so, set this to a list
    of the same length as *models* with each entry being the group to which
    that model is assigned. For instance, if *models* is 
    *['ExpCM', 'YNGKP', 'ExpCM', 'YNGKP']*,
    then *modelgroups* might be *['HA', 'HA', 'NP', 'NP']*. In this case,
    the two groups are indicated with a line and a label on the x-axis.
    Models in the same group must be consecutive. If any entry is *None*,
    the corresponding model is not assigned a group.
    """
    alpha = 0.55 # transparency for points
    markersize = 25 # size of points
    markerlw = 0.6 # line width for makers
    assert os.path.splitext(plotfile)[1].lower() == '.pdf', "plotfile %s does not end with extension '.pdf'"
    assert len(models) == len(yvalues) >= 1
    if modelgroups:
        assert len(modelgroups) == len(models), "modelgroups is not the same length as models"
        # make sure models in the same group are consecutive
        ngroups = 1
        previousgroup = modelgroups[0]
        for group in modelgroups[1 : ]:
            if group != previousgroup:
                ngroups += 1
                previousgroup = group
        assert ngroups == len(set(modelgroups)), "models in the same group must be consecutive in modelgroups. This is not the case:\n%s" % str(modelgroups)
    plt.rc('font', size=12)
    plt.rc('text', usetex=usetex)
    lmargin = 0.7
    tmargin = 0.1
    if modelgroups:
        bmargin = 0.6
    else:
        bmargin = 0.4
    if legends:
        perlegendwidth = 0.9
        rmargin = perlegendwidth * len(legends) + 0.03
    else:
        rmargin = 0.1
    (height, widthper) = (2.5, 1.5)
    violinwidth = 0.7
    totwidth = lmargin + rmargin
    if modelgroups:
        firstmodel = True
        withingroupspacing = violinwidth + 0.3 * (1.0 - violinwidth)
        xs = []
        for (imodel, igroup) in zip(models, modelgroups): 
            if not firstmodel and (igroup == lastgroup != None):
                xs.append(xs[-1] + withingroupspacing)
                totwidth += withingroupspacing * widthper
            elif firstmodel:
                firstmodel = False
                xs.append(0)
                totwidth += widthper
            else:
                xs.append(xs[-1] + 1)
                totwidth += widthper
            lastgroup = igroup
    else:
        xs = [x for x in range(len(models))]
        totwidth += widthper * len(models)
    totheight = height + tmargin + bmargin
    plt.figure(figsize=(totwidth, totheight))
    plt.axes([lmargin / totwidth, bmargin / totheight, 1.0 - (lmargin + rmargin) / totwidth, 1.0 - (tmargin + bmargin) / totheight])
    plt.ylabel(ylabel, fontsize=15)
    plt.violinplot(yvalues, xs, widths=violinwidth, showextrema=False)
    xmargin = 0.2 * violinwidth / 2.0
    xmin = xs[0] - violinwidth / 2.0 - xmargin
    xmax = xs[-1] + violinwidth / 2.0 + xmargin
    plt.xlim(xmin, xmax)
    if isinstance(hlines, (int, float)):
        plt.hlines(hlines, xmin, xmax, colors='b', linewidths=1, linestyles='dotted')
    elif hlines:
        assert len(hlines) == len(models)
        line_ys = []
        line_xmins = []
        line_xmaxs = []
        for (i, ix) in enumerate(xs):
            line_ys += hlines[i]
            line_xmins += [ix - violinwidth / 2.0] * len(hlines[i])
            line_xmaxs += [ix + violinwidth / 2.0] * len(hlines[i])
        plt.hlines(line_ys, line_xmins, line_xmaxs, colors='b', linewidths=1, linestyles='dotted')
    if symmetrizey:
        (ymin, ymax) = plt.ylim()
        ymax = 1.05 * max(abs(ymin), abs(ymax))
        ymin = -ymax
    else:
        (ymin, ymax) = plt.ylim()
    if points:
        assert len(points) == len(models)
        if isinstance(pointmarkercolor, str):
            assert len(pointmarkercolor) == 2
            color = pointmarkercolor[1]
            marker = pointmarkercolor[0]
        else:
            color = []
            marker = []
            assert len(pointmarkercolor) == len(points), "len(pointmarkercolor) = %d; len(points) = %d" % (len(pointmarkercolor), len(points))
        point_xs = []
        point_ys = []
        for (i, ix) in enumerate(xs):
            (model_xs, model_ys) = SmartJitter(points[i], yspace=(ymax - ymin) / 25., xspace=0.08, xcenter=ix)
            point_xs += model_xs
            point_ys += model_ys
            if not isinstance(pointmarkercolor, str):
                imarkercolor = pointmarkercolor[i]
                assert len(imarkercolor) == len(points[i]), "pointmarkercolor and points have length mismatch for %d" % i
                color += [x[1] for x in imarkercolor]
                marker += [x[0] for x in imarkercolor]
        if isinstance(pointmarkercolor, str):
            plt.scatter(point_xs, point_ys, s=markersize, c=color, marker=marker, alpha=alpha, lw=markerlw)
        else:
            assert len(color) == len(marker) == len(point_xs)
            for (x, y, c, m) in zip(point_xs, point_ys, color, marker):
                plt.scatter(x, y, s=markersize, c=c, marker=m, alpha=alpha, lw=markerlw)
    if fixymin not in [None, False]:
        ymin = fixymin
    if fixymax not in [None, False]:
        ymax = fixymax
    assert ymin < ymax
    plt.ylim(ymin, ymax)
    plt.xticks(xs, models, fontsize=15)
    if legends:
        legendx = 1.0 - rmargin / float(totwidth)
        legendfracwidth = perlegendwidth / float(totwidth)
        legendtop = 1.0 - tmargin / float(totheight)
        for (markercolors, legendnames, legendtitle) in legends:
            assert len(markercolors) == len(legendnames)
            handles = [matplotlib.lines.Line2D([0], [0], marker=marker, color=color, markersize=markersize, alpha=alpha, lw=markerlw, linestyle='None') for (marker, color) in markercolors]
            # put in natural sort order
            assert len(set(legendnames)) == len(legendnames), "Duplicate legendnames entry"
            sortedlegendnames = list(legendnames)
            dms_tools.utils.NaturalSort(sortedlegendnames)
            sortedhandles = [None] * len(sortedlegendnames)
            for (handle, name) in zip(handles, legendnames):
                sortedhandles[sortedlegendnames.index(name)] = handle
            assert None not in sortedhandles
            if legendtitle:
                legendtitle = ('\\bf{%s\nsites}' % SplitText(legendtitle, maxchars=6)).replace('\n', '}\n\\bf{')
            else:
                legendtitle = '\\bf{sites}'
            legend = plt.legend(sortedhandles, sortedlegendnames, bbox_to_anchor=(legendx, 0, legendfracwidth, legendtop), bbox_transform=plt.gcf().transFigure, fontsize=13, numpoints=1, title=legendtitle, markerscale=0.25, handlelength=0.7, handletextpad=0.25, borderaxespad=0, labelspacing=0.2)
            plt.gca().add_artist(legend)
            legendx += legendfracwidth
            plt.setp(legend.get_title(), fontsize=13)
    if modelgroups:
        for group in set(modelgroups):
            if not group:
                continue
            start_i = min([i for (i, g) in enumerate(modelgroups) if g == group])
            end_i = max([i for (i, g) in enumerate(modelgroups) if g == group])
            start_x = (xmargin + xs[start_i]) / (xmax - xmin) # axes coordinates
            end_x = (xmargin + xs[end_i] + violinwidth) / (xmax - xmin) # axes coordinates
            line_y = -0.12 # in axes coordinates
            cap_height = 0.03
            line = plt.Line2D([start_x, end_x], [line_y, line_y], transform=plt.gca().transAxes, color='black', linewidth=1.5, solid_capstyle='butt')
            line.set_clip_on(False)
            plt.gca().add_line(line)
            for x in [start_x, end_x]: # caps on end of lines
                line = plt.Line2D([x, x], [line_y + cap_height, line_y - cap_height], transform=plt.gca().transAxes, color='black', linewidth=1.5, solid_capstyle='butt')
                line.set_clip_on(False)
                plt.gca().add_line(line)
            plt.text((start_x + end_x) / 2.0, line_y - 0.04, group, transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='top', fontsize=15)
    plt.savefig(plotfile)
    plt.clf()
    plt.close()
Ejemplo n.º 26
0
    "WDDFF(MODWT-SVR)\n(9-month ahead)",
    ]
x = list(range(25))
ylabels=[
    r"$NSE$",r"$NRMSE$",r"$PPTS(5)(\%)$",
]
x_s=[-1.1,-1.1,-1.1]
y_s=[0.93,1.8,78]
plt.figure(figsize=(7.48, 5.54))
for i in range(len(all_datas)):
    ax1 = plt.subplot(3, 1, i+1)
    ax1.yaxis.grid(True)
    ax1.text(x_s[i],y_s[i],fig_index[i],fontsize=7)
    vplot1 = plt.violinplot(
        dataset=all_datas[i],
        positions=x,
        showmeans=True,
    )
    ax1.plot(list(range(0,5)),lines[i][0],'--',lw=0.5,color='blue')
    ax1.plot(list(range(5,10)),lines[i][1],'--',lw=0.5,color='blue')
    ax1.plot(list(range(10,15)),lines[i][2],'--',lw=0.5,color='blue')
    ax1.plot(list(range(15,20)),lines[i][3],'--',lw=0.5,color='blue')
    ax1.plot(list(range(20,25)),lines[i][4],'--',lw=0.5,color='blue')
    print(type(vplot1["cmeans"]))
    plt.ylabel(ylabels[i])
    if i==len(all_datas)-1:
        plt.xticks(x, labels, rotation=45)
    else:
        plt.xticks([])
    for pc in vplot1['bodies']:
        pc.set_facecolor('#D43F3A')
Ejemplo n.º 27
0
for i in range(len(genes)):
    _num = ind[i]
    _name = genes[i]
    expr_li = [
        X2[fg == 0, _num], X2[fg == 1, _num], X2[fg == 2, _num],
        X2[fg == 3, _num], X2[fg == 4, _num], X2[fg == 5, _num],
        X2[fg == 6, _num], X2[fg == 7, _num], X2[fg == 8, _num], X2[fg == 9,
                                                                    _num]
    ]

    plt.figure()
    plt.title(_name, fontsize=18)
    plt.xticks(range(1, 11))
    plt.tick_params(labelsize=18)
    parts = plt.violinplot(expr_li,
                           showmeans=False,
                           showmedians=False,
                           showextrema=False)

    cnt = 0
    for pc in parts['bodies']:
        data = expr_li[cnt]
        pc.set_facecolor(c[cnt])
        pc.set_edgecolor('black')
        pc.set_alpha(0.8)
        q1, med, q3 = np.percentile(data, [25, 50, 75])
        mean = np.mean(data)

        plt.vlines(cnt + 1, q1, q3, color='k', linestyle='-', lw=3)
        plt.scatter(cnt + 1,
                    mean,
                    marker='o',
Ejemplo n.º 28
0
vmin = min(min(min(v) for v in values), 0)
vmax = max(max(max(v) for v in values), 0)

if dim == 1:
    plot.ylabel('time')
    if len(values[0]) == 1:
        plot.bar(range(len(values)),
                 [vs[0] for vs in values],
                 align = 'center',
                 orientation = 'vertical',
                 tick_label = labels)
        labels = None
    elif 'VIOLIN_STYLE' in os.environ:
        plot.violinplot(values,
                        vert = True,
                        showmeans = 'SHOW_MEAN' in os.environ,
                        showmedians = 'SHOW_MEAN' not in os.environ,
                        showextrema = True)
    else:
        plot.boxplot(values,
                    vert = True,
                     notch = 'NOTCH_STYLE' in os.environ,
                     patch_artist = 'PATCH_ARTIST' in os.environ)
        if 'SHOW_MEAN' in os.environ:
            for i in range(len(values)):
                mean = sum(values[i]) / len(values[i])
                plot.plot([i + 0.75, i + 1.25], [mean, mean]);
    if labels is not None:
        plot.setp(fig.axes,
                  xticks = [x + 1 for x in range(len(values))],
                  xticklabels = labels)
Ejemplo n.º 29
0
def main():
    out = '../analysis_output/'
    pickle_folder = '../pickles'
    pickle_folders_to_load = [f for f in os.listdir(pickle_folder) if os.path.isdir(join(pickle_folder, f))]
    pickle_folders_to_load = sorted(pickle_folders_to_load)

    sdr_dict = {}
    coefs = {}
    sdrs = []

    exclude = ['noise_', 'speed', '0.0']
    include = ['noise_', '0.0']


    noise_time = 'noise_time'
    speed = 'speed'
    stft_swap = 'stft_swap'

    sdr_by_noise_type = {noise_time: {}, speed: {}, stft_swap: {}}
    coefs_by_noise_type = {noise_time: set(), speed: set(), stft_swap: set()}

    sdr_type = 'foreground'

    perfect_coef = 0.0
    first = True


    for folder in pickle_folders_to_load:
        beat_spec_name = join(pickle_folder, folder, folder + '__beat_spec.pick')
        sdrs_name = join(pickle_folder, folder, folder + '__sdrs.pick')

        # beat_spec = pickle.load(open(beat_spec_name, 'rb'))
        # # dct = scipy.fftpack.dct(beat_spec)
        # if any([folder.find(e) != -1 for e in exclude]):
        #     continue

        coef = float(folder.split('_')[-1])
        sdr_vals = pickle.load(open(sdrs_name, 'rb'))
        cur_name = folder.split('_')[0] + '__' + folder.split('_')[2]

        if coef == perfect_coef:
            if cur_name not in sdr_by_noise_type[noise_time]:
                sdr_by_noise_type[noise_time][cur_name] = []
            if cur_name not in sdr_by_noise_type[speed]:
                sdr_by_noise_type[speed][cur_name] = []
            if cur_name not in sdr_by_noise_type[stft_swap]:
                sdr_by_noise_type[stft_swap][cur_name] = []

            perfect_sdr = sdr_vals[sdr_type][0]

            # sdr_by_noise_type[noise_time][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr)
            # sdr_by_noise_type[speed][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr)
            # sdr_by_noise_type[stft_swap][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr)

            continue


        if folder.find(noise_time) != -1:
            sdr_by_noise_type[noise_time][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr)
            coefs_by_noise_type[noise_time].add(coef)

        elif folder.find(speed) != -1:
            sdr_by_noise_type[speed][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr)
            coefs_by_noise_type[speed].add(coef)

        else:
            sdr_by_noise_type[stft_swap][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr)
            coefs_by_noise_type[stft_swap].add(coef)


    for noise_type, coefs in sdr_by_noise_type.iteritems():
        # sorted_sdrs = sorted(coefs.items(), key=operator.itemgetter(0))
        sdr_values = zip(*[diffs for diffs in coefs.values()])
        sdr_coefs = sorted(coefs_by_noise_type[noise_type])
        sdr_coefs.insert(0, '')

        plt.close('all')
        # plt.yscale('log')
        plt.violinplot(sdr_values, showmeans=False, showmedians=True)

        plt.xticks(range(len(sdr_values)+1), sdr_coefs, rotation='vertical')
        plt.title('repetition perturbation coef vs. foreground SDR {}'.format(noise_type))
        plt.xlabel('perturbation coefficient')
        plt.ylabel('SDR (dB)')
        plt.subplots_adjust(bottom=0.15)
        plt.savefig(out + 'bigger_dataset_sdr_foreground_diff_{}.png'.format(noise_type))
def set_violin_color_label(vplot, color, label):
    for p in vplot['bodies']:
        p.set_color(color)
    return mpatch.Patch(color=color, label=label)

# Function application analysis on casts
fn_app_casts_data = np.genfromtxt('partially-typed-function-app-results-coercions=15.txt'
                                  , dtype=(int, int, int, float, float, float, float)
                                  , names="casts, types, coercions, trun, titer, crun, citer"
                                  , usecols=("casts, titer, citer"))

# Twosomes function application analysis by casts
tdata_pos, tdata_times = sort_for_violin_plot(fn_app_casts_data, key="casts", value="titer")
tdata_vplt = plt.violinplot(tdata_times
                            , positions=tdata_pos
                            , widths=0.5
                            , showextrema=False
                            , showmedians=False
                            , showmeans=False)
tdata_vplt_fake = set_violin_color_label(tdata_vplt, "blue", "Type-Based Casts")

# Coercions cast introduction analysis
cdata_pos, cdata_times = sort_for_violin_plot(fn_app_casts_data, key='casts', value='citer')
cdata_vplt = plt.violinplot(cdata_times
                            , positions=cdata_pos
                            , widths=0.6
                            , showextrema=False
                            , showmedians=False
                            , showmeans=False)
cdata_vplt_fake = set_violin_color_label(cdata_vplt, "green", "Coercions")

plt.axis([-1, max(cdata_pos) + 1, -10, 1600])
Ejemplo n.º 31
0
ACTUALDATA = [[] for _ in range(NumberOfValues)]

for ii in range(len(Y_test)):
    predic = Y_predict[ii]
    test = Y_test[ii]

    DATA[MICDIC[test]].append(predic)
    ACTUALDATA[MICDIC[test]].append(test)

ACCclass = []
NumberDataClass = []
for ii in range(NumberOfValues):
    ACCclass.append(accuracy_dilute(DATA[ii], ACTUALDATA[ii]))
    NumberDataClass.append(len(DATA[ii]))

plt.violinplot(DATA, showmeans=True, showextrema=True, positions=ActualValues)
plt.plot(x, x, color='g')

Z1 = np.polyfit(Y_test, Y_predict, 1)
P1 = np.poly1d(Z1)
R2 = metrics.r2_score(Y_predict, P1(Y_test))
plt.plot(Y_test, P1(Y_test), color='y')

plt.plot(x, x - 1, color='r')
plt.plot(x, x + 1, color='r')
plt.xlabel('Actual value (log2 scale)')
plt.ylabel('Predicted value (log2 scale)')
plt.title('. Predicted vs actual values. Accuracy:'+\
          str(np.round(accuracy_dilute(Y_predict,Y_test),4)))
plt.legend([
    'Perfect prediction',
Ejemplo n.º 32
0
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=18, loc='upper right')
plt.savefig("../../plots/uubAllBinCenterCalibCh"+pmtId+".png", dpi=100)
#plt.ylim(-15,55)
#plt.savefig("../../plots/uubAllBinCenterCalibZoom"+pmtId+".png", dpi=100)
plt.clf()
print("uubAllBinCenterCalib OK")
'''

# =====================
# *** For Area/Peak ***

# =============================
# *** For Chis distribution ***
'''
# *** HBase ***
xbl = []
ybl = []
distChis = []
for st in range(0, 19):
    tmp = 0
    ybl = []
    for evt in range(0, histos.GetEntries()):
        histos.GetEntry(evt)
        tmp = histos.chisHbasePk.GetBinContent(st)
        if tmp > 0:
            ybl.append( tmp )
    if len(ybl) > 0:
        distChis.append( ybl )
Ejemplo n.º 33
0
def figure_2_1():
    plt.violinplot(dataset=np.random.randn(200, 10) + np.random.randn(10))
    plt.xlabel("Action")
    plt.ylabel("Reward distribution")
    plt.savefig('../images/figure_2_1.png')
    plt.close()
Ejemplo n.º 34
0
cdict = {'full': '#003366', 'res':'#900000'}
for ttype in ['full', 'res']:
    v_means = []
    for fol in range(1,4):
        vals = []
        for cell in cells:
            data = pickle.load(open(folder[fol]+'rgrExpr_'+ttype+'_'+cell+'_1.0.pkl', 'rb'))
            vals.append(data['r_value'])
        
        v_means.append(np.array(vals))
    
    if ttype=='full':
        pos = np.array([0.5, 2.0, 3.5])
    else:
        pos = np.array([1.0, 2.5, 4.0])
    violin = plt.violinplot(v_means, pos, widths=0.3, showmeans=True, showextrema=False)
    plt.setp(violin['bodies'], facecolor=cdict[ttype], edgecolor=cdict[ttype])
    for key in ['cmeans']:
	    plt.setp(violin[key], color=cdict[ttype], linewidth='3', alpha=0.5)

blue_patch = mpatches.Patch(color='#003366', label='Full data') 
red_patch = mpatches.Patch(color='#900000', label='Residuals')
plt.legend(handles=[blue_patch, red_patch])
plt.xticks([0.75, 2.25, 3.75], ['Binary', 'ChromStates', 'Signals'], fontsize=16, color='k')
plt.yticks(np.arange(3, 11)/10.0, fontsize=16, color='k')
plt.ylabel('Pearson R', fontsize=18, color='k')
plt.title('Expression', fontsize=18)
plt.savefig('figures/r2Expr.png', bbox_inches='tight')


plt.figure()
Ejemplo n.º 35
0
def main():

    # your favourite units here
    energy_unit = u.TeV
    angle_unit = u.deg
    dist_unit = u.m

    parser = make_argparser()
    parser.add_argument('-o', '--outfile', type=str,
                        help="if given, write output file with reconstruction results")
    parser.add_argument('--plot_c', action='store_true',
                        help="plot camera-wise displays")
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--proton', action='store_true',
                       help="do protons instead of gammas")
    group.add_argument('--electron', action='store_true',
                       help="do electrons instead of gammas")

    args = parser.parse_args()

    if args.infile_list:
        filenamelist = []
        for f in args.infile_list:
            filenamelist += glob("{}/{}".format(args.indir, f))
    elif args.proton:
        filenamelist = glob("{}/proton/*gz".format(args.indir))
        channel = "proton"
    elif args.electron:
        filenamelist = glob("{}/electron/*gz".format(args.indir))
        channel = "electron"
    elif args.gamma:
        filenamelist = glob("{}/gamma/*gz".format(args.indir))
        channel = "gamma"
    else:
        raise ValueError("don't know which input to use...")
    filenamelist.sort()

    if not filenamelist:
        print("no files found; check indir: {}".format(args.indir))
        exit(-1)
    else:
        print("found {} files".format(len(filenamelist)))

    tel_phi = {}
    tel_theta = {}

    # keeping track of events and where they were rejected
    Eventcutflow = CutFlow("EventCutFlow")
    Imagecutflow = CutFlow("ImageCutFlow")

    # takes care of image cleaning
    cleaner = ImageCleaner(mode=args.mode, cutflow=Imagecutflow,
                           wavelet_options=args.raw,
                           skip_edge_events=args.skip_edge_events, island_cleaning=True)

    # the class that does the shower reconstruction
    shower_reco = HillasReconstructor()

    shower_max_estimator = ShowerMaxEstimator("paranal")

    preper = EventPreparer(cleaner=cleaner,
                           hillas_parameters=hillas_parameters, shower_reco=shower_reco,
                           event_cutflow=Eventcutflow, image_cutflow=Imagecutflow,
                           # event/image cuts:
                           allowed_cam_ids=[],  # means: all
                           min_ntel=3,
                           min_charge=args.min_charge,
                           min_pixel=3)

    # a signal handler to abort the event loop but still do the post-processing
    signal_handler = SignalHandler()
    signal.signal(signal.SIGINT, signal_handler)

    try:
        # this class defines the reconstruction parameters to keep track of
        class RecoEvent(tb.IsDescription):
            NTels_trigg = tb.Int16Col(dflt=1, pos=0)
            NTels_clean = tb.Int16Col(dflt=1, pos=1)
            EnMC = tb.Float32Col(dflt=1, pos=2)
            xi = tb.Float32Col(dflt=1, pos=3)
            DeltaR = tb.Float32Col(dflt=1, pos=4)
            ErrEstPos = tb.Float32Col(dflt=1, pos=5)
            ErrEstDir = tb.Float32Col(dflt=1, pos=6)
            h_max = tb.Float32Col(dflt=1, pos=7)

        reco_outfile = tb.open_file(
            args.outfile, mode="w",
            # if we don't want to write the event list to disk, need to add more arguments
            **({} if args.store else {"driver": "H5FD_CORE",
                                      "driver_core_backing_store": False}))
        reco_table = reco_outfile.create_table("/", "reco_event", RecoEvent)
        reco_event = reco_table.row
    except:
        reco_event = RecoEvent()
        print("no pytables installed?")

    # ##        #######   #######  ########
    # ##       ##     ## ##     ## ##     ##
    # ##       ##     ## ##     ## ##     ##
    # ##       ##     ## ##     ## ########
    # ##       ##     ## ##     ## ##
    # ##       ##     ## ##     ## ##
    # ########  #######   #######  ##

    cam_id_map = {}

    # define here which telescopes to loop over
    allowed_tels = None
    # allowed_tels = prod3b_tel_ids("L+F+D")
    for i, filename in enumerate(filenamelist[:args.last]):
        print("file: {i} filename = {filename}".format(i=i, filename=filename))

        source = hessio_event_source(filename,
                                     allowed_tels=allowed_tels,
                                     max_events=args.max_events)

        # loop that cleans and parametrises the images and performs the reconstruction
        for (event, hillas_dict, n_tels,
             tot_signal, max_signal, pos_fit, dir_fit, h_max,
             err_est_pos, err_est_dir) in preper.prepare_event(source):

            shower = event.mc

            org_alt = u.Quantity(shower.alt).to(u.deg)
            org_az = u.Quantity(shower.az).to(u.deg)
            if org_az > 180 * u.deg:
                org_az -= 360 * u.deg

            org_the = alt_to_theta(org_alt)
            org_phi = az_to_phi(org_az)
            if org_phi > 180 * u.deg:
                org_phi -= 360 * u.deg
            if org_phi < -180 * u.deg:
                org_phi += 360 * u.deg

            shower_org = linalg.set_phi_theta(org_phi, org_the)
            shower_core = convert_astropy_array([shower.core_x, shower.core_y])

            xi = linalg.angle(dir_fit, shower_org).to(angle_unit)
            diff = linalg.length(pos_fit[:2] - shower_core)

            # print some performance
            print()
            print("xi = {:4.3f}".format(xi))
            print("pos = {:4.3f}".format(diff))
            print("h_max reco: {:4.3f}".format(h_max.to(u.km)))
            print("err_est_dir: {:4.3f}".format(err_est_dir.to(angle_unit)))
            print("err_est_pos: {:4.3f}".format(err_est_pos))

            try:
                # store the reconstruction data in the PyTable
                reco_event["NTels_trigg"] = n_tels["tot"]
                reco_event["NTels_clean"] = len(shower_reco.circles)
                reco_event["EnMC"] = event.mc.energy / energy_unit
                reco_event["xi"] = xi / angle_unit
                reco_event["DeltaR"] = diff / dist_unit
                reco_event["ErrEstPos"] = err_est_pos / dist_unit
                reco_event["ErrEstDir"] = err_est_dir / angle_unit
                reco_event["h_max"] = h_max / dist_unit
                reco_event.append()
                reco_table.flush()

                print()
                print("xi res (68-percentile) = {:4.3f} {}"
                      .format(np.percentile(reco_table.cols.xi, 68), angle_unit))
                print("core res (68-percentile) = {:4.3f} {}"
                      .format(np.percentile(reco_table.cols.DeltaR, 68), dist_unit))
                print("h_max (median) = {:4.3f} {}"
                      .format(np.percentile(reco_table.cols.h_max, 50), dist_unit))

            except NoPyTables:
                pass

            if args.plot_c:
                from mpl_toolkits.mplot3d import Axes3D
                fig = plt.figure()
                ax = fig.gca(projection='3d')
                for c in shower_reco.circles.values():
                    points = [c.pos + t * c.a * u.km for t in np.linspace(0, 15, 3)]
                    ax.plot(*np.array(points).T, linewidth=np.sqrt(c.weight) / 10)
                    ax.scatter(*c.pos[:, None].value, s=np.sqrt(c.weight))
                plt.xlabel("x")
                plt.ylabel("y")
                plt.pause(.1)

                # this plots
                # • the MC shower core
                # • the reconstructed shower core
                # • the used telescopes
                # • and the trace of the Hillas plane on the ground
                plt.figure()
                for tel_id, c in shower_reco.circles.items():
                    plt.scatter(c.pos[0], c.pos[1], s=np.sqrt(c.weight))
                    plt.gca().annotate(tel_id, (c.pos[0].value, c.pos[1].value))
                    plt.plot([c.pos[0].value-500*c.norm[1], c.pos[0].value+500*c.norm[1]],
                             [c.pos[1].value+500*c.norm[0], c.pos[1].value-500*c.norm[0]],
                             linewidth=np.sqrt(c.weight)/10)
                plt.scatter(*pos_fit[:2], c="black", marker="*", label="fitted")
                plt.scatter(*shower_core[:2], c="black", marker="P", label="MC")
                plt.legend()
                plt.xlabel("x")
                plt.ylabel("y")
                plt.xlim(-1400, 1400)
                plt.ylim(-1400, 1400)
                plt.show()

            if signal_handler.stop: break
        if signal_handler.stop: break

    print("\n" + "="*35 + "\n")
    print("xi res (68-percentile) = {:4.3f} {}"
          .format(np.percentile(reco_table.cols.xi, 68), angle_unit))
    print("core res (68-percentile) = {:4.3f} {}"
          .format(np.percentile(reco_table.cols.DeltaR, 68), dist_unit))
    print("h_max (median) = {:4.3f} {}"
          .format(np.percentile(reco_table.cols.h_max, 50), dist_unit))

    # print the cutflows for telescopes and camera images
    print("\n\n")
    Eventcutflow("min2Tels trig")
    print()
    Imagecutflow(sort_column=1)

    # if we don't want to plot anything, we can exit now
    if not args.plot:
        return

    # ########  ##        #######  ########  ######
    # ##     ## ##       ##     ##    ##    ##    ##
    # ##     ## ##       ##     ##    ##    ##
    # ########  ##       ##     ##    ##     ######
    # ##        ##       ##     ##    ##          ##
    # ##        ##       ##     ##    ##    ##    ##
    # ##        ########  #######     ##     ######

    plt.figure()
    plt.hist(reco_table.cols.h_max, bins=np.linspace(000, 15000, 51, True))
    plt.title(channel)
    plt.xlabel("h_max reco")
    plt.pause(.1)

    figure = plt.figure()
    xi_edges = np.linspace(0, 5, 20)
    plt.hist(reco_table.cols.xi, bins=xi_edges, log=True)
    plt.xlabel(r"$\xi$ / deg")
    if args.write:
        save_fig('{}/reco_xi_{}'.format(args.plots_dir, args.mode))
    plt.pause(.1)

    plt.figure()
    plt.hist(reco_table.cols.ErrEstDir[:],
             bins=np.linspace(0, 20, 50))
    plt.title(channel)
    plt.xlabel("beta")
    plt.pause(.1)

    plt.figure()
    plt.hist(np.log10(reco_table.cols.xi[:] / reco_table.cols.ErrEstDir[:]), bins=50)
    plt.title(channel)
    plt.xlabel("log_10(xi / beta)")
    plt.pause(.1)

    # convert the xi-list into a dict with the number of used telescopes as keys
    xi_vs_tel = {}
    for xi, ntel in zip(reco_table.cols.xi, reco_table.cols.NTels_clean):
        if ntel not in xi_vs_tel:
            xi_vs_tel[ntel] = [xi]
        else:
            xi_vs_tel[ntel].append(xi)

    print(args.mode)
    for ntel, xis in sorted(xi_vs_tel.items()):
        print("NTel: {} -- median xi: {}".format(ntel, np.median(xis)))
        # print("histogram:", np.histogram(xis, bins=xi_edges))

    # create a list of energy bin-edges and -centres for violin plots
    Energy_edges = np.linspace(2, 8, 13)
    Energy_centres = (Energy_edges[1:]+Energy_edges[:-1])/2.

    # convert the xi-list in to an energy-binned dict with the bin centre as keys
    xi_vs_energy = {}
    for en, xi in zip(reco_table.cols.EnMC, reco_table.cols.xi):

        # get the bin number this event belongs into
        sbin = np.digitize(np.log10(en), Energy_edges)-1

        # the central value of the bin is the key for the dictionary
        if Energy_centres[sbin] not in xi_vs_energy:
            xi_vs_energy[Energy_centres[sbin]]  = [xi]
        else:
            xi_vs_energy[Energy_centres[sbin]] += [xi]

    # plotting the angular error as violin plots with binning in
    # number of telescopes and shower energy
    figure = plt.figure()
    plt.subplot(211)
    plt.violinplot([np.log10(a) for a in xi_vs_tel.values()],
                   [a for a in xi_vs_tel.keys()],
                   points=60, widths=.75, showextrema=False, showmedians=True)
    plt.xlabel("Number of Telescopes")
    plt.ylabel(r"log($\xi$ / deg)")
    plt.ylim(-3, 2)
    plt.grid()

    plt.subplot(212)
    plt.violinplot([np.log10(a) for a in xi_vs_energy.values()],
                   [a for a in xi_vs_energy.keys()],
                   points=60, widths=(Energy_edges[1] - Energy_edges[0]) / 1.5,
                   showextrema=False, showmedians=True)
    plt.xlabel(r"log(Energy / GeV)")
    plt.ylabel(r"log($\xi$ / deg)")
    plt.ylim(-3, 2)
    plt.grid()

    plt.tight_layout()
    if args.write:
        save_fig('{}/reco_xi_vs_E_NTel_{}'.format(args.plots_dir, args.mode))

    plt.pause(.1)

    # convert the diffs-list into a dict with the number of used telescopes as keys
    diff_vs_tel = {}
    for diff, ntel in zip(reco_table.cols.DeltaR, reco_table.cols.NTels_clean):
        if ntel not in diff_vs_tel:
            diff_vs_tel[ntel] = [diff]
        else:
            diff_vs_tel[ntel].append(diff)

    # convert the diffs-list in to an energy-binned dict with the bin centre as keys
    diff_vs_energy = {}
    for en, diff in zip(reco_table.cols.EnMC, reco_table.cols.DeltaR):

        # get the bin number this event belongs into
        sbin = np.digitize(np.log10(en), Energy_edges) - 1

        # the central value of the bin is the key for the dictionary
        if Energy_centres[sbin] not in diff_vs_energy:
            diff_vs_energy[Energy_centres[sbin]]  = [diff]
        else:
            diff_vs_energy[Energy_centres[sbin]] += [diff]

    # plotting the core position error as violin plots with binning in
    # number of telescopes an shower energy
    plt.figure()
    plt.subplot(211)
    plt.violinplot([np.log10(a) for a in diff_vs_tel.values()],
                   [a for a in diff_vs_tel.keys()],
                   points=60, widths=.75, showextrema=False, showmedians=True)
    plt.xlabel("Number of Telescopes")
    plt.ylabel(r"log($\Delta R$ / m)")
    plt.grid()

    plt.subplot(212)
    plt.violinplot([np.log10(a) for a in diff_vs_energy.values()],
                   [a for a in diff_vs_energy.keys()],
                   points=60, widths=(Energy_edges[1] - Energy_edges[0]) / 1.5,
                   showextrema=False, showmedians=True)
    plt.xlabel(r"log(Energy / GeV)")
    plt.ylabel(r"log($\Delta R$ / m)")
    plt.grid()

    plt.tight_layout()
    if args.write:
        save_fig('{}/reco_dist_vs_E_NTel_{}'.format(args.plots_dir, args.mode))
    plt.show()
Ejemplo n.º 36
0
    plt.figure()
    plt.boxplot([np.log10(x) for x in MSE])
    ax = plt.gca()
    ax.set_yticks(np.arange(-7, 0))
    ax.set_yticklabels(10.0**np.arange(-7, 0))
    ax.set_xticklabels(datasets)
    plt.grid()
    plt.xlabel("Dataset", fontsize=11)
    plt.xticks(rotation=45, fontsize=11)
    plt.ylabel("MSE", fontsize=11)
    plt.yticks(fontsize=11)
    plt.savefig('./../imgs/boxplot.pdf', format='pdf', bbox_inches='tight')

    plt.figure()
    plt.violinplot([np.log10(x) for x in MSE],
                   showmeans=False,
                   showmedians=True)
    ax = plt.gca()
    ax.set_yticks(np.arange(-7, 0))
    ax.set_yticklabels(10.0**np.arange(-7, 0))
    plt.setp(ax,
             xticks=[idx + 1 for idx in range(len(MSE))],
             xticklabels=datasets)
    plt.grid()
    plt.xlabel("Dataset", fontsize=11)
    plt.xticks(rotation=45, fontsize=11)
    plt.ylabel("MSE", fontsize=11)
    plt.yticks(fontsize=11)
    plt.savefig('./../imgs/violinplot.pdf', format='pdf', bbox_inches='tight')

    # 5) plot the PSD
### Add text
#axHistx.text(5.05,0.05,r'*LOWESS Smoothing',fontsize=8)

plt.savefig(directoryfigure + 'scattertest2.png',dpi=800)

###########################################################################
###########################################################################
###########################################################################
###########################################################################

fig = plt.figure()
axb = fig.add_subplot(312)

datai = [varx[mask],vary[mask]]

vp = plt.violinplot(datai,showmeans=True,showmedians=False,vert=False,widths=0.6)

axb.spines['top'].set_color('none')
axb.spines['right'].set_color('none')
axb.spines['left'].set_color('none')
axb.spines['bottom'].set_color('none')
axb.xaxis.set_ticks_position('bottom')
axb.tick_params(left='off',right='off',bottom='off')
plt.setp(axb,xticks=[])

axb.set_aspect(1.9)

plt.setp(axb,yticks=[y+1 for y in range(len(datai))],
                     yticklabels=['PIOMAS','ICESat-J'])
                     
for i in vp['bodies']:
Xv.append([x[x.columns[2]]])
Xv.append([x[x.columns[3]]])
Xv.append([x[x.columns[4]]])
Xv.append([x[x.columns[5]]])
Xv.append([x[x.columns[6]]])
Xv.append([x[x.columns[7]]])
Xv.append([x[x.columns[8]]])
Xv.append([x[x.columns[9]]])
Xv.append([x[x.columns[10]]])
Xv.append([x[x.columns[11]]])

pos = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
plt.violinplot(Xv,
               pos,
               points=40,
               widths=0.5,
               showmeans=True,
               showextrema=True,
               showmedians=True,
               bw_method='silverman')
plt.title('Violin plot of Timbre Average', fontsize=10)

y.describe()
y.mode()

# fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6, 6))
#
# axes[0, 1].violinplot(x[1:], pos, points=40, widths=0.5,
#                       showmeans=True, showextrema=True, showmedians=True,
#                       bw_method='silverman')
# axes[0, 1].set_title('Violin plot of Timbre average', fontsize=10)
#
Ejemplo n.º 39
0
def conf_int(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    if (n < 2) or (se == 0):
        return np.nan
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
    return h


################################################################
## Line Plot RT-Prio (n, n) scaling
################################################################
d = (pd.read_csv('../perf-data/dur.csv', ))
fig, axes = plt.subplots(1, 1)

print(d['time'])

d.sort_values(["start", "pipe"], axis=0, ascending=True, inplace=True)

print(d)

d = d.reset_index()
print(d)

plt.violinplot(d['time'])

plt.show()

# fig.savefig('dur-violin.pdf')
Ejemplo n.º 40
0
            elif 'right' in file_names[i][j].lower() or 'rh' in file_names[i][j].lower():

                df = pd.read_csv(file_names[i][j])

                means = df.mean(axis = 0)

                #print (xticklabels[k])
                #print (violin_labels[2 * i - 1], means[k])

                # On a separate row and with a separate column iterator, fill in data
                single_plot[2 * i - 1].append(means[k])


    #Violin plots
    plt.figure(xticklabels[k]) 
    plt.violinplot(single_plot, showmeans=True)

    # Plot logistics
    plt.title(xticklabels[k])
    plt.xlabel('Group')
    plt.ylabel('Observed Values')

    # Label the x axis more specifically
    plt.xticks([i+1 for i in range(2 * max_group)], violin_labels)


# Print out all the plots
plt.show()


# Code to read in one csv file and isolate the different types of values
Ejemplo n.º 41
0
covsTeo, covsNum = np.array(covSuperList).transpose((1, 2, 0, 3, 4, 5)).reshape((2, 4, -1, 2, 2))

scaleNum = np.linalg.inv(covsNum)


# saco la norma de frobenius de cada matriz
covSuperFrob = np.linalg.norm(covSuperList, axis=(4, 5))

# %%
p = 2
matt = np.eye(p)

np.exp(-p/2) / spe.gamma(N/2) / 2**(N*p/2) / np.linalg.det(matt)**(p/2+0.5)


sts.wishart.pdf(matt, df=N-1, scale=matt)



# %%
rv = sts.wishart()
rv.pdf()


frobQuotList = (covSuperFrob[:, 0] / covSuperFrob[:,1]).transpose((1, 0, 2)).reshape((4, -1))


plt.plot(frobQuotList)
plt.hist(frobQuotList[3])
plt.violinplot(frobQuotList.T, showmeans=True, showextrema=False)
Ejemplo n.º 42
0
def boxplot_embedding_models():
    sns.set_context("paper", font_scale=2.6)

    # Embedder with Discriminator
    # SDR
    mcos_disc_sdr = np.load('results/eval_res/r-mcos-disc_sdr.npy')
    mcos_disc_sdr.shape = (mcos_disc_sdr.shape[0], 1)

    # Masking SDR
    mcos_disc_mask_sdr = np.load('results/eval_res/r-mcos-disc_mask_sdr.npy')
    mcos_disc_mask_sdr.shape = (mcos_disc_mask_sdr.shape[0], 1)
    # Embedding SDR
    mcos_disc_lat_sdr = np.load('results/eval_res/r-mcos-disc_lat_sdr.npy')
    mcos_disc_lat_sdr.shape = (mcos_disc_lat_sdr.shape[0], 1)
    # ADT
    mcos_disc_adt = np.load('results/eval_res/r-mcos-disc_adt.npy')
    mcos_disc_adt.shape = (mcos_disc_adt.shape[0], 1)
    # EMBD-ADT
    mcos_disc_embd_adt = np.load('results/eval_res/r-mcos-disc_lat_adt.npy')
    mcos_disc_embd_adt.shape = (mcos_disc_embd_adt.shape[0], 1)

    # Embedder with Additivity loss
    # SDR
    mcos_embd_sdr = np.load('results/eval_res/r-mcos-embd_sdr.npy')
    mcos_embd_sdr.shape = (mcos_embd_sdr.shape[0], 1)
    # Masking SDR
    mcos_embd_mask_sdr = np.load('results/eval_res/r-mcos-embd_mask_sdr.npy')
    mcos_embd_mask_sdr.shape = (mcos_embd_mask_sdr.shape[0], 1)
    # Embedding SDR
    mcos_embd_lat_sdr = np.load('results/eval_res/r-mcos-embd_lat_sdr.npy')
    mcos_embd_lat_sdr.shape = (mcos_embd_lat_sdr.shape[0], 1)
    # ADT
    mcos_embd_adt = np.load('results/eval_res/r-mcos-embd_adt.npy')
    mcos_embd_adt.shape = (mcos_embd_adt.shape[0], 1)
    # EMBD-ADT
    mcos_embd_lat_adt = np.load('results/eval_res/r-mcos-embd_lat_adt.npy')
    mcos_embd_lat_adt.shape = (mcos_embd_lat_adt.shape[0], 1)

    # Embedder that makes singing voice louder
    # SDR
    mcos_embd_loud_sdr = np.load('results/eval_res/r-mcos-embd-loud_sdr.npy')
    mcos_embd_loud_sdr.shape = (mcos_embd_loud_sdr.shape[0], 1)
    # Masking SDR
    mcos_embd_loud_mask_sdr = np.load('results/eval_res/r-mcos-embd-loud_mask_sdr.npy')
    mcos_embd_loud_mask_sdr.shape = (mcos_embd_mask_sdr.shape[0], 1)
    # Embedding SDR
    mcos_embd_loud_lat_sdr = np.load('results/eval_res/r-mcos-embd-loud_lat_sdr.npy')
    mcos_embd_loud_lat_sdr.shape = (mcos_embd_loud_lat_sdr.shape[0], 1)
    # ADT
    mcos_embd_loud_adt = np.load('results/eval_res/r-mcos-embd-loud_adt.npy')
    mcos_embd_loud_adt.shape = (mcos_embd_loud_adt.shape[0], 1)
    # EMBD-ADT
    mcos_embd_loud_lat_adt = np.load('results/eval_res/r-mcos-embd-loud_lat_adt.npy')
    mcos_embd_loud_lat_adt.shape = (mcos_embd_loud_lat_adt.shape[0], 1)

    # SDR plot
    si_sdr_res = np.hstack((mcos_disc_sdr, mcos_disc_mask_sdr, mcos_disc_lat_sdr,
                            mcos_embd_sdr, mcos_embd_mask_sdr, mcos_embd_lat_sdr,
                            mcos_embd_loud_sdr, mcos_embd_loud_mask_sdr, mcos_embd_loud_lat_sdr))

    plt.violinplot(si_sdr_res, showmeans=False, showextrema=False, showmedians=True)
    plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9], ['nnMCS-Disc.', 'nnMCS-Disc:Mask', 'nnMCS-Disc:Lat',
                                             'nnMCS-Embd.', 'nnMCS-Embd:Mask', 'nnMCS-Embd:Lat',
                                             'nnMCS-Loud.', 'nnMCS-Loud:Mask', 'nnMCS-Loud:Lat'], fontsize=9)
    plt.ylabel('SI-SDR (dB)')
    plt.ylim(4, 37.3)

    med_vals = np.round(np.median(si_sdr_res, axis=0), 1)
    plt.annotate(str(med_vals[0]), xy=(1, med_vals[0] + 1), xytext=(0.95, med_vals[0] + 0.3))
    plt.annotate(str(med_vals[1]), xy=(2, med_vals[1] + 1), xytext=(1.95, med_vals[1] + 0.3))
    plt.annotate(str(med_vals[2]), xy=(3, med_vals[2] + 1), xytext=(2.95, med_vals[2] + 0.3))
    plt.annotate(str(med_vals[3]), xy=(4, med_vals[3] + 1), xytext=(3.95, med_vals[3] + 0.3))
    plt.annotate(str(med_vals[4]), xy=(5, med_vals[4] + 1), xytext=(4.95, med_vals[4] + 0.3))
    plt.annotate(str(med_vals[5]), xy=(6, med_vals[5] + 1), xytext=(5.95, med_vals[5] + 0.3))
    plt.annotate(str(med_vals[6]), xy=(7, med_vals[6] + 1), xytext=(6.95, med_vals[6] + 0.3))
    plt.annotate(str(med_vals[7]), xy=(8, med_vals[7] + 1), xytext=(7.95, med_vals[7] + 0.3))
    plt.annotate(str(med_vals[8]), xy=(9, med_vals[8] + 1), xytext=(8.95, med_vals[8] + 0.3))

    # ADT plot
    plt.figure()
    adt_res = np.hstack((mcos_disc_adt, mcos_disc_embd_adt,
                         mcos_embd_adt, mcos_embd_lat_adt,
                         mcos_embd_loud_adt, mcos_embd_loud_lat_adt))

    plt.violinplot(adt_res, showmeans=False, showextrema=False, showmedians=True)
    plt.xticks([1, 2, 3, 4, 5, 6], ['nnMCS-Disc.', 'nnMCS-Disc:Lat',
                                    'nnMCS-Embd.', 'nnMCS-Embd:Lat',
                                    'nnMCS-Loud.', 'nnMCS-Loud:Lat'], fontsize=9)
    plt.ylabel(r'$L_1$ Error')
    plt.ylim(0, 0.5)

    med_vals = np.round(np.median(adt_res, axis=0), 2)
    plt.annotate(str(med_vals[0]), xy=(1, med_vals[0] + 0.001), xytext=(0.95, med_vals[0] + 0.005))
    plt.annotate(str(med_vals[1]), xy=(2, med_vals[1] + 0.001), xytext=(1.95, med_vals[1] + 0.005))
    plt.annotate(str(med_vals[2]), xy=(3, med_vals[2] + 0.001), xytext=(2.95, med_vals[2] + 0.005))
    plt.annotate(str(med_vals[3]), xy=(4, med_vals[3] + 0.001), xytext=(3.95, med_vals[3] + 0.005))
    plt.annotate(str(med_vals[4]), xy=(5, med_vals[4] + 0.001), xytext=(4.95, med_vals[4] + 0.005))
    plt.annotate(str(med_vals[5]), xy=(6, med_vals[5] + 0.001), xytext=(5.95, med_vals[5] + 0.005))
    plt.show()
Ejemplo n.º 43
0
def createPlots(c1, c2, data, label, colour,
                x_axis, y_axis, point_size, name, axis,
                grid_linestyle, mode, grid_colour=None):
    # Load the data
    x = data[c1]
    y = data[c2]

    # Arrange the data
    x_axis = np.arange(x_axis[0], x_axis[1], x_axis[2])
    y_axis = np.arange(y_axis[0], y_axis[1], y_axis[2])

    # Graph with points
    if mode is PTN:
        plt.plot(x, y, 'o', label=label, markersize=np.sqrt(point_size[0]), color=colour)

    # Line graph with discontinuous lines
    elif mode is LN_DISC:
        plt.plot(x, y, label=label, marker='.', markersize=np.sqrt(point_size[1]), color=colour, linestyle=':')
        plt.fill_between(x, y, alpha=0.4, color=colour)


    elif mode is LN_MEDIAN:
        grid_col = colour[0]
        graph_col = colour[1]
        plt.plot(x, y, label=label, marker='.', markersize=np.sqrt(point_size[1]), color=grid_col, linestyle=':')
        plt.fill_between(x, y, alpha=0.2, color=grid_col)
        for idx, x_i in enumerate(x):
            # Vertical line
            plt.plot((x_i, x_i), (0, y[idx]), alpha=0.7, color=graph_col[idx], linestyle="dashed")

            # Horizontal line
            plt.plot((0, x_i), (y[idx], y[idx]), alpha=0.7, color=graph_col[idx], linestyle="dashed")

            # Point
            plt.plot(x_i, y[idx], 'o', markersize=3,  alpha=0.7, color=graph_col[idx])

        # Set colour variable to set the grid
        colour = grid_col


    elif mode is VLN:
        violin_parts = plt.violinplot(y, [x[1]], points=100, widths=4, showmeans=True,
                                      showextrema=True, showmedians=True, bw_method=0.5)

        for part in ('cbars', 'cmins', 'cmaxes', 'cmeans', 'cmedians'):
            vp = violin_parts[part]
            vp.set_color(colour)
            vp.set_linewidth(1)

        for part in violin_parts['bodies']:
            part.set_color(colour)
            part.set_alpha(0.3)

    # Incorrect plot mode
    else:
        print("ERROR: Wrong plot mode")
        return -1

    # Set the ranges for the
    plt.xlim([x_axis[0], x_axis[len(x_axis) - 1]])
    plt.ylim([y_axis[0], y_axis[len(y_axis) - 1]])

    plt.xticks(x_axis)
    plt.yticks(y_axis)

    # Set the name to the graph and the names for the axis
    plt.title(name)
    plt.xlabel(axis[0])
    plt.ylabel(axis[1])

    # Set the colour to the grid
    if grid_colour is None:
        plt.grid(True, color=colour, alpha=0.3, linestyle=grid_linestyle)
    else:
        plt.grid(True, color=grid_colour, linestyle=grid_linestyle)
Ejemplo n.º 44
0
def plotpvalueofChi2IndependenceTest(densities,
                                     functionName,
                                     c=0.0,
                                     out=False):
    numDensities = len(densities)
    numIterations = 0
    for i, (setting, stats) in enumerate(densities.items()):
        numIterations = max(numIterations, len(stats))

    data = np.zeros((numIterations, 2 * numDensities))
    names = [None] * data.shape[1]
    i = 0
    for i, setting in enumerate(
        ["kde_gaussian", "kde_epanechnikov", "sgde_zero", "sgde_boundaries"]):
        stats = densities[setting]
        if "sgde" in setting:
            if "zero" in setting:
                names[2 * i] = "SGDE \n set-to-zero \n shuffled"
                names[2 * i + 1] = "SGDE \n set-to-zero \n not shuffled"
            else:
                names[2 * i] = "SGDE \n interp. bound. \n shuffled"
                names[2 * i + 1] = "SGDE \n interp. bound. \n not shuffled"
        elif "nataf" in setting:
            names[2 * i] = "Nataf \n shuffled"
            names[2 * i + 1] = "Nataf \n not shuffled"
        elif "gaussian" in setting:
            names[2 * i] = "KDE \n Gaussian \n shuffled"
            names[2 * i + 1] = "KDE \n Gaussian \n not shuffled"
        elif "epanechnikov" in setting:
            names[2 * i] = "KDE \n Epan. \n shuffled"
            names[2 * i + 1] = "KDE \n Epan. \n not shuffled"
        for j, values in enumerate(stats.values()):
            numDims = values["config"]["numDims"]

            # apply the chi 2 test
            bins = np.linspace(0, 1, 10)
            samples = values["samples"]["shuffled"]["uniform_validation"]
            inner_samples = np.array([])
            for sample in samples:
                if c < sample[0] < 1 - c and c < sample[1] < 1 - c:
                    inner_samples = np.append(inner_samples, sample)
            inner_samples = inner_samples.reshape((inner_samples.size // 2), 2)
            h0 = np.histogram2d(inner_samples[:, 0],
                                inner_samples[:, 1],
                                bins=bins)[0][2:-2, 2:-2]
            pvalue_shuffled = chi2_contingency(h0)[1]

            if False and j == 0:
                plt.figure()
                plt.scatter(inner_samples[:, 0], inner_samples[:, 1])

                plt.figure()
                plt.hist2d(inner_samples[:, 0], inner_samples[:, 1], bins=20)
                plt.colorbar()
                plt.title("%s shuffled, %g" %
                          (setting.replace("_", " "), pvalue_shuffled))

            samples = values["samples"]["not_shuffled"]["uniform_validation"]
            inner_samples = np.array([])
            for sample in samples:
                if c < sample[0] < 1 - c and c < sample[1] < 1 - c:
                    inner_samples = np.append(inner_samples, sample)
            inner_samples = inner_samples.reshape((inner_samples.size // 2), 2)
            h0 = np.histogram2d(inner_samples[:, 0],
                                inner_samples[:, 1],
                                bins=bins)[0][2:-2, 2:-2]
            pvalue_not_shuffled = chi2_contingency(h0)[1]

            if False and j == 0:
                plt.figure()
                plt.scatter(inner_samples[:, 0], inner_samples[:, 1])

                plt.figure()
                plt.hist2d(inner_samples[:, 0], inner_samples[:, 1], bins=20)
                plt.colorbar()
                plt.title("%s not shuffled, %g" %
                          (setting.replace("_", " "), pvalue_not_shuffled))

                plt.show()

            data[j, 2 * i] = pvalue_shuffled
            data[j, 2 * i + 1] = pvalue_not_shuffled

    pos = np.arange(0, len(names))
    xlim = (np.min(pos) - 0.5, np.max(pos) + 0.5)
    fig = plt.figure(figsize=(17, 5))
    plt.violinplot(data,
                   pos,
                   points=60,
                   widths=0.7,
                   showmeans=True,
                   showextrema=True,
                   showmedians=True,
                   bw_method=0.5)
    plt.xticks(pos, names)
    plt.ylabel("$p$-value")
    plt.hlines(0.05, xlim[0], xlim[1], linestyle="--")
    plt.xlim(xlim)

    if "moons" in functionName:
        plt.title("$\chi^2$ test", fontproperties=load_font_properties())
    else:
        plt.title("$\chi^2$ test", fontproperties=load_font_properties())

    if out:
        savefig(fig,
                os.path.join(
                    "plots",
                    "chi_squared_%s_c%i" % (functionName, np.round(c * 100))),
                tikz=True)
        plt.close(fig)
    else:
        plt.show()
Ejemplo n.º 45
0
            """            y5 = []
            x5 = []

            start_index = 0
            for i in range(len(x_d)):

                y_add, start_index = (norm_KDE(x_d[i], ordered_spectra, res,start_index))
                if y_add > 0:
                    y5.append(y_add)
                    x5.append(x_d[i])

            max2 = max(y5)"""
            #y5 = [y5[x] * hist_max / max2 for x in range(len(y5))]
            #plt.plot(x5, y5, "black")
            #plt.vlines(np.mean(mzrange),(hist_max/2)-hist_max*0.05,(hist_max/2)+hist_max*0.05)
            #plt.vlines([np.mean(mzrange)+theo_width,np.mean(mzrange)-theo_width], (hist_max/2)-hist_max*0.025,(hist_max/2)+hist_max*0.025,color="r",alpha=0.25)
            #plt.hlines((hist_max/2),np.mean(mzrange)-theo_width,np.mean(mzrange)+theo_width,color="r",alpha=0.25)

        except:
            ""

    #plt.scatter(raw_data,[-0.2]*len(raw_data),marker="+",color="blue",label="raw data")
    #customlegend = [Line2D([0], [0], color='black', lw=4),Line2D([0], [0], color='blue', lw=4),Line2D([0], [0], color='gray', lw=4),Line2D([0], [0], color='blue', marker="+", lw=0,label='Scatter')]
    #plt.legend(customlegend, ["Lorentzian KDE bandwidth={0}".format(bandwidth_L_kde),"Sinc KDE bandwidth={0}".format(SinC_bandwidth), "norm KDE bandwidth={0}".format(bandwidth_norm_kde),"Raw Data"])
    #customlegend = [Line2D([0], [0], color='black', lw=4),Line2D([0], [0], color='blue', marker="+", lw=0,label='Scatter')]
    #plt.legend(customlegend, ["norm KDE bandwidth={0}".format(bandwidth_norm_kde),"Raw Data"])

    plt.violinplot(data)

    plt.show()
    ratios_nosvase = {}
    svase_datasets = {}
    nosvase_datasets = {}
    for featuretype in ['reg', 'tfbs', 'dnase', 'zld', 'kr', 'med', 'D', 'da',
                        'twi' ]:
        svASE_snpcounts, nosvASE_snpcounts = get_datafiles(featuretype)
        svase_datasets[featuretype] = svASE_snpcounts
        nosvase_datasets[featuretype] = nosvASE_snpcounts


        ratios_svase[featuretype] = svASE_snpcounts.rate
        ratios_nosvase[featuretype] = nosvASE_snpcounts.rate

        figure()
        violinplot([svASE_snpcounts.num_snps.dropna(),
                    nosvASE_snpcounts.num_snps.dropna()], showmedians=True,
                   showextrema=False)
        boxplot([svASE_snpcounts.rate, nosvASE_snpcounts.rate])
        '''
        if featuretype != 'dnase':
            scatter(1 + .05 * randn(len(svASE_snpcounts)),
                    svASE_snpcounts.rate)
            scatter(2 + .05 * randn(len(nosvASE_snpcounts)),
                    nosvASE_snpcounts.rate)
                    '''
        xticks([1, 2], ['svASE', 'no svASE'])
        ymin, ymax = ylim()
        ylim(-.05 * ymax, ymax)
        title(featuretype)
        savefig('analysis/results/snp_rate_{}.png'.format(featuretype))
Ejemplo n.º 47
0
def summary_plot(shap_values,
                 features=None,
                 feature_names=None,
                 max_display=None,
                 plot_type="dot",
                 color=None,
                 axis_color="#333333",
                 title=None,
                 alpha=1,
                 show=True,
                 sort=True,
                 color_bar=True,
                 auto_size_plot=True,
                 layered_violin_max_num_bins=20,
                 class_names=None,
                 export_path=None,
                 width=8):
    """Create a SHAP summary plot, colored by feature values when they are provided.

    Parameters
    ----------
    shap_values : numpy.array
        Matrix of SHAP values (# samples x # features)

    features : numpy.array or pandas.DataFrame or list
        Matrix of feature values (# samples x # features) or a feature_names list as shorthand

    feature_names : list
        Names of the features (length # features)

    max_display : int
        How many top features to include in the plot (default is 20, or 7 for interaction plots)

    plot_type : "dot" (default) or "violin"
        What type of summary plot to produce
    """

    multi_class = False
    if isinstance(shap_values, list):
        multi_class = True
        plot_type = "bar"  # only type supported for now
    else:
        assert len(
            shap_values.shape
        ) != 1, "Summary plots need a matrix of shap_values, not a vector."

    # default color:
    if color is None:
        if plot_type == 'layered_violin':
            color = "coolwarm"
        elif multi_class:
            color = lambda i: colors.red_blue_circle(i / len(shap_values))
        else:
            color = colors.blue_rgb

    # convert from a DataFrame or other types
    if str(type(features)) == "<class 'pandas.core.frame.DataFrame'>":
        if feature_names is None:
            feature_names = features.columns
        features = features.values
    elif isinstance(features, list):
        if feature_names is None:
            feature_names = features
        features = None
    elif (features is not None) and len(
            features.shape) == 1 and feature_names is None:
        feature_names = features
        features = None

    num_features = (shap_values[0].shape[1]
                    if multi_class else shap_values.shape[1])

    if feature_names is None:
        feature_names = np.array(
            [labels['FEATURE'] % str(i) for i in range(num_features)])

    # plotting SHAP interaction values
    if not multi_class and len(shap_values.shape) == 3:
        if max_display is None:
            max_display = 7
        else:
            max_display = min(len(feature_names), max_display)

        sort_inds = np.argsort(-np.abs(shap_values.sum(1)).sum(0))

        # get plotting limits
        delta = 1.0 / (shap_values.shape[1]**2)
        slow = np.nanpercentile(shap_values, delta)
        shigh = np.nanpercentile(shap_values, 100 - delta)
        v = max(abs(slow), abs(shigh))
        slow = -v
        shigh = v

        pl.figure(figsize=(1.5 * max_display + 1, 0.8 * max_display + 1))
        pl.subplot(1, max_display, 1)
        proj_shap_values = shap_values[:, sort_inds[0], sort_inds]
        proj_shap_values[:,
                         1:] *= 2  # because off diag effects are split in half
        summary_plot(proj_shap_values,
                     features[:, sort_inds] if features is not None else None,
                     feature_names=feature_names[sort_inds],
                     sort=False,
                     show=False,
                     color_bar=False,
                     auto_size_plot=False,
                     max_display=max_display)
        pl.xlim((slow, shigh))
        pl.xlabel("")
        title_length_limit = 11
        pl.title(shorten_text(feature_names[sort_inds[0]], title_length_limit))
        for i in range(1, min(len(sort_inds), max_display)):
            ind = sort_inds[i]
            pl.subplot(1, max_display, i + 1)
            proj_shap_values = shap_values[:, ind, sort_inds]
            proj_shap_values *= 2
            proj_shap_values[:,
                             i] /= 2  # because only off diag effects are split in half
            summary_plot(proj_shap_values,
                         features[:,
                                  sort_inds] if features is not None else None,
                         sort=False,
                         feature_names=["" for i in range(len(feature_names))],
                         show=False,
                         color_bar=False,
                         auto_size_plot=False,
                         max_display=max_display)
            pl.xlim((slow, shigh))
            pl.xlabel("")
            if i == min(len(sort_inds), max_display) // 2:
                pl.xlabel(labels['INTERACTION_VALUE'])
            pl.title(shorten_text(feature_names[ind], title_length_limit))
        pl.tight_layout(pad=0, w_pad=0, h_pad=0.0)
        pl.subplots_adjust(hspace=0, wspace=0.1)
        if export_path:
            pl.savefig(export_path)
        if show:
            pl.show()
        return

    if max_display is None:
        max_display = 20

    if sort:
        # order features by the sum of their effect magnitudes
        if multi_class:
            feature_order = np.argsort(
                np.sum(np.mean(np.abs(shap_values), axis=0), axis=0))
        else:
            feature_order = np.argsort(np.sum(np.abs(shap_values), axis=0))
        feature_order = feature_order[-min(max_display, len(feature_order)):]
    else:
        feature_order = np.flip(np.arange(min(max_display, num_features)), 0)

    row_height = 0.4
    if auto_size_plot:
        pl.gcf().set_size_inches(width, len(feature_order) * row_height + 1.5)
    pl.axvline(x=0, color="#999999", zorder=-1)

    if plot_type == "dot":
        for pos, i in enumerate(feature_order):
            pl.axhline(y=pos,
                       color="#cccccc",
                       lw=0.5,
                       dashes=(1, 5),
                       zorder=-1)
            shaps = shap_values[:, i]
            values = None if features is None else features[:, i]
            inds = np.arange(len(shaps))
            np.random.shuffle(inds)
            if values is not None:
                values = values[inds]
            shaps = shaps[inds]
            colored_feature = True
            try:
                values = np.array(
                    values, dtype=np.float64)  # make sure this can be numeric
            except:
                colored_feature = False
            N = len(shaps)
            # hspacing = (np.max(shaps) - np.min(shaps)) / 200
            # curr_bin = []
            nbins = 100
            quant = np.round(nbins * (shaps - np.min(shaps)) /
                             (np.max(shaps) - np.min(shaps) + 1e-8))
            inds = np.argsort(quant + np.random.randn(N) * 1e-6)
            layer = 0
            last_bin = -1
            ys = np.zeros(N)
            for ind in inds:
                if quant[ind] != last_bin:
                    layer = 0
                ys[ind] = np.ceil(layer / 2) * ((layer % 2) * 2 - 1)
                layer += 1
                last_bin = quant[ind]
            ys *= 0.9 * (row_height / np.max(ys + 1))

            if features is not None and colored_feature:
                # trim the color range, but prevent the color range from collapsing
                vmin = np.nanpercentile(values, 5)
                vmax = np.nanpercentile(values, 95)
                if vmin == vmax:
                    vmin = np.nanpercentile(values, 1)
                    vmax = np.nanpercentile(values, 99)
                    if vmin == vmax:
                        vmin = np.min(values)
                        vmax = np.max(values)

                assert features.shape[0] == len(
                    shaps
                ), "Feature and SHAP matrices must have the same number of rows!"

                # plot the nan values in the interaction feature as grey
                nan_mask = np.isnan(values)
                pl.scatter(shaps[nan_mask],
                           pos + ys[nan_mask],
                           color="#777777",
                           vmin=vmin,
                           vmax=vmax,
                           s=16,
                           alpha=alpha,
                           linewidth=0,
                           zorder=3,
                           rasterized=len(shaps) > 500)

                # plot the non-nan values colored by the trimmed feature value
                cvals = values[np.invert(nan_mask)].astype(np.float64)
                cvals_imp = cvals.copy()
                cvals_imp[np.isnan(cvals)] = (vmin + vmax) / 2.0
                cvals[cvals_imp > vmax] = vmax
                cvals[cvals_imp < vmin] = vmin
                pl.scatter(shaps[np.invert(nan_mask)],
                           pos + ys[np.invert(nan_mask)],
                           cmap=colors.red_blue,
                           vmin=vmin,
                           vmax=vmax,
                           s=16,
                           c=cvals,
                           alpha=alpha,
                           linewidth=0,
                           zorder=3,
                           rasterized=len(shaps) > 500)
            else:

                pl.scatter(shaps,
                           pos + ys,
                           s=16,
                           alpha=alpha,
                           linewidth=0,
                           zorder=3,
                           color=color if colored_feature else "#777777",
                           rasterized=len(shaps) > 500)

    elif plot_type == "violin":
        for pos, i in enumerate(feature_order):
            pl.axhline(y=pos,
                       color="#cccccc",
                       lw=0.5,
                       dashes=(1, 5),
                       zorder=-1)

        if features is not None:
            global_low = np.nanpercentile(
                shap_values[:, :len(feature_names)].flatten(), 1)
            global_high = np.nanpercentile(
                shap_values[:, :len(feature_names)].flatten(), 99)
            for pos, i in enumerate(feature_order):
                shaps = shap_values[:, i]
                shap_min, shap_max = np.min(shaps), np.max(shaps)
                rng = shap_max - shap_min
                xs = np.linspace(
                    np.min(shaps) - rng * 0.2,
                    np.max(shaps) + rng * 0.2, 100)
                if np.std(shaps) < (global_high - global_low) / 100:
                    ds = gaussian_kde(shaps + np.random.randn(len(shaps)) *
                                      (global_high - global_low) / 100)(xs)
                else:
                    ds = gaussian_kde(shaps)(xs)
                ds /= np.max(ds) * 3

                values = features[:, i]
                window_size = max(10, len(values) // 20)
                smooth_values = np.zeros(len(xs) - 1)
                sort_inds = np.argsort(shaps)
                trailing_pos = 0
                leading_pos = 0
                running_sum = 0
                back_fill = 0
                for j in range(len(xs) - 1):

                    while leading_pos < len(shaps) and xs[j] >= shaps[
                            sort_inds[leading_pos]]:
                        running_sum += values[sort_inds[leading_pos]]
                        leading_pos += 1
                        if leading_pos - trailing_pos > 20:
                            running_sum -= values[sort_inds[trailing_pos]]
                            trailing_pos += 1
                    if leading_pos - trailing_pos > 0:
                        smooth_values[j] = running_sum / (leading_pos -
                                                          trailing_pos)
                        for k in range(back_fill):
                            smooth_values[j - k - 1] = smooth_values[j]
                    else:
                        back_fill += 1

                vmin = np.nanpercentile(values, 5)
                vmax = np.nanpercentile(values, 95)
                if vmin == vmax:
                    vmin = np.nanpercentile(values, 1)
                    vmax = np.nanpercentile(values, 99)
                    if vmin == vmax:
                        vmin = np.min(values)
                        vmax = np.max(values)
                pl.scatter(shaps,
                           np.ones(shap_values.shape[0]) * pos,
                           s=9,
                           cmap=colors.red_blue,
                           vmin=vmin,
                           vmax=vmax,
                           c=values,
                           alpha=alpha,
                           linewidth=0,
                           zorder=1)
                # smooth_values -= nxp.nanpercentile(smooth_values, 5)
                # smooth_values /= np.nanpercentile(smooth_values, 95)
                smooth_values -= vmin
                if vmax - vmin > 0:
                    smooth_values /= vmax - vmin
                for i in range(len(xs) - 1):
                    if ds[i] > 0.05 or ds[i + 1] > 0.05:
                        pl.fill_between(
                            [xs[i], xs[i + 1]], [pos + ds[i], pos + ds[i + 1]],
                            [pos - ds[i], pos - ds[i + 1]],
                            color=colors.red_blue(smooth_values[i]),
                            zorder=2)

        else:
            parts = pl.violinplot(shap_values[:, feature_order],
                                  range(len(feature_order)),
                                  points=200,
                                  vert=False,
                                  widths=0.7,
                                  showmeans=False,
                                  showextrema=False,
                                  showmedians=False)

            for pc in parts['bodies']:
                pc.set_facecolor(color)
                pc.set_edgecolor('none')
                pc.set_alpha(alpha)

    elif plot_type == "layered_violin":  # courtesy of @kodonnell
        num_x_points = 200
        bins = np.linspace(
            0, features.shape[0], layered_violin_max_num_bins + 1
        ).round(0).astype(
            'int')  # the indices of the feature data corresponding to each bin
        shap_min, shap_max = np.min(shap_values), np.max(shap_values)
        x_points = np.linspace(shap_min, shap_max, num_x_points)

        # loop through each feature and plot:
        for pos, ind in enumerate(feature_order):
            # decide how to handle: if #unique < layered_violin_max_num_bins then split by unique value, otherwise use bins/percentiles.
            # to keep simpler code, in the case of uniques, we just adjust the bins to align with the unique counts.
            feature = features[:, ind]
            unique, counts = np.unique(feature, return_counts=True)
            if unique.shape[0] <= layered_violin_max_num_bins:
                order = np.argsort(unique)
                thesebins = np.cumsum(counts[order])
                thesebins = np.insert(thesebins, 0, 0)
            else:
                thesebins = bins
            nbins = thesebins.shape[0] - 1
            # order the feature data so we can apply percentiling
            order = np.argsort(feature)
            # x axis is located at y0 = pos, with pos being there for offset
            y0 = np.ones(num_x_points) * pos
            # calculate kdes:
            ys = np.zeros((nbins, num_x_points))
            for i in range(nbins):
                # get shap values in this bin:
                shaps = shap_values[order[thesebins[i]:thesebins[i + 1]], ind]
                # if there's only one element, then we can't
                if shaps.shape[0] == 1:
                    warnings.warn(
                        "not enough data in bin #%d for feature %s, so it'll be ignored. Try increasing the number of records to plot."
                        % (i, feature_names[ind]))
                    # to ignore it, just set it to the previous y-values (so the area between them will be zero). Not ys is already 0, so there's
                    # nothing to do if i == 0
                    if i > 0:
                        ys[i, :] = ys[i - 1, :]
                    continue
                # save kde of them: note that we add a tiny bit of gaussian noise to avoid singular matrix errors
                ys[i, :] = gaussian_kde(shaps + np.random.normal(
                    loc=0, scale=0.001, size=shaps.shape[0]))(x_points)
                # scale it up so that the 'size' of each y represents the size of the bin. For continuous data this will
                # do nothing, but when we've gone with the unqique option, this will matter - e.g. if 99% are male and 1%
                # female, we want the 1% to appear a lot smaller.
                size = thesebins[i + 1] - thesebins[i]
                bin_size_if_even = features.shape[0] / nbins
                relative_bin_size = size / bin_size_if_even
                ys[i, :] *= relative_bin_size
            # now plot 'em. We don't plot the individual strips, as this can leave whitespace between them.
            # instead, we plot the full kde, then remove outer strip and plot over it, etc., to ensure no
            # whitespace
            ys = np.cumsum(ys, axis=0)
            width = 0.8
            scale = ys.max(
            ) * 2 / width  # 2 is here as we plot both sides of x axis
            for i in range(nbins - 1, -1, -1):
                y = ys[i, :] / scale
                c = pl.get_cmap(color)(
                    i / (nbins - 1)
                ) if color in pl.cm.datad else color  # if color is a cmap, use it, otherwise use a color
                pl.fill_between(x_points, pos - y, pos + y, facecolor=c)
        pl.xlim(shap_min, shap_max)

    elif not multi_class and plot_type == "bar":
        feature_inds = feature_order[:max_display]
        y_pos = np.arange(len(feature_inds))
        global_shap_values = np.abs(shap_values).mean(0)
        pl.barh(y_pos,
                global_shap_values[feature_inds],
                0.7,
                align='center',
                color=color)
        pl.yticks(y_pos, fontsize=13)
        pl.gca().set_yticklabels([feature_names[i] for i in feature_inds])

    elif multi_class and plot_type == "bar":
        if class_names is None:
            class_names = ["Class " + str(i) for i in range(len(shap_values))]
        feature_inds = feature_order[:max_display]
        y_pos = np.arange(len(feature_inds))
        left_pos = np.zeros(len(feature_inds))

        class_inds = np.argsort(
            [-np.abs(shap_values[i]).mean() for i in range(len(shap_values))])
        for i, ind in enumerate(class_inds):
            global_shap_values = np.abs(shap_values[ind]).mean(0)
            pl.barh(y_pos,
                    global_shap_values[feature_inds],
                    0.7,
                    left=left_pos,
                    align='center',
                    color=color(i),
                    label=class_names[ind])
            left_pos += global_shap_values[feature_inds]
        pl.yticks(y_pos, fontsize=13)
        pl.gca().set_yticklabels([feature_names[i] for i in feature_inds])
        pl.legend(frameon=False, fontsize=12)

    # draw the color bar
    if color_bar and features is not None and plot_type != "bar" and \
            (plot_type != "layered_violin" or color in pl.cm.datad):
        import matplotlib.cm as cm
        m = cm.ScalarMappable(
            cmap=colors.red_blue if plot_type != "layered_violin" else pl.
            get_cmap(color))
        m.set_array([0, 1])
        cb = pl.colorbar(m, ticks=[0, 1], aspect=1000)
        cb.set_ticklabels(
            [labels['FEATURE_VALUE_LOW'], labels['FEATURE_VALUE_HIGH']])
        cb.set_label(labels['FEATURE_VALUE'], size=12, labelpad=0)
        cb.ax.tick_params(labelsize=11, length=0)
        cb.set_alpha(1)
        cb.outline.set_visible(False)
        bbox = cb.ax.get_window_extent().transformed(
            pl.gcf().dpi_scale_trans.inverted())
        cb.ax.set_aspect((bbox.height - 0.9) * 20)
        # cb.draw_all()

    pl.gca().xaxis.set_ticks_position('bottom')
    pl.gca().yaxis.set_ticks_position('none')
    pl.gca().spines['right'].set_visible(False)
    pl.gca().spines['top'].set_visible(False)
    pl.gca().spines['left'].set_visible(False)
    pl.gca().tick_params(color=axis_color, labelcolor=axis_color)
    pl.yticks(range(len(feature_order)),
              [feature_names[i] for i in feature_order],
              fontsize=13)
    if plot_type != "bar":
        pl.gca().tick_params('y', length=20, width=0.5, which='major')
    pl.gca().tick_params('x', labelsize=11)
    pl.ylim(-1, len(feature_order))
    if plot_type == "bar":
        pl.xlabel(labels['GLOBAL_VALUE'], fontsize=13)
    else:
        pl.xlabel(labels['VALUE'], fontsize=13)
    if export_path:
        pl.savefig(export_path)
    if show:
        pl.show()
Ejemplo n.º 48
0
def PlotDataFromFile(insurance_id):

    import matplotlib.pyplot as plt

    insurance_label = dbm.GetAccountLabel(insurance_id)

    # Reads the dataframe
    df = pd.read_csv('%s/%s.csv' % (insurance_label, insurance_label))

    # Drops all NaNs columns
    df = df.dropna(axis='columns', how='all')

    # Calculates the occurence frequencies of the term in the documents
    terms = []
    terms_freq = []
    for row in df.index:
        occurences = 0

        for column in df.columns[1:]:
            occurences += df[column][row]

        terms.append(df['Unnamed: 0'][row])
        terms_freq.append(occurences)

    # Pairs term and it's frequencies
    zipped = zip(terms, terms_freq)

    # Sorts decrescently terms with it's frequencies
    zipped_sorted = sorted(zipped, key=lambda t: t[1], reverse=True)

    # Data array
    data = np.array([_[1] for _ in zipped_sorted])
    data_labels = [_[0] for _ in zipped_sorted]

    # Finds the quartiles and median
    q1, median, q3 = np.percentile(data, [25, 50, 75])

    print '1st Quartile', q1
    print 'Median ', median
    print '3rd Quartile', q3

    # Draws the bars chart. Term per occurence
    plt.figure(1)
    plt.bar(np.arange(len(data_labels)), data, align='center', alpha=0.5)

    plt.xlabel('Terms')
    plt.ylabel('Occurences')
    plt.title('Occurrence of terms in documents of %s' % insurance_label)
    plt.savefig('%s/%s_ocorrenceTerms.png'\
        % (insurance_label, insurance_label))
    plt.close()

    # Desenha o boxplot
    # Draws the boxplot
    plt.figure(2)
    plt.title('Boxplot of %s' % insurance_label)
    bp = plt.boxplot(data)
    plt.savefig('%s/%s_boxplot.png'\
        % (insurance_label, insurance_label))
    plt.close()

    # Draws the violin
    plt.figure(3)
    plt.title('Density and occurrence of terms in docs from %s' %
              insurance_label)
    plt.xlabel('Density')
    plt.ylabel('Occurrences')
    plt.violinplot(data, showmeans=False, showmedians=True)
    plt.savefig('%s/%s_violinplot.png' % (insurance_label, insurance_label))

    CutFile(q3, zipped, df, insurance_label)

    plt.close()
    del df
Ejemplo n.º 49
0
def plotpvalueofKolmogorovSmirnovTest(densities, functionName, out=False):
    numDensities = len(densities)
    numIterations = 0
    for i, (setting, stats) in enumerate(densities.items()):
        numIterations = max(numIterations, len(stats))

    data = np.zeros((numIterations, 2 * numDensities))
    names = [None] * data.shape[1]
    i = 0
    for i, setting in enumerate(
        ["kde_gaussian", "kde_epanechnikov", "sgde_zero", "sgde_boundaries"]):
        stats = densities[setting]
        if "sgde" in setting:
            if "zero" in setting:
                names[2 * i] = "SGDE \n set-to-zero \n shuffled"
                names[2 * i + 1] = "SGDE \n set-to-zero \n not shuffled"
            else:
                names[2 * i] = "SGDE \n interp. bound. \n shuffled"
                names[2 * i + 1] = "SGDE \n interp. bound. \n not shuffled"
        elif "nataf" in setting:
            names[2 * i] = "Nataf \n shuffled"
            names[2 * i + 1] = "Nataf \n not shuffled"
        elif "gaussian" in setting:
            names[2 * i] = "KDE \n Gaussian \n shuffled"
            names[2 * i + 1] = "KDE \n Gaussian \n not shuffled"
        elif "epanechnikov" in setting:
            names[2 * i] = "KDE \n Epan. \n shuffled"
            names[2 * i + 1] = "KDE \n Epan. \n not shuffled"
        for j, values in enumerate(stats.values()):
            numDims = values["config"]["numDims"]
            pvalues_shuffled = np.zeros(numDims)
            pvalues_not_shuffled = np.zeros(numDims)
            for idim in range(numDims):
                pvalues_shuffled[idim] = values["samples"]["shuffled"][
                    "kstests"][idim][1]
                pvalues_not_shuffled[idim] = values["samples"]["not_shuffled"][
                    "kstests"][idim][1]
            data[j, 2 * i] = pvalues_shuffled.mean()
            data[j, 2 * i + 1] = pvalues_not_shuffled.mean()

    pos = np.arange(0, len(names))
    xlim = (np.min(pos) - 0.5, np.max(pos) + 0.5)
    fig = plt.figure(figsize=(17, 5))
    plt.violinplot(data,
                   pos,
                   points=60,
                   widths=0.7,
                   showmeans=True,
                   showextrema=True,
                   showmedians=True,
                   bw_method=0.5)
    plt.xticks(pos, names)
    plt.ylabel("$p$-value")
    plt.hlines(0.05, xlim[0], xlim[1], linestyle="--")
    plt.xlim(xlim)

    if "moons" in functionName:
        plt.title("Kolmogorov-Smirnov test",
                  fontproperties=load_font_properties())
    else:
        plt.title("Kolmogorov-Smirnov test",
                  fontproperties=load_font_properties())

    if out:
        savefig(fig,
                os.path.join("plots", "kolmogorov_smirnov_%s" % functionName),
                tikz=True)
        plt.close(fig)
    else:
        plt.show()
Ejemplo n.º 50
0
                    np.loadtxt(path_entity + 'susc.csv', delimiter=','))

                c, r = correlation_function(simulated_matrix, J)

                index_ct = find_nearest(ts, critical_temperature)
                dimensionality = dim(c, r, index_ct)
                if not np.isinf(r[-1]):
                    dimensionality_sim.append(dimensionality)
        dimensionality_exp.append(dimensionality_sim)

fig, ax = plt.subplots(figsize=(10, 7))

colors = ['blue', 'green', 'red', 'black', 'cyan']

parts = plt.violinplot(dimensionality_exp,
                       positions=np.array(sizes_),
                       showmeans=True,
                       showmedians=False)

cont = 0
for pc in parts['bodies']:
    pc.set_facecolor(colors[cont])
    cont += 1

blue_patch = mpatches.Patch(color='blue', label='Graph dimensionality = 1')
green_patch = mpatches.Patch(color='green', label='Graph dimensionality = 2')
#red_patch = mpatches.Patch(color='red', label='Graph dimensionality = 3')
#red_patch = mpatches.Patch(color='black', label='Graph dimensionality = 4')
#red_patch = mpatches.Patch(color='cyan', label='Graph dimensionality = 5')
# black_patch = mpatches.Patch(color='black', label='Weighted 80%')

plt.legend(handles=[blue_patch, green_patch])
Ejemplo n.º 51
0
    plt.scatter(mean_r2, tau_diff)

plt.ylabel('mean(fake taus) - real tau')
plt.xlabel('mean r$^2$')
plt.title('Amygdala \n 1000 iterations')
plt.show()

#%% Individual violin plots

for unit in range(len(filtered_real)):

    this_unit = filtered_real.iloc[unit]

    this_id = this_unit['unit_id']

    fake_units = filtered_fake[filtered_fake['unit'] == this_id]

    taus = fake_units['tau']

    try:

        plt.violinplot(taus, showmeans=True)

        plt.title('%i' % this_id)

        plt.show()

    except ValueError:

        pass
Ejemplo n.º 52
0
def main():

    # your favourite units here
    energy_unit = u.TeV
    angle_unit = u.deg
    dist_unit = u.m

    parser = make_argparser()
    parser.add_argument(
        '-o',
        '--outfile',
        type=str,
        help="if given, write output file with reconstruction results")
    parser.add_argument('--plot_c',
                        action='store_true',
                        help="plot camera-wise displays")
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--proton',
                       action='store_true',
                       help="do protons instead of gammas")
    group.add_argument('--electron',
                       action='store_true',
                       help="do electrons instead of gammas")

    args = parser.parse_args()

    if args.infile_list:
        filenamelist = []
        for f in args.infile_list:
            filenamelist += glob("{}/{}".format(args.indir, f))
    elif args.proton:
        filenamelist = glob("{}/proton/*gz".format(args.indir))
        channel = "proton"
    elif args.electron:
        filenamelist = glob("{}/electron/*gz".format(args.indir))
        channel = "electron"
    elif args.gamma:
        filenamelist = glob("{}/gamma/*gz".format(args.indir))
        channel = "gamma"
    else:
        raise ValueError("don't know which input to use...")
    filenamelist.sort()

    if not filenamelist:
        print("no files found; check indir: {}".format(args.indir))
        exit(-1)
    else:
        print("found {} files".format(len(filenamelist)))

    tel_phi = {}
    tel_theta = {}

    # keeping track of events and where they were rejected
    Eventcutflow = CutFlow("EventCutFlow")
    Imagecutflow = CutFlow("ImageCutFlow")

    # takes care of image cleaning
    cleaner = ImageCleaner(mode=args.mode,
                           cutflow=Imagecutflow,
                           wavelet_options=args.raw,
                           skip_edge_events=args.skip_edge_events,
                           island_cleaning=True)

    # the class that does the shower reconstruction
    shower_reco = HillasReconstructor()

    shower_max_estimator = ShowerMaxEstimator("paranal")

    preper = EventPreparer(
        cleaner=cleaner,
        hillas_parameters=hillas_parameters,
        shower_reco=shower_reco,
        event_cutflow=Eventcutflow,
        image_cutflow=Imagecutflow,
        # event/image cuts:
        allowed_cam_ids=[],  # means: all
        min_ntel=3,
        min_charge=args.min_charge,
        min_pixel=3)

    # a signal handler to abort the event loop but still do the post-processing
    signal_handler = SignalHandler()
    signal.signal(signal.SIGINT, signal_handler)

    try:
        # this class defines the reconstruction parameters to keep track of
        class RecoEvent(tb.IsDescription):
            NTels_trigg = tb.Int16Col(dflt=1, pos=0)
            NTels_clean = tb.Int16Col(dflt=1, pos=1)
            EnMC = tb.Float32Col(dflt=1, pos=2)
            xi = tb.Float32Col(dflt=1, pos=3)
            DeltaR = tb.Float32Col(dflt=1, pos=4)
            ErrEstPos = tb.Float32Col(dflt=1, pos=5)
            ErrEstDir = tb.Float32Col(dflt=1, pos=6)
            h_max = tb.Float32Col(dflt=1, pos=7)

        reco_outfile = tb.open_file(
            args.outfile,
            mode="w",
            # if we don't want to write the event list to disk, need to add more arguments
            **({} if args.store else {
                "driver": "H5FD_CORE",
                "driver_core_backing_store": False
            }))
        reco_table = reco_outfile.create_table("/", "reco_event", RecoEvent)
        reco_event = reco_table.row
    except:
        reco_event = RecoEvent()
        print("no pytables installed?")

    # ##        #######   #######  ########
    # ##       ##     ## ##     ## ##     ##
    # ##       ##     ## ##     ## ##     ##
    # ##       ##     ## ##     ## ########
    # ##       ##     ## ##     ## ##
    # ##       ##     ## ##     ## ##
    # ########  #######   #######  ##

    cam_id_map = {}

    # define here which telescopes to loop over
    allowed_tels = None
    # allowed_tels = prod3b_tel_ids("L+F+D")
    for i, filename in enumerate(filenamelist[:args.last]):
        print("file: {i} filename = {filename}".format(i=i, filename=filename))

        source = hessio_event_source(filename,
                                     allowed_tels=allowed_tels,
                                     max_events=args.max_events)

        # loop that cleans and parametrises the images and performs the reconstruction
        for (event, hillas_dict, n_tels, tot_signal, max_signal, pos_fit,
             dir_fit, h_max, err_est_pos,
             err_est_dir) in preper.prepare_event(source):

            shower = event.mc

            org_alt = u.Quantity(shower.alt).to(u.deg)
            org_az = u.Quantity(shower.az).to(u.deg)
            if org_az > 180 * u.deg:
                org_az -= 360 * u.deg

            org_the = alt_to_theta(org_alt)
            org_phi = az_to_phi(org_az)
            if org_phi > 180 * u.deg:
                org_phi -= 360 * u.deg
            if org_phi < -180 * u.deg:
                org_phi += 360 * u.deg

            shower_org = linalg.set_phi_theta(org_phi, org_the)
            shower_core = convert_astropy_array([shower.core_x, shower.core_y])

            xi = linalg.angle(dir_fit, shower_org).to(angle_unit)
            diff = linalg.length(pos_fit[:2] - shower_core)

            # print some performance
            print()
            print("xi = {:4.3f}".format(xi))
            print("pos = {:4.3f}".format(diff))
            print("h_max reco: {:4.3f}".format(h_max.to(u.km)))
            print("err_est_dir: {:4.3f}".format(err_est_dir.to(angle_unit)))
            print("err_est_pos: {:4.3f}".format(err_est_pos))

            try:
                # store the reconstruction data in the PyTable
                reco_event["NTels_trigg"] = n_tels["tot"]
                reco_event["NTels_clean"] = len(shower_reco.circles)
                reco_event["EnMC"] = event.mc.energy / energy_unit
                reco_event["xi"] = xi / angle_unit
                reco_event["DeltaR"] = diff / dist_unit
                reco_event["ErrEstPos"] = err_est_pos / dist_unit
                reco_event["ErrEstDir"] = err_est_dir / angle_unit
                reco_event["h_max"] = h_max / dist_unit
                reco_event.append()
                reco_table.flush()

                print()
                print("xi res (68-percentile) = {:4.3f} {}".format(
                    np.percentile(reco_table.cols.xi, 68), angle_unit))
                print("core res (68-percentile) = {:4.3f} {}".format(
                    np.percentile(reco_table.cols.DeltaR, 68), dist_unit))
                print("h_max (median) = {:4.3f} {}".format(
                    np.percentile(reco_table.cols.h_max, 50), dist_unit))

            except NoPyTables:
                pass

            if args.plot_c:
                from mpl_toolkits.mplot3d import Axes3D
                fig = plt.figure()
                ax = fig.gca(projection='3d')
                for c in shower_reco.circles.values():
                    points = [
                        c.pos + t * c.a * u.km for t in np.linspace(0, 15, 3)
                    ]
                    ax.plot(*np.array(points).T,
                            linewidth=np.sqrt(c.weight) / 10)
                    ax.scatter(*c.pos[:, None].value, s=np.sqrt(c.weight))
                plt.xlabel("x")
                plt.ylabel("y")
                plt.pause(.1)

                # this plots
                # • the MC shower core
                # • the reconstructed shower core
                # • the used telescopes
                # • and the trace of the Hillas plane on the ground
                plt.figure()
                for tel_id, c in shower_reco.circles.items():
                    plt.scatter(c.pos[0], c.pos[1], s=np.sqrt(c.weight))
                    plt.gca().annotate(tel_id,
                                       (c.pos[0].value, c.pos[1].value))
                    plt.plot([
                        c.pos[0].value - 500 * c.norm[1],
                        c.pos[0].value + 500 * c.norm[1]
                    ], [
                        c.pos[1].value + 500 * c.norm[0],
                        c.pos[1].value - 500 * c.norm[0]
                    ],
                             linewidth=np.sqrt(c.weight) / 10)
                plt.scatter(*pos_fit[:2],
                            c="black",
                            marker="*",
                            label="fitted")
                plt.scatter(*shower_core[:2],
                            c="black",
                            marker="P",
                            label="MC")
                plt.legend()
                plt.xlabel("x")
                plt.ylabel("y")
                plt.xlim(-1400, 1400)
                plt.ylim(-1400, 1400)
                plt.show()

            if signal_handler.stop: break
        if signal_handler.stop: break

    print("\n" + "=" * 35 + "\n")
    print("xi res (68-percentile) = {:4.3f} {}".format(
        np.percentile(reco_table.cols.xi, 68), angle_unit))
    print("core res (68-percentile) = {:4.3f} {}".format(
        np.percentile(reco_table.cols.DeltaR, 68), dist_unit))
    print("h_max (median) = {:4.3f} {}".format(
        np.percentile(reco_table.cols.h_max, 50), dist_unit))

    # print the cutflows for telescopes and camera images
    print("\n\n")
    Eventcutflow("min2Tels trig")
    print()
    Imagecutflow(sort_column=1)

    # if we don't want to plot anything, we can exit now
    if not args.plot:
        return

    # ########  ##        #######  ########  ######
    # ##     ## ##       ##     ##    ##    ##    ##
    # ##     ## ##       ##     ##    ##    ##
    # ########  ##       ##     ##    ##     ######
    # ##        ##       ##     ##    ##          ##
    # ##        ##       ##     ##    ##    ##    ##
    # ##        ########  #######     ##     ######

    plt.figure()
    plt.hist(reco_table.cols.h_max, bins=np.linspace(000, 15000, 51, True))
    plt.title(channel)
    plt.xlabel("h_max reco")
    plt.pause(.1)

    figure = plt.figure()
    xi_edges = np.linspace(0, 5, 20)
    plt.hist(reco_table.cols.xi, bins=xi_edges, log=True)
    plt.xlabel(r"$\xi$ / deg")
    if args.write:
        save_fig('{}/reco_xi_{}'.format(args.plots_dir, args.mode))
    plt.pause(.1)

    plt.figure()
    plt.hist(reco_table.cols.ErrEstDir[:], bins=np.linspace(0, 20, 50))
    plt.title(channel)
    plt.xlabel("beta")
    plt.pause(.1)

    plt.figure()
    plt.hist(np.log10(reco_table.cols.xi[:] / reco_table.cols.ErrEstDir[:]),
             bins=50)
    plt.title(channel)
    plt.xlabel("log_10(xi / beta)")
    plt.pause(.1)

    # convert the xi-list into a dict with the number of used telescopes as keys
    xi_vs_tel = {}
    for xi, ntel in zip(reco_table.cols.xi, reco_table.cols.NTels_clean):
        if ntel not in xi_vs_tel:
            xi_vs_tel[ntel] = [xi]
        else:
            xi_vs_tel[ntel].append(xi)

    print(args.mode)
    for ntel, xis in sorted(xi_vs_tel.items()):
        print("NTel: {} -- median xi: {}".format(ntel, np.median(xis)))
        # print("histogram:", np.histogram(xis, bins=xi_edges))

    # create a list of energy bin-edges and -centres for violin plots
    Energy_edges = np.linspace(2, 8, 13)
    Energy_centres = (Energy_edges[1:] + Energy_edges[:-1]) / 2.

    # convert the xi-list in to an energy-binned dict with the bin centre as keys
    xi_vs_energy = {}
    for en, xi in zip(reco_table.cols.EnMC, reco_table.cols.xi):

        # get the bin number this event belongs into
        sbin = np.digitize(np.log10(en), Energy_edges) - 1

        # the central value of the bin is the key for the dictionary
        if Energy_centres[sbin] not in xi_vs_energy:
            xi_vs_energy[Energy_centres[sbin]] = [xi]
        else:
            xi_vs_energy[Energy_centres[sbin]] += [xi]

    # plotting the angular error as violin plots with binning in
    # number of telescopes and shower energy
    figure = plt.figure()
    plt.subplot(211)
    plt.violinplot([np.log10(a) for a in xi_vs_tel.values()],
                   [a for a in xi_vs_tel.keys()],
                   points=60,
                   widths=.75,
                   showextrema=False,
                   showmedians=True)
    plt.xlabel("Number of Telescopes")
    plt.ylabel(r"log($\xi$ / deg)")
    plt.ylim(-3, 2)
    plt.grid()

    plt.subplot(212)
    plt.violinplot([np.log10(a) for a in xi_vs_energy.values()],
                   [a for a in xi_vs_energy.keys()],
                   points=60,
                   widths=(Energy_edges[1] - Energy_edges[0]) / 1.5,
                   showextrema=False,
                   showmedians=True)
    plt.xlabel(r"log(Energy / GeV)")
    plt.ylabel(r"log($\xi$ / deg)")
    plt.ylim(-3, 2)
    plt.grid()

    plt.tight_layout()
    if args.write:
        save_fig('{}/reco_xi_vs_E_NTel_{}'.format(args.plots_dir, args.mode))

    plt.pause(.1)

    # convert the diffs-list into a dict with the number of used telescopes as keys
    diff_vs_tel = {}
    for diff, ntel in zip(reco_table.cols.DeltaR, reco_table.cols.NTels_clean):
        if ntel not in diff_vs_tel:
            diff_vs_tel[ntel] = [diff]
        else:
            diff_vs_tel[ntel].append(diff)

    # convert the diffs-list in to an energy-binned dict with the bin centre as keys
    diff_vs_energy = {}
    for en, diff in zip(reco_table.cols.EnMC, reco_table.cols.DeltaR):

        # get the bin number this event belongs into
        sbin = np.digitize(np.log10(en), Energy_edges) - 1

        # the central value of the bin is the key for the dictionary
        if Energy_centres[sbin] not in diff_vs_energy:
            diff_vs_energy[Energy_centres[sbin]] = [diff]
        else:
            diff_vs_energy[Energy_centres[sbin]] += [diff]

    # plotting the core position error as violin plots with binning in
    # number of telescopes an shower energy
    plt.figure()
    plt.subplot(211)
    plt.violinplot([np.log10(a) for a in diff_vs_tel.values()],
                   [a for a in diff_vs_tel.keys()],
                   points=60,
                   widths=.75,
                   showextrema=False,
                   showmedians=True)
    plt.xlabel("Number of Telescopes")
    plt.ylabel(r"log($\Delta R$ / m)")
    plt.grid()

    plt.subplot(212)
    plt.violinplot([np.log10(a) for a in diff_vs_energy.values()],
                   [a for a in diff_vs_energy.keys()],
                   points=60,
                   widths=(Energy_edges[1] - Energy_edges[0]) / 1.5,
                   showextrema=False,
                   showmedians=True)
    plt.xlabel(r"log(Energy / GeV)")
    plt.ylabel(r"log($\Delta R$ / m)")
    plt.grid()

    plt.tight_layout()
    if args.write:
        save_fig('{}/reco_dist_vs_E_NTel_{}'.format(args.plots_dir, args.mode))
    plt.show()
Ejemplo n.º 53
0
def plot_hex_and_violin(abscissa, ordinate, bin_edges, extent=None,
                        xlabel="", ylabel="", zlabel="", do_hex=True, do_violin=True,
                        cm=plt.cm.inferno, axis=None, v_padding=.015, **kwargs):

    """
    takes two arrays of coordinates and creates a 2D hexbin plot and a violin plot (or
    just one of them)

    Parameters
    ----------
    abscissa, ordinate : arrays
        the coordinates of the data to plot
    bin_edges : array
        bin edges along the abscissa
    extent : 4-tuple of floats (default: None)
        extension of the abscissa, ordinate; given as is to plt.hexbin
    xlabel, ylabel : strings (defaults: "")
        labels for the two axes of either plot
    zlabel : string (default: "")
        label for the colorbar of the hexbin plot
    do_hex, do_violin : bools (defaults: True)
        whether or not to do the respective plots
    cm : colour map (default: plt.cm.inferno)
        colour map to be used for the hexbin plot
    kwargs : args dictionary
        more arguments to be passed to plt.hexbin
    """

    if axis:
        if do_hex and do_violin:
            from matplotlib.axes import Axes
            from matplotlib.transforms import Bbox
            axis_bbox = axis.get_position()
            axis.axis("off")
        else:
            plt.sca(axis)

    # make a normal 2D hexplot from the given data
    if do_hex:

        # if we do both plot types,
        if do_violin:
            if axis:
                ax_hex_pos = axis_bbox.get_points().copy()  # [[x0, y0], [x1, y1]]
                ax_hex_pos[0, 1] += np.diff(ax_hex_pos, axis=0)[0, 1]*(.5+v_padding)
                ax_hex = Axes(plt.gcf(), Bbox.from_extents(ax_hex_pos))
                plt.gcf().add_axes(ax_hex)
                plt.sca(ax_hex)
                ax_hex.set_xticklabels([])
            else:
                plt.subplot(211)

        plt.hexbin(abscissa,
                   ordinate,
                   gridsize=40,
                   extent=extent,
                   cmap=cm,
                   **kwargs)
        cb = plt.colorbar()
        cb.set_label(zlabel)
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        if extent:
            plt.xlim(extent[:2])
            plt.ylim(extent[2:])

    # prepare and draw the data for the violin plot
    if do_violin:

        # if we do both plot types, open a subplot
        if do_hex:
            if axis:
                ax_vio_pos = axis_bbox.get_points().copy()  # [[x0, y0], [x1, y1]]
                ax_vio_pos[1, 1] -= np.diff(ax_vio_pos, axis=0)[0, 1]*(.5+v_padding)
                ax_vio = Axes(plt.gcf(), Bbox.from_extents(ax_vio_pos))
                plt.gcf().add_axes(ax_vio)
                plt.sca(ax_vio)
            else:
                plt.subplot(212)

        # to plot the violins, sort the ordinate values into a dictionary
        # the keys are the central values of the bins given by `bin_edges`
        val_vs_dep = {}
        bin_centres = (bin_edges[1:]+bin_edges[:-1])/2.

        for dep, val in zip(abscissa, ordinate):
            # get the bin number this event belongs into
            # outliers are put into the first and last bin accordingly
            ibin = np.clip(np.digitize(dep, bin_edges)-1,
                           0, len(bin_centres)-1)

            # the central value of the bin is the key for the dictionary
            if bin_centres[ibin] not in val_vs_dep:
                val_vs_dep[bin_centres[ibin]] = [val]
            else:
                val_vs_dep[bin_centres[ibin]] += [val]

        keys = [k[0] for k in sorted(val_vs_dep.items())]
        vals = [k[1] for k in sorted(val_vs_dep.items())]

        # calculate the widths of the violins as 90 % of the corresponding bin width
        widths = []
        for cen, wid in zip(bin_centres, (bin_edges[1:]-bin_edges[:-1])):
            if cen in keys:
                widths.append(wid*.9)

        plt.violinplot(vals, keys,
                       points=60, widths=widths,
                       showextrema=False, showmedians=True)
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)

        if extent:
            # adding a colour bar to the hexbin plot reduces its width by 1/5
            # adjusting the extent of the violin plot to sync up with the hexbin plot
            plt.xlim([extent[0],
                      (5.*extent[1] - extent[0])/4. if do_hex else extent[1]])
            # for good measure also sync the vertical extent
            plt.ylim(extent[2:])

        plt.grid()
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        all_variants = ap.get_variants()
        variants = -np.ones(N_VARIANTS)
        for v, variant in enumerate(all_variants):
            disable_constraints, additional_disabled = get_disabled_constraints(
                variant)
            if additional_disabled is None:
                variants[0] = variant
            elif len(additional_disabled) == 0:
                variants[1] = variant
            elif ADDITIONAL_DISABLED_CONSTRAINTS == set(additional_disabled):
                variants[2] = variant

        if np.any(variants < 0):
            print('Not enough variants to analyze')
            return

        with open(
                os.path.join(inputDir, 'kb',
                             constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
            sim_data = cPickle.load(f)

        all_yields = []
        for variant in variants:
            yields = []

            for sim_dir in ap.get_cells(variant=[variant]):
                sim_out_dir = os.path.join(sim_dir, 'simOut')

                # Listeners used
                fba_reader = TableReader(
                    os.path.join(sim_out_dir, 'FBAResults'))
                main_reader = TableReader(os.path.join(sim_out_dir, 'Main'))
                mass_reader = TableReader(os.path.join(sim_out_dir, 'Mass'))

                # Load data
                time_step_sec = main_reader.readColumn('timeStepSec')

                external_fluxes = fba_reader.readColumn(
                    'externalExchangeFluxes')
                external_molecules = fba_reader.readAttribute(
                    'externalMoleculeIDs')

                dry_mass = MASS_UNITS * mass_reader.readColumn('dryMass')
                growth = GROWTH_UNITS * mass_reader.readColumn(
                    'growth') / time_step_sec

                # Calculate growth yield on glucose
                glc_idx = external_molecules.index(GLUCOSE_ID)
                glc_flux = FLUX_UNITS * external_fluxes[:, glc_idx]
                glc_mw = sim_data.getter.getMass([GLUCOSE_ID])[0]
                glc_mass_flux = glc_flux * glc_mw * dry_mass
                glc_mass_yield = growth / -glc_mass_flux

                yields += list(glc_mass_yield[1:].asNumber())

            all_yields += [yields]

        for i, v1 in enumerate(variants):
            for j, v2 in enumerate(variants[i + 1:]):
                t, p = stats.ttest_ind(all_yields[i],
                                       all_yields[i + j + 1],
                                       equal_var=False)
                print('p={:.2e} for variant {} vs variant {}'.format(
                    p, v1, v2))

        plt.figure(figsize=(4, 4))
        xticks = range(N_VARIANTS)

        # Plot data
        plt.violinplot(all_yields, xticks, showmeans=False, showextrema=False)
        plt.axhline(VALIDATION_YIELD, linestyle='--', color='#eb7037')

        # Format axes
        ax = plt.gca()
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        plt.xticks(xticks, VARIANT_LABELS)
        plt.ylabel('Glucose Yield\n(g cell / g glucose)')

        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close('all')
Ejemplo n.º 55
0
def do_analyse(feature1, feature2, feature3):
	"""	
	1) Analyze GaussianNB, SVC and KNN without adjusting their parameters 
		- on all the features of the dataset
		- on 2 chosen features of the dataset 
	2) Plot a comparison boxplot of the cross_val_scores of the results grouped by algorithm 
	3) Analyze GaussianNB, SVC and KNN with optimization 
		- on all the features of the dataset
		- on 2 chosen features of the dataset
	4) Plot visualization of the predicted areas in 2-D space
	5) Plot a comparison boxplot of the cross_val_scores of the results grouped by the algorithm
	
	"""	
	folder = "results_{0}".format(dataset_name)
	if not os.path.exists(folder):
		os.makedirs(folder)

	# Performing all the models without tuning on both 30 and 2 features and plotting box plots
	
	# prepare configuration for cross validation test harness
	seed = 7
	# prepare models
	models = []
	models.append(('NB', GaussianNB()))
	models.append(('SVM', SVC(gamma='auto')))
	models.append(('KNN', KNeighborsClassifier()))

	# evaluate each model in turn
	results1 = []
	results2 = []
	names = []
	scoring = 'accuracy'
	#for 30 features:
	X = dataset.data
	y = dataset.target
	X = normalize(X, axis=0)
	#for 2 features:
	X2, y2, features = set_data_analyse(feature1, feature2, feature3)
	
	def set_box_color(bp, color):
		plt.setp(bp['boxes'], color=color)
		plt.setp(bp['whiskers'], color=color)
		plt.setp(bp['caps'], color=color)
		plt.setp(bp['medians'], color=color)

	for name, model in models:
		kfold = model_selection.KFold(n_splits=5, random_state=seed)
		cv_results1 = model_selection.cross_val_score(model, X, y, cv=kfold, scoring=scoring)
		results1.append(cv_results1)
		cv_results2 = model_selection.cross_val_score(model, X2, y2, cv=kfold, scoring=scoring)
		results2.append(cv_results2)
		names.append(name)

	# Comparison box plot of NOT tuned algorithms
	fig = plt.figure(figsize=(7, 6))
	bp1 = plt.violinplot(results1, positions=np.array(range(len(results1)))*2.0-0.4, showmeans=True, widths=0.6)
	bp2 = plt.violinplot(results2, positions=np.array(range(len(results2)))*2.0+0.4, showmeans=True, widths=0.6)
	#set_box_color(bp1, '#D7191C')
	#set_box_color(bp2, '#2C7BB6')
	plt.xticks(range(0, len(names) * 2, 2), names)
	plt.xlim(-2, len(names)*2)
	plt.ylim(0.3, 1)
	plt.tight_layout()
	plt.plot([], c='#2C7BB6', label='30 features')
	plt.plot([], c='#D7191C', label='2 features')
	plt.legend()
	plt.title('Comparison of untuned algorithms on 30 an 2 features')
	#plt.show()
	plt.savefig(("./{0}/Comparison_NOT_optimized.png".format(folder)), bbox_inches='tight')
	plt.close('all')

	results1 = []
	results2 = []
	names = []


	# Performing GaussianNB on all the features
	print('/////////////////////////////////////////////')
	print('Performing GaussianNB on all the features\n')
	clf = GaussianNB()
	X = dataset.data
	y = dataset.target
	X = normalize(X, axis=0)
	kfold = model_selection.KFold(n_splits=10, random_state=seed)
	cvs = model_selection.cross_val_score(clf, X, y, cv=kfold, scoring=scoring)
	results1.append(cvs)
	names.append('NB')

	X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0)
	clf.fit(X_train,y_train)
	y_pred = clf.predict(X_test)
	print('GaussianNB score: ', metrics.f1_score(y_test,y_pred,average="macro"))
	print('cross_val_score mean: ', np.mean(cvs))
	print(metrics.confusion_matrix(y_test, y_pred))

	
	# Performing Gaussian on two chosen features
	print('/////////////////////////////////////////////')
	print('Performing Gaussian on features:\n', feature1, '\n', feature2, '\n', feature3)
	X, y, features = set_data_analyse(feature1, feature2, feature3)
	#print('Performing Gaussian on', num_PCA, ' features from PCA\n')
	#X, y = set_data_analyse_PCA(num_PCA)

	classifier_name = 'GaussianNB'
	clf = GaussianNB()
	kfold = model_selection.KFold(n_splits=10, random_state=seed)
	cvs = cross_val_score(clf, X, y, cv=kfold, scoring=scoring)
	results2.append(cvs)

	X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0)
	clf.fit(X_train, y_train)
	if not feature3: 
		plot_results_2D(X_test, y_test, features, classifier_name, clf, np.mean(cvs))
	y_pred = clf.predict(X_test)
	print('GaussianNB on 2 features score: ', metrics.f1_score(y_test,y_pred,average="macro"))
	print('cross_val_score mean: ', np.mean(cvs))
	print(metrics.confusion_matrix(y_test, y_pred))
	



	# Performing SVC on all the features
	print('/////////////////////////////////////////////')
	print('Performing SVC on all the features\n')
	clf = SVC(C=100, kernel='rbf', gamma='scale')
	X = dataset.data
	y = dataset.target
	X = normalize(X, axis=0)
	kfold = model_selection.KFold(n_splits=10, random_state=seed)
	cvs = cross_val_score(clf, X, y, cv=kfold, scoring=scoring)
	results1.append(cvs)
	names.append('SVC')
	
	X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0)
	clf.fit(X_train, y_train)
	y_pred = clf.predict(X_test)
	#kfold = model_selection.KFold(n_splits=5, random_state=seed)
	print('SVC score: ', metrics.f1_score(y_test,y_pred,average="macro"))
	print('cross_val_score mean: ', np.mean(cvs))
	print(metrics.confusion_matrix(y_test, y_pred))

	
	# Performing SVC on PCA two chosen features
	print('/////////////////////////////////////////////')
	print('Performing SVC on features:\n', feature1, '\n', feature2, '\n', feature3)
	X, y, features = set_data_analyse(feature1, feature2, feature3)
	#print('Performing SVC on', num_PCA, ' features from PCA\n')
	#X, y = set_data_analyse_PCA(num_PCA)
	
	classifier_name = 'SVC'
	clf = SVC(C=100, kernel='rbf', gamma='scale', random_state=None)
	kfold = model_selection.KFold(n_splits=10, random_state=seed)
	cvs = model_selection.cross_val_score(clf, X, y, cv=kfold, scoring=scoring)
	results2.append(cvs)

	X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0)
	#finding best parameters for SVC
	'''from sklearn.model_selection import GridSearchCV
	print("Fitting the classifier to the training set")
	param_grid = {'C': [0.01, 0.1, 1, 10, 100], 'kernel': ['rbf', 'linear']}
	clf = GridSearchCV(SVC(class_weight='balanced'), param_grid)
	clf = clf.fit(X_train, y_train)
	print("Best estimator found by grid search:")
	print(clf.best_estimator_)'''
	clf.fit(X_train, y_train)
	if not feature3:
		plot_results_2D(X_test, y_test, features, classifier_name, clf, np.mean(cvs))
	y_pred = clf.predict(X_test)
	print('SVC on 2 features score: ', metrics.f1_score(y_test,y_pred,average="macro"))
	print('cross_val_score mean: ', np.mean(cvs))
	print(metrics.confusion_matrix(y_test, y_pred))

	

	
	# Performing KNeighborsClassifier on all the features
	print('/////////////////////////////////////////////')
	print('Performing KNeighborsClassifier on all the features\n')
	clf = KNeighborsClassifier(n_neighbors=1, weights='uniform')
	X = dataset.data
	y = dataset.target
	X = normalize(X, axis=0)
	kfold = model_selection.KFold(n_splits=10, random_state=seed)
	cvs = model_selection.cross_val_score(clf, X, y, cv=kfold, scoring=scoring)
	print('mean cvs: ', np.mean(cvs))
	results1.append(cvs)
	names.append('KNN')

	X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0)
	clf.fit(X_train,y_train)
	y_pred = clf.predict(X_test)
	'''for n in range(1,11):
		clf = KNeighborsClassifier(n_neighbors=n).fit(X_train,y_train)
		y_pred = clf.predict(X_test)
		print('KNeighborsClassifier with {0} neighbors score: '.format(n), metrics.f1_score(y_test,y_pred,average="macro"))'''
	print('KNeighborsClassifier score: ', metrics.f1_score(y_test,y_pred,average="macro"))
	print('cross_val_score mean: ', np.mean(cvs))
	print(metrics.confusion_matrix(y_test, y_pred))

	# Performing KNeighborsClassifier for the two chosen columns
	print('/////////////////////////////////////////////')
	print('Performing KNN on features:\n', feature1, '\n', feature2, '\n', feature3)
	X, y, features = set_data_analyse(feature1, feature2, feature3)
	#print('Performing KNN on', num_PCA, ' features from PCA\n')
	#X, y = set_data_analyse_PCA(num_PCA)

	
	classifier_name = 'KN'
	clf = KNeighborsClassifier(n_neighbors=5, weights='uniform')
	kfold = model_selection.KFold(n_splits=10, random_state=seed)
	cvs = model_selection.cross_val_score(clf, X, y, cv=kfold, scoring=scoring)
	results2.append(cvs)

	X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0)
	clf.fit(X_train,y_train)
	if not feature3:
		plot_results_2D(X_test, y_test, features, classifier_name, clf, np.mean(cvs))
	y_pred = clf.predict(X_test)
	'''for n in range(1,11):
		clf = KNeighborsClassifier(n_neighbors=n, weights='uniform').fit(X_train,y_train)
		y_pred = clf.predict(X_test)
		print('KNeighborsClassifier score: ', 'k = ', n, ': ', metrics.f1_score(y_test,y_pred,average="macro"))
		print(metrics.confusion_matrix(y_test, y_pred))
		print('KNeighborsClassifier with {0} neighbors score: '.format(n), metrics.f1_score(y_test,y_pred,average="macro"))'''
	print('KNeighborsClassifier score: ', metrics.f1_score(y_test,y_pred,average="macro"))
	print('cross_val_score mean: ', np.mean(cvs))
	print(metrics.confusion_matrix(y_test, y_pred))
	#print(metrics.classification_report(y_test, y_pred))


	# Comparison box plot of tuned algorithms
	fig = plt.figure(figsize=(7, 6))
	bp1 = plt.violinplot(results1, positions=np.array(range(len(results1)))*2.0-0.4, showmeans=True, widths=0.6)
	bp2 = plt.violinplot(results2, positions=np.array(range(len(results2)))*2.0+0.4, showmeans=True, widths=0.6)
	#set_box_color(bp1, '#D7191C')
	#set_box_color(bp2, '#2C7BB6')
	#m = max([max(results1[i]) for i in range(len(results1))])
	#plt.hlines(m, xmin=-2, xmax=len(names)*2, colors='k', linestyles='solid', label='best score')
	plt.xticks(range(0, len(names) * 2, 2), names)
	plt.xlim(-2, len(names)*2)
Ejemplo n.º 56
0
                       grid.get_obs(),
                       target_policy,
                       behaviour_policy,
                       double=False,
                       step_size=0.1)
    dq_agent = GeneralQ(grid._layout.size,
                        4,
                        grid.get_obs(),
                        target_policy,
                        behaviour_policy,
                        double=True,
                        step_size=0.1)
    mean_reward_q_learning.append(run_experiment(grid, q_agent, int(2e5)))
    mean_reward_double_q_learning.append(
        run_experiment(grid, dq_agent, int(2e5)))
plt.violinplot([mean_reward_q_learning, mean_reward_double_q_learning])
plt.xticks([1, 2], ["Q-learning", "Double Q-learning"], rotation=60, size=12)
plt.ylabel("average reward during learning", size=12)
ax = plt.gca()
ax.set_axis_bgcolor('white')
ax.grid(0)

q = q_agent.q_values.reshape(grid._layout.shape + (4, ))
plot_action_values(q, vmin=-5)

q = dq_agent.q_values.reshape(grid._layout.shape + (4, ))
plot_action_values(q, vmin=-5)
"""The plots above show 1) the distributions of average rewards (over all learning steps) over the 20 experiments per algorithm, 2) the action values for Q-learning, and 3) the action values for Double Q-learning.

**[10 pts]** Explain why Double Q-learning has a higher average reward. Use at most four sentences, and discuss at least a) the dynamics of the algorithm, b) how this affects behaviour, and c) why the resulting behaviour yields higher rewards for Double Q-learning than for Q-learning.
Ejemplo n.º 57
0
    n_lines = len(self.lines)
    self.param_names = self.lines[0].strip().split(' ')
    self.params = []
    for idx in range(1,n_lines):
      self.params.append([float(num) for num in self.lines[idx].split()])
     
param_reader = ParameterFileReader(fname_param_in=fname_out)

#------------------------------------------------------------------------------
#%% make density plots

#norm_pareto_set = pareto_set
#norm_pareto_set[:,0] = norm_pareto_set[:,0]/0.02
#norm_pareto_set[:,1] = norm_pareto_set[:,1]/27.2
#norm_pareto_set[:,2] = norm_pareto_set[:,2]/39.5
#norm_pareto_set[:,3] = norm_pareto_set[:,3]/7.2
#norm_pareto_set[:,4] = norm_pareto_set[:,4]
#norm_pareto_set[:,5] = norm_pareto_set[:,5]

# plot violin plot
plt.violinplot(sim_results.pareto_set[:,0], showmeans=False, showmedians=True)
plt.violinplot(sim_results.pareto_set[:,1], showmeans=False, showmedians=True)
plt.violinplot(sim_results.pareto_set[:,2], showmeans=False, showmedians=True)
plt.violinplot(sim_results.pareto_set[:,3], showmeans=False, showmedians=True)
plt.violinplot(sim_results.pareto_set[:,4], showmeans=False, showmedians=True)
plt.violinplot(sim_results.pareto_set[:,5], showmeans=False, showmedians=True)

min_error = np.zeros((1,6))
min_error[:,1] = 1
min_error[:,2] = 1
min_error[:,3] = 1
Ejemplo n.º 58
0
with open("max_lifetime_" + suffix + ".dat") as fp:
    data2 = json.load(fp)

lifetime = data1["lifetime"]
lifetime2 = data2["lifetime"]
consumption1 = data1["consumption"]
consumption2 = data2["consumption"]
print("Minimum lifetime: ", min(lifetime[:-1]))
print("Minimum lifetime: ", min(lifetime2[:-1]))
print("Global consumption: ", 1000 * sum(consumption1))
print("Global consumption: ", 1000 * sum(consumption2))

plt.violinplot([lifetime[:-1], lifetime2[:-1]], [1, 3],
               widths=0.7,
               showmeans=True,
               showextrema=True,
               showmedians=True)
plt.show()

xs, ys = data1["xs"], data1["ys"]
plt.scatter(xs[:-1], ys[:-1], s=20)
plt.scatter(xs[-1], ys[-1], s=50, marker="s")

plt.scatter(xs[11], ys[11], s=100, marker="p")
plt.scatter(xs[28], ys[28], s=100, marker="h")
plt.scatter(xs[39], ys[39], s=100, marker="*")
plt.scatter(xs[50], ys[50], s=100, marker="^")
plt.scatter(xs[61], ys[61], s=100, marker="<")
plt.scatter(xs[95], ys[95], s=100, marker=">")
Ejemplo n.º 59
0
def plot_correction_effect(dataframes: dict):
    """Makes violin plots to compare corrected and uncorrected ET"""
    plt.violinplot([[dataframes[df].E.mean() * 365 for df in dataframes],
                    [dataframes[df].E_cor.mean() * 365 for df in dataframes]])
Ejemplo n.º 60
0
cdict = dict(zip(cells, colors))
yclabels = [''.join([l for l in yc if l.isupper()]) for yc in yclass]


plt.figure(figsize=(len(yclass), 4))
patches = []
for cell in cells:
	C = []
	for yc in yclass:
		fname = 'train/mSelect'+yc+cell+'.pkl'
		metrics = pickle.load(open(fname, 'rb'))
		this_C = metrics['C']
		if np.array_equal(this_C, np.array([this_C[0]]*len(this_C))):
			this_C[0] -= this_C[0]*0.01
		C.append(this_C)
	violin = plt.violinplot(C, widths=0.7, showmeans=True, showextrema=True)
	color = cdict[cell]
	patches.append(mpatches.Patch(color=color, alpha=0.5, label=cell))
	plt.setp(violin['bodies'], facecolor=color, edgecolor=color)
	for key in ['cbars', 'cmins', 'cmeans', 'cmaxes']:
		plt.setp(violin[key], color=color, linewidth='1', alpha=0.5)

plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., prop={'size':8})
plt.xticks(np.arange(len(yclass))+1, yclabels, fontsize=10, color='k')
plt.yscale('log')
plt.yticks(fontsize=10, color='k')
plt.xlabel('Label Class', fontsize=12, color='k')
plt.ylabel('Cost', fontsize=12, color='k')

plt.savefig('figures/CSelect'+ttype+'.svg', bbox_inches='tight')