예제 #1
0
def answerbox(responses):
    response_matrix = metrics.normalize_responses(metrics.responsematrix(responses))
    pl.boxplot(np.squeeze(np.asarray(response_matrix)))
    pl.title("Responses")
    pl.ylabel("Numeric representation of question answers")
    pl.xlabel("Question number")
    pl.show()
def boxplot_poi(data, var_name):
    """
    Makes box plot with variable "var_name"
    split into
    :param data: data dict with enron data
    :param var_name: name of variable to plot
    :return: plot object
    """
    poi_v = []
    no_poi_v = []
    for p in data.itervalues():
        value = p[var_name]
        if value == "NaN":
            value = 0
        if p["poi"] == 1:
            poi_v.append(value)
        else:
            no_poi_v.append(value)
    plt.xlabel("POI")
    plt.ylabel(var_name)
    plt.boxplot([poi_v, no_poi_v])
    plt.xticks([1, 2], ["POI", "Not a POI"])
    # http://stackoverflow.com/a/29780292/1952996
    for i, v in enumerate([poi_v, no_poi_v]):
        y = v
        x = np.random.normal(i+1, 0.04, size = len(y))
        plt.plot(x, y, "r.", alpha=0.2)
예제 #3
0
def plot_box_plot(col, title=None, verbose=True):
    """Makes a box plot for a feature
    
    Parameters
    ----------
    col : np.array
    title : str or None
        title of a plot
    verbose : boolean
        iff True, display the graph
        
    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot
    
    """
    col = utils.check_col(col)

    fig = plt.figure()
    boxplot(col)
    if title:
        plt.title(title)
    #add col_name to graphn
    if verbose:
        plt.show()
    return fig
예제 #4
0
파일: roi.py 프로젝트: cindeem/nipy
 def plot_feature(self, fid):
     """
     boxplot the feature within the ROI
     """
     f = self.get_feature(fid)
     import matplotlib.pylab as mp
     mp.figure()
     mp.boxplot(f)
     mp.title('Distribution of %s within %s'%(fid,self.id))
예제 #5
0
def create_and_save_boxplot(data, title, x_titles, metric, folder_to_save_to, display=False):
    fig = plt.figure()
    pylab.boxplot(data)
    pylab.xticks(range(1, len(x_titles)+1), x_titles)
    plt.ylabel(metric)
    plt.title(title)
    file_name = "_".join(title.split(" "))
    fig.savefig('%s%s.png' % (folder_to_save_to, file_name) )
    if display: plt.show()
    plt.close(fig)
예제 #6
0
def ca_box_plot_features(index):
    '''
    训练特征和外倾性的关系
    :param index: 训练特征编号, 印象笔记存储
    :return:
    '''
    index += 1
    data1 = pd.read_csv('data/split_class/large_IGNORE_404_NOR_+1.txt', sep=' ', header=None)
    data2 = pd.read_csv('data/split_class/large_IGNORE_404_NOR_-1.txt', sep=' ', header=None)

    col1 = data1[index]
    col2 = data2[index]

    plt.boxplot([col1, col2], showmeans=True, showfliers=False)
    plt.show()
예제 #7
0
def load():
    # regular expression
    orgfiles = os.listdir('results')
    test = re.compile("S500")
    files = list(filter(test.search, orgfiles))
    #files = orgfiles
    files.sort()

    #files = ['S5C1', 'S5C08', 'S5C06', 'S20C1', 'S20C08', 'S20C06', 'S50C1', 'S50C08', 'S50C06', 'S80C1', 'S80C08', 'S80C06']
    #files = ['S30C06la']
    #files = ['S100F2C3']
    print("files:", files)

    #res = []
    res_tri = []
    res_auto = []
    labels = []
    for f_name in files:
        temp_tri = []
        temp_auto = []
        with open('results/' + f_name, 'r') as f:
            for i, l in enumerate(f):
                x = eval(l)
                temp_tri += [x['tri_score']]
                temp_auto += [x['auto_score']]
        #res += [temp_tri, temp_auto]
        #labels += ['tri', 'auto']
        res_tri += [temp_tri]
        res_auto += [temp_auto]
        labels += [f_name[4:]]

    #print(res)

    # tri result
    pl.figure(figsize=(1366/96, 768/96), dpi=100)
    pl.title('tri ' + f_name[1:4] + ' points')
    pl.boxplot(res_tri, labels=labels)
    #pl.show()
    pl.savefig('tri'+f_name[1:4]+'.pdf')
    pl.close()

    # auto result
    pl.figure(figsize=(1366 / 96, 768 / 96), dpi=100)
    pl.title('auto ' + f_name[1:4] + ' points')
    pl.boxplot(res_auto, labels=labels)
    #pl.show()
    pl.savefig('auto'+f_name[1:4]+'.pdf')
    pl.close()
def plot_box(plot_data, top_key):
    """
    Plots the passed plot_data[top_key] dict on a side by side
    box plot.
    """
    plot_data = plot_data[top_key]
    data = [list_of_weeks for list_of_weeks in plot_data.values()]
    plt.title('Spam emails per week by ' + top_key, fontsize=20)
    plt.boxplot(data)
    plt.xticks([(i + 1) for i in range(len(plot_data.values()))], \
                 ['%s' % i for i in plot_data.keys()], rotation=80)
    plt.tight_layout()
    create_folder(top_key)
    fig = plt.gcf()
    fig.set_size_inches(20,14)
    plt.savefig(top_key + '/box_plot.png', format='png', dpi=100)
예제 #9
0
파일: plot.py 프로젝트: darshansi/actg
def makejitter(y1list,y2list,pointannotation,ylabel,xlabel,title,imagefilename):
    fig = plt.figure(figsize=(8, 8))
    x1list=[0.8+0.4*x[0] for x in np.random.rand(len(y1list),1).tolist()]
    x2list=[1.8+0.4*x[0] for x in np.random.rand(len(y2list),1).tolist()]
    plt.boxplot([y1list,y2list],widths=0.4)
    plt.title(title)
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.plot(x1list,y1list,'b.')
    plt.plot(x2list,y2list,'r.')
    for label,x,y in pointannotation:
        plt.annotate(label,xy=(x,y),xytext=(80,-40),
                     textcoords = 'offset points', ha = 'right', va = 'bottom',
                     bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
                     arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
    plt.axis([0.2,3.0,0.0,1.0])
    frame1 = plt.gca()
    frame1.axes.get_xaxis().set_visible(False)
    plt.savefig(imagefilename)
예제 #10
0
def plotStoreDailyTrends(trainSet,storeData,storeID,savepath = None):

    thisStore = trainSet[trainSet['Store'] == storeID]
    thisStore = thisStore[thisStore['Open'] == 1]

    plt.figure()
    plt.violinplot(
        [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])],
        showmeans=True)
    plt.boxplot(
        [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])],
        notch=1)
    plt.xlabel('day of week')
    plt.ylabel('sales')


    storeCompetitionFlag = ~np.isnan(storeData[storeData['Store']==storeID]['CompetitionOpenSinceYear'].values)

    if storeCompetitionFlag:
        thisStore = thisStore
def violin_plot(ax, values_list, measure_name, group_names, fontsize, color='blue',  ttest=False):
    '''
    This is a little wrapper around the statsmodels violinplot code
    so that it looks nice :)    
    '''    
    
    # IMPORTS
    import matplotlib.pylab as plt
    import statsmodels.api as sm
    import numpy as np
    
    # Make your violin plot from the values_list
    # Don't show the box plot because it looks a mess to be honest
    # we're going to overlay a boxplot on top afterwards
    plt.sca(ax)
    
    # Adjust the font size
    font = { 'size'   : fontsize}
    plt.rc('font', **font)

    max_value = np.max(np.concatenate(values_list))
    min_value = np.min(np.concatenate(values_list))
    
    vp = sm.graphics.violinplot(values_list,
                            ax = ax,
                            labels = group_names,
                            show_boxplot=False,
                            plot_opts = { 'violin_fc':color ,
                                          'cutoff': True,
                                          'cutoff_val': max_value,
                                          'cutoff_type': 'abs'})
    
    # Now plot the boxplot on top
    bp = plt.boxplot(values_list, sym='x')
    
    for key in bp.keys():
        plt.setp(bp[key], color='black', lw=fontsize/10)
        
    # Adjust the power limits so that you use scientific notation on the y axis
    plt.ticklabel_format(style='sci', axis='y')
    ax.yaxis.major.formatter.set_powerlimits((-3,3))
    plt.tick_params(axis='both', which='major', labelsize=fontsize)

    # Add the y label
    plt.ylabel(measure_name, fontsize=fontsize)
    
    # And now turn off the major ticks on the y-axis
    for t in ax.yaxis.get_major_ticks(): 
        t.tick1On = False 
        t.tick2On = False

    return ax
예제 #12
0
def plot_box_plot(col, col_name=None, verbose=True):
    """Makes a box plot for a feature
    comment
    
    Parameters
    ----------
    col : np.array
    
    Returns
    -------
    matplotlib.figure.Figure
    
    """

    fig = plt.figure()
    boxplot(col)
    if col_name:
        plt.title(col_name)
    #add col_name to graphn
    if verbose:
        plt.show()
    return fig
예제 #13
0
    def boxplot_feature(self, pid, fids):
        """
        self.show_feature(pid,fids)
        This function makes a boxplot of the feature distribution
        in a given parcel across subjects
        
        Parameters
        ----------
        pid = parcel identifier an integer within the [0..self.K] range
        fids = list of features of inetegers
        """
        # 1. test that pid is coorect
        if pid < 0:
            raise ValueError, "Negative parcel id"
        if pid > self.k:
            raise ValueError, "Wrong parcel id"

        # 2. test that the feature(s) exist
        idx = []
        for fid in fids:
            i = np.array([fid == f for f in self.fids])
            i = np.nonzero(i)
            i = np.reshape(i, np.size(i))
            if np.size(i) == 0:
                raise ValueError, "The feature does not exist yet"
            idx.append(i)

        # 3 get the data and make the figure
        dataplot = []
        for j in idx:
            dataplot.append(np.transpose(self.features[j][:, pid]))

        dataplot = np.transpose(np.concatenate(dataplot))
        print np.shape(dataplot)
        import matplotlib.pylab as mp

        mp.figure()
        mp.boxplot(dataplot)
 def plot_error_bar(self,MT_results,average_case):
     '''
     plot the figure of total annual releases
     with error bar
     '''
     year_distribution = []
     round = len(MT_results)
     num_year = len(MT_results[0])
     plt.figure()
     final_results = []
     for each_year in range(num_year):
         this_year_resutls = []
         for each_ana in range(round):
             this_round_year = MT_results[each_ana][each_year]
             this_year_resutls.append(this_round_year)
         final_results.append(this_year_resutls) 
     plt.plot(self.years[40::],average_case[40::],label='Average Release Case')
     plt.boxplot(final_results[40::],positions=self.years[40::],labels=self.years[40::],widths=0.2)
     plt.xlabel('Year')
     plt.ylabel('Total Releases From Coating Products (Ton)')
     plt.legend()
     
     plt.show()
예제 #15
0
            lr_ae_supp = np.dot(mat_V1, mat_W0)
            lr_ae_supp_z = zscore(lr_ae_supp, axis=1)

            for i in np.arange(18):
                r1, p1 = pearsonr(lr_ae_supp_z[i, :], mean_supp_z[i, :])
                print('r/lrae: %.4f' % r1)
                corr_means_lr_ae[ilamb, i] = r1

                r2, p2 = pearsonr(lr_supp_z[i, :], mean_supp_z[i, :])
                print('r/lr: %.4f' % r2)
                corr_means_lr[ilamb, i] = r2

    # boxplot
    plt.figure()
    corrs = np.vstack((corr_means_lr[0, :], corr_means_lr_ae))
    plt.boxplot(corrs.T)
    plt.ylabel('correlation r')
    plt.title('Support Recovery: normal versus low-rank logistic regression\n'
              '%i components' % n_comp)
    tick_strs = [u'normal'] + [u'low-rank lambda=%.2f' % val for val in lambs]
    plt.xticks(np.arange(6) + 1, tick_strs, rotation=320)
    plt.ylim(0, 1.0)
    plt.yticks(np.linspace(0, 1., 11), np.linspace(0, 1., 11))
    plt.tight_layout()
    
    out_path = op.join(WRITE_DIR, 'supp_recov_comp=%i.png' % n_comp)
    plt.savefig(out_path)

    # barplot
    plt.figure()
    ind = np.arange(5)
예제 #16
0
파일: boxplot.py 프로젝트: adamuas/coevondm
  0.15433815,  0.2244887  , 0.27699527 , 0.14800338],

    '$Vertebral3C$':[ 0.11300019,  0.06446632 , 0.08426391 , 0.06303025,  0.08434364,  0.09018135,
  0.09996641 , 0.07146446,  0.09008181 , 0.08094153],

    '$Cancer$': [ 0.03437089,  0.06196577,  0.04087629 , 0.06653635 , 0.06513942 , 0.03501766,
  0.03562236,  0.11285626 , 0.0667102 ,  0.18861101],

    '$Diabetes$': [ 0.25870847 , 0.27619998 , 0.24180374,  0.26761697,  0.23557992,  0.22850167,
  0.26946416,  0.24228854 , 0.2434516 ,  0.25255786],

    '$Card$': [ 0.31023136 , 0.3207549,   0.28652662,  0.24727198,  0.26294096,  0.30268156,
  0.29924656,  0.2828805,   0.2497709 ,  0.27087237],

    '$Heart$': [ 0.26676818,  0.26147714 , 0.22306759,  0.3132863,   0.28896025 , 0.29202858,
  0.2508732,   0.25777515 , 0.28629689,  0.2796618 ],
    
};

fig, ax1 = plt.subplots(figsize=(15,8))
plt.boxplot(results.values(), labels =  results.keys());
plt.title('$Big\ Benchmark$');
plt.ylabel('$Error(MSE)$');
plt.ylim(0.0, 0.37)
plt.xticks(rotation=25)
ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
              alpha=0.45)
plt.savefig('boxplot_bigbench.png',dpi =100);
plt.show();

예제 #17
0
    regmodel.xi *= forg_factor
    regmodel.nu *= forg_factor
    
    # update
    regmodel.update(yt, xt)
    regmodel.log()

#%%
Ebeta_log = np.array(regmodel.Ebeta_log)

plt.figure(1, figsize=(15, 5))
plt.plot(e2)
plt.plot(yt_pred, '+')

plt.figure(2, figsize=(15, 5))
plt.subplot(3, 1, 1)
plt.plot(Ebeta_log[:,0])
plt.subplot(3, 1, 2)
plt.plot(Ebeta_log[:,1])
#plt.subplot(3, 1, 3)
#plt.plot(Ebeta_log[:,2])
#%%
errors = e2[3:] - yt_pred[3:]
plt.figure(3)
plt.hist(errors, bins=100)
print('RMSE: ', np.sqrt(np.mean((e2[3:] - yt_pred[3:])**2)))

#%%
plt.figure(4)
plt.boxplot(errors, showfliers=False)
                           csize=csize).mean()/ngroups
        sent.append(seni)
    sens.append(np.array(sent))
    kap.append(np.array(kappa))
    clt.append(np.array(cls))
    pk.append(np.array(peaks))
    
################################################################################
# Visualize the results
import scipy.stats as st
aux = st.norm.sf(thresholds)

import matplotlib.pylab as mp
a = mp.figure()
mp.subplot(1, 3, 1)
mp.boxplot(kap)
mp.title('voxel-level reproducibility', fontsize=12)
mp.xticks(range(1,1+len(thresholds)),thresholds)
mp.xlabel('threshold')
mp.subplot(1, 3, 2)
mp.boxplot(clt)
mp.title('cluster-level reproducibility', fontsize=12)
mp.xticks(range(1,1+len(thresholds)),thresholds)
mp.xlabel('threshold')
mp.subplot(1,3,3)
mp.boxplot(pk,notch=1)
mp.title('peak-level reproducibility', fontsize=12)
mp.xticks(range(1,1+len(thresholds)),thresholds)
mp.xlabel('threshold')
a.set_figwidth(10.)
a.set_size_inches(12, 5)
예제 #19
0
def plot_exon_coverage(filename, exons=None, exons_per_gene = None, target_folder = None, whitelist=None):
    
    sample = filename.split("/")[-1][:-13]
    header = ["chr", "start", "stop", "amplicon", "na", "strand",  "amplicon_pos", "dp"]
    
    rawdf = pd.read_csv(filename, sep="\t", names=header)
    rawdf["pos"] = rawdf.start +rawdf.amplicon_pos
    rawdf["chrompos"] = rawdf.apply(lambda x : "_".join([str(x["chr"]), str(x["pos"]) ]), axis = 1 ) 
    rawdf["name"] = rawdf.amplicon 
    rawdf["gene"] = rawdf.apply(lambda x : x["amplicon"].split("_")[0].upper(), axis = 1 ) 
    
    ########################################################################################
    ## parse the complete list of human exons
    #
    ##exon_filename = "/home/andreas/bioinfo/core/general/data/HumanExons_Ensembl_v65_merged.tsv"
    #exon_filename = "/home/andreas/bioinfo/core/general/data/HumanExons_Ensembl_v75_merged.tsv"
    #
    #header = ["chrom", "exon_start", "exon_stop", "gene", "strand", "exon_no"]
    #exons = pd.read_csv(exon_filename, sep="\t", names=header)
    #exons["gene_upper"] = exons.gene.str.upper()
    #exons = exons.sort(columns = ["gene", "exon_start", "exon_stop"])
    #    
    #exons_per_gene = {}
    #for _, row in exons.iterrows():
    #    gene = row["gene"].upper()
    #    start, stop = int(row["exon_start"]), int(row["exon_stop"])
    #    exon_no = row["exon_no"]
    #    
    #    if not exons_per_gene.get(gene):
    #        exons_per_gene[gene] = []
    #        
    #    exons_per_gene[gene].append((start, stop, exon_no))$
    
    #######################################################################################
    # df: per base
    
    df = dict(chrom = rawdf.chr.groupby(rawdf.chrompos).min(), 
              pos = rawdf.pos.groupby(rawdf.chrompos).min(),
              gene = rawdf.gene.groupby(rawdf.chrompos).min(),
              start = rawdf.start.groupby(rawdf.chrompos).min(),
              stop = rawdf.stop.groupby(rawdf.chrompos).min(),
              minus_dp = rawdf[rawdf.strand == "-"].dp.groupby(rawdf.chrompos).max(),    
              plus_dp = rawdf[rawdf.strand == "+"].dp.groupby(rawdf.chrompos).max(),  
              dp = rawdf.dp.groupby(rawdf.chrompos).sum(),    
    )
    
    df = pd.DataFrame(df).reset_index()
    df["gene"] = df.gene.str.upper()
    
    #######################################################################################
    
    def find_matching_exon_from_dict(row):    
        gene = row["gene"]
        pos = row["pos"]
    
        exon_no = 0
        try:
            for region in exons_per_gene[gene]:
                if region[0] < pos < region[1]:
                    exon_no = region[2]
                    break
        except:
            pass
        
        return exon_no     
        
    df["exon_no"] = df.apply(find_matching_exon_from_dict, axis = 1)
    
    #######################################################################################
    
    def plot_exon(row):
        start = int(row["exon_start"])
        stop = int(row["exon_stop"])
        size = stop - start    
        
        #print start, stop
        
        rectangle = plt.Rectangle((start, -20), size, 10, fc='red')
        plt.gca().add_patch(rectangle)
    
    
    #######################################################################################    
    # plot individual bases vs chromosome locations

    all_genes = df.gene.unique()
    all_genes.sort()
    
    plot_no = 0#######################################################################################

    
    figure_length = len(all_genes)
    plt.figure(figsize=(15, figure_length))
    
    upper = np.percentile(df[df.dp > 0].dp, 90)
    lower = -np.percentile(df[df.dp > 0].dp, 40)
    plt.ylim(lower, upper)
    for gene in all_genes:
        plot_no += 1
        plt.subplot(len(all_genes), 1, plot_no)
        
        try:
            plt.ylabel(gene)
            gene= gene.upper()
            
            ######################################################################
        
            gene_exons = exons[(gene_exons.gene_upper == gene)]
            
            gene_exons.apply(plot_exon, axis =1 )
            
            x_start = int(gene_exons.head(1).exon_start) - 1000
            x_stop = int(gene_exons.tail(1).exon_start) + 1000 
            
            plt.xlim(x_start, x_stop)
            plt.ylim(-20,210)
            
            ######################################################################
        
            gene_df = df[(df.gene.str.upper() == gene)]
            gene_df["dp_capped"] = gene_df.apply(lambda x : 200 if x["dp"] > 200 else x["dp"], axis=1)
            
            pdf = gene_df[gene_df.dp > 10].sort(columns = ["pos"])
            y = pdf.dp_capped
            x = pdf.pos
                
            plt.scatter(x,y, c="black", s=10)
            #plt.gray()
            
            plt.axhline(0)
            
            ######################################################################
            # create x labels
            
            locs = []
            labels = [] 
            
            for i in range(1,int(gene_exons.exon_no.max())+1):                
                if len(gene_exons[gene_exons.exon_no == i]) > 0: # exons have matching amplicons
                    start = gene_exons[gene_exons.exon_no == i].exon_start.min()
                    stop = gene_exons[gene_exons.exon_no == i].exon_stop.max()
                    
                    locs.append( np.mean([start, stop]))
                    
                    label = str(i)
                    #if i % 2 == 0:
                    #    label = "\n" + str(i)
                    #else:
                    #    label = str(i) + "\n"
                    
                    labels.append(label)
                    
            plt.xticks(locs, labels)
        
        except:
            logging.warning( "Gene %s in sample %s is not plotted due to an error. Is it in the exon list?" % (gene, sample))
            
            
    plt.tight_layout()
    
    sample = filename.split("/")[-1].split(".")[0]
    title = target_folder + sample + " raw exon coverage"
    plt.savefig(title.replace(" ", "_")+".png", dpi=300)
    plt.close()
    
    #######################################################################################    
    # plot boxplots vs exon counts
    
    all_genes = df.gene.unique()
    all_genes.sort()
    #all_genes = ["EFNA5", ]
    
    plot_no = 0
    
    figure_length = len(all_genes)
    plt.figure(figsize=(15, figure_length))
    
    upper = np.percentile(df[df.dp > 0].dp, 90)
    lower = -np.percentile(df[df.dp > 0].dp, 40)
    
    for gene in all_genes:
        plot_no += 1
        plt.subplot(len(all_genes), 1, plot_no)
        
        #try:
        plt.ylabel(gene)
        plt.axhline(0)
        plt.ylim(lower, upper)
        
        ######################################################################
        
        gene= gene.upper()
        gene_exons = exons[(exons.gene_upper == gene)]
        
        gene_df = df[(df.gene.str.upper() == gene)]
        gene_df["dp_capped"] = gene_df.apply(lambda x : upper if x["dp"] > upper else x["dp"], axis=1)
        
        pdf = df[(df.gene.str.upper() == gene)].sort(columns = ["pos"]) 
        
        xs = []
        ys = [] 
        
        logging.debug( "-" * 150 )
        logging.debug( gene )

        if len(gene_exons.exon_no.values) > 0:
            for i in range(1,int(gene_exons.exon_no.max())+1):
                if len(gene_exons[gene_exons.exon_no == i]) > 0: # exons have matching amplicons
         
                    if whitelist:
                        logging.debug( "Exon %s, %s variants" % (i, whitelist.get_variants_per_exon(gene, i)))
                    
                    data = list(pdf[pdf.exon_no == i].dp)
                    mean_x = pdf[pdf.exon_no == i].pos.mean()
                            
                    if data and mean_x:
                        #print i, np.mean(data[0]), mean_x
                        ys.append(data)
                        xs.append(i)
                    else:
                        ys.append([0,0,0])
                        xs.append(i)

                    
                else: # empty exon that can't have coverage
                    midlevel = lower + (upper-lower)/2
                    plt.text(i, midlevel, "X", fontsize=16)
                    
                    ys.append([0,0,0])
                    xs.append(i)
        
        if len(xs) > 0:
            
            plt.boxplot(ys, positions=xs)
            
            if whitelist:
                    
                ######################################################################
                # create x labels
                        
                locs = []
                labels = [] 
            
                covered_exon_count = 0
                for i in range(1,int(gene_exons.exon_no.max())+1):
                    #if len(gene_exons[gene_exons.exon_no == i]) > 0: # exons have matching amplicons
                    covered_exon_count += 1
                    locs.append( i )
                    
                    var_count = whitelist.get_variants_per_exon(gene, i)
                    label = "%s.\n(%s)" % (covered_exon_count, var_count)
                    labels.append(label)
                    #else: # empty exon that can't have coverage
                    #    locs.append( i )
                    #    label = ""
                    #    labels.append(label)
                    
                plt.xticks(locs, labels)
        
    plt.tight_layout()
    
    sample = filename.split("/")[-1].split(".")[0]
    title = target_folder + sample + " summarized exon coverage"
    plt.savefig(title.replace(" ", "_")+".png", dpi=300)
    plt.close()

    return df
                                  method, swap, verbose, **kwargs)
        kappa.append(k)
        cld = cluster_reproducibility(func, var, xyz, ngroups, coord, sigma,
                                      method, swap, verbose, **kwargs)
        cls.append(cld)
        
    kap.append(np.array(kappa))
    clt.append(np.array(cls))
    
################################################################################
# Visualize the results

import matplotlib.pylab as mp
mp.figure()
mp.subplot(1,2,1)
mp.boxplot(kap)
mp.title('voxel-level reproducibility')
mp.xticks(range(1,1+len(thresholds)),thresholds)
mp.xlabel('threshold')
mp.subplot(1,2,2)
mp.boxplot(clt)
mp.title('cluster-level reproducibility')
mp.xticks(range(1,1+len(thresholds)),thresholds)
mp.xlabel('threshold')


mp.figure()
q = 1 
for threshold in thresholds:
    mp.subplot(3, len(thresholds)/3, q)
    rmap = map_reproducibility(func, var, xyz, ngroups,
        stds[nmm, nc]=tempStds

CIsMeanUB=CI.variables['meanUB'][ny+70,nj,ni]
CIsMeanLB=CI.variables['meanLB'][ny+70,nj,ni]
CIsStdsUB=CI.variables['sdUB'][ny+70,nj,ni]
CIsStdsLB=CI.variables['sdLB'][ny+70,nj,ni]

numMean=minNum_nSIF_mean_85[ny, nj, ni]
numStd=minNum_nSIF_std_85[ny, nj, ni]
 

plt.figure()    
plt.hlines(CIsMeanUB, 0, 31)   
plt.hlines(CIsMeanLB, 0, 31) 
plt.hlines(means[-1][0], 0, 31)   
plt.boxplot(means.T)
plt.title('Example mean distributions ('+str(int(numMean))+' members needed), YEAR=' + str(yr)+ ' NI='+str(ni)+ 'NJ='+str(nj))
plt.ylabel('Number of open water days')
plt.xlabel('Number of subsampled ensemble members')
plt.savefig('SI_FigXx_numNeeded_Mean.'+rcpName[ittR]+'.'+nsk+'.pdf', format='pdf')
#plt.show()

plt.figure()    
plt.hlines(CIsStdsUB, 0, 31)   
plt.hlines(CIsStdsLB, 0, 31) 
plt.hlines(stds[-1][0], 0, 31)   
plt.boxplot(stds.T)
plt.title('Example standard deviation distributions('+str(int(numStd))+' members needed), YEAR=' + str(yr)+ ' NI='+str(ni)+ 'NJ='+str(nj))
plt.ylabel('Number of open water days')
plt.xlabel('Number of subsampled ensemble members')
plt.savefig('SI_FigXx_numNeeded_STD.'+rcpName[ittR]+'.'+nsk+'.pdf', format='pdf')
예제 #22
0
def plot_cluster_expression(out,data1,data2,donor,gene, image):

    # function for setting the colors of the box plots pairs
    def setBoxColors(bp):
        setp(bp['boxes'][0], color='blue')
        setp(bp['caps'][0], color='blue')
        setp(bp['caps'][1], color='blue')
        setp(bp['whiskers'][0], color='blue')
        setp(bp['whiskers'][1], color='blue')
        setp(bp['fliers'][0], color='blue')
        setp(bp['fliers'][1], color='blue')
        setp(bp['medians'][0], color='blue')

        setp(bp['boxes'][1], color='red')
        setp(bp['caps'][2], color='red')
        setp(bp['caps'][3], color='red')
        setp(bp['whiskers'][2], color='red')
        setp(bp['whiskers'][3], color='red')
        setp(bp['fliers'][2], color='red')
        setp(bp['fliers'][3], color='red')
        setp(bp['medians'][1], color='red')

    N_probes=data1.shape[0]

    fig = figure()
    ax = axes()
    hold(True)

    s=1
    f=2
    p_value=[]
    t_stat=[]
    ticks=[]
    for i in range(N_probes):
        t,p=stats.ttest_ind(data1[i,:], data2[i,:])
        bp = boxplot([data1[i,:],data2[i,:]], positions = [s, f], widths = 0.6)
        setBoxColors(bp)
        ticks.append( (s+f)/2. )
        s+=3
        f+=3
        p_value.append(p)
        t_stat.append(t)

    hB, = plot([1,1],'b-')
    hR, = plot([1,1],'r-')

    xlim(0,f+2)
    ylim(2,20)
    legend((hB, hR),('Inside', 'Outside'))

    for i in range(N_probes):
        text(f+3,10-i,'Probe #{}: p-value={}'.format(i+1,np.round(p_value[i],3) ) )
    ax.set_xticklabels(['probe #{}'.format(j) for j in range(1,N_probes+1)])
    ax.set_xticks(ticks)
    title('Donor {}, Allen Brain expression of gene {} inside/outside clusters formed in {} image'.format(donor, gene,image))

    hB.set_visible(False)
    hR.set_visible(False)
    try:
        savefig(os.path.join(out,donor+"_"+gene+".png") )
    except:
        savefig(os.path.join(out,donor+"_"+gene+".svg") )
def boxplot_dti_movement(subs_df, figure_name):
    '''
    Create a boxplot showing the 6 different ways of calculating
    displacement for dti scans. Label the outliers with their subid.
    '''
    #===============================================================
    # IMPORTS
    #---------------------------------------------------------------
    import numpy as np
    import matplotlib.pylab as plt
    import pandas as pd
    import matplotlib as mpl
    
    #===============================================================
        
    #===============================================================
    # Define some measures we need
    #---------------------------------------------------------------
    
    # First: the columns we're going to plot
    cols = [ name for name in subs_df.columns if 'mean_rms' in name ]
    
    # The total number of subjects
    n = subs_df.subid.count()
    
    # Define the colorbar that you want to use
    cmap = mpl.cm.gist_ncar
    norm = mpl.colors.Normalize(vmin=0, vmax=1)
    map = mpl.cm.ScalarMappable( norm, cmap)
    
    # Start the color counter
    color_counter = 1.0

    # Make sure everyone is originally set with a color of 0
    subs_df['color'] = 0.0

    #===============================================================
    # Make the figure
    #---------------------------------------------------------------
    fig, ax = plt.subplots()
    
    # Make a box plot of the six different measures of movement
    box = plt.boxplot(subs_df[cols].values)

    # One of the pieces of information contained in the box variable
    # are the locations of the fliers (the outliers)
    for f in box['fliers']:

        # Get the information from each of the 12 positions that fliers
        # could be found in.
        # x_list: list of x positions, fliers_list: list of y positions
        x_list, fliers_list = f.get_data()
        
        # Sort the fliers_list so that they're in order smallest to largest
        # Note that you don't have to sort the x list because they're all the
        # same value :)
        fliers_list.sort()
        
        # Now loop through all the x, y pairs in the x_list and
        # fliers_list and define a counter (c)
        for c, (x, y) in enumerate(zip(x_list, fliers_list)):
        
            # You can find the subID for each of the outliers
            # by looking up the y value in the appropriate column
            #(indexed as x-1 because the plot doesn't start counting at 0)
            id = subs_df.subid[subs_df[cols[np.int(x-1)]]==y].values[0]

            # We're also going to set the color of each box so that it's the
            # same for each individual across plots. Note that you don't have to
            # do this step if the person already has a color.
            if subs_df.color[subs_df.subid==id] == 0:
                subs_df.color[subs_df.subid==id] = color_counter
                color_counter+=1
            
            # Get the sub_color_id, this is the number that's been filled in
            # in the subs_df for this participant, and define the color that
            # will be used in the annotation
            sub_color_id = subs_df.color[subs_df.subid==id]
            color = map.to_rgba(10.0*sub_color_id.values[0]/n)
                        
            # In order to make the labels flip sides left and right as
            # we go through each person we're going do something creative
            # with modulo division
            offset_x = -0.5 * np.float(c%2) + 0.25 + x
            offset_y = 0.25 + y
            
            # Annotate all the outliers with a box that contains their subid
            # and has a personalized color
            ax.annotate(id, xy=(x, y), xytext=(offset_x, offset_y),
                textcoords='data', ha='center', va='center',
                bbox=dict(boxstyle='round,pad=0.2', fc=color, alpha=0.5),
                arrowprops=dict(arrowstyle='->', 
                                color='black'))

    # Make the plot look nicer:
    # Lets make sure the labels all fit onto the x axis
    plt.xticks(range(1,len(cols)+1), cols, rotation=45)
    # And label the yaxis
    ax.set_ylabel('Displacement (mm)')
    # And set the y axis to being a little higher than the max so the labels fit!
    ylims = ax.get_ylim()
    ax.set_ylim(ylims[0], ylims[1]+0.5)
    # Don't know if this makes a difference, but hey, here's a try
    plt.tight_layout()
    # Name the figure and save it
    fig.savefig(figure_name, bbox_inches=0, dpi=100)
    
    return subs_df
예제 #24
0
#Plot a pie chart
plt.cla()
plt.pie(carDf.PRICE, labels=carDf.MODEL, shadow=True, autopct='%1.1f')

#Plot a histogram
plt.cla()
plt.hist(data3, color='g')
plt.title("Demo Histogram")
plt.xlabel("Sin weights")
plt.ylabel("Frequency")

#Plot a box plot
plt.cla()
#Pass a List of Lists
plt.boxplot([[carDf.WEIGHT[carDf.MAKE == 'Toyota']],
             [carDf.WEIGHT[carDf.MAKE == 'Ford']]],
            labels=('Toyota', 'Ford'))

#----------------------------------------------------------------------------
#                   Data Acquisition
#----------------------------------------------------------------------------

import os

os.chdir("C:/Personal/V2Maestros/Modules/Python - Pandas")

#File
irisData = pd.read_csv("iris.csv")
irisData
irisData.describe()
irisData['dummy'] = 1
예제 #25
0
#Plot scatter
plt.cla()
plt.scatter(carDf.PRICE, carDf.WEIGHT, color='r') 

#Plot bar charts
plt.cla()
plt.bar(carDf.ID, carDf.PRICE)
plt.cla()
plt.barh(carDf.ID, carDf.WEIGHT)
plt.yticks(carDf.ID, carDf.MODEL)

#Plot pie chart
plt.cla()
plt.pie(carDf.PRICE, labels=carDf.MODEL, shadow=True, autopct='%1.1f')

#Plot a histogram
plt.cla()
plt.hist(data3, color='g')
plt.title('Demo Histogram')
plt.xlabel('Sin Weights')
plt.ylabel('Frequency')

#Plot a boxplot
plt.cla() #pass a list of lists
plt.boxplot([[carDf.WEIGHT[carDf.MAKE=='Toyota']], [carDf.WEIGHT[carDf.MAKE=='Ford']] ], labels=('Toyota','Ford')) #show show weights change by make of car 




    
iris_data.head()

"""
No Cleansing is Required
"""

#Exploratory Data Analysis

plt.scatter(iris_data['Petal.Length'],iris_data['Petal.Width'])
plt.cla()
plt.scatter(iris_data['Sepal.Length'],iris_data['Sepal.Width'])

plt.cla()

plt.boxplot([[iris_data['Petal.Length'][iris_data.Species=='setosa']],
              [iris_data['Petal.Length'][iris_data.Species=='versicolor']] ,
                [iris_data['Petal.Length'][iris_data.Species=='virginica']] ],
            labels=('setosa','versicolor','virginica'))
plt.cla()            

plt.boxplot([[iris_data['Petal.Width'][iris_data.Species=='setosa']],
              [iris_data['Petal.Width'][iris_data.Species=='versicolor']] ,
                [iris_data['Petal.Width'][iris_data.Species=='virginica']] ],
            labels=('setosa','versicolor','virginica'))
            
plt.cla()
plt.boxplot([[iris_data['Sepal.Length'][iris_data.Species=='setosa']],
              [iris_data['Sepal.Length'][iris_data.Species=='versicolor']] ,
                [iris_data['Sepal.Length'][iris_data.Species=='virginica']] ],
            labels=('setosa','versicolor','virginica'))
        
#Note that sepal width is all over the place, a lot of overlap. Not a great predictor.
def feature_by_age_boxplot(age2vals, age2label, outfn, title='', xlabel='Age',
                           ylabel='Value', scale_x = False, output_png=False, 
                           methods_str=''):
    fig = plt.figure()

    if len(age2vals.keys()) > 1:
        fig_width = fig.get_figwidth()
        fig_height = fig.get_figheight()
        fig.set_figwidth(fig_width * 1.7)
        fig.set_figheight(fig_height * 1.2)

    ax1 = fig.add_subplot(111)
    plt.subplots_adjust(bottom=0.26)
    
    ages = sorted(age2label.keys())

    box_data = []
    box_pos = []
    for i, age in enumerate(ages):
        if age in age2vals:
            box_data.append(age2vals[age])
            box_pos.append(age if scale_x else i)
        else:
            box_data.append([])
            box_pos.append(age if scale_x else i)

    bp = plt.boxplot(box_data, widths=.6, sym='', patch_artist=True, 
                     positions=box_pos)

    plt.setp(bp['boxes'], color='#99CCFF', edgecolor="black", lw=1)
    #plt.setp(bp['boxes'], color='darkkhaki', edgecolor="black", lw=1)
    plt.setp(bp['whiskers'], color='black', lw=1)
    plt.setp(bp['medians'], color='black', lw=1.5)
    plt.setp(bp['caps'], color='black', lw=1)


    for i, age in enumerate(ages):
        x_pos = age if scale_x else i
        if age in age2vals:
            ax1.plot(x_pos, numpy.average(age2vals[age]), 'x', color='red', 
                     markersize=6, markeredgewidth=1.5)


    if age2label: 
        labels = []
        for age in ages:
            if age < 0: 
                labels.append('')
            elif age in age2label:
                np = len(age2vals[age]) if age in age2vals else 0
                labels.append('%s (%d)' % (age2label[age], age))
                #labels.append('%s (n=%d)' % (age2label[age], np))
            else:
                labels.append('')

        xtickNames = plt.setp(ax1, xticklabels = labels)
        plt.setp(xtickNames, fontsize=10)
        plt.setp(xtickNames, rotation=45)
        plt.setp(xtickNames, horizontalalignment='right')


    ymin = ax1.viewLim.ymin
    ymax = ax1.viewLim.ymax
    y_range = ymax - ymin
    #ax1.set_ylim(-.05 * ymin, 1.05 * ymax)
    ax1.set_ylim(ymin - (.02 * y_range), ymax + (.02 * y_range))

    xmin = ax1.viewLim.xmin
    xmax = ax1.viewLim.xmax
    x_range = xmax - xmin
    pad = .02 * x_range
    ax1.set_xlim(xmin - pad, xmax + pad)


    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    plt.title(title)

    plt.figtext(0.1, 0.01, '[DB: '+methods_str+']', size=10)

    plt.savefig(outfn)
    if output_png: plt.savefig(outfn.replace('.pdf', '.png'))
    
    return
예제 #28
0
def plot_boxplot(data_without_orig, data_with_orig, labels, filename):
    
    data_without = []
    data_with = []
    
    print "Medians (%s):"%filename
    big_y = False
    for idx in range(len(data_with_orig)):
        
        median_without = numpy.median(data_without_orig[idx])
        median_with = numpy.median(data_with_orig[idx])
        
        if median_without/1000 > 1000:
            big_y = True
        print " * %d: With: %f, Without: %f"%(idx, median_with, median_without)
        prct_change = (median_without-median_with)/median_without
        print " * %d: Percent Change: %f"%(idx,prct_change)
#         data_without.append( data_without_orig[idx] / median_without )
#         data_with.append( data_with_orig[idx] / median_without )
        # convert to Megabyte/sec
        data_without_orig[idx] = [x/1000 for x in data_without_orig[idx]]
        data_with_orig[idx] = [x/1000 for x in data_with_orig[idx]]
        data_without.append( data_without_orig[idx] )
        data_with.append( data_with_orig[idx] )
    
    
    fig, ax1 = plt.subplots(figsize=(10,6))
    
    index = numpy.arange(len(data_without))+1
    bar_width=.1
    widths = numpy.ones(len(data_without))*bar_width*2
    bp = pylab.boxplot(data_without,
                  positions=index-bar_width,
                  widths=widths,
                  sym='')
    bp2 = pylab.boxplot(data_with,
                  positions=index+bar_width,
                  widths=widths,
                  sym='')
    
    plt.setp(bp['boxes'], color='black')
    plt.setp(bp['whiskers'], color='black')
    plt.setp(bp['fliers'], color='grey', marker='+')
    plt.setp(bp2['boxes'], color='black')
    plt.setp(bp2['whiskers'], color='black')
    plt.setp(bp2['fliers'], color='grey', marker='+')
    
    boxColors = ['white','grey']
    numBoxes = len(data_without)
    medians = range(numBoxes)
    
    for i in range(numBoxes):

        # Box 1
        box = bp['boxes'][i]
        boxX = []
        boxY = []
        for j in range(5):
            boxX.append(box.get_xdata()[j])
            boxY.append(box.get_ydata()[j])
        boxCoords = zip(boxX,boxY)
        # Alternate between Dark Khaki and Royal Blue
        k = i % 2
        boxPolygon = plt.Polygon(boxCoords, facecolor=boxColors[0])
        ax1.add_patch(boxPolygon)
        # Now draw the median lines back over what we just filled in
        med = bp['medians'][i]
        medianX = []
        medianY = []
        for j in range(2):
            medianX.append(med.get_xdata()[j])
            medianY.append(med.get_ydata()[j])
            plt.plot(medianX, medianY, 'k')
            medians[i] = medianY[0]
            
        # Box 2
        box = bp2['boxes'][i]
        boxX = []
        boxY = []
        for j in range(5):
            boxX.append(box.get_xdata()[j])
            boxY.append(box.get_ydata()[j])
        boxCoords = zip(boxX,boxY)
        # Alternate between Dark Khaki and Royal Blue
        boxPolygon = plt.Polygon(boxCoords, facecolor=boxColors[1])
        ax1.add_patch(boxPolygon)
        # Now draw the median lines back over what we just filled in
        med = bp2['medians'][i]
        medianX = []
        medianY = []
        for j in range(2):
            medianX.append(med.get_xdata()[j])
            medianY.append(med.get_ydata()[j])
            plt.plot(medianX, medianY, 'k')
            medians[i] = medianY[0]
            
    plt.grid('on')
    plt.xlim(0,len(labels)+1)
    # Conver to KB
    labels = [int(x)/1024 for x in labels]
    plt.xticks(index, labels)
    plt.xlabel("File Size (MB)", fontsize=20)
    plt.ylabel("Disk Throughput (MB/sec)", fontsize=20)
    
    for tick in ax1.xaxis.get_major_ticks():
        tick.label.set_fontsize(15)
    for tick in ax1.yaxis.get_major_ticks():
        tick.label.set_fontsize(15)
    
    # Labels
    if not big_y:
        plt.figtext(0.13, 0.18,  'Uninstrumented' ,
                    backgroundcolor=boxColors[0], color='black', weight='roman',
                    size=15,
                    bbox=dict(facecolor=boxColors[0], 
                              edgecolor='black', 
                              boxstyle='round,pad=1'))
        plt.figtext(0.35, 0.18, 'With Instrumentation',
                    backgroundcolor=boxColors[1],
                    color='white', weight='roman', size=15,
                    bbox=dict(facecolor=boxColors[1], 
                              edgecolor='black', 
                              boxstyle='round,pad=1'))
    else:
        plt.figtext(0.16, 0.18,  'Uninstrumented' ,
                    backgroundcolor=boxColors[0], color='black', weight='roman',
                    size=15,
                    bbox=dict(facecolor=boxColors[0], 
                              edgecolor='black', 
                              boxstyle='round,pad=1'))
        plt.figtext(0.38, 0.18, 'With Instrumentation',
                    backgroundcolor=boxColors[1],
                    color='white', weight='roman', size=15,
                    bbox=dict(facecolor=boxColors[1], 
                              edgecolor='black', 
                              boxstyle='round,pad=1'))
        
#     plt.show()
    
    plt.tight_layout()
    plt.savefig(filename, format='eps', dpi=1000)
예제 #29
0
    else:

        # Just in case, replace Missing Values with zero:

        data[column].fillna(0, inplace=True)

        print 'Missing values replaced with zeros.'
        print ' '

        Col = preprocessing.scale(data[column])

        skness = skew(Col)
        xlabel = str(skness)
        figure = plt.figure()
        print 'Skewness =', skness
        figure.add_subplot(121)
        plt.hist(Col, facecolor='lightblue', alpha=0.75)
        plt.xlabel(
            " Skewness greater than zero shows large skewed distribution --> ")
        plt.title(column)
        plt.text(2, 100000, "Skewness: {0:.2f}".format(skness))

        figure.add_subplot(122)
        plt.boxplot(Col)
        plt.title("Skewed Distribution")
        plt.xlabel(xlabel)
        plt.show()

print '\nHasta la vista, human.\n'
                 for i in range(n_controls) if i!=n]
        test = control_covs[n]
        control_model.fit(train)
        control_fit_cv.append(control_model.log_lik(test))
        patient_fit_cv += np.array([control_model.log_lik(p) 
                                    for p in patient_covs])

    patient_fit_cv /= n_controls
    
    import matplotlib.pylab as pl
    pl.rcParams['text.usetex'] = True
    pl.rcParams['text.latex.preamble'] = r'\usepackage{amsfonts}'
    pl.figure(1, figsize=(1, 3))
    pl.clf()
    ax = pl.axes([.2, .2, .5, .7])
    pl.boxplot([control_fit_cv, patient_fit_cv], widths=.25)
    pl.plot(1.26*np.ones(len(control_fit_cv)), control_fit_cv, '+k',
            markeredgewidth=1)
    pl.plot(2.26*np.ones(len(patient_fits)),
            patient_fit_cv, '+k',
            markeredgewidth=1)
    pl.xticks((1.13, 2.13), ('controls', 'patients'), size=13)
    if WHITEN:
        title = 'Tangent\nspace'
    else:
        title = r'$\mathbb{R}^{n\times n}$'
    pl.text(.1, .1, title,
            transform=ax.transAxes,
            horizontalalignment='left',
            verticalalignment='bottom',
            size=12)
예제 #31
0
파일: util.py 프로젝트: bmcorser/Azimuth
def plot_all_metrics(metrics, gene_names, all_learn_options, save, plots=None, bottom=0.19):
    num_methods = len(metrics.keys())
    metrics_names = metrics[metrics.keys()[0]].keys()
    num_genes = len(gene_names)
    width = 0.9/num_methods
    ind = np.arange(num_genes)

    if save==True:
        first_key = all_learn_options.keys()[0]
        #basefile = r"..\results\V%s_trmetric%s_%s" % (all_learn_options[first_key]["V"], all_learn_options[first_key]["training_metric"], datestamp())
        basefile = r"..\results\%s" % (first_key)

        d = os.path.dirname(basefile)
        if not os.path.exists(d):
            os.makedirs(d)
        with open(basefile + ".plot.pickle", "wb") as f:
            pickle.dump([metrics, all_learn_options, gene_names], f)

    for metric in metrics_names:
        if 'global' not in metric:
            plt.figure(metric, figsize=(20, 8))
        elif plots == None or 'gene level' in plots:
            plt.figure(metric, figsize=(12, 12))

    boxplot_labels = []
    boxplot_arrays = {}
    boxplot_median = {}

    for i, method in enumerate(metrics.keys()):
        boxplot_labels.append(method)
        for metric in metrics[method].keys():

            if 'global' in metric:
                plt.figure(metric)
                plt.bar([i], metrics[method][metric], 0.9, color=plt.cm.Paired(1.*i/len(metrics.keys())), label=method)
            else:
                if plots == None or 'gene level' in plots:
                    plt.figure(metric)
                    plt.bar(ind+(i*width), metrics[method][metric], width, color=plt.cm.Paired(1.*i/len(metrics.keys())), label=method)

                median_metric = np.median(metrics[method][metric])
                print method, metric, median_metric
                assert not np.isnan(median_metric), "found nan for %s, %s" % (method, metric)
                if metric not in boxplot_arrays.keys():
                    boxplot_arrays[metric] = np.array(metrics[method][metric])[:, None]
                    boxplot_median[metric] = [np.median(np.array(metrics[method][metric]))]
                else:
                    boxplot_arrays[metric] = np.concatenate((boxplot_arrays[metric], np.array(metrics[method][metric])[:, None]), axis=1)
                    boxplot_median[metric].append(np.median(np.array(metrics[method][metric])))


    for metric in metrics_names:
        if plots == None or 'gene level' in plots:
            ax = plt.figure(metric)
            leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
            # leg.draggable(state=True, use_blit=True)
            plt.ylabel(metric)

            if 'global' in metric:
                plt.xticks(range(len(metrics.keys())), metrics.keys(), rotation=70)
                plt.grid(True, which='both')
                plt.subplots_adjust(left = 0.05, right = 0.8)
            else:
                plt.xticks(ind+width, gene_names)
                plt.grid(True, which='both')
                plt.subplots_adjust(left = 0.05, right = 0.8)
        if save == True:
            plt.xticks(ind+0.5, gene_names)
            if metric=='AUC':
                plt.ylim([0.5, 1.0])                
            plt.savefig(basefile + "_" + metric + "_bar" + ".png")

        if (plots == None or "boxplots" in plots) and 'global' not in metric:
            plt.figure('Boxplot %s' % metric)

            sorted_boxplot = np.argsort(boxplot_median[metric])[::-1]

            plt.boxplot(boxplot_arrays[metric][:, sorted_boxplot])
            plt.ylabel(metric)
            plt.xticks(range(1, num_methods+1), np.array(boxplot_labels)[sorted_boxplot], rotation=70)
            plt.subplots_adjust(top = 0.97, bottom = bottom)

            if metric == 'RMSE':
                plt.ylim((1.0, 2.0))

        if save == True:
            plt.savefig(basefile + "_" + metric + ".png")
예제 #32
0
def plot(et_name, iv_measure, single_eyetracker_results):
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.patches import Polygon


    # Generate some data from five different probability distributions,
    # each with different characteristics. We want to play with how an IID
    # bootstrap resample of the data preserves the distributional
    # properties of the original sample, and a boxplot is one visual tool
    # to make this assessment

    numDists = len(single_eyetracker_results.keys())
    distNames = single_eyetracker_results.keys()

    print 'distNames:',distNames
    data = single_eyetracker_results.values()

    ###########################################
    fig = plt.figure(figsize=(10,6))
    fig.canvas.set_window_title(et_name+' : '+iv_measure)
    ax1 = fig.add_subplot(111)
    plt.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25)

    bp = plt.boxplot(data, notch=0, sym='', vert=1, whis=1.5)
    plt.setp(bp['boxes'], color='black')
    plt.setp(bp['whiskers'], color='blue')
    plt.setp(bp['fliers'], color='red', marker='+')

    # Add a horizontal grid to the plot, but make it very light in color
    # so we can use it for reading data values but not be distracting
    ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
                  alpha=0.5)

    # Hide these grid behind plot objects
    ax1.set_axisbelow(True)
    ax1.set_title('Comparison of Sample Selection Window Algorithms\n'+et_name+' : '+iv_measure)
    ax1.set_xlabel('Window Type')
    ax1.set_ylabel(iv_measure)

    # Now fill the boxes with desired colors
    boxColors = ['darkkhaki']#,'royalblue']
    numBoxes = numDists#*2
    medians = range(numBoxes)
    for i in range(numBoxes):
      med = bp['medians'][i]
      medianX = []
      medianY = []
      for j in range(2):
          medianX.append(med.get_xdata()[j])
          medianY.append(med.get_ydata()[j])
          plt.plot(medianX, medianY, 'k')
          medians[i] = medianY[0]
      plt.plot([np.average(med.get_xdata())], [np.average(data[i])],
               color='g', marker='*', markeredgecolor='k')

    # Set the axes ranges and axes labels
    ax1.set_xlim(0.5, numBoxes+0.5)
    bottom, top = ax1.get_ylim()
    top = top+0.1
    top = min(top, 10.0)
    bottom = bottom-0.1
    ax1.set_ylim(bottom, top)
    xtickNames = plt.setp(ax1, xticklabels=distNames)
    plt.setp(xtickNames, rotation=45, fontsize=8)

    # Due to the Y-axis scale being different across samples, it can be
    # hard to compare differences in medians across the samples. Add upper
    # X-axis tick labels with the sample medians to aid in comparison
    # (just use two decimal places of precision)
    pos = np.arange(numBoxes)+1
    upperLabels = [str(np.round(s, 3)) for s in medians]
    weights = ['bold', 'semibold']
    for tick,label in zip(range(numBoxes),ax1.get_xticklabels()):
       k = 0#tick % 2
       ax1.text(pos[tick], top-(top*0.05), upperLabels[tick],
            horizontalalignment='center', size='x-small', weight=weights[k],
            color=boxColors[k])

    plt.savefig('%s_%s.png'%(et_name,iv_measure), bbox_inches='tight')
    plt.close()
예제 #33
0
get_ipython().magic(u'time predicted_tags = [np.array(tagrank.get_ranking(title))[:,0] for title in product_log_test.title_tokens]')
def calculate_nhits(pred_tags,true_tags):
    """
    Find number of hits of the predicted results
    """
    return len(set(pred_tags).intersection(true_tags))
n_hits = map(lambda (p,t): calculate_nhits(p,t), zip(predicted_tags, product_log_test.query_tokens.values))




import matplotlib.pylab as plt
import seaborn as sns
get_ipython().magic(u'matplotlib inline')

plt.boxplot(n_hits)
plt.title("Number of hits at top 5 tags")




print "average number of hits at Top 5: ", np.average(n_hits)


# ##### Print some results for test data



def print_ranking(i):
    print 
    print test_data.product_name.iloc[i]
예제 #34
0
for v, dist in zip(mse_t2_avg, mse_avg):
    m_ = np.count_nonzero(v >= dist)
    p2_.append(m_)
    
p2_ = np.array(p2_)/2000.

p3_ = []
mse_avg = mse_.mean(1)
mse_t3_avg = mse_t3.mean(1)
for v, dist in zip(mse_t3_avg, mse_avg):
    m_ = np.count_nonzero(v >= dist)
    p3_.append(m_)
    
p3_ = np.array(p3_)/2000.

pl.boxplot(mse_avg.T, showmeans=True, showfliers=False)
pl.scatter(np.arange(1,79), mse_t1_avg, c='b')
pl.scatter(np.arange(1,79)[p1_<=0.05], mse_t1_avg[p1_<=0.05], c='b', s=45)
pl.scatter(np.arange(1,79), mse_t2_avg, c='g')
pl.scatter(np.arange(1,79)[p2_<=0.05], mse_t2_avg[p2_<=0.05], c='g', s=45)
pl.scatter(np.arange(1,79), mse_t3_avg, c='r')
pl.scatter(np.arange(1,79)[p3_<=0.05], mse_t3_avg[p3_<=0.05], c='r', s=45)


############### Controls ###################
gm_can = np.genfromtxt('/home/robbis/Share/CAN_NET_GMperc.csv', skip_header=1, delimiter=',')
can_labels = ['MCC','R_aINS','L_pINS','L_AMY']
repetitions = 200
n_permutation = 2000
arg_ = np.argsort(np.abs(corr))[::-1]
mse_can = np.zeros((arg_.shape[0], len(algorithms_), repetitions, gm_can.shape[1]))