예제 #1
0
 def __init__(self,path,filename):
     new = Image.open(path+'/'+filename)
     self.size = new.size
     self.data = np.reshape(new.getdata(), self.size)
     self.pmax = prctile(new.getdata(),p=95)
     self.pmin = prctile(new.getdata(),p=5)
     self.title= 'New range = %i-%i'%(self.pmin,self.pmax)
     self.label= 'UT: %s\nExp. Times: %.1f sec\nTemp: %.1fC' % (new.info['UniversalTime'].strftime('%m-%d-%y %H:%M:%S'),new.info['ExposureTime'],new.info['CCDTemperature'])
예제 #2
0
 def bootstrapMedian(data, N=5000):
     # determine 95% confidence intervals of the median
     M = len(data)
     percentile = [2.5,97.5]
     estimate = np.zeros(N)
     for n in range(N):
         bsIndex = np.random.random_integers(0,M-1,M)
         bsData = data[bsIndex]
         estimate[n] = mlab.prctile(bsData, 50)
     CI = mlab.prctile(estimate, percentile)
     return CI
예제 #3
0
 def bootstrapMedian(data, N=5000):
     # determine 95% confidence intervals of the median
     M = len(data)
     percentile = [2.5, 97.5]
     estimate = np.zeros(N)
     for n in range(N):
         bsIndex = np.random.random_integers(0, M - 1, M)
         bsData = data[bsIndex]
         estimate[n] = mlab.prctile(bsData, 50)
     CI = mlab.prctile(estimate, percentile)
     return CI
예제 #4
0
 def execute(self, seed=0):
     """Test the difference in means with bootstrapping.
     
     Data is drawn randomly from group1 and group2, with resampling.
     From these bootstraps, estimates with confidence intervals are 
     calculated for the mean of each group and the difference in means.
     
     The estimated difference is positive if group2 > group1.
     
     Sets: mean1, CI_1, mean2, CI_2, diff_estimate, diff_CI, p1, p2
     
     p1 is the p-value estimated from the distribution of differences
     p2 is the p-value from a 1-sample ttest on that distribution
     """
     if len(self.data1) < self.min_bucket or len(self.data2) < self.min_bucket:
         #~ raise BootstrapError(
             #~ 'insufficient data in bucket in bootstrap_two_groups')
         raise ValueError(
             'insufficient data in bucket in bootstrap_two_groups')
     
     if seed is not None:
         np.random.seed(seed)
     
     # Generate random samples, shape (n_boots, len(group))
     self.idxs1 = np.random.randint(0, len(self.data1), 
         (self.n_boots, len(self.data1)))
     self.idxs2 = np.random.randint(0, len(self.data2), 
         (self.n_boots, len(self.data2)))
     
     # Draw from the data
     self.draws1 = self.data1[self.idxs1]
     self.draws2 = self.data2[self.idxs2]
     
     # Bootstrapped means of each group
     self.means1 = self.draws1.mean(axis=1)
     self.means2 = self.draws2.mean(axis=1)
     
     # CIs on group means
     self.CI_1 = mlab.prctile(self.means1, (2.5, 97.5))
     self.CI_2 = mlab.prctile(self.means2, (2.5, 97.5))
     
     # Bootstrapped difference between the groups
     self.diffs = self.means2 - self.means1
     self.CI_diff = mlab.prctile(self.diffs, (2.5, 97.5))
     
     # p-value
     self.p_from_dist = pvalue_of_distribution(self.diffs, 0)
     
     # save memory
     del self.idxs1
     del self.idxs2
     del self.draws1
     del self.draws2
예제 #5
0
def test_prctile():
    x=[1,2,3]
    assert mlab.prctile(x,50)==np.median(x)
    x=[1,2,3,4]
    assert mlab.prctile(x,50)==np.median(x)
    ob1=[1,1,2,2,1,2,4,3,2,2,2,3,4,5,6,7,8,9,7,6,4,5,5]
    p        = [0,   75, 100]
    expected = [1,  5.5,   9]
    actual = mlab.prctile(ob1,p)
    assert np.allclose( expected, actual )
    for pi, expectedi in zip(p,expected):
        actuali = mlab.prctile(ob1,pi)
        assert np.allclose( expectedi, actuali )
예제 #6
0
    def createAsaInfo(self):
        'Return True on error.'
        if self.showMessages:
            nTdebug( "Fetching WHATIF per-atom surface accessibility info..." )

        fileNames = glob.glob(os.path.join(self.whatIfDataDir, "wsvacc*.log"))

        self.allWhatIfInfo = {'chains': {}}
        for fileName in fileNames:
            if self.readWhatIfAsaInfoFile(fileName): # fills self.allWhatIfInfo
                nTerror("Failed %s when reading file." % (getCallerName()))
                return True
        # end for
        
        #
        # Now determine the median ASA for each
        #
        # whatIfInfo is used in super class whereas allWhatIfInfo was filled before. 
        self.whatIfInfo = self.allWhatIfInfo
        d = self.whatIfInfo['chains']
#        medianIndex = None
        for chainCode in d.keys():
            for seqKey in d[chainCode].keys():
                for atomName in d[chainCode][seqKey]['atoms'].keys():
                    asaList =   d[chainCode][seqKey]['atoms'][atomName]
                    asaList.sort()
#                    if not medianIndex:
#                    medianIndex = int((len(asaList) / 2.0) + 0.5) # fails with round off on single element lists.
                    ml = mlab.prctile(asaList,[50])                    
#                    if medianIndex < 0 or medianIndex >= len(asaList):
#                        nTerror("Found improper median index %s for %s" % (medianIndex, str(asaList)))
#                        return True
#                    d[chainCode][seqKey]['atoms'][atomName] = [asaList[medianIndex]] # Resetting list to only include median
                    d[chainCode][seqKey]['atoms'][atomName] = [ml[0]] 
예제 #7
0
def comp_histo(a, **kwargs):
    """Return plot-ready histogram (h,l), with Freedman-Diaconis' choice for
    optimal bin width if not fixed.

    See http://en.wikipedia.org/wiki/Histogram"""

    if 'bins' in kwargs:
        try:
            nbins = len(kwargs['bins'])
        except:
            nbins = kwargs['bins']
        print "Using default numpy histogram: nbins=%d" % nbins
        h,l = N.histogram(a, **kwargs)
    else:                               # Define optimal binning
        if 'range' in kwargs:
            vmin,vmax = kwargs['range']
        else:
            vmin,vmax = a.min(),a.max()
        # Freedman-Diaconis' choice for optimal bin width
        q1,q3 = prctile(a, p=(25.,75.))
        h = 2 * (q3-q1) / len(a)**(1./3.)
        nbins = round( (vmax-vmin)/h )
        print "Freedman-Diaconis optimal bin width: nbins=%d" % nbins
        h,l = N.histogram(a, bins=nbins, **kwargs)

    h = N.concatenate((h,[h[-1]]))  # Complete h
    #l = N.concatenate((l,[l[-1]+l[1]-l[0]])) # Not needed w/ new=True

    return h,l
예제 #8
0
파일: plot.py 프로젝트: cxrodgers/my
def harmonize_clim_in_subplots(fig=None, axa=None, clim=(None, None), 
    center_clim=False, trim=1):
    """Set clim to be the same in all subplots in figur
    
    f : Figure to grab all axes from, or None
    axa : the list of subplots (if f is None)
    clim : tuple of desired c-limits. If either or both values are
        unspecified, they are derived from the data.
    center_clim : if True, the mean of the new clim is always zero
        May overrule specified `clim`
    trim : does nothing if 1 or None
        otherwise, sets the clim to truncate extreme values
        for example, if .99, uses the 1% and 99% values of the data
    """
    # Which axes to operate on
    if axa is None:
        axa = fig.get_axes()
    axa = np.asarray(axa)

    # Two ways of getting new clim
    if trim is None or trim == 1:
        # Get all the clim
        all_clim = []        
        for ax in axa.flatten():
            for im in ax.get_images():
                all_clim.append(np.asarray(im.get_clim()))
        
        # Find covering clim and optionally center
        all_clim_a = np.array(all_clim)
        new_clim = [np.min(all_clim_a[:, 0]), np.max(all_clim_a[:, 1])]
    else:
        # Trim to specified prctile of the image data
        data_l = []
        for ax in axa.flatten():
            for im in ax.get_images():
                data_l.append(np.asarray(im.get_array()).flatten())
        data_a = np.concatenate(data_l)
        
        # New clim
        new_clim = list(mlab.prctile(data_a, (100.*(1-trim), 100.*trim)))
    
    # Take into account specified clim
    try:
        if clim[0] is not None:
            new_clim[0] = clim[0]
        if clim[1] is not None:
            new_clim[1] = clim[1]
    except IndexError:
        print "warning: problem with provided clim"
    
    # Optionally center
    if center_clim:
        new_clim = np.max(np.abs(new_clim)) * np.array([-1, 1])
    
    # Set to new value
    for ax in axa.flatten():
        for im in ax.get_images():
            im.set_clim(new_clim)
    
    return new_clim
예제 #9
0
파일: util.py 프로젝트: jingzbu/WAJAD
def HoeffdingRuleMarkov(beta, G, H, U, FlowNum):
    """
    Estimate the K-L divergence and the threshold by use of weak convergence
    ----------------
    beta: the false alarm rate
    G: the gradient
    H: the Hessian
    U: a sample path of the Gaussian empirical measure
    FlowNum: the number of flows
    ----------------
    """
    _, SampNum, _ = U.shape

    # Estimate K-L divergence using 2nd-order Taylor expansion
    KL = []
    for j in range(0, SampNum):
        t = (1.0 / sqrt(FlowNum)) * np.dot(G, U[0, j, :]) + \
                (1.0 / 2) * (1.0 / FlowNum) * \
                    np.dot(np.dot(U[0, j, :], H), U[0, j, :])
        # print t.tolist()
        # break
        KL.append(np.array(t.real)[0])
    eta = prctile(KL, 100 * (1 - beta))
    # print(KL)
    # assert(1 == 2)
    return eta
예제 #10
0
    def createAsaInfo(self):
        'Return True on error.'
        if self.showMessages:
            nTdebug("Fetching WHATIF per-atom surface accessibility info...")

        fileNames = glob.glob(os.path.join(self.whatIfDataDir, "wsvacc*.log"))

        self.allWhatIfInfo = {'chains': {}}
        for fileName in fileNames:
            if self.readWhatIfAsaInfoFile(
                    fileName):  # fills self.allWhatIfInfo
                nTerror("Failed %s when reading file." % (getCallerName()))
                return True
        # end for

        #
        # Now determine the median ASA for each
        #
        # whatIfInfo is used in super class whereas allWhatIfInfo was filled before.
        self.whatIfInfo = self.allWhatIfInfo
        d = self.whatIfInfo['chains']
        #        medianIndex = None
        for chainCode in d.keys():
            for seqKey in d[chainCode].keys():
                for atomName in d[chainCode][seqKey]['atoms'].keys():
                    asaList = d[chainCode][seqKey]['atoms'][atomName]
                    asaList.sort()
                    #                    if not medianIndex:
                    #                    medianIndex = int((len(asaList) / 2.0) + 0.5) # fails with round off on single element lists.
                    ml = mlab.prctile(asaList, [50])
                    #                    if medianIndex < 0 or medianIndex >= len(asaList):
                    #                        nTerror("Found improper median index %s for %s" % (medianIndex, str(asaList)))
                    #                        return True
                    #                    d[chainCode][seqKey]['atoms'][atomName] = [asaList[medianIndex]] # Resetting list to only include median
                    d[chainCode][seqKey]['atoms'][atomName] = [ml[0]]
예제 #11
0
def bootstrapped_intercluster_mahalanobis(cluster1, cluster2, n_boots=1000,
    fix_covariances=True):
    """Bootstrap the intercluster distance.
    
    Returns:
        m - The mean distance
        CI - 95% confidence interval on the distance
        distances - an array of the distances measured on each boot
    """
    d_l = []
    
    # Determine the covariance matrices, or recalculate each time
    if fix_covariances:
        icov1 = np.linalg.inv(np.cov(cluster1, rowvar=0))
        icov2 = np.linalg.inv(np.cov(cluster2, rowvar=0))
    else:
        icov1, icov2 = None, None
    
    # Bootstrap
    for n_boot in range(n_boots):
        # Draw
        idxs1 = np.random.randint(0, len(cluster1), len(cluster1))
        idxs2 = np.random.randint(0, len(cluster2), len(cluster2))
        
        # Calculate and store
        d_l.append(intercluster_mahalanobis(
            cluster1[idxs1], cluster2[idxs2], icov1, icov2))
    
    # Statistics
    d_a = np.asarray(d_l)
    m = np.mean(d_a)
    CI = mlab.prctile(d_a, (2.5, 97.5))
    return m, CI, d_a
예제 #12
0
def print_stats(name, x=None):
    "Prints simple stats"
    if type(name) is not StringType:
        x = name
        name = 'mean,stdv,rms,min,25%,median,75%,max: '
    if name == '__header__':
        print ''
        n = (80 - len(x)) / 2
        print n * ' ' + x
        print n * ' ' + len(x) * '-'
        print ''
        print '   Name       mean      stdv      rms      min     25%    median     75%      max'
        print ' ---------  -------  -------  -------  -------  -------  -------  -------  -------'
    elif name == '__sep__':
        print ' ---------  -------  -------  -------  -------  -------  -------  -------  -------'
    elif name == '__footer__':
        print ' ---------  -------  -------  -------  -------  -------  -------  -------  -------'
        print ''
    else:
        ave = x.mean()
        std = x.std()
        rms = sqrt(ave * ave + std * std)
        prc = prctile(x)
        print '%10s  %7.2f  %7.2f  %7.2f  %7.2f  %7.2f  %7.2f  %7.2f  %7.2f  '%\
            (name,ave,std,rms,prc[0],prc[1],prc[2],prc[3],prc[4])
예제 #13
0
파일: util.py 프로젝트: jingzbu/WAJAD
def HoeffdingRuleMarkovRobust_(beta, G_list, H_list, U_list, FlowNum):
    """
    Estimate the K-L divergence and the threshold by use of weak convergence
    ----------------
    beta: the false alarm rate
    G: the gradient
    H: the Hessian
    U: a sample path of the Gaussian empirical measure
    FlowNum: the number of flows
    ----------------
    """
    _, SampNum, _ = U_list[0].shape
    # print(SampNum)
    # assert(1 == 2)

    # Estimate K-L divergence using 2nd-order Taylor expansion
    KL = []
    for j in range(0, SampNum):
        KL_est_list = []
        for G, H, U in zip(G_list, H_list, U_list):
            KL_est = (1.0 / sqrt(FlowNum)) * np.dot(G, U[0, j, :]) + \
                     (1.0 / 2) * (1.0 / FlowNum) * \
                      np.dot(np.dot(U[0, j, :], H), U[0, j, :])
            KL_est = np.array(KL_est.real)[0]
            KL_est_list.append(KL_est)
        KL.append(min(KL_est_list))
    eta = prctile(KL, 100 * (1 - beta))

    return eta
예제 #14
0
    def compute(self):
        """Detect RFI
        """
        median_size = (self.median_size_time, self.median_size_freq)

        data = self.data - sp_dip.median_filter(self.data, size=median_size)
        #    data1 = np.abs(np.sum(data,0))
        #    data1 = np.abs(np.median(data,0))
        data1 = (np_median(data, 0))
        #    th = np.percentile(data1, th_prctile)
        thl = []
        for ii in xrange(data1.shape[0] - 9):
            thl.append(prctile(data1[ii:ii + 10], p=90))

    #       thl.append(max(data1[ii:ii+10]))

        th = self.th_k * np_median(thl)
        for ii in xrange(data1.shape[0]):
            if data1[ii] > th:
                z, p_value = sp_normaltest(data[:, ii])
                if p_value < self.p_th:
                    if self.is_out_selected('Not_normal'):
                        self.flag_results['Not_normal'].flag_data[:, ii] = 1
                else:
                    if self.is_out_selected('Normal'):
                        self.flag_results['Normal'].flag_data[:, ii] = 1
        return self.flag_results
예제 #15
0
def calc_kde1d(Xin, N=256, range=None, Verbose=False, name=None):
    """
    Calculates 1D KDE. On input,

       Xin   ---  input data array
       N     ---  number of bins to evaluate KDE
       range ---  range of Xin values to work with

    Example:

       bins, P = calc_kde1d(obs,range=(-2,2))
       
    """

    try:
        X = Xin.data[Xin.mask == False].ravel()
    except:
        X = Xin
    if range == None:
        prc = prctile(X)
        range = [prc[0], prc[4]]
    bins = linspace(range[0], range[1], N)
    if Verbose:
        if name != None:
            print name
            print 'Evaluating 1D kernel with %d observations' % len(X)
    kernel = stats.kde.gaussian_kde(X)
    if Verbose:
        print 'Evaluating 1D KDE with %d bins' % N
    P = kernel(bins)
    return (bins, P)
예제 #16
0
def difference_CI_bootstrap_wrapper(data, **boot_kwargs):
    """Given parsed data from single ulabel, return difference CIs.
    
    data : same format as bootstrap_main_effect expects
    
    Will calculate the following statistics:
        means : mean of each condition, across draws
        CIs : confidence intervals on each condition
        mean_difference : mean difference between conditions
        difference_CI : confidence interval on difference between conditions
        p : two-tailed p-value of 'no difference'
    
    Returns:
        dict of those statistics
    """
    # Yields a 1000 x 2 x N_trials matrix:
    # 1000 draws from the original data, under both conditions.
    bh = bootstrap_main_effect(data, meth=keep, **boot_kwargs)

    # Find the distribution of means of each draw, across trials
    # This is 1000 x 2, one for each condition
    # hist(means_of_all_draws) shows the comparison across conditions
    means_of_all_draws = bh.mean(axis=2)

    # Confidence intervals across the draw means for each condition
    condition_CIs = np.array([
        mlab.prctile(dist, (2.5, 97.5)) for dist in means_of_all_draws.T])

    # Means of each ulabel (centers of the CIs, basically)
    condition_means = means_of_all_draws.mean(axis=0)

    # Now the CI on the *difference between conditions*
    difference_of_conditions = np.diff(means_of_all_draws).flatten()
    difference_CI = mlab.prctile(difference_of_conditions, (2.5, 97.5)) 

    # p-value of 0. in the difference distribution
    cdf_at_value = np.sum(difference_of_conditions < 0.) / \
        float(len(difference_of_conditions))
    p_at_value = 2 * np.min([cdf_at_value, 1 - cdf_at_value])
    
    # Should probably floor the p-value at 1/n_boots

    return {'p' : p_at_value, 
        'means' : condition_means, 'CIs': condition_CIs,
        'mean_difference': difference_of_conditions.mean(), 
        'difference_CI' : difference_CI}
예제 #17
0
    def get_sample_percentiles(self, percents):
        'It returns the percentiles given a percent list'
        if not self._sample:
            raise ValueError('No data to calculate percentiles')

        vect = numpy.ravel(self.sample)
        percentiles = mlab.prctile(vect, percents)
        return list(percentiles)
예제 #18
0
def percentile_box_plot(ax, data, indexer=None, box_top=75, 
                        box_bottom=25,whisker_top=98,whisker_bottom=2):
    if indexer is None:
        indexed_data = zip(range(1,len(data)+1), data)
    else:
        indexed_data = [(indexer(datum), datum) for datum in data]

    for index, x in indexed_data:
        if whisker_top != None and whisker_bottom != None:
            bp = boxplotter(*(prctile(x,(50,box_top,box_bottom,whisker_top,whisker_bottom))))
            bp.draw_on(ax, index, data=x)

        elif whisker_top == None and whisker_bottom == None:
            bp = boxplotter(*(prctile(x,(50,box_top,box_bottom))))
            bp.draw_on(ax, index)
        else:
            raise Exception("Just one whisker? That's silly.")
예제 #19
0
파일: outliers.py 프로젝트: RONNCC/bumps
def identify_outliers(test, chains, x):
    """
    Determine which chains have converged on a local maximum much lower than
    the maximum likelihood.

    *test* is the name of the test to use (one of IQR, Grubbs, Mahal or none).
    *chains* is a set of log likelihood values of shape (chain len, num chains)
    *x* is the current population of shape (num vars, num chains)

    See :module:`outliers` for details.
    """
    # Determine the mean log density of the active chains
    v = mean(chains, axis=0)

    # Check whether any of these active chains are outlier chains
    test = test.lower()
    if test == 'iqr':
        # Derive the upper and lower quartile of the chain averages
        Q1,Q3 = prctile(v,[25,75])
        # Derive the Inter Quartile Range (IQR)
        IQR = Q3 - Q1
        # See whether there are any outlier chains
        outliers = where(v < Q1 - 2*IQR)[0]

    elif test == 'grubbs':
        # Compute zscore for chain averages
        zscore = (mean(v) - v) / std(v, ddof=1)
        # Determine t-value of one-sided interval
        N = len(v)
        t2 = tinv(1 - 0.01/N,N-2)**2; # 95% interval
        # Determine the critical value
        Gcrit = ((N - 1)/sqrt(N)) * sqrt(t2/(N-2 + t2))
        # Then check against this
        outliers = where(zscore > Gcrit)[0]

    elif test == 'mahal':
        # Use the Mahalanobis distance to find outliers in the population
        alpha = 0.01
        Npop, Nvar = x.shape
        Gcrit = ACR(Nvar,Npop-1,alpha)
        #print "alpha",alpha,"Nvar",Nvar,"Npop",Npop,"Gcrit",Gcrit
        # Find which chain has minimum log_density
        minidx = argmin(v)
        # Then check the Mahalanobis distance of the current point to other chains
        d1 = mahalanobis(x[minidx,:], x[minidx!=arange(Npop),:])
        #print "d1",d1,"minidx",minidx
        # and see if it is an outlier
        outliers = [minidx] if d1 > Gcrit else []

    elif test == 'none':
        outliers = []

    else:
        raise ValueError("Unknown outlier test "+test)

    return outliers
예제 #20
0
def do_kde(X, range=None, N=256):
    if range is None:
        prc = prctile(X.ravel())
        a = prc[0]
        b = prc[4]
    else:
        a, b = range
    bins = linspace(a, b, N)
    kernel = kde.gaussian_kde(X.ravel())
    return bins, kernel(bins)
예제 #21
0
파일: util.py 프로젝트: jingzbu/WAJAD
 def ThresCal(self):
     SampNum = 1000
     KL = []
     for i in range(0, SampNum):
         x = chain(self.mu_0, self.P, self.n)
         mu = np.reshape(self.mu, (self.N, self.N))
         KL.append(KL_est(
             x, mu))  # Get the actual relative entropy (K-L divergence)
     eta = prctile(self.KL, 100 * (1 - self.beta))
     return eta
예제 #22
0
파일: util.py 프로젝트: jingzbu/ROCHM
 def ThresCal(self):
     SampNum = 1000
     self.KL = []
     for i in range(0, SampNum):
         x = chain(self.mu_0, self.P, self.n)
         mu = np.reshape(self.mu, (self.N, self.N))
         self.KL.append(KL_est(x, mu))  # Get the actual relative entropy (K-L divergence)
     self.eta = prctile(self.KL, 100 * (1 - self.beta))
     KL = self.KL
     eta = self.eta
     return KL, eta
예제 #23
0
def modeifyer(times, fluxes, window=500, p=20, minpoints=10):
    """Uses percentile p of points around each datapoint "flux",
    being within time window to detrend fluxes. Returns
    corrected fluxes. For now done with a slow loop..."""
    detrend = fluxes.copy()
    for i in range(len(times)):
        near_fluxes = fluxes[where(
            (times < times[i] + window / 2) * (times > times[i] - window / 2))]
        trend = prctile(near_fluxes, p)
        detrend[i] = fluxes[i] - trend
    return detrend
예제 #24
0
파일: stats.py 프로젝트: mcleonard/memory
 def prctile(self, p = (2.5, 97.5)):
     ''' Returns the standard percentiles of the bootstrapped statistic.
     
     Arguments
     ---------
     perc : 
         A sequence of percentile values or a scalar
     '''
     
     from matplotlib.mlab import prctile
     return prctile(self.dist, p = p)
예제 #25
0
파일: lomb.py 프로젝트: martindurant/misc
def modeifyer(times,fluxes,window=500,p=20,minpoints=10):
    """Uses percentile p of points around each datapoint "flux",
    being within time window to detrend fluxes. Returns
    corrected fluxes. For now done with a slow loop..."""
    detrend = fluxes.copy()
    for i in range(len(times)):
        near_fluxes = fluxes[where((times<times[i]+window/2)*(
            times>times[i]-window/2))]
        trend = prctile(near_fluxes,p)
        detrend[i] = fluxes[i] - trend
    return detrend
예제 #26
0
def _calculate_percentiles(numbers, percents):
    'It calculates the percentiles for some numbers'
    #we need a numpy array
    if 'any' not in dir(numbers):
        numbers = numpy.ravel(numbers)
    if not numbers.any():
        raise ValueError('No data to calculate percentiles')

    mlab = sys.modules['matplotlib.mlab']

    percentiles = mlab.prctile(numbers, percents)
    return list(percentiles)
예제 #27
0
def bootstrap_regress(x, y, n_boot=1000):
    from matplotlib import mlab
    x = np.asarray(x)
    y = np.asarray(y)

    m_l, b_l = [], []
    for n in range(n_boot):
        msk = np.random.randint(0, len(x), size=len(x))
        m, b, rval, pval, stderr = scipy.stats.stats.linregress(x[msk], y[msk])
        m_l.append(m)
        b_l.append(b)

    res = {
        'slope_m': np.mean(m_l),
        'slope_l': mlab.prctile(m_l, p=2.5),
        'slope_h': mlab.prctile(m_l, p=97.5),
        'intercept_m': np.mean(b_l),
        'intercept_l': mlab.prctile(b_l, p=2.5),
        'intercept_h': mlab.prctile(b_l, p=97.5),
    }
    return res
예제 #28
0
def plot_kde(X, a=None, b=None, N=256, Title=None, Label=None):
    if a == None:
        prc = prctile(X.ravel())
        a = prc[0]
        b = prc[4]
    if Title is None:
        Title = 'Kernel Density Function'
    bins = linspace(a, b, N)
    kernel = kde.gaussian_kde(X.ravel())
    plot(bins, kernel(bins))
    ylabel('PDF')
    title(Title)
예제 #29
0
파일: stats.py 프로젝트: cxrodgers/my
def bootstrap_regress(x, y, n_boot=1000):
    from matplotlib import mlab

    x = np.asarray(x)
    y = np.asarray(y)

    m_l, b_l = [], []
    for n in range(n_boot):
        msk = np.random.randint(0, len(x), size=len(x))
        m, b, rval, pval, stderr = scipy.stats.stats.linregress(x[msk], y[msk])
        m_l.append(m)
        b_l.append(b)

    res = {
        "slope_m": np.mean(m_l),
        "slope_l": mlab.prctile(m_l, p=2.5),
        "slope_h": mlab.prctile(m_l, p=97.5),
        "intercept_m": np.mean(b_l),
        "intercept_l": mlab.prctile(b_l, p=2.5),
        "intercept_h": mlab.prctile(b_l, p=97.5),
    }
    return res
예제 #30
0
def test_prctile():
    # test odd lengths
    x=[1,2,3]
    assert mlab.prctile(x,50)==np.median(x)

    # test even lengths
    x=[1,2,3,4]
    assert mlab.prctile(x,50)==np.median(x)

    # derived from email sent by jason-sage to MPL-user on 20090914
    ob1=[1,1,2,2,1,2,4,3,2,2,2,3,4,5,6,7,8,9,7,6,4,5,5]
    p        = [0,   75, 100]
    expected = [1,  5.5,   9]

    # test vectorized
    actual = mlab.prctile(ob1,p)
    assert np.allclose( expected, actual )

    # test scalar
    for pi, expectedi in zip(p,expected):
        actuali = mlab.prctile(ob1,pi)
        assert np.allclose( expectedi, actuali )
예제 #31
0
def test_prctile():
    # test odd lengths
    x = [1, 2, 3]
    assert mlab.prctile(x, 50) == np.median(x)

    # test even lengths
    x = [1, 2, 3, 4]
    assert mlab.prctile(x, 50) == np.median(x)

    # derived from email sent by jason-sage to MPL-user on 20090914
    ob1 = [1, 1, 2, 2, 1, 2, 4, 3, 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 7, 6, 4, 5, 5]
    p = [0, 75, 100]
    expected = [1, 5.5, 9]

    # test vectorized
    actual = mlab.prctile(ob1, p)
    assert np.allclose(expected, actual)

    # test scalar
    for pi, expectedi in zip(p, expected):
        actuali = mlab.prctile(ob1, pi)
        assert np.allclose(expectedi, actuali)
예제 #32
0
def main_chain(img_name, template_name, blursize=5, cliplim=3.0, gridsize=8):

    # FIXME what if foscam moves, then blind offset-from-template method will not work robustly, will it?
    # apply blurring and CLAHE to small (skinny garage door) roi
    img, final, xywh_template, topleft_sgd, botright_sgd = blurred_histogram_equalization(
        img_name, template_name, blursize=5, cliplim=3.0, gridsize=8)

    # histogram of skinny garage door subset after image processing
    colors = ('b', )
    fig, ax = plt.subplots(figsize=(12, 8))

    # ith-channel of skinny garage door (roi1) after image processing
    i = 0
    c = colors[i]
    sgd = final[topleft_sgd[1]:botright_sgd[1],
                topleft_sgd[0]:botright_sgd[0]][:, :, i]  # i = 0 for Luminance

    intensity_bins = range(0, 256)
    n, bins, patches = ax.hist([sgd],
                               intensity_bins,
                               normed=1,
                               color=c,
                               histtype='step',
                               cumulative=True,
                               label='Color: ' + c)

    # FIXME we may not always want histogram plot (maybe just during "gather")

    # TODO with each image file, always ratchet up a running sum type of histogram OR db each for future summing

    # TODO always put percentiles (10th, 20th, 30th, ... 90th) into db table!?

    # percentiles
    percs = mlab.prctile([sgd], p=np.arange(10.0, 100.0, 10.0))
    print percs

    # tidy up the figure
    ax.grid(True)
    #ax.legend(loc='right')
    #ax.set_title('Cumulative Step Histograms')
    ax.set_title('Cumulative Step Histogram, Blue Channel, %s' % img_name)
    ax.set_xlabel('Pixel [intensity?]')
    ax.set_ylabel('Likelihood of Occurrence')
    plt.xlim([0, 256])

    # save cumulative histogram figure as _chist.jpg
    outname = img_name.replace('.jpg', '_chist.jpg')
    plt.savefig(outname)
    print 'open -a Firefox file://%s' % outname

    return img, final, xywh_template
예제 #33
0
def simple_bootstrap(data, n_boots=1000, min_bucket=20):
    if len(data) < min_bucket:
        raise BootstrapError("too few samples")
    
    res = []
    data = np.asarray(data)
    for boot in range(n_boots):
        idxs = np.random.randint(0, len(data), len(data))
        draw = data[idxs]
        res.append(np.mean(draw))
    res = np.asarray(res)
    CI = mlab.prctile(res, (2.5, 97.5))
    
    return res, res.mean(), CI
예제 #34
0
파일: util.py 프로젝트: jingzbu/TAHTMA
def HoeffdingRuleMarkov(beta, rho, G, H, W, Chi, FlowNum):
    """
    Estimate the K-L divergence and the threshold by use of weak convergence
    ----------------
    beta: the false alarm rate
    mu: the stationary distribution 
    G: the gradient
    H: the Hessian
    Sigma: the covariance matrix
    W: a sample path of the Gaussian empirical measure
    Chi: a sample path of the "Chi-Square" estimation
    FlowNum: the number of flows
    ----------------
    """
    _, SampNum, N = W.shape  # Here, N equals the number of states in the new chain Z

    # Estimate K-L divergence using 2nd-order Taylor expansion
    KL_1 = []
    for j in range(0, SampNum):
        t = (1.0 / sqrt(FlowNum)) * np.dot(G, W[0, j, :]) + \
                (1.0 / 2) * (1.0 / FlowNum) * \
                    np.dot(np.dot(W[0, j, :], H), W[0, j, :])
        # print t.tolist()
        # break
        KL_1.append(np.array(t.real)[0])
    # Get the threshold
    eta1 = prctile(KL_1, 100 * (1 - beta))
    KL_2 = [Chi[idx] / (2 * FlowNum) for idx in xrange(len(Chi))]
    # Using the simplified formula
    # eta2 = 1.0 / (2 * FlowNum) * rho * chi2.ppf(1 - beta, N)
    eta2 = prctile(KL_2, 100 * (1 - beta))
    # print N

    # print(KL)
    # assert(1 == 2)
    return KL_1, KL_2, eta1, eta2
예제 #35
0
def bootstrapMedian(data, N=5000):
    '''Bootstraper to refine estimate of a percentile from data
    N = number of iterations for the bootstrapping
    M = number of data points
    output = MU.bootStrapper(data, 50, 10000)
    '''
    import numpy as np
    import matplotlib.mlab as mlab

    M = len(data)
    percentile = 50

    estimate = np.array([])
    for k in range(N):
        bsIndex = np.random.random_integers(0,M-1,M)
        bsData = data[bsIndex]
        tmp = mlab.prctile(bsData, percentile)
        estimate = np.hstack((estimate, tmp))


    CI = mlab.prctile(estimate, [2.5,97.5])
    med = np.mean(estimate)

    return med, CI, estimate
예제 #36
0
def calc_kde2d(x_values,
               y_values,
               x_range=None,
               y_range=None,
               Nx=256,
               Ny=256,
               npz=None,
               Verbose=True,
               name=None):

    if Verbose:
        if name != None:
            print "[] ", name
        print "Starting the 2D kernel density estimation with %d data points..."\
              %len(x_values)

    kernel = stats.kde.gaussian_kde(_cat(x_values, y_values))

    if x_range == None:
        prc = prctile(x_values)
        x_range = [prc[0], prc[4]]

    if y_range == None:
        y_range = x_range

    x_bins = linspace(x_range[0], x_range[1], Nx)
    y_bins = linspace(y_range[0], y_range[1], Ny)

    if Verbose:
        print "Evaluating 2D kernel on grid with (Nx,Ny)=(%d,%d) ..." % (Nx,
                                                                         Ny)

    X, Y = meshgrid(x_bins, y_bins)  # each has shape (Ny,Nx)
    P = kernel(_cat(X, Y))  # shape is (Ny*Nx)
    P = reshape(P, X.shape)

    if Verbose:
        print "X, Y, P shapes: ", X.shape, Y.shape, P.shape

    #   Save to file
    #   ------------
    if npz != None:
        print "Saving 2D KDE to file <" + npz + "> ..."
        savez(npz, pdf=P, x_bins=x_bins, y_bins=y_bins)

    return (x_bins, y_bins, P)
예제 #37
0
    def test_Median(self):
        'test median'
# Wiki: If there is an even number of observations, then there is no single middle value; the median is then usually defined to be the 
# mean of the two middle values.[1][2]      
        lol = [ 
#               [], # fails
               [1.2],
               [1.0, 2.0], # Get 1.5 (matplotlib 1.0.1 or 2.0 (matplotlib 0.99.3) 
               [1.0, 2.0, 4.0],
               ]
        expectedMedianList              = [ 1.2, 1.5, 2.0] # matplotlib 1.0.1
        expectedMedianListOldMatplotlib = [ 1.2, 2.0, 2.0] # matplotlib 0.99.3
        for i,floatList in enumerate(lol):
            ml = mlab.prctile(floatList,[50])
            nTdebug("Found: %s and expected (by new matplotlib): %s" % (ml[0], expectedMedianList[i]))
            if ml[0] != expectedMedianList[i]:
                self.assertEqual(ml[0], expectedMedianListOldMatplotlib[i])
예제 #38
0
파일: demo_box.py 프로젝트: dplass/etframes
def boxpoints(d, outlier_distance=1.5):
	# implementation pretty much the same as matplotlib axes.boxplot

    # get median and quartiles
    q1, med, q3 = mlab.prctile(d,[25,50,75])
  	# min(data), max(data)

    iq = q3 - q1
    hi_val = q3 + outlier_distance*iq
    lo_val = q1 - outlier_distance*iq
 
    print iq, q1, q3, '---', hi_val, lo_val
    # print (d > hi_val)
    # print (d < lo_val)
    outliers = r_[d[d>hi_val], d[d<lo_val]]
    # print 'outliers', outliers
    inliers = list(set(data)-set(outliers))
    # print 'inliers', inliers
    min_without_outliers = min(inliers)
    max_without_outliers = max(inliers)

    return outliers, min_without_outliers, q1, med, q3, max_without_outliers
예제 #39
0
def plot_transparent_histogram(arr,
                               ax,
                               frame_width,
                               frame_height,
                               upper_prctile_clim=95,
                               cmap=plt.cm.gray_r):
    """Imshow a histogram with zero values transparent
    
    H_tip : single 2d array to plot. Should be non-negative
    frame_width, frame_height : passed to imshow to get the data limits right
    
    All zero bins will be transparent. The image color limits will be 
    set to the 95th percentile of non-zero values.
    """
    # Determine the transparent threshold and upper clim
    vals = arr.flatten()
    vals = vals[vals > 0]
    transparent_threshold = vals.min()
    clim_upper = prctile(vals, upper_prctile_clim)

    # Mask the data to make zero bins transparent
    # We use .99 to avoid floating point comparison problems
    masked_data = np.ma.masked_where(arr < transparent_threshold * .99, arr)

    # Plot
    im = my.plot.imshow(
        masked_data,
        ax=ax,
        xd_range=(0, frame_width),
        yd_range=(0, frame_height),
        axis_call='image',
        cmap=cmap,
        skip_coerce=True,
    )

    # Set the clim to go from 0 to upper
    im.set_clim((0, clim_upper))

    return im
예제 #40
0
파일: util.py 프로젝트: jingzbu/WAJAD
def HoeffdingRuleMarkovRobust(beta, G_1, H_1, U_1, G_2, H_2, U_2, G_3, H_3,
                              U_3, FlowNum):
    """
    Estimate the K-L divergence and the threshold by use of weak convergence
    ----------------
    beta: the false alarm rate
    G: the gradient
    H: the Hessian
    U: a sample path of the Gaussian empirical measure
    FlowNum: the number of flows
    ----------------
    """
    _, SampNum, _ = U_1.shape

    # Estimate K-L divergence using 2nd-order Taylor expansion
    KL = []
    for j in range(0, SampNum):
        t_1 = (1.0 / sqrt(FlowNum)) * np.dot(G_1, U_1[0, j, :]) + \
                (1.0 / 2) * (1.0 / FlowNum) * \
                    np.dot(np.dot(U_1[0, j, :], H_1), U_1[0, j, :])
        t_2 = (1.0 / sqrt(FlowNum)) * np.dot(G_2, U_2[0, j, :]) + \
                (1.0 / 2) * (1.0 / FlowNum) * \
                    np.dot(np.dot(U_2[0, j, :], H_2), U_2[0, j, :])
        t_3 = (1.0 / sqrt(FlowNum)) * np.dot(G_3, U_3[0, j, :]) + \
                (1.0 / 2) * (1.0 / FlowNum) * \
                    np.dot(np.dot(U_3[0, j, :], H_3), U_3[0, j, :])
        t1 = np.array(t_1.real)[0]
        t2 = np.array(t_2.real)[0]
        t3 = np.array(t_3.real)[0]
        # print t.tolist()
        # break
        KL.append(min([t1, t2, t3]))
    eta = prctile(KL, 100 * (1 - beta))
    # print(KL)
    # assert(1 == 2)
    return eta
예제 #41
0
def bootstrapped_intercluster_mahalanobis(cluster1,
                                          cluster2,
                                          n_boots=1000,
                                          fix_covariances=True):
    """Bootstrap the intercluster distance.
    
    Returns:
        m - The mean distance
        CI - 95% confidence interval on the distance
        distances - an array of the distances measured on each boot
    """
    d_l = []

    # Determine the covariance matrices, or recalculate each time
    if fix_covariances:
        icov1 = np.linalg.inv(np.cov(cluster1, rowvar=0))
        icov2 = np.linalg.inv(np.cov(cluster2, rowvar=0))
    else:
        icov1, icov2 = None, None

    # Bootstrap
    for n_boot in range(n_boots):
        # Draw
        idxs1 = np.random.randint(0, len(cluster1), len(cluster1))
        idxs2 = np.random.randint(0, len(cluster2), len(cluster2))

        # Calculate and store
        d_l.append(
            intercluster_mahalanobis(cluster1[idxs1], cluster2[idxs2], icov1,
                                     icov2))

    # Statistics
    d_a = np.asarray(d_l)
    m = np.mean(d_a)
    CI = mlab.prctile(d_a, (2.5, 97.5))
    return m, CI, d_a
예제 #42
0
        datas = [np.recfromtxt(f, names = True, case_sensitive = True) for f in files]
        data = np.ma.concatenate(datas)
        desired_unit = dict(O3 = 'ppb', GMAO_TEMP = 'K', PRESS = 'hPa', TEMP = 'K')
        unit_factor = {'ppt': 1e12, 'ppb': 1e9}
        pfile.createDimension('time', data.shape[0])
        for ki, key in enumerate(data.dtype.names):
            typecode = data[key].dtype.char
            if typecode not in ('c', 'S'):
                unit = desired_unit.get(key, 'ppt')
                factor = unit_factor.get(unit, 1)
                values = np.ma.masked_values(data[key], -1000) * factor
            else:
                unit = 'unknown'
                values = data[key]
            pfile.createVariable(key, typecode, dimensions = ('time',), units = unit, values = values)
        
    

if __name__ == '__main__':
    import sys
    bfile1 = flightlogs(sys.argv[1:])
    from matplotlib.mlab import prctile
    for label, key in [('O3', 'O3[:]'), ('NO2', 'NO2[:]')]:
        bvar = eval(key, None, bfile1.variables)
        b2var = eval(key, None, bfile1.variables)
        assert((bvar == b2var).all())
        print('\n%s (BASE: %6.2f)' % (label, bvar.mean()), file = sys.stdout)
        print('\n      BASE:', sep = '', file = sys.stdout)
        prctile(bvar, np.ma.arange(.1, 1., .1)* 100).tofile(sys.stdout, sep = ', ', format = '%6.2f')
    print('', file = sys.stdout)
    
예제 #43
0
def read_state_trajectories():
    global state_array, NUM_DIM, NUM_STATES, TRAJ_LEN
    
    state_trajs = []
    to_put = []
    rewards = []

    trajs = open("state_trajectories.dat", 'r')
    if trajs:
        lines = trajs.readlines()
        for l in lines:
            s = l.split('\t')
            
            num_steps = len(s) -1
            if( len(s) > 3):
                to_put = [int(s[x]) for x in range(num_steps)]
                state_trajs.append(to_put)
                
                if len(to_put) > TRAJ_LEN:
                    TRAJ_LEN = len(to_put)
            else:
                rewards.append(float(s[1]))

    trajs.close()
    
    num_traj = len(state_trajs)
    for ct in range(num_traj):
        last = state_trajs[ct][-1]
        curr_len = len(state_trajs[ct])
        for ti in np.linspace(curr_len, TRAJ_LEN-1, TRAJ_LEN-curr_len):
            state_trajs[ct].append(last)

    state_trajs = np.array(state_trajs)
    traj_len = len(state_trajs[0])
    print "num_traj: ", num_traj , " traj_len: ", traj_len, " reward: ", np.average(rewards)
    
    """
    fig = figure(3)
    fig.add_subplot(111, aspect='equal')
    # create a hexbin map now for each trajectory
    for i in range(len(state_trajs)):
        curr_traj = np.array([ [state_array[x,0], state_array[x,1]] for x in state_trajs[i] ] )
        clf()
        scatter( curr_traj[:,0], curr_traj[:,1], marker='o', c='y', s= 25, alpha=0.7)
        #hexbin(curr_traj[:,0], curr_traj[:,1], gridsize=10, cmap=cm.get_cmap('Jet'), alpha=0.9, mincnt=1)
        fig.savefig("movie/"+str(i)+".png")
    """
    
    """
    if NUM_DIM==2:
        fig = figure(1)
        ax = fig.add_subplot(111, aspect='equal')
        for i in range(len(state_trajs)):
            curr_traj = np.array([ [state_array[x,i] for i in range(NUM_DIM)] for x in state_trajs[i] ] )

            plot(curr_traj[:,0], curr_traj[:,1], 'b-', lw=0.5, alpha=0.2)

            circle = Circle( (curr_traj[0,0], curr_traj[0,1]), 0.01, fc='red', alpha = 0.4)
            ax.add_patch(circle)
            circle = Circle( (curr_traj[traj_len-1,0], curr_traj[traj_len-1,1]), 0.01, fc='green', alpha = 0.4)
            ax.add_patch(circle)
    """

    fig = figure(2)
    state_traj_x = []
    state_traj_y = []

    for i in range(num_traj):
        curr_traj = np.array([ [state_array[x,j] for j in range(NUM_DIM)] for x in state_trajs[i] ] )
        
        tmp = np.array([state_array[x,0] for x in state_trajs[i]])
        state_traj_x.append(tmp)
        
        if NUM_DIM == 2:
            tmp = np.array([state_array[x,1] for x in state_trajs[i]])
            state_traj_y.append(tmp)

        #subplot(111)
        #plot(curr_traj[:,0], 'b-', lw=0.5, alpha=0.10)
        #subplot(212)
        #plot(curr_traj[:,1], 'ro', lw=0.5, alpha=0.05)
    
    state_traj_x = np.array(state_traj_x)
    state_traj_y = np.array(state_traj_y)
    
    print state_traj_x.shape, state_traj_y.shape

    state_traj_x_percentile_10 = np.array([mlab.prctile(state_traj_x[:,i],p=10) for i in range(TRAJ_LEN)])
    state_traj_x_percentile_50 = np.array([mlab.prctile(state_traj_x[:,i],p=50) for i in range(TRAJ_LEN)])
    state_traj_x_percentile_90 = np.array([mlab.prctile(state_traj_x[:,i],p=90) for i in range(TRAJ_LEN)])
    state_traj_x_percentile = np.array([state_traj_x_percentile_10, state_traj_x_percentile_90])

    if NUM_DIM == 2:
        state_traj_y_percentile_10 = np.array([mlab.prctile(state_traj_y[:,i],p=10) for i in range(TRAJ_LEN)])
        state_traj_y_percentile_90 = np.array([mlab.prctile(state_traj_y[:,i],p=90) for i in range(TRAJ_LEN)])
        state_traj_y_percentile = np.array([state_traj_y_percentile_10, state_traj_y_percentile_90])
        
        subplot(211)
        plot( holding_time*np.linspace(0,TRAJ_LEN,num=TRAJ_LEN), np.average(state_traj_x, axis=0), 'b-', label='mean')
        plot( holding_time*np.linspace(0,TRAJ_LEN,num=TRAJ_LEN), state_traj_x_percentile_10, 'b--', label='10/90 percentile')
        plot( holding_time*np.linspace(0,TRAJ_LEN,num=TRAJ_LEN), state_traj_x_percentile_90, 'b--')
        legend()
        grid()
        ylabel('x (t)')
        xlabel('t [s]')
        axis('tight')
    
        subplot(212)
        plot( holding_time*np.linspace(0,TRAJ_LEN,num=TRAJ_LEN), np.average(state_traj_y, axis=0), 'b-', label='mean')
        plot( holding_time*np.linspace(0,TRAJ_LEN,num=TRAJ_LEN), state_traj_y_percentile_10, 'b--', label='10/90 percentile')
        plot( holding_time*np.linspace(0,TRAJ_LEN,num=TRAJ_LEN), state_traj_y_percentile_90, 'b--')
        legend()
        grid()
        ylabel('y (t)')
        xlabel('t [s]')
        axis('tight')

    elif NUM_DIM==1:
        subplot(111)
        #plot( holding_time*np.linspace(0,TRAJ_LEN,num=TRAJ_LEN), np.average(state_traj_x, axis=0), 'b-', label='mean')
        plot( holding_time*np.linspace(0,TRAJ_LEN,num=TRAJ_LEN), state_traj_x_percentile_10, 'b--', label='10/50/90 percentile')
        plot( holding_time*np.linspace(0,TRAJ_LEN,num=TRAJ_LEN), state_traj_x_percentile_90, 'b--')
        plot( holding_time*np.linspace(0,TRAJ_LEN,num=TRAJ_LEN), state_traj_x_percentile_50, 'b--')
        legend()
        grid()
        xlabel('t [s]')
        axis('tight')
예제 #44
0
def imshow(arr, x=None, ax=None, vmin=None, vmax=None, percentile=True,
           strip=False, features=None, conf=0.95, line_kwargs=None,
           sort_by=None, fill_kwargs=None, figsize=(5, 12),
           width_ratios=(4, 1), height_ratios=(4, 1),
           subplot_params=dict(wspace=0.1, hspace=0.1), imshow_kwargs=None):
    """
    Parameters
    ----------
    arr : array-like

    x : 1D array
        X values to use.  If None, use range(arr.shape[1])

    ax : matplotlib.Axes
        If not None, then only plot the array on the provided axes.  This will
        ignore any additional arguments provided that apply to figure-level
        configuration or to the average line plot.  For example, `figsize`,
        `width_ratios`, `height_ratios`, `subplot_params`, `line_kwargs`, and
        `fill_kwargs` will be ignored.

    vmin, vmax : float

    percentile : bool
        If True, then treat values for `vmin` and `vmax` as percentiles rather
        than absolute values.

    strip : bool
        Include a strip plot alongside the array

    features : pybedtools.BedTool or string filename
        Features used to construct the array

    sort_by : array-like
        Use the provided array to sort the array (e.g., expression).  This
        array is argsorted to get the proper order.

    line_kwargs, fill_kwargs : dict
        Passed directly to `ci_plot`.

    figsize : tuple
        (Width, height) of the figure to create.
    """
    if ax is None:
        fig = new_shell(
            figsize=figsize,
            strip=strip,
            subplot_params=subplot_params,
            width_ratios=width_ratios,
            height_ratios=height_ratios)

    if x is None:
        x = np.arange(arr.shape[1])

    if percentile:
        if vmin is None:
            vmin = arr.min()
        else:
            vmin = mlab.prctile(arr.ravel(), vmin)
        if vmax is None:
            vmax = arr.max()
        else:
            vmax = mlab.prctile(arr.ravel(), vmax)
    else:
        if vmin is None:
            vmin = arr.min()
        if vmax is None:
            vmax = arr.max()

    if imshow_kwargs is None:
        imshow_kwargs = {}

    cmap = colormap_adjust.smart_colormap(vmin, vmax)
    if sort_by is not None:
        ind = np.argsort(sort_by)
    else:
        ind = np.arange(arr.shape[0])

    if ax is None:
        array_ax = fig.array_axes
    else:
        array_ax = ax

    mappable = array_ax.imshow(
        arr[ind, :],
        aspect='auto',
        cmap=cmap,
        vmin=vmin,
        vmax=vmax,
        origin='lower',
        extent=(x.min(), x.max(), 0, arr.shape[0]),
        **imshow_kwargs
    )
    if ax is None:
        plt.colorbar(mappable, fig.cax)
        ci_plot(
            x,
            arr,
            ax=fig.line_axes,
            line_kwargs=line_kwargs,
            fill_kwargs=fill_kwargs,
        )

        return fig
    else:
        return ax.figure
예제 #45
0
def input_ip_plots(iparr, inputarr, diffed, x, sort_ind,
                   prefix=None, limits1=(None, None), limits2=(None, None),
                   hlines=None, vlines=None):

    """
    All-in-one plotting function to make a 5-panel figure.

    Panels are IP, input, and diffed; plus 2 line plots showing averages.

    :param iparr, inputarr: NumPy arrays constructed by a genomic_signal object
    :param diffed: Difference of `iparr` and `inputarr`, but can be some other
                   transformation.
    :param x: Extent to use -- for TSSs, maybe something like
        np.linspace(-1000, 1000, bins), or for just bin IDs, something like
        `np.arange(bins)`.

    :param sort_ind: row order for each of the 3 panels -- usually interesting
        to use `clustered_sortind` or `tip_zscores`

    :param prefix: Used to prefix plot titles with '%(prefix)s IP", etc
    :param limits1: Tuple passed to the Normalize function for IP and input.
    :param limits2: Tuple passed tot he Normalize function for the diffed array
    :param hlines: List of (position, kwarg) tuples for plotting horizontal
        lines.  Kwargs are passed directly to axhline. Useful for delimiting
        clusters, if you used `clustered_sortind` and have both `row_order` and
        `breaks`.
    :param vlines: List of (position, kwargs) tuples.  A vertical line will be
        plotted at each position using kwargs.
    """

    # global min and max
    gmin = min(iparr.min(), inputarr.min())
    gmax = max(iparr.max(), inputarr.max())

    fig = plt.figure(figsize=(10, 10))

    # 3 arrays, 2 line plots, a gene strip, and 2 colorbars.  Plots share the
    # axes that make sense
    #
    # 3 arrays
    ax1 = plt.subplot2grid(
        (9, 9), (0, 0), colspan=3, rowspan=6)
    ax2 = plt.subplot2grid(
        (9, 9), (0, 3), colspan=3, rowspan=6, sharex=ax1, sharey=ax1)
    ax3 = plt.subplot2grid(
        (9, 9), (0, 6), colspan=3, rowspan=6, sharex=ax1, sharey=ax1)

    # 2 line plots
    ax4 = plt.subplot2grid((9, 9), (6, 3), colspan=3, rowspan=3, sharex=ax1)
    ax5 = plt.subplot2grid((9, 9), (6, 6), colspan=3, rowspan=3, sharex=ax1)

    # 2 colorbars
    cax1 = plt.Axes(fig, rect=(0.05, 0.25, 0.25, 0.025))
    cax2 = plt.Axes(fig, rect=(0.05, 0.15, 0.25, 0.025))

    # For nice imshow axes
    extent = (min(x), max(x), 0, diffed.shape[0])

    cm = matplotlib.cm.gist_gray
    cm.set_bad('k')
    cm.set_over('r')
    cm.set_under('b')

    limits1 = list(limits1)
    limits2 = list(limits2)

    all_base = np.column_stack((iparr.ravel(), inputarr.ravel())).ravel()

    if limits1[0] is None:
        limits1[0] = mlab.prctile(
            all_base, 1. / all_base.size)
    if limits1[1] is None:
        limits1[1] = mlab.prctile(
            all_base, 100 - 1. / all_base.size)
    if limits2[0] is None:
        limits2[0] = mlab.prctile(
            diffed.ravel(), 1. / all_base.size)
    if limits2[1] is None:
        limits2[1] = mlab.prctile(
            diffed.ravel(), 100 - 1. / all_base.size)

    del all_base

    imshow_kwargs = dict(
        interpolation='nearest',
        aspect='auto',
        cmap=cm,
        norm=matplotlib.colors.Normalize(*limits1),
        extent=extent,
        origin='lower')

    # modify kwargs for diffed (by changing the normalization)
    diffed_kwargs = imshow_kwargs.copy()
    diffed_kwargs['norm'] = matplotlib.colors.Normalize(*limits2)

    # IP
    mappable1 = ax1.imshow(iparr[sort_ind, :], **imshow_kwargs)

    # input
    mappable2 = ax2.imshow(inputarr[sort_ind, :], **imshow_kwargs)

    # diffed
    mappable3 = ax3.imshow((diffed)[sort_ind, :], **diffed_kwargs)

    # IP and input line plot with vertical line
    ax4.plot(x, inputarr.mean(axis=0), color='k', linestyle='--',
             label='input')
    ax4.plot(x, iparr.mean(axis=0), color='k', label='ip')
    ax4.axvline(0, color='k', linestyle=':')

    # Diffed line plot with vertical line
    ax5.plot(x, diffed.mean(axis=0), 'k', label='enrichment')
    ax5.axvline(0, color='k', linestyle=':')

    # Colorbars
    cbar1 = fig.colorbar(mappable1, cax1, orientation='horizontal')
    cbar2 = fig.colorbar(mappable3, cax2, orientation='horizontal')
    fig.add_axes(cax1)
    fig.add_axes(cax2)

    # labeling...
    ax1.set_ylabel('features')
    plt.setp(ax2.get_yticklabels(), visible=False)
    plt.setp(ax3.get_yticklabels(), visible=False)
    ax4.set_xlabel('bp')
    ax4.set_ylabel('mean reads per million mapped reads')
    ax5.set_xlabel('bp')
    cax1.set_xlabel('Reads per million mapped reads')
    cax2.set_xlabel('Enrichment (RPMMR)')

    if prefix is None:
        prefix = ""
    ax1.set_title('%s IP' % prefix)
    ax2.set_title('%s input' % prefix)
    ax3.set_title('Difference')

    # diffed line plot should have y ax on right
    ax5.yaxis.set_ticks_position('right')
    ax5.yaxis.set_label_position('right')
    ax5.set_ylabel('enriched reads per million mapped reads')

    # Legends
    ax4.legend(loc='best', frameon=False)
    ax5.legend(loc='best', frameon=False)

    # Make sure everybody snaps to xmin/xmax
    for ax in [ax1, ax2, ax3, ax4, ax5]:
        ax.axis(xmin=extent[0], xmax=extent[1])

    if not hlines:
        hlines = []
    if not vlines:
        vlines = []

    for ax in [ax1, ax2, ax3]:
        for pos, kwargs in hlines:
            ax.axhline(pos, **kwargs)
        for pos, kwargs in vlines:
            ax.axvline(pos, **kwargs)

    fig.subplots_adjust(bottom=0.05, top=0.95, hspace=0.75, wspace=0.9)

    return fig
예제 #46
0
def boxplot(x, notch=0, sym='b+', positions=None, widths=None):
    """Makes a box and whisker plot.

    Adapted from matplotlib.axes 0.98.5.2
    Modified such that the caps are set to the 10th and 90th
    percentiles, and to have some control on the colors.

    call signature::

      boxplot(x, notch=0, sym='+', positions=None, widths=None)

    Make a box and whisker plot for each column of *x* or each
    vector in sequence *x*.  The box extends from the lower to
    upper quartile values of the data, with a line at the median.
    The whiskers extend from the box to show the range of the
    data.  Flier points are those past the end of the whiskers.

    - *notch* = 0 (default) produces a rectangular box plot.
    - *notch* = 1 will produce a notched box plot

    *sym* (default 'b+') is the default symbol for flier points.
    Enter an empty string ('') if you don't want to show fliers.

    *whis* (default 1.5) defines the length of the whiskers as
    a function of the inner quartile range.  They extend to the
    most extreme data point within ( ``whis*(75%-25%)`` ) data range.

    *positions* (default 1,2,...,n) sets the horizontal positions of
    the boxes. The ticks and limits are automatically set to match
    the positions.

    *widths* is either a scalar or a vector and sets the width of
    each box. The default is 0.5, or ``0.15*(distance between extreme
    positions)`` if that is smaller.

    *x* is an array or a sequence of vectors.

    Returns a dictionary mapping each component of the boxplot
    to a list of the :class:`matplotlib.lines.Line2D`
    instances created.
    
    Copyright (c) 2002-2009 John D. Hunter; All Rights Reserved
    """
    whiskers, caps, boxes, medians, fliers = [], [], [], [], []

    # convert x to a list of vectors
    if hasattr(x, 'shape'):
        if len(x.shape) == 1:
            if hasattr(x[0], 'shape'):
                x = list(x)
            else:
                x = [
                    x,
                ]
        elif len(x.shape) == 2:
            nr, nc = x.shape
            if nr == 1:
                x = [x]
            elif nc == 1:
                x = [x.ravel()]
            else:
                x = [x[:, i] for i in xrange(nc)]
        else:
            raise ValueError, "input x can have no more than 2 dimensions"
    if not hasattr(x[0], '__len__'):
        x = [x]
    col = len(x)

    # get some plot info
    if positions is None:
        positions = range(1, col + 1)
    if widths is None:
        distance = max(positions) - min(positions)
        widths = min(0.15 * max(distance, 1.0), 0.5)
    if isinstance(widths, float) or isinstance(widths, int):
        widths = numpy.ones((col, ), float) * widths

    # loop through columns, adding each to plot
    for i, pos in enumerate(positions):
        d = numpy.ravel(x[i])
        row = len(d)
        # get median and quartiles
        wisk_lo, q1, med, q3, wisk_hi = mlab.prctile(d, [10, 25, 50, 75, 90])
        # get high extreme
        #iq = q3 - q1
        #hi_val = q3 + whis*iq
        #wisk_hi = numpy.compress( d <= hi_val , d )
        #if len(wisk_hi) == 0:
        #wisk_hi = q3
        #else:
        #wisk_hi = max(wisk_hi)
        ## get low extreme
        #lo_val = q1 - whis*iq
        #wisk_lo = numpy.compress( d >= lo_val, d )
        #if len(wisk_lo) == 0:
        #wisk_lo = q1
        #else:
        #wisk_lo = min(wisk_lo)
        # get fliers - if we are showing them
        flier_hi = []
        flier_lo = []
        flier_hi_x = []
        flier_lo_x = []
        if len(sym) != 0:
            flier_hi = numpy.compress(d > wisk_hi, d)
            flier_lo = numpy.compress(d < wisk_lo, d)
            flier_hi_x = numpy.ones(flier_hi.shape[0]) * pos
            flier_lo_x = numpy.ones(flier_lo.shape[0]) * pos

        # get x locations for fliers, whisker, whisker cap and box sides
        box_x_min = pos - widths[i] * 0.5
        box_x_max = pos + widths[i] * 0.5

        wisk_x = numpy.ones(2) * pos

        cap_x_min = pos - widths[i] * 0.25
        cap_x_max = pos + widths[i] * 0.25
        cap_x = [cap_x_min, cap_x_max]

        # get y location for median
        med_y = [med, med]

        # calculate 'regular' plot
        if notch == 0:
            # make our box vectors
            box_x = [box_x_min, box_x_max, box_x_max, box_x_min, box_x_min]
            box_y = [q1, q1, q3, q3, q1]
            # make our median line vectors
            med_x = [box_x_min, box_x_max]
        # calculate 'notch' plot
        else:
            raise NotImplementedError
            notch_max = med  #+ 1.57*iq/numpy.sqrt(row)
            notch_min = med  #- 1.57*iq/numpy.sqrt(row)
            if notch_max > q3:
                notch_max = q3
            if notch_min < q1:
                notch_min = q1
            # make our notched box vectors
            box_x = [
                box_x_min, box_x_max, box_x_max, cap_x_max, box_x_max,
                box_x_max, box_x_min, box_x_min, cap_x_min, box_x_min,
                box_x_min
            ]
            box_y = [
                q1, q1, notch_min, med, notch_max, q3, q3, notch_max, med,
                notch_min, q1
            ]
            # make our median line vectors
            med_x = [cap_x_min, cap_x_max]
            med_y = [med, med]

        doplot = plt.plot
        whiskers.extend(
            doplot(wisk_x, [q1, wisk_lo], color=whiskerscolor, linestyle='--'))
        whiskers.extend(
            doplot(wisk_x, [q3, wisk_hi], color=whiskerscolor, linestyle='--'))
        caps.extend(
            doplot(cap_x, [wisk_hi, wisk_hi], color=capscolor, linestyle='-'))
        caps.extend(
            doplot(cap_x, [wisk_lo, wisk_lo], color=capscolor, linestyle='-'))
        boxes.extend(doplot(box_x, box_y, color=boxescolor, linestyle='-'))
        medians.extend(doplot(med_x, med_y, color=medianscolor, linestyle='-'))
        fliers.extend(
            doplot(flier_hi_x, flier_hi, sym, flier_lo_x, flier_lo, sym))

    # fix our axes/ticks up a little
    newlimits = min(positions) - 0.5, max(positions) + 0.5
    plt.gca().set_xlim(newlimits)
    plt.gca().set_xticks(positions)

    return dict(whiskers=whiskers,
                caps=caps,
                boxes=boxes,
                medians=medians,
                fliers=fliers)
예제 #47
0
def calculate_limits(array_dict, method='global', percentiles=None, limit=()):
    """
    Calculate limits for a group of arrays in a flexible manner.

    Returns a dictionary of calculated (vmin, vmax), with the same keys as
    `array_dict`.

    Useful for plotting heatmaps of multiple datasets, and the vmin/vmax values
    of the colormaps need to be matched across all (or a subset) of heatmaps.

    Parameters
    ----------
    array_dict : dict of np.arrays

    method : {'global', 'independent', callable}
        If method="global", then use the global min/max values across all
        arrays in array_dict.  If method="independent", then each array will
        have its own min/max calcuated.  If a callable, then it will be used to
        group the keys of `array_dict`, and each group will have its own
        group-wise min/max calculated.

    limit: tuple, optional
        Tuple of 2 scalars passed directly to matplotlib.mlab.prctile to
        limit the calculation of the percentile.

    percentiles : None or list
        If not None, a list of (lower, upper) percentiles in the range [0,100].
    """
    if percentiles is not None:
        for percentile in percentiles:
            if not 0 <= percentile <= 100:
                raise ValueError("percentile (%s) not between [0, 100]")

    if method == 'global':
        all_arrays = np.concatenate(
            [i.ravel() for i in array_dict.itervalues()]
        )
        if percentiles:
            vmin = mlab.prctile(
                all_arrays, percentiles[0], limit=limit)
            vmax = mlab.prctile(
                all_arrays, percentiles[1], limit=limit)

        else:
            vmin = all_arrays.min()
            vmax = all_arrays.max()
        d = dict([(i, (vmin, vmax)) for i in array_dict.keys()])

    elif method == 'independent':
        d = {}
        for k, v in array_dict.iteritems():
            d[k] = (v.min(), v.max())

    elif hasattr(method, '__call__'):
        d = {}
        sorted_keys = sorted(array_dict.keys(), key=method)
        for group, keys in groupby(sorted_keys, method):
            keys = list(keys)
            all_arrays = np.concatenate([array_dict[i] for i in keys])
            if percentiles:
                vmin = mlab.prctile(
                    all_arrays, percentiles[0], limit=limit)
                vmax = mlab.prctile(
                    all_arrays, percentiles[1], limit=limit)
            else:
                vmin = all_arrays.min()
                vmax = all_arrays.max()
            for key in keys:
                d[key] = (vmin, vmax)
    return d
예제 #48
0
파일: pplogloss.py 프로젝트: NDManh/numbbo
def boxplot(x, notch=0, sym='b+', positions=None, widths=None):
    """Makes a box and whisker plot.

    Adapted from matplotlib.axes 0.98.5.2
    Modified such that the caps are set to the 10th and 90th
    percentiles, and to have some control on the colors.

    call signature::

      boxplot(x, notch=0, sym='+', positions=None, widths=None)

    Make a box and whisker plot for each column of *x* or each
    vector in sequence *x*.  The box extends from the lower to
    upper quartile values of the data, with a line at the median.
    The whiskers extend from the box to show the range of the
    data.  Flier points are those past the end of the whiskers.

    - *notch* = 0 (default) produces a rectangular box plot.
    - *notch* = 1 will produce a notched box plot

    *sym* (default 'b+') is the default symbol for flier points.
    Enter an empty string ('') if you don't want to show fliers.

    *whis* (default 1.5) defines the length of the whiskers as
    a function of the inner quartile range.  They extend to the
    most extreme data point within ( ``whis*(75%-25%)`` ) data range.

    *positions* (default 1,2,...,n) sets the horizontal positions of
    the boxes. The ticks and limits are automatically set to match
    the positions.

    *widths* is either a scalar or a vector and sets the width of
    each box. The default is 0.5, or ``0.15*(distance between extreme
    positions)`` if that is smaller.

    *x* is an array or a sequence of vectors.

    Returns a dictionary mapping each component of the boxplot
    to a list of the :class:`matplotlib.lines.Line2D`
    instances created.
    
    Copyright (c) 2002-2009 John D. Hunter; All Rights Reserved
    """
    whiskers, caps, boxes, medians, fliers = [], [], [], [], []

    # convert x to a list of vectors
    if hasattr(x, 'shape'):
        if len(x.shape) == 1:
            if hasattr(x[0], 'shape'):
                x = list(x)
            else:
                x = [x,]
        elif len(x.shape) == 2:
            nr, nc = x.shape
            if nr == 1:
                x = [x]
            elif nc == 1:
                x = [x.ravel()]
            else:
                x = [x[:,i] for i in xrange(nc)]
        else:
            raise ValueError, "input x can have no more than 2 dimensions"
    if not hasattr(x[0], '__len__'):
        x = [x]
    col = len(x)

    # get some plot info
    if positions is None:
        positions = range(1, col + 1)
    if widths is None:
        distance = max(positions) - min(positions)
        widths = min(0.15*max(distance,1.0), 0.5)
    if isinstance(widths, float) or isinstance(widths, int):
        widths = np.ones((col,), float) * widths

    # loop through columns, adding each to plot
    for i,pos in enumerate(positions):
        d = np.ravel(x[i])
        row = len(d)
        # get median and quartiles
        wisk_lo, q1, med, q3, wisk_hi = mlab.prctile(d,[10,25,50,75,90])
        # get high extreme
        #iq = q3 - q1
        #hi_val = q3 + whis*iq
        #wisk_hi = np.compress( d <= hi_val , d )
        #if len(wisk_hi) == 0:
            #wisk_hi = q3
        #else:
            #wisk_hi = max(wisk_hi)
        ## get low extreme
        #lo_val = q1 - whis*iq
        #wisk_lo = np.compress( d >= lo_val, d )
        #if len(wisk_lo) == 0:
            #wisk_lo = q1
        #else:
            #wisk_lo = min(wisk_lo)
        # get fliers - if we are showing them
        flier_hi = []
        flier_lo = []
        flier_hi_x = []
        flier_lo_x = []
        if len(sym) != 0:
            flier_hi = np.compress( d > wisk_hi, d )
            flier_lo = np.compress( d < wisk_lo, d )
            flier_hi_x = np.ones(flier_hi.shape[0]) * pos
            flier_lo_x = np.ones(flier_lo.shape[0]) * pos

        # get x locations for fliers, whisker, whisker cap and box sides
        box_x_min = pos - widths[i] * 0.5
        box_x_max = pos + widths[i] * 0.5

        wisk_x = np.ones(2) * pos

        cap_x_min = pos - widths[i] * 0.25
        cap_x_max = pos + widths[i] * 0.25
        cap_x = [cap_x_min, cap_x_max]

        # get y location for median
        med_y = [med, med]

        # calculate 'regular' plot
        if notch == 0:
            # make our box vectors
            box_x = [box_x_min, box_x_max, box_x_max, box_x_min, box_x_min ]
            box_y = [q1, q1, q3, q3, q1 ]
            # make our median line vectors
            med_x = [box_x_min, box_x_max]
        # calculate 'notch' plot
        else:
            raise NotImplementedError
            notch_max = med #+ 1.57*iq/np.sqrt(row)
            notch_min = med #- 1.57*iq/np.sqrt(row)
            if notch_max > q3:
                notch_max = q3
            if notch_min < q1:
                notch_min = q1
            # make our notched box vectors
            box_x = [box_x_min, box_x_max, box_x_max, cap_x_max, box_x_max,
                     box_x_max, box_x_min, box_x_min, cap_x_min, box_x_min,
                     box_x_min ]
            box_y = [q1, q1, notch_min, med, notch_max, q3, q3, notch_max,
                     med, notch_min, q1]
            # make our median line vectors
            med_x = [cap_x_min, cap_x_max]
            med_y = [med, med]

        doplot = plt.plot
        whiskers.extend(doplot(wisk_x, [q1, wisk_lo], color=whiskerscolor, linestyle='--'))
        whiskers.extend(doplot(wisk_x, [q3, wisk_hi], color=whiskerscolor, linestyle='--'))
        caps.extend(doplot(cap_x, [wisk_hi, wisk_hi], color=capscolor, linestyle='-'))
        caps.extend(doplot(cap_x, [wisk_lo, wisk_lo], color=capscolor, linestyle='-'))
        boxes.extend(doplot(box_x, box_y, color=boxescolor, linestyle='-'))
        medians.extend(doplot(med_x, med_y, color=medianscolor, linestyle='-'))
        fliers.extend(doplot(flier_hi_x, flier_hi, sym,
                             flier_lo_x, flier_lo, sym))

    # fix our axes/ticks up a little
    newlimits = min(positions)-0.5, max(positions)+0.5
    plt.gca().set_xlim(newlimits)
    plt.gca().set_xticks(positions)

    return dict(whiskers=whiskers, caps=caps, boxes=boxes,
                medians=medians, fliers=fliers)
예제 #49
0
def ssi_scatter(timelock, iter = 100):
    
    from myutils import bootstrap
    from matplotlib.mlab import prctile, find
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    storeSSI = {'PG':[],'FG':[]}
    mem = {'PG response':'PG', 'response':'FG'}
    
    units = timelock.units
    
    # For each unit, compute the SSI for PG and FG
    for unit in units:
        
        data = timelock.get(unit)
        
        # For now, I only want to look at hit-hit trials
        select = (data['PG outcome']==consts['HIT']) & \
                (data['outcome']==consts['HIT'])
        trials = data[select]
        
        inter = interval(unit,trials,'PG out','onset')
        counts = inter[unit.id].map(len)
        rates = counts/(inter['onset']-inter['PG out'])
        for goal in mem.keys():
            input = DataFrame({'rates':rates, 'stimulus':trials[goal]})
            
            storeSSI[mem[goal]].append(bootstrap(input,ssi,iters=iter))
    
    meanSSI = dict.fromkeys(storeSSI.keys())
    intervSSI = dict.fromkeys(storeSSI.keys())
    
    for key, ssis in storeSSI.iteritems():
        # Calculate the means of the bootstrapped SSIs
        meanSSI[key] = [ np.mean(unitSSI) for unitSSI in ssis ]
        # Calculate the 95% confidence intervals of the boostrapped SSIs
        intervSSI[key] = [ prctile(unitSSI,p=(2.5,97.5)) for unitSSI in ssis ]
    
    # Now let's check for significance
    sig = dict.fromkeys(meanSSI.keys())
    def check_between(check, between):
        is_it = (between[0] <= check) & (between[1] >= check)
        return is_it
    for key, iSSIs in intervSSI.iteritems():
        sig[key] = np.array([ not check_between(0,issi) for issi in iSSIs ])
    
    not_sig = [ not (pg | fg) for pg,fg in zip(sig['PG'],sig['FG']) ]
    not_sig = np.array(not_sig)
    
    sig_colors = {'PG':'r','FG':'b'}
    xpnts = np.array(meanSSI['PG'])
    ypnts = np.array(meanSSI['FG'])
    xbars = np.abs(np.array(intervSSI['PG']).T - xpnts)
    ybars = np.abs(np.array(intervSSI['FG']).T - ypnts)
    
    # First, plot the not significant units
    ax.errorbar(xpnts[not_sig],ypnts[not_sig],
                yerr=ybars[:,not_sig],xerr=xbars[:,not_sig],
                fmt='o', color = 'grey')
    
    # Then plot things that are significant for PG and FG
    for key in sig.iterkeys():
        if sig[key].any():
            ax.errorbar(xpnts[sig[key]],ypnts[sig[key]],
                yerr=ybars[:,sig[key]],xerr=xbars[:,sig[key]],
                fmt='o', color = sig_colors[key])
    
    xs = ax.get_xlim()
    ys = ax.get_ylim()
    ax.plot(xs,[0,0],'-k')
    ax.plot([0,0],ys,'-k')
    ax.plot([-10,10],[-10,10],'--',color='grey')
    ax.set_xlabel('PG SSI')
    ax.set_ylabel('FG SSI')
    ax.set_xlim(xs)
    ax.set_ylim(ys)
    ax.set_aspect('equal')
    
    #fig.show()
    
    return sig, not_sig
예제 #50
0
def make_tss_plot(bam_file, tss, prefix, chromsizes, read_len, bins=400, bp_edge=2000,
                  processes=8, greenleaf_norm=True):
    '''
    Take bootstraps, generate tss plots, and get a mean and
    standard deviation on the plot. Produces 2 plots. One is the
    aggregation plot alone, while the other also shows the signal
    at each TSS ordered by strength.
    '''
    logging.info('Generating tss plot...')
    tss_plot_file = '{0}_tss-enrich.png'.format(prefix)
    tss_plot_data_file = '{0}_tss-enrich.txt'.format(prefix)
    tss_plot_large_file = '{0}_large_tss-enrich.png'.format(prefix)

    # Load the TSS file
    tss = pybedtools.BedTool(tss)
    tss_ext = tss.slop(b=bp_edge, g=chromsizes)

    # Load the bam file
    # Need to shift reads and just get ends, just load bed file?
    bam = metaseq.genomic_signal(bam_file, 'bam')
    bam_array = bam.array(tss_ext, bins=bins, shift_width=-read_len / 2,  # Shift to center the read on the cut site
                          processes=processes, stranded=True)

    # Actually first build an "ends" file
    #get_ends = '''zcat {0} | awk -F '\t' 'BEGIN {{OFS="\t"}} {{if ($6 == "-") {{$2=$3-1; print}} else {{$3=$2+1; print}} }}' | gzip -c > {1}_ends.bed.gz'''.format(bed_file, prefix)
    # print(get_ends)
    # os.system(get_ends)

    #bed_reads = metaseq.genomic_signal('{0}_ends.bed.gz'.format(prefix), 'bed')
    # bam_array = bed_reads.array(tss_ext, bins=bins,
    #                      processes=processes, stranded=True)

    # Normalization (Greenleaf style): Find the avg height
    # at the end bins and take fold change over that
    if greenleaf_norm:
        # Use enough bins to cover 100 bp on either end
        num_edge_bins = int(100 / (2 * bp_edge / bins))
        bin_means = bam_array.mean(axis=0)
        avg_noise = (sum(bin_means[:num_edge_bins]) +
                     sum(bin_means[-num_edge_bins:]  ))/(2*num_edge_bins)
        bam_array /= avg_noise
    else:
        bam_array /= bam.mapped_read_count() / 1e6

    # Generate a line plot
    fig = plt.figure()
    ax = fig.add_subplot(111)
    x = np.linspace(-bp_edge, bp_edge, bins)

    ax.plot(x, bam_array.mean(axis=0), color='r', label='Mean')
    ax.axvline(0, linestyle=':', color='k')

    # Note the middle high point (TSS)
    tss_point_val = max(bam_array.mean(axis=0))

    ax.set_xlabel('Distance from TSS (bp)')
    ax.set_ylabel('Average read coverage (per million mapped reads)')
    ax.legend(loc='best')

    fig.savefig(tss_plot_file)

    # Print a more complicated plot with lots of info

    # write the plot data; numpy object
    np.savetxt(tss_plot_data_file, bam_array.mean(axis=0), delimiter=",")

    # Find a safe upper percentile - we can't use X if the Xth percentile is 0
    upper_prct = 99
    if mlab.prctile(bam_array.ravel(), upper_prct) == 0.0:
        upper_prct = 100.0

    plt.rcParams['font.size'] = 8
    fig = metaseq.plotutils.imshow(bam_array,
                                   x=x,
                                   figsize=(5, 10),
                                   vmin=5, vmax=upper_prct, percentile=True,
                                   line_kwargs=dict(color='k', label='All'),
                                   fill_kwargs=dict(color='k', alpha=0.3),
                                   sort_by=bam_array.mean(axis=1))

    # And save the file
    fig.savefig(tss_plot_large_file)

    return tss_plot_file, tss_plot_large_file, tss_point_val
예제 #51
0
def imshow(arr, x=None, ax=None, vmin=None, vmax=None, percentile=True,
           strip=False, features=None, conf=0.95, sort_by=None,
           line_kwargs=None, fill_kwargs=None, imshow_kwargs=None, figsize=(5, 12),
           width_ratios=(4, 1), height_ratios=(4, 1),
           subplot_params=dict(wspace=0.1, hspace=0.1),
           subset_by=None, subset_order=None,):
    """
    Do-it-all function to help with plotting heatmaps

    Parameters
    ----------
    arr : array-like

    x : 1D array
        X values to use.  If None, use range(arr.shape[1])

    ax : matplotlib.Axes
        If not None, then only plot the array on the provided axes.  This will
        ignore any additional arguments provided that apply to figure-level
        configuration or to the average line plot.  For example, `figsize`,
        `width_ratios`, `height_ratios`, `subplot_params`, `line_kwargs`, and
        `fill_kwargs` will all be ignored.

    vmin, vmax : float

    percentile : bool
        If True, then treat values for `vmin` and `vmax` as percentiles rather
        than absolute values.

    strip : bool
        Include a strip plot alongside the array

    features : pybedtools.BedTool or string filename
        Features used to construct the array

    conf : float
        Confidence interval to use in line plot.

    sort_by : array-like
        Use the provided array to sort the array (e.g., an array of expression
        values).  This array will be argsorted to get the proper order.

    line_kwargs, fill_kwargs : dict
        Passed directly to `ci_plot`.

    figsize : tuple
        (Width, height) of the figure to create.

    imshow_kwargs : dict
        Passed directly to matplotlib.pyplot.imshow.  By default, arguments
        used are `origin='lower'`, `aspect="auto"` and a colormap from
        colormap_adjust.smart_colormap generated using the provided `vmin` and
        `vmax`.

    width_ratios, height_ratios: tuple
        These tuples are passed to the `new_shell` function.  The default
        values set up a 2x2 configuration of panels for heatmap, line plot,
        colorbar axes, and optional strip plot.  However modifying
        `width_ratios` or `height_ratios` can be used to create more or fewer panels.

    subplot_params : dict
        Passed to Figure.subplots_adjust

    subset_by : array
        An array of any type (but usually int or str) that contains a class
        label for each row in the heatmap array.  For example, to subset by
        expression, an array the values of "up", "down", or "unchanged" at each
        of the positions could be provided.

        Note that the heatmap array is first sorted by `sort_by` and then split
        into groups according to `subset_by`, so each subset remains sorted by
        `sort_by`.

    subset_order : list-like
        This provides the order in which the subsets are plotted.  Since the
        default imshow arguments contain `origin="lower"`, these will be
        plotted in order starting at the bottom of the heatmap.

    """
    if ax is None:
        fig = new_shell(
            figsize=figsize,
            strip=strip,
            subplot_params=subplot_params,
            width_ratios=width_ratios,
            height_ratios=height_ratios)

    if x is None:
        x = np.arange(arr.shape[1] + 1)

    if percentile:
        if vmin is None:
            vmin = arr.min()
        else:
            vmin = mlab.prctile(arr.ravel(), vmin)
        if vmax is None:
            vmax = arr.max()
        else:
            vmax = mlab.prctile(arr.ravel(), vmax)
    else:
        if vmin is None:
            vmin = arr.min()
        if vmax is None:
            vmax = arr.max()

    cmap = colormap_adjust.smart_colormap(vmin, vmax)
    _imshow_kwargs = dict(origin='lower', cmap=cmap, vmin=vmin, vmax=vmax,
                          aspect='auto')
    if imshow_kwargs is not None:
        _imshow_kwargs.update(imshow_kwargs)

    # previously we did an argsort first; with subsetting we don't want to do
    # that yet....
    #if sort_by is not None:
    #    ind = np.argsort(sort_by)
    #else:
    #    ind = np.arange(arr.shape[0])

    if sort_by is None:
        sort_by = np.arange(arr.shape[0])

    if ax is None:
        array_ax = fig.array_axes
    else:
        array_ax = ax

    # If not provided, assume all in the same subset.
    if subset_by is None:
        subset_by = np.zeros(arr.shape[0])

    # Ensure always array, since we're doing indexing tricks
    if not isinstance(subset_by, np.ndarray):
        subset_by = np.array(subset_by)

    # If not provided, use sorted order
    if subset_order is None:
        subset_order = sorted(np.unique(subset_by))

    inds = []
    for cls in subset_order:
        subset_ind = np.nonzero(subset_by == cls)[0]
        subset_sort_by = sort_by[subset_ind]
        subset_argsort_by = np.argsort(subset_sort_by)
        inds.append(subset_ind[subset_argsort_by])
    ind = np.concatenate(inds)

    mappable = array_ax.imshow(
        arr[ind, :],
        extent=(x.min(), x.max(), 0, arr.shape[0]),
        **_imshow_kwargs
    )

    if line_kwargs is None:
        line_kwargs = {}
    if fill_kwargs is None:
        fill_kwargs = {}

    if isinstance(line_kwargs, dict):
        line_kwargs = [line_kwargs]
    if isinstance(fill_kwargs, dict):
        fill_kwargs = [fill_kwargs]

    _line_kwargs = itertools.cycle(line_kwargs)
    _fill_kwargs = itertools.cycle(fill_kwargs)

    if ax is None:
        plt.colorbar(mappable, fig.cax)
        for subset_ind, label, _lkw, _fkw in zip(inds, subset_order, _line_kwargs, _fill_kwargs):
            ci_plot(
                x,
                arr[subset_ind],
                ax=fig.line_axes,
                line_kwargs=_lkw,
                fill_kwargs=_fkw,
            )
        return fig
    else:
        return ax.figure
예제 #52
0
#initializing function constant variables
N = Nmarkets * Nproducts
tolerance = 0.001
nogradient = 0

thetaspost = hlp.HMCMC(
    lambda theta: hlp.computeGMMobjective(
        theta, simshare, simoutshare, cdindex, weights, price, X, IV, vdraws,
        Nproducts, N, tolerance, nogradient), theta0, B)

posteriormeanpost = np.mean(thetaspost[t - 1:, :], axis=0)
posteriormedianpost = np.median(thetaspost[t - 1:, :], axis=0)
thetasdemedianed = np.abs(thetaspost[t - 1:, :] - (np.ones(
    (B - t + 1, 1)) @ posteriormedianpost[:, None].T))
criticalvaluesymmetricpost = matlab.prctile(thetasdemedianed,
                                            100 * (1 - alpha))

posteriorquantilealpha2post = matlab.prctile(thetaspost[t - 1:, :],
                                             100 * alpha / 2)
posteriorquantileoneminusalpha2post = matlab.prctile(thetaspost[t - 1:, :],
                                                     100 * (1 - alpha / 2))

### STANDARD ERRORS ###
betahat = np.zeros((1, dimX + 1))
for e in range(dimX + 1):
    betanew = posteriormeanpost[e]
    betahat[0, e] = betanew
betahat = betahat.conj().transpose()

theta2hat = np.zeros((1, dimX))
for c in range(dimX):
예제 #53
0
        s.join(
            "lista", 'lista_qtde_' +
            datetime.now().strftime("%d%m%Y_%H_%M_%S") + '.csv'), 'w', 'utf-8')
    for k, v in assuntos.items():
        if 'valor' in v:
            values.append(v['valor'])
            line = "%s;%s\n" % (k, str(v['valor']))
            print(line)
            handle.write(line)
    handle.close()
    plt.figure()
    #d = np.sort(np.random.randint(0, 1000, 1000)).cumsum()
    d = sorted(values)
    print(d)

    # Percentile values
    p = np.array([0.0, 25.0, 50.0, 75.0, 100.0])

    perc = mlab.prctile(d, p=p)

    plt.plot(d)
    # Place red dots on the percentiles
    plt.plot((len(d) - 1) * p / 100., perc, 'ro')

    # Set tick locations and labels
    plt.xticks((len(d) - 1) * p / 100., map(str, p))

    plt.savefig(
        s.join(
            "figuras", 'resultado_perc_' +
            datetime.now().strftime("%d%m%Y_%H_%M_%S") + '.png'))
예제 #54
0
def ssa(X, M=None, K=0):
    r"""Performs Singular Spectrum Analysis on time series X with the method of
    Vautard and Ghil, Phys. D. 1989.

    Parameters
    ----------
    X : 1D array
        Vector of evenly spaced observations.
    M : int
        Window length.  Default value is M = len(X) / 10
    K : int
        Number of EOFs used for reconstruction (AICC choice by default k=0).
        if K = 0, corrected Akaike Information Criterion (AICC) is used
        if K = 'mcssa', the Monte Carlo spectral significance estimation of
        Allen & Smith (J Clim, 1996) is used.

    Returns
    -------
    spec : array_like
           Eigenvalue spectrum, in % variance.
    eig_vec : array_like
              Eigenvector matrix ("temporal EOFs").
    PC : array_like
         Matrix of principal components.
    RC : array_like
         Matrix of RCs (N*M, K) (only if K > 0).
    RCp : array_like
          Reconstructed time-series, involving only the modes retained, and
          rescaled to original mean and variance.

    Examples
    --------
    spec, eig_vec, PC, RC, RCp = ssa(X,[M, K])

    Notes
    -----
    Orignal file hepta_ssa.m from Hepta Technologies, 2004 writing in MatlabTM.
    last updated 03/14/2012 to include automated choice for K (AICC).

    Julien Emile-Geay, Lamont Doherty Earth Observatory.
    Dec 2004 last updated 03/14/2012
    """

    X = np.atleast_1d(X)
    if X.ndim > 1:
        raise ValueError("Input vector `X` has more than 1 dimension.")

    N = len(X)

    # Center the series.
    Xr, mu, sigma = standardize(X)  # NOTE: Original calls standardize.m.

    # Set default value for M.
    if not M:
        M = N // 10
    if K == 'mcssa':
        mcssa = True
        MC = 1000
    else:
        mcssa = False
        signif = np.arange(0, K)  # FIXME: 0, K

    Np = N - M + 1

    gam, lags = xcorr(Xr, maxlags=M - 1, matlab_compat='unbiased')

    # Fill in Covariance matrix.  Take positive half of auto-correlation
    # diagram, hence M to 2M - 1.
    C = toeplitz(gam[M - 1:2 * M])

    # Solve eigenvalue problem.
    eig_vec, eig_val = eigd(C)  # FIXME: Matlab eig_vec have reversed signs.
    spec = eig_val / np.sum(eig_val)

    # Determine significant eigenvalues.
    if mcssa:
        # NOTE: Got this at from: http://www.gps.caltech.edu/~tapio/arfit/
        # But this is commented out in the original code.
        #w, A, C, SBC, FPE, th = arfit(Xr, 1, 1)  # fit AR(1) model.
        # NOTE: The original code uses ar1.m.
        # What is the difference between ar1.m and arfit.m?
        a, var, _ = ar1(Xr)
        s = np.sqrt(var)
        noise = np.zeros(N, MC)
        noise[0, :] = np.tile(Xr[0], np.r_[1, MC])
        for jt in range(1, N):
            noise[jt, :] = a * noise[jt - 1, :] + s * np.random.randn(1, MC)

        noise, _, _ = standardize(noise)
        Lambda_R = np.zeros_like(MC)  # FIXME: Don't know the right shape yet.
        for m in range(0, MC):
            Gn, ln = xcorr(noise[:, m], M - 1, 'unbiased')
            Cn = toeplitz(Gn[M: 2 * M - 1])
            # Noise "eigenvalues".
            tmp = np.dot(eig_vec, Cn)
            Lambda_R[:, m] = np.diag(np.dot(tmp, eig_vec))

        q95 = prctile(Lambda_R, 100 * 0.95)  # FIXME
        # Index of modes rising above the background.
        signif = np.where(eig_val > q95)
        print('MCSSA modes retained: %s' % signif)

        fix, ax = plt.subplots()
        ax.set_title('MCSSA')
        v = np.arange[1, M + 1]
        ligr = [0.7000, 0.7000, 0.7000]
        lmin = Lambda_R.min(axis=1)
        lmax = Lambda_R.max(axis=1)
        ax.fill(v, lmin, lmax, ligr, ligr, 0, 0.3)
        ax.plot(v, eig_val, 'kx', linewidth=2.0)
        ax.plot(v, q95, 'r-', linewidth=2.0)
    elif K == 0:
        trunc = range(0, len(spec))
        # The pca_truncation_criteria.m original call:
        # [MDL, NE08, AIC, AICC] =
        # pca_truncation_criteria(eig_val, 1, trunc, N, 1)
        WK85, NE08 = pca_truncation_criteria(eig_val, 1, trunc, N, 1)
        imin = (np.real(NE08['aicc'])).argmin()
        K = trunc[imin]
        print('AICC truncation choice, K = %s' % K)
        signif = np.arange(0, K)

    # Compute PCs.
    decal = np.zeros((Np, M))

    for t in range(0, N - M + 1):
        decal[t, :] = Xr[t:M + t]

    # The columns of this matrix are Ak(t), k=1 to M.
    PC = np.dot(decal, eig_vec)

    # Compute reconstructed timeseries if K > 0.
    if signif:
        RC = np.zeros((N, len(signif)))
        # First M terms.
        for t in range(0, M - 1):
            Av = np.flipud(PC[0:t, signif])
            eig_vec_red = eig_vec[0:t, signif]
            RC[t, :] = 1.0 / t * np.sum(Av * eig_vec_red, axis=0)

        # Middle of timeseries.
        for t in range(M, Np + 1):
            Av = np.flipud(PC[t - M + 1:t, signif])
            eig_vec_red = eig_vec[0:M, signif]
            RC[t, :] = 1 / M * np.sum(Av * eig_vec_red, axis=0)

        # Last M terms.
        for t in range(Np + 1, N + 1):
            Av = np.flipud(PC[t - M + 1:Np, signif])
            eig_vec_red = eig_vec[t - N + M:M, signif]
            RC[t, :] = 1.0 / (N - t + 1) * np.sum(Av * eig_vec_red, axis=0)

        # Sum and restore the mean and variance.
        RCp = sigma * np.sum(RC, axis=1) + mu
    else:
        RC, RCp = None, None

    return spec, eig_vec, PC, RC, RCp, signif
예제 #55
0
            np.log10(faked_rdiffs))
        std_over_shuffles = np.log10(faked_rdiffs).std(axis=0)

        # floored 1-tailed p-value of actual std vs faked distr
        std_n_more_extreme = np.sum(std_over_shuffles > real_std) + \
            np.sum(~np.isfinite(std_over_shuffles))
        mad_n_more_extreme = np.sum(mad_over_shuffles > real_mad)
        assert np.sum(~np.isfinite(mad_over_shuffles)) == 0
        std_pval = (std_n_more_extreme + 1) / float(faked_rdiffs.shape[1])
        mad_pval = (mad_n_more_extreme + 1) / float(faked_rdiffs.shape[1])

        # Text summary
        sdump.append(region)
        sdump.append("MAD, mean %0.3f, distr over shuffles: %s" % \
            (mad_over_shuffles.mean(),
            ' ' .join(['%0.3f' % v for v in mlab.prctile(
            mad_over_shuffles, (25, 50, 95, 97.5))])))
        sdump.append("MAD, nP.E. actual=%0.3f, P.E. actual=%0.3f, pval=%0.6f" %
                     (real_npe_mad, real_mad, mad_pval))
        sdump.append("STDEV, nanmean %0.3f, distr over shuffles: %s" % \
            (np.nanmean(std_over_shuffles),
            ' '.join(['%0.3f' % v for v in mlab.prctile(
            std_over_shuffles, (25, 50, 95, 97.5))])))
        sdump.append("STDEV, nP.E. actual=%0.3f, P.E. actual=%0.3f, p=%0.6f" %
                     (real_npe_std, real_std, std_pval))
        sdump.append('')

    # Pretty
    axa[0, 0].set_yticks((0, 1, 2, 3, 4))
    axa[0, 1].set_yticks((0, 2, 4, 6))
    axa[1, 0].set_yticks((0, 1, 2, 3, 4))
    axa[1, 1].set_yticks((0, 2, 4, 6))
예제 #56
0
    def sea(self, **kwargs):
        """Method called to perform superposed epoch analysis on data in object.

        Uses object attributes obj.data, obj.times, obj.epochs, obj.delta,
        obj.window, all of which must be available on instantiation.

	Other Parameters
        ================
        storedata : boolean
	    saves matrix of epoch windows as obj.datacube (default = False)
	quartiles : list
	    calculates the quartiles as the upper and lower bounds (and is default);
        ci : float
	    will find the bootstrapped confidence intervals of ci_quan at the ci percent level (default=95)
        mad : float
	    will use +/- the median absolute deviation for the bounds;
        ci_quan : string
	    can be set to 'median' (default) or 'mean'

        Notes
        =====
        A basic plot can be raised with :meth:`plot`
        """
        #check this hasn't already been done
        #TODO: find out why doing two .sea() calls back-to-back fails 2nd time
        if hasattr(self, 'semedian') or hasattr(self, 'semean'):
            return None

        #check defaults
        defaults = {
            'storedata': True,
            'quartiles': True,
            'ci': False,
            'mad': False,
            'ci_quan': 'median'
        }
        for default in defaults:
            if default not in kwargs:
                kwargs[default] = defaults[default]

        #ensure all input is np array
        delt = float(self.delta)
        if isinstance(self.data, np.ndarray):
            y = self.data
        else:
            y = np.asarray(self.data, dtype=float)

        if kwargs['ci']:
            kwargs['quartiles'], kwargs['mad'] = False, False
        if kwargs['mad']:
            kwargs['quartiles'], kwargs['ci'] = False, False

        time, t_epoch = self._timeepoch(delt)

        #build SEA matrix and perform analysis
        wind = int(self.window)
        m = int(2 * wind + 1)
        n = len(t_epoch)
        y_sea = np.zeros((n, m), dtype=float)
        blankslice = np.zeros([m], dtype=float)
        for i in range(n):
            dif = np.abs(time - t_epoch[i])
            j = np.where(dif == np.min(dif))
            stpt = j[0][0] - wind
            enpt = j[0][0] + wind + 1
            sea_slice = blankslice.copy()
            if stpt < 0:  #fix for bad epochs not correctly moved to badepochs attr #TODO: make badepochs robust or do all checking here
                sea_slice[0:abs(stpt)] = np.NaN
                sea_slice[abs(stpt):] = y[0:enpt]
            elif enpt >= len(y):
                tmpslice = y[stpt:]
                sea_slice[:len(tmpslice)] = tmpslice
                sea_slice[len(tmpslice):] = np.NaN
            else:
                sea_slice = y[stpt:enpt]

            y_sea[i, 0:] = sea_slice

        #find SEA mean, median and percentiles - exclude NaNs (or badval)
        try:
            badval = kwargs['badval']
        except KeyError:
            badval = np.nan
            y_sea_m = ma.masked_where(np.isnan(y_sea), y_sea)
        else:
            y_sea_m = ma.masked_values(y_sea, badval)
        self.semean = [np.mean(y_sea_m[:, i].compressed()) for i in range(m)]
        self.semedian = [
            np.median(y_sea_m[:, i].compressed()) for i in range(m)
        ]
        self.semean, self.semedian = np.array(self.semean), np.array(
            self.semedian)
        self.bound_low = np.zeros((m, 1))
        self.bound_high = np.zeros((m, 1))

        if kwargs['quartiles']:
            from matplotlib.mlab import prctile
            for i in range(m):
                dum = np.sort(y_sea_m[:, i].compressed())
                qul = prctile(dum, p=(25, 75))
                self.bound_low[i], self.bound_high[i] = qul[0], qul[1]
                self.bound_type = 'quartiles'
        elif kwargs['ci']:  #bootstrapped confidence intervals (95%)
            funcdict = {'mean': np.mean, 'median': np.median}
            try:
                if isinstance(kwargs['ci'], bool):
                    raise ValueError  #fall through to default case
                else:
                    ci_level = float(kwargs['ci'])
            except ValueError:
                ci_level = 95
            from spacepy.poppy import boots_ci
            if hasattr(kwargs['ci_quan'], "__call__"):  #ci_quan is a function
                ci_func = kwargs['ci_quan']
            else:
                ci_func = funcdict[kwargs['ci_quan']]
            for i in range(m):
                dum = np.sort(y_sea_m[:, i].compressed())
                self.bound_low[i], self.bound_high[i] = \
                     boots_ci(dum, 800, ci_level, ci_func)
                self.bound_type = 'ci'
        elif kwargs['mad']:  #median absolute deviation
            for i in range(m):
                dum = np.sort(y_sea_m[:, i].compressed())
                spread_mad = tb.medAbsDev(dum)
                self.bound_low[i] = self.semedian[i] - spread_mad
                self.bound_high[i] = self.semedian[i] + spread_mad
                self.bound_type = 'mad'

        self.x = np.linspace(-1.*self.window*self.delta, self.window*self.delta, \
         len(self.semedian))
        if kwargs['storedata']:
            self.datacube = y_sea_m
            if self.verbose:
                print('sea(): datacube added as new attribute')

        if self.verbose:
            print('Superposed epoch analysis complete')