Python fitの例、scipy.stats.lognorm.fit Pythonの例

コード例 #1

0

ファイルを表示

def get_lognormal_effluent():
    df = pd.read_excel('./model_WWT/SSD_effluent.xlsx',
                       parse_dates=['Date'],
                       index_col='Date')
    df = df.dropna()
    df = df[(df.iloc[:, 0] < 115)]
    mask = (df != 0).any(axis=1)
    df = df.loc[mask]
    df_min, df_max = min(df.iloc[:, 0]), max(df.iloc[:, 0])
    df['month'] = df.index.month
    # df['year'] = df.index.year
    # eff_np = np.zeros((12,2020-1990))
    # for i in range (2020-1990):
    #     for j in range(12):
    #         eff_np[j,i] = df[(df.year==1990+i) & (df.month==1+j)].iloc[:,0].mean()

    sigma = []
    mu = []
    for i in range(12):
        data = df[(df.month == i + 1)].iloc[:, 0]
        parm = lognorm.fit(data, floc=0)
        sigma.append(parm[0])
        mu.append(np.log(parm[2]))
#
#        mean = np.exp(mu + 1/2*(sigma**2))
#        mean_data = data.mean()
#        median = np.exp(mu)
#        cv = np.sqrt(np.exp(sigma**2) - 1)
#        sd = mean*np.sqrt(np.exp(sigma**2) - 1)
    data = df.iloc[:, 0]
    parm = lognorm.fit(data, floc=0)
    sigma.append(parm[0])
    mu.append(np.log(parm[2]))

    return mu, sigma

コード例 #2

0

ファイルを表示

ファイル: test_qdm.py プロジェクト: leochencipher/SWHAQ

class TestQDM(NumpyTestCase.NumpyTestCase):
    badinput = 0.5
    nanarray = np.array([1, 2, 3, 4, np.nan])

    obsdist = lognorm.rvs(0.57, size=100)
    obsp = lognorm.fit(obsdist)
    refdist = lognorm.rvs(0.45, size=100)
    refp = lognorm.fit(refdist)
    futdist = lognorm.rvs(0.55, size=100)
    futp = lognorm.fit(futdist)
    x = np.linspace(0, 1, 101)
    qobs = np.quantile(obsdist, x)
    qref = np.quantile(refdist, x)
    qfut = np.quantile(futdist, x)

    def testQDMInput(self):
        """Test input is array-like"""
        self.assertRaises(TypeError, qdm, 0.5, 0.5, 0.5)

    def testQDMNanInput(self):
        """Test input array has no nan values"""
        self.assertRaises(ValueError, qdm, self.nanarray, self.nanarray,
                          self.nanarray)

    def testRefInput(self):
        """Test using reference data as future returns obs dist params"""
        testqfut = qdm(self.obsdist, self.refdist, self.refdist)
        testp = lognorm.fit(testqfut)
        self.assertAlmostEqual(self.obsp[0], testp[0], places=2)
        self.assertAlmostEqual(self.obsp[1], testp[1], places=2)
        self.assertAlmostEqual(self.obsp[2], testp[2], places=2)

コード例 #3

0

ファイルを表示

ファイル: all_model.py プロジェクト: bhuwanaga/NYC-Yellow-Taxi-Trip-Analysis-

    def dataHistogram(self):
        #Code for plotting the histogram of number of pickups
        plt.hist(self.dftaxi.num_pickups,normed=True, bins=5)
        plt.ylabel('Frequency')
        plt.title("Unscaled - Number of Pickups")
        plt.show()
        # define the figure with 2 subplots
        fig,ax = plt.subplots(1,2,figsize = (15,4))
        print("the unscaled graph is not representative and hence we go for scaling ")
        #if data is skewed negative binomial will perform better than poisson
        # histogram of the number of pickups
        self.dftaxi.num_pickups.hist(bins=30,ax=ax[0])
        ax[0].set_xlabel('Num of Pickups')
        ax[0].set_ylabel('Count')
        ax[0].set_yscale('log')
        ax[0].set_title('Histogram of Pickups - Normal Scale')

        # create a vector to hold num of pickups
        v = self.dftaxi.num_pickups 

        # plot the histogram with 30 bins
        v[~((v-v.median()).abs()>3*v.std())].hist(bins=30,ax=ax[1]) 
        ax[1].set_xlabel('Num of pickups')
        ax[1].set_ylabel('Count')
        ax[1].set_title('Histogram of Num of pickups - Scaled')
        print("A scaled graph is being plotted instead...!")
        print("\n")
        # apply a lognormal fit. Use the mean of trip distance as the scale parameter
        scatter,loc,mean = lognorm.fit(self.dftaxi.num_pickups.values,scale=self.dftaxi.num_pickups.mean(),loc=0)
        pdf_fitted = lognorm.pdf(np.arange(0,12,.1),scatter,loc,mean)
        ax[1].plot(np.arange(0,12,.1),600000*pdf_fitted,'r') 
        ax[1].legend(['data','lognormal fit'])
        plt.show()

コード例 #4

0

ファイルを表示

ファイル: test_qdm.py プロジェクト: leochencipher/SWHAQ

 def testRefInput(self):
     """Test using reference data as future returns obs dist params"""
     testqfut = qdm(self.obsdist, self.refdist, self.refdist)
     testp = lognorm.fit(testqfut)
     self.assertAlmostEqual(self.obsp[0], testp[0], places=2)
     self.assertAlmostEqual(self.obsp[1], testp[1], places=2)
     self.assertAlmostEqual(self.obsp[2], testp[2], places=2)

コード例 #5

0

ファイルを表示

    def test_fa(self):
        T = 10
        q = generic.fa(self.da, T, 'lognorm')

        p0 = lognorm.fit(self.da.values[:, 0, 0])
        q0 = lognorm.ppf(1 - 1. / T, *p0)
        np.testing.assert_array_equal(q[0, 0, 0], q0)

コード例 #6

0

ファイルを表示

ファイル: plots.py プロジェクト: gaurav000144/polylx

def logdistplot(d, **kwargs):
    import seaborn as sns
    from scipy.stats import lognorm
    ax = sns.distplot(d, fit=lognorm, **kwargs)
    shape, loc, scale = lognorm.fit(d)
    ax.set_title('Fit mode: {}'.format(loc + np.exp(np.log(scale) - shape**2)))
    plt.show()

コード例 #7

0

ファイルを表示

    def returnDistData(cls, self):
        gammaParam = gamma.fit(10**(self.data / 10))
        gammaDist = gamma.pdf(self.data, *gammaParam)

        rayleighParam = rayleigh.fit(self.data)
        rayleighDist = rayleigh.pdf(self.data, *rayleighParam)

        normParam = norm.fit(self.data)
        normDist = norm.pdf(self.data, *normParam)

        logNormParam = lognorm.fit(self.data)
        lognormDist = lognorm.pdf(self.data, *logNormParam)

        nakagamiParam = nakagami.fit(self.data)
        nakagamiDist = nakagami.pdf(self.data, *nakagamiParam)

        exponParam = expon.fit(self.data)
        exponDist = expon.pdf(self.data, *exponParam)

        exponweibParam = exponweib.fit(self.data)
        weibDist = exponweib.pdf(self.data, *exponweibParam)

        distDF = pd.DataFrame(np.column_stack([
            gammaDist, rayleighDist, normDist, lognormDist, nakagamiDist,
            exponDist, weibDist
        ]),
                              columns=[
                                  'gammaDist', 'rayleighDist', 'normDist',
                                  'lognormDist', 'nakagamiDist', 'exponDist',
                                  'weibDist'
                              ])
        self.distDF = distDF

コード例 #8

0

ファイルを表示

ファイル: test.py プロジェクト: bjanesh/uchvc-tools

def distfit(n,dists,title,width,height,fwhm,dm,samples=1000):
    from scipy.stats import lognorm

    bins_h = int(height * 60. / 8.)
    bins_w = int(width * 60. / 8.)
    sig = ((bins_w/width)*fwhm)/2.355
    valsLP = []
    for i in range(samples) :
        random_ra = width*np.random.random_sample((n,))
        random_dec = height*np.random.random_sample((n,))
        random_xy = zip(random_ra,random_dec)
        grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins_h,bins_w], range=[[0,height],[0,width]])
        hist_points_r = zip(xedges_r,yedges_r)
        grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0)
        S_r = np.array(grid_gaus_r*0)

        grid_mean_r = np.mean(grid_gaus_r)
        grid_sigma_r = np.std(grid_gaus_r)
        S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r

        x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape)
        valsLP.append(S_r[x_cent_r][y_cent_r])

    x = np.linspace(2, 22, 4000)

    bins, edges = np.histogram(valsLP, bins=400, range=[2,22], normed=True)
    centers = (edges[:-1] + edges[1:])/2.

    al,loc,beta=lognorm.fit(valsLP)
    pct = 100.0*lognorm.cdf(dists, al, loc=loc, scale=beta)
    print 'Significance of detection:','{0:6.3f}%'.format(pct)

コード例 #9

0

ファイルを表示

ファイル: test_generic.py プロジェクト: ellesmith88/xclim

 def test_fa(self):
     T = 10
     q = generic.fa(self.da, T, "lognorm")
     assert "return_period" in q.coords
     p0 = lognorm.fit(self.da.values[:, 0, 0])
     q0 = lognorm.ppf(1 - 1.0 / T, *p0)
     np.testing.assert_array_equal(q[0, 0, 0], q0)

コード例 #10

0

ファイルを表示

ファイル: CO2_PAS_PV - With old unused code, DataFrame adding dates.py プロジェクト: kyle-weng/pas_pv_co2

def logNormal(dset,a,b,c,d,e,f,g):
    global shape,loc,scale,distro,pdf
    try:
        plt.xscale(f)
    except ValueError:
        print "Specify the type of scale for the x axis"
        return
    
    if g == "linspace":
        distro = np.linspace(a,b,c)
    elif g == "logspace":
        distro = np.logspace(a,b,c)
        for x in range (0,len(distro)):
            #distro[x] = distro[x] + 40
            #adding 40 really does not matter for the overall distribution, it literally just shifts it right
            print ""
    else:
        print "That didn't work."
        return

    shape,loc,scale = lognorm.fit(dset)
    
    
    pdf = lognorm.pdf(distro, shape, loc, scale)
    
    plt.plot(distro, pdf,color=e) #formerly ax.plot
    plt.title(d + " PDF with data")

コード例 #11

0

ファイルを表示

ファイル: response_time_dist.py プロジェクト: joshnr13/fire-risk

def response_time_dist(filename, column):
    """
    Returns the lognormal distribution fit of travel times.
    """
    dt = pandas.read_csv(filename)
    response = lognorm.fit(dt[column])
    click.echo(response)
    return response

コード例 #12

0

ファイルを表示

ファイル: response_time_dist.py プロジェクト: FireCARES/fire-risk

def response_time_dist(filename, column):
    """
    Returns the lognormal distribution fit of travel times.
    """
    dt = pandas.read_csv(filename)
    response = lognorm.fit(dt[column])
    click.echo(response)
    return response

コード例 #13

0

ファイルを表示

 def fit(self, data, s=1, loc=1, scale=1):
     (s, loc, scale) = lognorm.fit(data)
     self.s, self.loc, self.scale = (s, loc, scale)
     self.mu = np.log(self.scale)
     self.sigma = self.s
     self.params = {'s': self.s, 'loc': self.loc, 'scale': self.scale}
     self.setParams(self.params)
     return (self.s, self.loc, self.scale)

コード例 #14

0

ファイルを表示

ファイル: single_emitter_photophysics_plotting.py プロジェクト: Lisa-m91/Single-molecule_photophysics

def fn_photonflux_hist(file_name, folder, mean_photons_per_sec):
    """
    Plots histogram of total number of and fits to lognormal distribution
    Inputs: data, filename and foldername which should be defined in the script
    
    """
    import numpy as np
    import matplotlib.pyplot as plt
    from scipy.stats import lognorm
    from pylab import text

    n_molecules = len(mean_photons_per_sec)

    #Plot photon flux
    figure_name = file_name + '_photonsPerSecond'
    ax = plt.subplot(111)
    num_bins = np.linspace(int(min(mean_photons_per_sec)),
                           int(max(mean_photons_per_sec)),
                           int(np.sqrt(len(mean_photons_per_sec)) * 4))
    ax.hist(mean_photons_per_sec,
            bins=num_bins,
            density=True,
            color='darkorange',
            edgecolor='black')

    #Fit curve
    sigma, loc, mean = lognorm.fit(mean_photons_per_sec, floc=0)
    pdf = lognorm.pdf(num_bins, sigma, loc,
                      mean)  #sigma=shape, mu=np.log(scale)
    ax.plot(num_bins, pdf, 'k', linestyle='--')

    #Edit plot
    plt.xlabel('Photon flux ($s^{-1}$)', fontname='Arial', fontsize=12)
    plt.ylabel('Probability density', fontname='Arial', fontsize=12)
    plt.xticks(fontname='Arial', fontsize=12)
    plt.yticks(fontname='Arial', fontsize=12)
    plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
    plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
    text(0.75,
         0.95,
         'μ=' + str(round(mean, 2)) + ' photons $s^{-1}$',
         horizontalalignment='center',
         verticalalignment='center',
         transform=ax.transAxes,
         fontname='Arial',
         fontsize=12)
    text(0.40,
         0.95,
         'N=' + str(n_molecules),
         horizontalalignment='center',
         verticalalignment='center',
         transform=ax.transAxes,
         fontname='Arial',
         fontsize=12)
    plt.savefig(folder + '/Figures/PDFs' + '/' + figure_name + '.pdf', dpi=500)
    plt.savefig(folder + '/Figures/PNGs' + '/' + figure_name + '.png', dpi=500)

    return (plt.show())

コード例 #15

0

ファイルを表示

def createHisto(histoData1, histoData2, imageName1, imageName2):
    """
    Creates a diagram showing two histograms.
    """
    
    fig = plt.figure()
    plt.subplot(111)
    data1 = histoData1['data']
    data2 = histoData2['data']
    n, bins, patches = plt.hist(data1, _NUMBER_OF_HISTO_BARS, range=(0, data1.max()), normed=0, \
            weights=np.zeros_like(data1)+1./data1.size, facecolor=_COLOR_FIRST_DATA[0], alpha=0.4, label=imageName1)
    n2, bins2, patches = plt.hist(data2, _NUMBER_OF_HISTO_BARS, range=(0, data2.max()), normed=0, \
            weights=np.zeros_like(data2)+1./data2.size, facecolor=_COLOR_SECOND_DATA[0], alpha=0.4, label=imageName2)
    
    # 'best fit' line
    shape, loc, scale = lognorm.fit(data1, floc=0) # Fit a curve to the variates
    maximum = data1.max() if data1.max()>data2.max() else data2.max()
    x = np.linspace(0, 1.2 * maximum, num=500)
    # scaling
    binlength = bins[1] - bins[0]
    alpha = factorize(n, binlength)
    
    shape2, loc2, scale2 = lognorm.fit(data2, floc=0) # Fit a curve to the variates
    # scaling
    binlength2 = bins2[1] - bins2[0]
    alpha2 = factorize(n2, binlength2)
    
    # plot functions
    simplefilter("ignore", RuntimeWarning)  # avoid warning in this method
#    plt.plot(bins[1:], n, 'b^', alpha=0.5)
    plt.plot(x, alpha * (lognorm.pdf(x, shape, loc=0, scale=scale)), _COLOR_FIRST_DATA[1]+'--')
#    plt.plot(bins2[1:], n2, 'g^', alpha=0.5)
    plt.plot(x, alpha2 * (lognorm.pdf(x, shape2, loc=0, scale=scale2)), _COLOR_SECOND_DATA[1]+'--')
    axe = plt.axis()
    newaxe =(axe[0], 1.2 * maximum, axe[2], axe[3])
    plt.axis(newaxe)
    plt.title(histoData1['title'])
    plt.ylabel(u'Relative frequency ' + r'$\left[\mathrm{\mathsf{ \frac{N}{\Sigma N} }}\right]$')
    plt.xlabel(histoData1['xlabel'])
    simplefilter("default", RuntimeWarning)
    
    # position the legend
    plt.legend(loc=0, frameon=0)
    plt.minorticks_on()
    return fig

コード例 #16

0

ファイルを表示

 def __init__(self, mode=0, elem=None, sample=None):
     if mode == 0:
         self.s = elem[0]
         self.mu = elem[1]
         self.sigma = elem[2]
     else:
         self.s, self.mu, self.sigma = lognorm.fit(sample)
     self.math_average = lognorm.mean(self.s, loc=self.mu, scale=self.sigma)
     self.dispersion = lognorm.var(self.s, loc=self.mu, scale=self.sigma)

コード例 #17

0

ファイルを表示

ファイル: EmailNetworkFullAnalysis.py プロジェクト: AbrahamZak/Network-Science

def lognormal(x, y):
    s, loc, scale = lognorm.fit(x)
    xmin = x.min()
    xmax = x.max()
    x = np.linspace(xmin, xmax, len(x))
    pdf = lognorm.pdf(x, s, scale=scale)
    yres = pdf
    print("Sum of squared difference (log-normal): ", np.sum((y-yres)**2))
    return yres

コード例 #18

0

ファイルを表示

ファイル: ssd.py プロジェクト: ShaoyiZhang/QSAR_SSD_Toolbox

    def _boots(self, df, newx, shape, scale, dist=lognorm):
        xr = lognorm.rvs(size=len(df['Prediction']),
                         s=shape,
                         loc=0,
                         scale=scale)
        this_shape, this_loc, this_scale = lognorm.fit(xr, floc=0)
        this_fit = dist.cdf(newx, s=this_shape, loc=0, scale=this_scale)

        return list(this_fit)

コード例 #19

0

ファイルを表示

def plot_gh_distribution(N=100, dp=2.0, sds=(1, 2, 3)):
    fig, ax = plt.subplots(
        figsize=[2 * plotdl.latex_width_inch, plotdl.latex_height_inch])
    cbandwidth = 20  #N #20
    tga = np.zeros([len(sds), cbandwidth])
    gg = np.zeros([len(sds)])
    for n, (sd, tg) in enumerate(zip(sds, tga)):
        m = models.Model_Anderson_DD_1d(number_of_points=N,
                                        dis_param=dp,
                                        periodic=False,
                                        bandwidth=1,
                                        prng=np.random.RandomState(sd))
        ## TAKE ONLY CENTER OF BAND:
        crange = np.arange(N // 2 - cbandwidth // 2, N // 2 + cbandwidth // 2)
        #crange = (abs(m.eig_vals).copy().argsort())[:4]
        #debug((crange, np.arange(N//2 -cbandwidth//2, N//2 + cbandwidth//2)))
        ga = N * phys_functions.ga(m.eig_matrix[:, [crange]])
        tg[:] = ga
        gg[n] = abs(phys_functions.A_matrix_inv(m.rate_matrix, 1, 1.57))**2
        #p = s_cummulative_plot(ax, ga)

    mi, ma, theo = phys_functions.ga_cdf(lyap_gamma(1, dp), N)
    ghs = np.logspace(np.log10(mi), np.log10(ma) + 1, 1000)
    logg = logavg(tga, axis=0)
    s_cummulative_plot(ax, logg)
    debug((logg.shape, tga.shape))
    p = s_cummulative_plot(ax, tga)
    sh, lo, sc = lognorm.fit(tga.flat)
    mu = np.log(sc)
    stdv = sh
    debug((sh, lo, sc))
    #lg = lognorm()
    xspace = np.logspace(np.log10(tga.min()), np.log10(tga.max()))
    ax.plot(xspace,
            norm.cdf(np.log(xspace), [stdv], loc=mu, scale=stdv),
            '-.',
            color='cyan')
    ax.plot(ghs, theo(ghs), color='black')
    #return ghs, theo
    #tga_sum = np.nansum(tga,axis=1)
    tga_avg = np.average(tga, axis=1)
    ax.axvline(logavg(gg))
    ax.axvline(np.average(gg), ls='--')
    ax.axvline(logavg(tga_avg), ls='-', color='red')
    ax.axvline(np.average(tga), ls='-', color='magenta')
    ax.axvline(logavg(tga[tga > 1e-100]), ls=':', color='red')
    ax.axvline(np.exp(-2 * N * lyap_gamma(1, dp)), ls='--', color='green')
    #ax.axvline(4*(lyap_gamma(1,dp)**2)*N*np.exp(-2*N*lyap_gamma(1,dp)), ls='--', color='green')
    print(2 * N * lyap_gamma(1, dp))
    ax.set_xscale('log')
    #ax.set_xlim(1e-10, 10)
    ax.set_ylim(1e-20, 1)
    ax.set_title("W = {}, $\gamma$ = {:.2}, N = {}".format(
        dp, lyap_gamma(1, dp), N))
    ax.xaxis.set_major_locator(LogLocator(numdecs=10))
    mkdir_and_savefig(fig, 'plots/pta_gh_dist.png')

コード例 #20

0

ファイルを表示

ファイル: fits.py プロジェクト: mccbc/lya_analytic

def lognorm_fit(x, xdata=None):
    '''
    Fit a log normal distribution to the data. Uses actual input data and is 
    slower as a result.
    '''
    if xdata is None:
        xdata = x
    shape, loc, scale = lognorm.fit(xdata, loc=1)
    pdf = lognorm.pdf(x, shape, loc, scale)
    return pdf, np.log(scale), shape

コード例 #21

0

ファイルを表示

def plot_hist(indiv, datas):
    x = np.array(datas)
    unique, counts = np.unique(x, return_counts=True)
    shape, loc, scale = lognorm.fit(x, floc=0)
    x2 = np.linspace(min(unique), max(unique))
    p = lognorm.pdf(x2, shape, loc=loc, scale=scale)
    plt.clf()
    plt.hist(x, normed=True, bins=50)
    plt.plot(x2, p, 'k')
    plt.savefig("hist_{}.pdf".format(indiv))

コード例 #22

0

ファイルを表示

ファイル: analysis_tools_update2.py プロジェクト: TechAM/THOR

def plotIntensityHistogram(ax,
                           sources,
                           freq_mhz,
                           num_bins,
                           label,
                           color="#338768",
                           fit=False):
    intensity_attrib = intensityAttrib(freq_mhz)

    #if the frequency has not been recorded, then no such column will exist so return
    try:
        peak_intensity = sources[intensity_attrib]
    except:
        print("Intensities for frequency " + str(freq_mhz) +
              "MHz have not been recorded")
        return
    #filtering out negative intensities
    peak_intensity = peak_intensity[peak_intensity > 0]

    #creating the log-scaled bins
    bin_max = np.log10(peak_intensity.max())
    bin_min = np.log10(peak_intensity.min())
    bins = 10**np.linspace(bin_min, bin_max, num_bins)

    #set axis label and x scale
    ax.set_xlabel(createLabel(intensity_attrib))
    ax.set_ylabel("Frequency")
    ax.set_xscale("log")

    counts, bin_edges, ignored = ax.hist(peak_intensity,
                                         bins=bins,
                                         rwidth=1.0,
                                         color=color,
                                         label=str(freq_mhz) + "MHz " + label)
    #draw best fit logarithmic Gaussian curve
    if fit:
        shape, loc, scale = lognorm.fit(peak_intensity, floc=0)
        bins_log_len = np.r_[bin_edges[1:] - bin_edges[:-1], 0]
        # get pdf-values for same intervals as histogram
        samples_fit_log = lognorm.pdf(bins, shape, loc=loc, scale=scale)
        # plot the fit line
        ax.plot(bins,
                samples_fit_log * bins_log_len * counts.sum(),
                'k-',
                label="Fit line " + label,
                linewidth=2)

        #display mean and std dev in a textbox
        mean = round(scale, 2)
        std = round(np.log10(shape), 2)
        ax.legend((dummyObj(), dummyObj()),
                  ("Mean = " + str(mean), "SD = " + str(std)))

    ax.legend(loc="lower left", bbox_to_anchor=(0.1, 1.01))
    print("Plotted the histogram")

コード例 #23

0

ファイルを表示

ファイル: app.py プロジェクト: anishkasi/Taskify

def index():
    req = request.json
    #print(req)
    data = req['times']
    shape, loc, scale = lognorm.fit(data, floc=0)
    fitted = lognorm(shape, loc, scale)

    res = fitted.ppf(0.05)

    #return req
    return str(res)

コード例 #24

0

ファイルを表示

    def test_fit(self):
        p = generic.fit(self.da, 'lognorm')

        assert p.dims[0] == 'dparams'
        assert p.get_axis_num('dparams') == 0
        p0 = lognorm.fit(self.da.values[:, 0, 0])
        np.testing.assert_array_equal(p[:, 0, 0], p0)

        # Check that we can reuse the parameters with scipy distributions
        cdf = lognorm.cdf(.99, *p.values)
        assert cdf.shape == (self.nx, self.ny)

コード例 #25

0

ファイルを表示

def continuous():
    """Fit distributions to symptoms' duration data."""
    # fetch data
    x = _symptoms_data()
    # fit distributions
    return {
        'x': x,
        'norm': norm.fit(x),
        'lognorm': lognorm.fit(x, floc=0),
        'gamma': gamma.fit(x, floc=0)
    }

コード例 #26

0

ファイルを表示

ファイル: test_generic.py プロジェクト: ellesmith88/xclim

    def test_fit(self):
        p = generic.fit(self.da, "lognorm")

        assert p.dims[0] == "dparams"
        assert p.get_axis_num("dparams") == 0
        p0 = lognorm.fit(self.da.values[:, 0, 0])
        np.testing.assert_array_equal(p[:, 0, 0], p0)

        # Check that we can reuse the parameters with scipy distributions
        cdf = lognorm.cdf(0.99, *p.values)
        assert cdf.shape == (self.nx, self.ny)
        assert p.attrs["estimator"] == "Maximum likelihood"

コード例 #27

0

ファイルを表示

ファイル: plot_escape.py プロジェクト: mccbc/spherical_escape

def plot_fit(tau):
    norm = 1. / tau * (tau + 2. / 3.)**2. * 3. / np.pi**2.
    print(tau)
    print(norm)

    # Load photon data, take care of float errors in r
    data = np.loadtxt(
        '../outputs/escape/escape_photons_nphot1e6/exit_photons_tau{}.dat'.
        format(int(tau)),
        skiprows=1)
    data[:, 0] = np.round(data[:, 0], 5)

    # Set up figure, make initial histogram, normalize x and y
    fig, ax = plt.subplots(1, 1, dpi=180)
    n, bins, patches = ax.hist(data[:, 6],
                               bins=50,
                               color='k',
                               histtype='step',
                               density=True,
                               range=(0, 15))
    bins = bins / norm
    n = n * norm

    # Calculate new bin positions, check normalization sum, clear old histogram
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    print(np.sum(n * np.diff(bins)))
    plt.cla()

    # Scatter plot of the new bin positions and normalized counts
    ax.scatter(bincenters, n, color='k', s=3)
    ax.set_xlabel('Distance')
    ax.set_ylabel('n (normalized)')
    ax.set_yscale('log')
    ax.set_title(r'Total Distance Traveled, $\tau = {}$, $n = 10^6$'.format(
        int(tau)))

    # Calculate probability density from Shane's series solution code
    prob = np.zeros(np.shape(bincenters))
    for i in range(len(bincenters)):
        prob[i] = prob_ct_tau(bincenters[i], tau)
    ax.plot(bincenters, prob, 'b--', label='Series Solution', alpha=0.5)

    # Fit a log normal distribution to the normalized data
    shape, loc, scale = lognorm.fit(data[:, 6] / norm, loc=1)
    pdf = lognorm.pdf(bincenters, shape, loc, scale)
    ax.plot(bincenters, pdf, 'r--', label='Log Normal', alpha=0.5)

    # Save or show the plot
    plt.legend()
    plt.savefig('../outputs/escape/fit_plots_nphot1e6/fit_tau{}.pdf'.format(
        int(tau)))
    plt.close()
    return np.log(scale), shape

コード例 #28

0

ファイルを表示

def lognormFit(series):
    '''对series（pd.Series或np.array）进行对数正态分布参数估计'''
    # stats中lognorm分布参数估计和lognormPdf中参数关系：
    # 若设置floc=0（即始终loc=0），则有s = sigma，scale = e ^ mu

    # mu = np.log(series).mean()
    # sigma = np.log(series).std(ddof=0)

    s, loc, scale = lognorm.fit(series, floc=0)
    sigma, mu = s, np.log(scale)

    return mu, sigma

コード例 #29

0

ファイルを表示

def get_lognormal_para(name):
    name = 'Inflow'
    df = pd.read_excel('./model_WWT/SDD_N_P_2012-2019.xlsx',
                       parse_dates=['Date'],
                       index_col='Date',
                       sheet_name=1)
    start_date = '2012-08-01'
    end_date = '2019-8-15'
    mask = (df.index > start_date) & (df.index <= end_date)
    df = df.loc[mask]

    if name == 'NH3':
        data = df.iloc[:, 0]
        data = data.replace(0, np.nan)
        data = data.dropna()
    elif name == 'TP':
        data = df.iloc[:, 3]
        data = data.replace(0, np.nan)
        data = data.dropna()
    elif name == 'Inflow':
        data = df.iloc[:, 2]
        data = data.replace(0, np.nan)
        data = data.dropna()
        data = pd.DataFrame(data)
        data.iloc[:, 0].value_counts()
        data = data[
            data.iloc[:, 0] !=
            30]  # inflow data used is only sewage flow, 30 MGD needs to be removed.
        data = np.array(data)
    else:
        print('wrong inputs')

    parm = lognorm.fit(
        data, floc=0)  #parm[0] = sigma; parm[1]=location, 0; parm[2]=median, m
    sigma = parm[0]
    mu = np.log(parm[2])  # mu is not equal to arithmetic mean
    mean = np.exp(mu + 1 / 2 * (sigma**2))
    mean_data = data.mean()
    median = np.exp(mu)
    cv = np.sqrt(np.exp(sigma**2) - 1)
    sd = mean * np.sqrt(np.exp(sigma**2) - 1)

    return {
        'mu': mu,
        'sigma': sigma,
        'cv': cv,
        'median (scale, m)': parm[2],
        'mean (E[X])': mean,
        'mean_realdata': mean_data,
        'SD[X]': sd,
        'location': parm[1]
    }

コード例 #30

0

ファイルを表示

ファイル: update.py プロジェクト: tbuffington7/firecares

def calculate_story_distribution(fd_id):
    """
    Using the department in combination with similar departments, calculate the story distribution of structures in
    owned census tracts.  Only medium and high risk structures are included in the calculations.
    """

    MAX_STORIES = 108

    try:
        fd = FireDepartment.objects.get(id=fd_id)
        cursor = connections['nfirs'].cursor()
    except (FireDepartment.DoesNotExist, ConnectionDoesNotExist):
        return

    geoms = list(fd.similar_departments.filter(owned_tracts_geom__isnull=False).values_list('owned_tracts_geom', flat=True))
    geoms.append(fd.owned_tracts_geom)

    FIND_STORY_COUNTS = """SELECT count(1), p.story_nbr
    FROM parcel_stories p
    JOIN "LUSE_swg" lu ON lu."Code" = p.land_use,
    (SELECT g.owned_tracts_geom FROM (VALUES {values}) AS g (owned_tracts_geom)) owned_tracts
    WHERE lu.include_in_floor_dist AND lu.risk_category = %(level)s
    AND ST_Intersects(owned_tracts.owned_tracts_geom, p.wkb_geometry)
    GROUP BY p.story_nbr
    ORDER BY count DESC, p.story_nbr;"""

    values = ','.join(['(ST_SetSRID(\'{}\'::geometry, 4326))'.format(geom.hex) for geom in geoms])
    mapping = {2: 'Medium', 4: 'High'}

    def expand(values, weights):
        ret = []
        for v in zip(values, weights):
            ret = ret + [v[0]] * v[1]
        return ret

    for nlevel, level in mapping.items():
        cursor.execute(FIND_STORY_COUNTS.format(values=values), {'level': level})
        res = cursor.fetchall()

        # Filter out `None` story counts and obnoxious values
        a = filter(lambda x: x[1] is not None and x[1] <= MAX_STORIES, res)
        weights = map(lambda x: x[0], a)
        vals = map(lambda x: x[1], a)

        expanded = expand(vals, weights)
        samples = np.random.choice(expanded, size=1000)
        samp = lognorm.fit(samples)

        # Fit curve to story counts
        rm = fd.firedepartmentriskmodels_set.get(level=nlevel)
        rm.floor_count_coefficients = {'shape': samp[0], 'loc': samp[1], 'scale': samp[2]}
        rm.save()

コード例 #31

0

ファイルを表示

ファイル: imageFilterer.py プロジェクト: Jeet1994/IISc_Neuroscience

def createHisto(A, title='', xlabel='', unit=''):
    """
    Generates one histogram of the given data.
    """
    
    fig = plt.figure()
    ax = plt.subplot(111)
    n, bins, patches = plt.hist(A, _NUMBER_OF_HISTO_BARS, range=(0, A.max()), normed=0, \
            weights=np.zeros_like(A)+1./A.size, facecolor='cyan', alpha=0.4, label=' ')
    
    # set min and max values to return
    values = {}
    values['min'] = A.min()
    values['minrf'] = n[np.nonzero(n)][0]
    values['max'] = A.max()
    values['maxrf'] = n[-1]
    numbers = title+"\nx: "+str(bins[1:])+"\ny: "+str(n)+"\n\n"
    # 'best fit' line
    shape, loc, scale = lognorm.fit(A, floc=0) # Fit a curve to the variates
    x = np.linspace(0, 1.2 * A.max(), num=500)
    # scaling
    binlength = bins[1] - bins[0]
    alpha = factorize(n, binlength)
    
    # plot functions
    simplefilter("ignore", RuntimeWarning)  # avoid warning in this method
    plt.plot(bins[1:], n, 'c^', alpha=0.5, label='Distribution')
    plt.plot(x, alpha * (lognorm.pdf(x, shape, loc=0, scale=scale)), 'c--', label='Fit')
    axe = plt.axis()
    newaxe =(axe[0], 1.2 * A.max(), axe[2], axe[3])
    plt.axis(newaxe)
    plt.title(title)
    plt.ylabel(u'Relative frequency ' + r'$\left[\mathrm{\mathsf{ \frac{N}{\Sigma N} }}\right]$')
    plt.xlabel(xlabel)
    simplefilter("default", RuntimeWarning)
    
    # position the legend
    handles, labels = ax.get_legend_handles_labels()
    indexL3 = labels.index(' ')
    labelsL3 = [labels[indexL3]]
    handlesL3 = [handles[indexL3]]
    del labels[indexL3]
    del handles[indexL3]
    l1 = plt.legend(handlesL3, labelsL3, prop={'size':12}, bbox_to_anchor=(0.72, 0.99), loc=2, frameon=0)
    plt.legend(handles, labels, prop={'size':12}, bbox_to_anchor=(0.72, 0.99), loc=2, frameon=0)
    plt.gca().add_artist(l1)
    currentaxis = fig.gca()
    legendText = '$\mathrm{\mathsf{\mu =}}$ %4.2f '+unit+'\n$\mathrm{\mathsf{\sigma =}}$ %4.2f '+unit
    plt.text(0.96, 0.86, legendText % (scale, (shape * scale)), horizontalalignment='right', \
            verticalalignment='top', transform=currentaxis.transAxes)
    plt.minorticks_on()
    return fig, values, numbers

コード例 #32

0

ファイルを表示

ファイル: axplot.py プロジェクト: JunchenJin/traffic_visulization

def axplot(i_data,linkid,intervalindex,ax):
    ax.cla()
    ax.hist(i_data, numberofbin,inputrange,normed=1,label='original distribution')
    # start to fit
    shape,location,scale=lognorm.fit(i_data,floc=0)
    rv=lognorm(shape,location,scale)
    x=np.linspace(0,140,100)
    ax.plot(x, rv.pdf(x),             
             'r-', lw=5, alpha=0.6, label='lognorm fit')
    ax.set_title("Link id "+ str(linkid)+" and time Interval:  " + formatinterval(intervalindex))
    ax.set_xlabel('Travel Time (s)')
    ax.set_ylabel('Probability Density') 
    ax.legend()

コード例 #33

0

ファイルを表示

ファイル: StockViewer.py プロジェクト: StupidCodeGenerator/PythonScripts

def FitPrice(data):
	priceData = data[:,6]
	priceData = priceData[~sp.isnan(priceData)]
	shape, loc, scale = lognorm.fit(priceData,loc = 0)
	x = np.linspace(0, 100, 100)
	p = lognorm.pdf(x, shape, loc, scale)
	maxIndex = 0
	for i in range(0, len(p)):
		if p[i] >= p[maxIndex]:
			maxIndex = i
		else:
			break; # if the plot goes down, stop searching.
	return x[maxIndex]

コード例 #34

0

ファイルを表示

ファイル: RelativePrices.py プロジェクト: StupidCodeGenerator/PythonScripts

def FitPrice(data):
	priceData = data[:,6]
	priceData = priceData[~sp.isnan(priceData)]
	shape, loc, scale = lognorm.fit(priceData,loc = 0)
	x = np.linspace(0, 100, 100)
	p = lognorm.pdf(x, shape, loc, scale)
	maxIndex = 0
	for i in range(0, len(p)):
		if p[i] >= p[maxIndex]:
			maxIndex = i
		else:
			break; # if the plot goes down, stop searching.
	return x[maxIndex]

コード例 #35

0

ファイルを表示

ファイル: traffic.py プロジェクト: michaelsouza/network

def hist(x, weights=None, bins=10, distname='normal', color='b', label='pdf', filename=None):
	# create full data using weights
	z = x
	if weights is not None:
		z = np.zeros(sum(weights))
		j = 0
		for i in range(weights.size):
			for k in range(j, j + weights[i]):
				z[j] = x[i]
				j += 1

	# histogram
	hist, bins = np.histogram(x, bins=bins, density=True, weights=weights)

	# fit distribution
	if distname is 'normal':
		(mu, sigma) = norm.fit(z)
		pdf = lambda x: norm.pdf(x, mu, sigma)
	elif distname is 'lognormal':
		sigma, loc, scale = lognorm.fit(z, floc=0)
		mu = np.log(scale)
		pdf = lambda x: lognorm.pdf(x, sigma, loc, scale=scale)
	elif distname is not None:
		raise Exception('Unsupported distribution name ' + distname)

	# plot distribution
	if (distname is not None):
		x = np.linspace(bins[0], bins[-1], 100)
		y = pdf(x)
		label = 'm=%2.1f, s=%2.1f [%s]' % (mu, sigma, label)
		plt.plot(x, y, linewidth=3, label=label, alpha=0.7, color=color)

	# plot histogram
	c = (bins[:-1] + bins[1:]) / 2;  # bins centers
	plt.plot(c, hist, marker='s', alpha=0.7, markersize=8, linestyle='None', color=color)

	# format plot	
	plt.xticks(fontsize=14)
	plt.yticks(fontsize=14)
	plt.ylabel('PDF', fontsize=16)

	if (filename is not None):
		print('Saving figure ' + filename)
		plt.savefig(filename, bbos_inches='tight')

コード例 #36

0

ファイルを表示

ファイル: stats_play.py プロジェクト: tspr/pycgats

rnd_a += np.random.normal(0,noise_size,1000)
rnd_b += np.random.normal(0,noise_size,1000)
rnd_L += np.random.normal(0,noise_size,1000)

cntr_a = a_pchip(cmpos)
cntr_b = b_pchip(cmpos)
cntr_L = L_pchip(cmpos)

delta_a = cntr_a - rnd_a
delta_b = cntr_b - rnd_b
delta_L = cntr_L - rnd_L

delta_E = sqrt(square(delta_a) +square(delta_b) + square(delta_L))


# plot them green
rndplt=ax.scatter3D(rnd_a,rnd_b,rnd_L,marker='*',c='green',s=50,linewidth=1)

# histogramm delta E

plt.figure()

n, bins, patches = plt.hist(delta_E,bins=50,color='blue',normed=True,histtype='bar')
lnrm_shape, lnrm_loc, lnrm_scale = lognorm.fit(delta_E)

x= np.linspace(0, delta_E.max(), num=400)
y = lognorm.pdf(x,lnrm_shape,loc=lnrm_loc,scale=lnrm_scale)

pdflne=plt.plot(x,y,'r--',linewidth=2)

コード例 #37

0

ファイルを表示

ファイル: lognormalfitexample.py プロジェクト: xuzhikethinker/PRG

from scipy import stats
from scipy.stats import lognorm
rrr=lognorm.rvs(10,loc=0,scale=2,size=1000)
print rrr[1:10]
print "log normal fit", lognorm.fit(rrr,5,loc=0,scale=3)
rrr[1:10]
from numpy import rint
from numpy import around
ppp = around(rrr)
print ppp[1:10]
print lognorm.fit(ppp,5,loc=0,scale=3)

コード例 #38

0

ファイルを表示

ファイル: PriceLogNormDis.py プロジェクト: StupidCodeGenerator/PythonScripts

import matplotlib.pyplot as plt
from scipy.stats import norm
from scipy.stats import lognorm
import math
from scipy.interpolate import UnivariateSpline
import sys

data = sp.genfromtxt(sys.argv[1], delimiter=",")

freq = {}

priceData = data[:, 4]

priceData = priceData[~sp.isnan(priceData)]

shape, loc, scale = lognorm.fit(priceData,loc = 0)

plt.hist(priceData, bins=100, normed=True, alpha=0.6, color='g')
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = lognorm.pdf(x, shape, loc, scale)
print(p)
print(x)
maxIndex = 0
for i in range(0, len(p)):
	if p[i] >= p[maxIndex]:
		maxIndex = i
	else:
		break; # if the plot goes down, stop searching.
maxX = x[maxIndex]
plt.plot(x, p, 'k', linewidth=2)

コード例 #39

0

ファイルを表示

ファイル: distfit.py プロジェクト: bjanesh/uchvc-tools

def distfit(n,dists,title,ra,dec,fwhm, dm):
	import numpy as np
	import matplotlib.pyplot as plt
	# from scipy.optimize import curve_fit
	from scipy.stats import lognorm
	from scipy import ndimage
	
	# n = 279
	bins = 165
	width = 22 
	# fwhm = 2.0
	sig = ((bins/width)*fwhm)/2.355
	valsLP = []
	for i in range(25000) :
		random_ra = ra*np.random.random_sample((n,))
		random_dec = dec*np.random.random_sample((n,))
		random_xy = zip(random_ra,random_dec)
		grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins,bins], range=[[0,width],[0,width]])
		hist_points_r = zip(xedges_r,yedges_r)
		grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0)
		S_r = np.array(grid_gaus_r*0)
		
		grid_mean_r = np.mean(grid_gaus_r)
		grid_sigma_r = np.std(grid_gaus_r)
		S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r
		
		x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape)
		valsLP.append(S_r[x_cent_r][y_cent_r])
	# valsLP = np.loadtxt('valuesLeoP.txt', usecols=(0,), unpack=True)
	# vals = np.loadtxt('values.txt', usecols=(0,), unpack=True)
	
	# bins, edges = np.histogram(vals, bins=400, range=[2,22], normed=True)
	# centers = (edges[:-1] + edges[1:])/2.
	# plt.scatter(centers, bins, edgecolors='none')
	
	x = np.linspace(2, 22, 4000)
	
	# al,loc,beta=lognorm.fit(vals)
	# print al, loc, beta
	# # plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal AGC198606')
	# print lognorm.cdf(dists, al, loc=loc, scale=beta)
	
	bins, edges = np.histogram(valsLP, bins=400, range=[2,22], normed=True)
	centers = (edges[:-1] + edges[1:])/2.
	
	
	# x = np.linspace(2, 22, 4000)
	# dists = np.array([3.958,3.685,3.897,3.317])
	al,loc,beta=lognorm.fit(valsLP)
	# print al, loc, beta
	plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=2, alpha=0.6, label='lognormal distribution')
	print 'Significance of detection:','{0:6.3f}%'.format(100.0*lognorm.cdf(dists, al, loc=loc, scale=beta))
	
	plt.scatter(centers, bins, edgecolors='none', label='histogram of $\sigma$ from 25000 \nuniform random samples')
	# print chisqg(bins, lognorm.pdf(centers, al, loc=loc, scale=beta))
	
	
	ax = plt.subplot(111)
	plt.plot([dists,dists],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='best '+title+' detection') 
	# plt.plot([4.115,4.115],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='Leo P detection at 1.74 Mpc')
	# plt.plot([3.897,3.897],[-1.0,2.0],'k-', lw=5, alpha=0.6, label='d=417 kpc')
	# plt.plot([3.317,3.317],[-1.0,2.0],'k-', lw=5, alpha=0.4, label='d=427 kpc')
	plt.ylim(0,1.1)
	plt.xlim(2,12)
	plt.xlabel('$\sigma$ above local mean')
	plt.ylabel('$P(\sigma = X)$')
	plt.legend(loc='best', frameon=True)
	ax.set_aspect(3)
	# plt.show()
	plt.savefig(title+'_'+repr(dm)+'_'+repr(fwhm)+'_dist.pdf')

コード例 #40

0

ファイルを表示

ファイル: test.py プロジェクト: fransua/ecolopy

log_c = np.array ([np.log (i) for i in c])

log_a = (log_a-np.mean (log_a))/np.std (log_a)
log_b = (log_b-np.mean (log_b))/np.std (log_b)
log_c = (log_c-np.mean (log_c))/np.std (log_c)

print kstest (log_a, 'norm')
print kstest (log_b, 'norm')
print kstest (log_c, 'norm')

plb.hist (b)
plb.hist (log_b, bins=20)
plb.hist (a, bins=100)
plb.hist (log_a, bins=10)

shape, loc, scale = lognorm.fit(a)
rnd_a = lognorm.rvs(shape, scale=scale, loc=loc, size=len(a))
plb.hist(rnd_a, bins=20, alpha=0.5)
plb.hist(a, bins=20, color='r', alpha=0.5)

shape, loc, scale = lognorm.fit(c)
rnd_c = lognorm.rvs(shape, scale=scale, loc=loc, size=len(c))
plb.hist(rnd_c, bins=30, alpha=0.5)
plb.hist(c, bins=30, color='r', alpha=0.5)

shape, loc, scale = lognorm.fit(b)
rnd_b = lognorm.rvs(shape, scale=scale, loc=loc, size=len(b))
plb.hist(rnd_b, bins=20, alpha=0.5)
plb.hist(b, bins=20, color='r', alpha=0.5)

np.mean (b)

コード例 #41

0

ファイルを表示

ファイル: sca_test_script.py プロジェクト: famagusta/sca_python

def test_z(filename, uncorr_algo, distbn_to_fit):
    '''test case for pdz domain proteins'''
    algn = read_free(filename)

    # truncate alignments to sequence positions with
    # gap frequency no greater than 20% - to avoid over-representation of gaps
    alignments = truncate(algn, FRAC_ALPHA_CUTOFF)
    print alignments.shape
    pdb_res_list = read_pdb(PDZ_PDB_FILE, 'A')
    msa_algn = msa_search(pdb_res_list, alignments)
    print msa_algn
    sca_algn = sca(alignments)
    algn_shape = get_algn_shape(algn)
    no_pos = alignments.shape[1]
    no_seq = algn_shape.no_seq
    no_aa = algn_shape.no_aa
    print 'Testing SCA module :'
    print 'algn_3d_bin hash :' + str(np.sum(np.square(sca_algn.algn_3d_bin)))
    print 'weighted_3d_algn hash :' +\
        str(np.sum(np.square(sca_algn.weighted_3d_algn)))
    print 'weight hash : ' + str(np.sum(np.square(sca_algn.weight)))
    print 'pwX hash : ' + str(np.sum(np.square(sca_algn.pwX)))
    print 'pm hash : ' + str(np.sum(np.square(sca_algn.pm)))
    print 'Cp has : ' + str(np.sum(np.square(sca_algn.Cp)))
    print 'Cs hash : ' + str(np.sum(np.square(sca_algn.Cs)))
    spect = spectral_decomp(sca_algn, 100)
    print 'spect lb hash : ' + str(np.sum(np.square(spect.pos_lbd)))
    print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev)))
    print 'spect ldb_rnd hash : ' + str(np.sum(np.square(spect.pos_lbd_rnd)))
    print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev_rnd)))

    svd_output = LA.svd(sca_algn.pwX)
    U = svd_output[0]
    sv = svd_output[1]
    V = svd_output[2]

    # calculate the matrix Pi = U*V'
    # this provides a mathematical mapping between
    # positional and sequence correlation

    n_min = min(no_seq, no_pos)
    print U.shape
    print V.shape
    print n_min
    Pi = dot(U[:, 0:n_min], transpose(V[:, 0:n_min]))
    U_p = dot(Pi, spect.pos_ev)

    distbn = get_distbn(distbn_to_fit)
    pd = lognorm.fit(spect.pos_ev[:, 0], floc=0)
    # floc = 0 holds location to 0 for fitting
    print pd

    p_cutoff = 0.8  # cutoff for the cdf
    xhist = arange(0, 0.4, 0.01)
    x_dist = arange(min(xhist), max(xhist), (max(xhist) - min(xhist))/100)
    cdf = lognorm.cdf(x_dist, pd[0], pd[1], pd[2])
    # Use case : lognorm.cdf(x, shape, loc, scale)

    jnk = min(abs(cdf - p_cutoff))
    x_dist_pos_right = np.argmin(abs(cdf-p_cutoff))
    cutoff_ev = x_dist[x_dist_pos_right]
    sector_def = np.array(np.where(spect.pos_ev[:, 0] > cutoff_ev)[0])[0]
    print 'sector definition :'
    print sector_def

コード例 #42

0

ファイルを表示

ファイル: test_pct.py プロジェクト: bjanesh/uchvc-tools

		random_ra = 20.0*np.random.random_sample((n,))
		random_dec = 20.0*np.random.random_sample((n,))
		random_xy = zip(random_ra,random_dec)
		grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins,bins], range=[[0,width],[0,width]])
		hist_points_r = zip(xedges_r,yedges_r)
		grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0)
		S_r = np.array(grid_gaus_r*0)
		
		grid_mean_r = np.mean(grid_gaus_r)
		grid_sigma_r = np.std(grid_gaus_r)
		S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r
		
		x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape)
		sig_values_r.append(S_r[x_cent_r][y_cent_r])
		# print >> f1, S_r[x_cent_r][y_cent_r]
	al,loc,beta=lognorm.fit(sig_values_r)
	alphas.append(al)
	betas.append(beta)
	locs.append(loc)
	# pct_calc = [sig_values_r[i] for i in range(len(sig_values_r)) if (sig_values_r[i] < S_th)]
	# percentile = (float(len(pct_calc))/1000.0)*100.0
	# print n, S_th, percentile

ax0 = plt.subplot(2,2,1)	
plt.scatter(ns,alphas,c='r', edgecolors='none')
# plt.ylim(0,1.1)
# plt.xlim(2,12)
plt.xlabel('sample size')
plt.ylabel('alpha')

ax1 = plt.subplot(2,2,2)