コード例 #1
0
ファイル: Utils.py プロジェクト: Douglas2Code/GT-CNN
def Plot_Dist_Train_Extreme(r_err, GT_val,bin1=500,bin2=500,interval1 = 0.95,interval2=0.99):
    covMat = np.array(r_err["Err"], dtype=float)
    median = np.median(covMat)
    c, loc, scale = genextreme.fit(covMat, floc=median)
    min_extreme1,max_extreme1 = genextreme.interval(interval1,c,loc,scale)
    min_extreme2,max_extreme2 = genextreme.interval(interval2,c,loc,scale)
    x = np.linspace(min(covMat),max(covMat),2000)
    fig,ax = plt.subplots(figsize = (30,10))
    plt.xlim(0,0.4)
    plt.plot(x, genextreme.pdf(x, *genextreme.fit(covMat)), linewidth=5)
    plt.hist(np.array(r_err["Err"], dtype=float),bins=bin1,alpha=0.3,density=True,edgecolor='black',facecolor='gray', linewidth=3,histtype='stepfilled') #{'bar', 'barstacked', 'step', 'stepfilled'})
    plt.hist(np.asarray(GT_val["Err"]), bins=bin2, alpha=0.3,density=True,edgecolor='red',facecolor='red', linewidth=3,histtype='stepfilled')
    plt.xlabel('Lengths Counts')
    plt.ylabel('Probability')
    plt.title(r'max_extreme1=%.3f,max_extreme2=%.3f' %(max_extreme1, max_extreme2))
    ax.tick_params(left = False, bottom = False)
    
    ax.axvline(min_extreme1, alpha = 0.9, ymax = 0.20, linestyle = ":",linewidth=3,color="red") #,
    ax.axvline(max_extreme1, alpha = 0.9, ymax = 0.20, linestyle = ":",linewidth=3,color="red") #,
    ax.text(min_extreme1, 8, "5th", size = 20, alpha = 0.8,color="red")
    ax.text(max_extreme1, 8, "95th", size = 20, alpha =.8,color="red")
    ax.axvline(min_extreme2, alpha = 0.9, ymax = 0.20, linestyle = ":",linewidth=3,color="red") #,
    ax.axvline(max_extreme2, alpha = 0.9, ymax = 0.20, linestyle = ":",linewidth=3,color="red") #,
    ax.text(min_extreme2, 8, "1st", size = 20, alpha = 0.8,color="red")
    ax.text(max_extreme2, 8, "99th", size = 20, alpha =.8,color="red")
    
    print("95% CI upper bound:",max_extreme1)
    print("99% CI upper bound:",max_extreme2)
    print("Median RE:",np.median(np.array(GT_val["Err"], dtype=float)))
    
    return c, loc, scale, fig,ax
コード例 #2
0
 def getZScoreDistExpFunction(y_data):
     from scipy.stats import genextreme as ge
     fit_params = ge.fit(y_data)
     mean = fit_params[1]
     sigma = fit_params[2]
     shape = fit_params[0]
     return lambda x: ge.pdf(x, shape, loc=mean, scale=sigma)
コード例 #3
0
def plotPDF(x, gevfit, bins, xLabel, Title, fname=None):
    '''
    Plot the PDF of data x.
    ----------------------------------------------------------
    Input:
        x:        Pandas series
        gevfit:   Tuple with the three fitted GEV parameters
        bins:     Integer indicating number of bins or a numpy array with the bin edges
        xLabel:   Str label to use for x-axis
        Title:    Str chart title
        fname:    (Optional) Full path to filename to save the figure in *.png format
    '''
    fig, ax = plt.subplots(1, 1)
    h = ax.hist(x,
                bins,
                density=True,
                color=[0, 1, 1],
                edgecolor='k',
                linewidth=.5,
                facecolor=colors[0])
    p = ax.plot(x,
                genextreme.pdf(x, gevfit[0], gevfit[1], gevfit[2]),
                color='k')
    plt.xlabel(xLabel)
    plt.ylabel('Probability density [-]')
    plt.title(Title)
    if fname:
        plt.savefig(fname, dpi=600.)
    else:
        plt.show()
コード例 #4
0
ファイル: gevmyh5.py プロジェクト: loicliang/postH5
def gevfit(sr):
    gev_fit = gev.fit(sr)
    c = gev_fit[0]
    mu = gev_fit[1]
    sigma = gev_fit[2]

    print("""
          GEV Fit Parameters:
          shape parameter c: %s
          location parameter mu: %s
          scale parameter sigma: %s
          """ % (c, sigma, mu))

    print("Median", gev.median(c, mu, sigma))
    print("Mean", gev.mean(c, mu, sigma))
    print("Std dev", gev.std(c, mu, sigma))
    print("95% interval: ", gev.interval(0.95, c, mu, sigma))

    if (c > 0):
        lBnd = mu - sigma / c
    else:
        lBnd = mu + sigma / c
    srmax = np.max(sr) * 1.1

    bins = sr.size

    x = np.linspace(np.min(sr) - 5, np.max(sr) + 5, 500)
    #x=np.linspace(lBnd,srmax,500)
    gev_pdf = gev.pdf(x, c, mu, sigma)
    gev_cdf = gev.cdf(x, c, mu, sigma)

    plt.figure(figsize=(12, 6))

    ax1 = plt.subplot(1, 2, 1)
    plt.hist(sr, normed=True, alpha=0.2, label='Raw Data', bins='auto')
    plt.plot(x, gev_pdf, 'r--', label='GEV Fit')
    plt.legend(loc='upper left')
    ax1.set_title('%s_Probability Density Fraction' % (sr.name))
    ax1.set_xlabel('Predicted Fatigue Limit (MPa)')
    ax1.set_ylabel('Probability')
    ax1.grid()

    ax2 = plt.subplot(1, 2, 2)
    plt.hist(sr,
             normed=True,
             alpha=0.2,
             label='Raw Data',
             cumulative=True,
             bins='auto')
    plt.plot(x, gev_cdf, 'r--', label='GEV Fit')
    plt.legend(loc='upper left')
    ax2.set_title('%s_Cumulative Density Fraction' % (sr.name))
    ax2.set_xlabel('Predicted Fatigue Limit (MPa)')
    ax2.set_ylabel('Density')
    ax2.grid()

    plt.show()
    pass
コード例 #5
0
 def test_with_scipy(self):
     if not SP:
         raise nose.SkipTest("SciPy not installed.")
     x = [1, 2, 3, 4]
     scipy_y = log(genextreme.pdf(x, -.3, 4, 2))
     flib_y = []
     for i in x:
         flib_y.append(flib.gev(i, .3, 4, 2))
     assert_array_almost_equal(scipy_y, flib_y, 5)
コード例 #6
0
 def test_with_scipy(self):
     if not SP:
         raise nose.SkipTest("SciPy not installed.")
     x = [1, 2, 3, 4]
     scipy_y = log(genextreme.pdf(x, -.3, 4, 2))
     flib_y = []
     for i in x:
         flib_y.append(flib.gev(i, .3, 4, 2))
     assert_array_almost_equal(scipy_y, flib_y, 5)
コード例 #7
0
ファイル: pdf_gen.py プロジェクト: wlsinaa/exfcst_windspd
 def density(self, x, data, distribution):
     den = 0.0
     if distribution == 'gev':
         den = gev.pdf(x, data[0], loc=data[1], scale=data[2])
     elif distribution == 'LN':
         den = self.lognormal_pdf(x, data[0], data[1])
     elif distribution == 'TN':
         den = self.TN_pdf(x, data[0], data[1])
     return den
コード例 #8
0
def srednie(plik_in):
    listy = []
    domeny = []
    li = 0
    d1 = 0

    with open(plik_in, 'r+') as f:
        for line in f:
            w = line.split()
            d = line.split()
            w = float(w[1])
            d = float(d[2])
            listy.append(w)
            # print(listy)
            domeny.append(d)

        for x, el in enumerate(domeny):
            if el == 0.0:
                domeny[x] = 1.0
        for x in domeny:
            li += 1
            if x == 1.0:
                d1 += 1

    # -------------------------DANIO RERIO REVIEWED----------------------
    data4 = pd.read_csv(
        'Danio_reviewed_out.txt',
        sep='\t',
        names=['Nazwa białka', 'Długość łańcucha', 'Liczba domen'])

    # histogram długosc łańcucha
    dwiekolumny4 = data4[data4.columns[1:3]]
    np.seterr(divide='ignore', invalid='ignore')
    dwiekolumny4.hist(column='Długość łańcucha',
                      bins=100,
                      figsize=(10, 10),
                      color='mediumvioletred',
                      density=True)

    p = genextreme.fit(listy, -1)
    print(p)
    ss.genextreme.fit(listy)
    plt.plot(np.linspace(0, 3500),
             genextreme.pdf(np.linspace(0, 3500), p[0], p[1], p[2]),
             'b--',
             lw=3,
             label='Generalized extreme value distribution ')

    plt.title('Danio rerio reviewed - Histogram długości łańucha',
              color='black')
    plt.xlabel('Długość łańcucha')
    plt.ylabel('Liczebność')
    plt.legend(loc='upper right')
    pylab.xlim([-10, 3500])
    plt.show()
コード例 #9
0
ファイル: gev_fit.py プロジェクト: tommylees112/esowc_notes
def plot_GEV_fit(series, shape, loc, scale):
    """`scipy` order, e.g. scale, loc, shape"""
    xx = np.linspace(l+0.00001, l+0.00001+35, num=71)
    yy = gev.pdf(xx, scale, loc, shape)

    fig, ax = plt.subplots()
    # plot histogram of observed data
    series.plot.hist(ax=ax)

    ax.plot(xx, yy, 'ro')
    plt.show()
コード例 #10
0
def plotgev(dados, ndivh, titulo):
    plt.figure()
    shape, loc, scale = gev.fit(dados)

    plt.hist(dados, bins=ndivh, density=True)
    xmin, xmax = plt.xlim()

    xx = np.linspace(xmin, xmax, num=100)
    yy = gev.pdf(xx, shape, loc, scale)

    plt.title(titulo + " | GEV")

    plt.xlabel("")
    plt.ylabel("")

    plt.plot(xx, yy, 'orange')
    plt.draw()
コード例 #11
0
ファイル: funcoesBasicas.py プロジェクト: clebsonpy/HidroComp
 def graDensidade(self, dados, forma, posicao, escala):
     dados.sort()
     '''
     dadosExt = []
     for i in range(1, 1001):
         dadosExt.append(self.ler.serieExtensa(i, 'Fluviometrico'))
     dadosExt.sort()
     yExt = gev.pdf(dadosExt, -0.168462, 6286.926278, 1819.961392)
     '''
     yd = gev.pdf(dados, forma, posicao, escala)
     plt.plot(dados,yd,'-r', label = 'Forma: %s\nPosicao: %s\nEscala: %s' % (forma, posicao, escala))
     #plt.plot(dadosExt, yExt,'-r')
     #plt.title('Série Extensa')
     plt.ylabel('Densidade')
     plt.xlabel('Vazão(m³/s)')
     plt.legend(numpoints = 1, loc = "best")
     plt.show()
コード例 #12
0
ファイル: Extreme_Wind.py プロジェクト: duncombe/system-test
def plot_probability_density(annual_max, station_id):
    mle = genextreme.fit(sorted(annual_max), 0)
    mu = mle[1]
    sigma = mle[2]
    xi = mle[0]
    min_x = min(annual_max)-0.5
    max_x = max(annual_max)+0.5
    x = np.linspace(min_x, max_x, num=100)
    y = [genextreme.pdf(z, xi, loc=mu, scale=sigma) for z in x]

    fig = plt.figure(figsize=(12,6))
    axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
    xlabel = (station_id + " - Annual Max Wind Speed (m/s)")
    axes.set_title("Probability Density & Normalized Histogram")
    axes.set_xlabel(xlabel)
    axes.plot(x, y, color='Red')
    axes.hist(annual_max, bins=arange(min_x, max_x, abs((max_x-min_x)/10)), normed=1, color='Yellow')
コード例 #13
0
    def gev(self, x):  ##cost function
        value = 0.0
        for i in range(0, len(self.obs)):
            mu = ngr_mean(x, self.hres[i], self.ctrl[i], self.mean[i],
                          self.var[i])
            sigma = ngr_var(x, self.hres[i], self.ctrl[i], self.mean[i],
                            self.var[i])
            shape_para = x[4]

            #normalized_x = ( self.obs[i] - mu ) / sigma
            #y = gev.pdf(self.obs[i], shape_para, loc=mu, scale=sigma) + sys.float_info.epsilon
            value += np.log(
                gev.pdf(self.obs[i], shape_para, loc=mu, scale=sigma) +
                sys.float_info.epsilon)

        value = -1 * value / self.sample_size  # negative log likelihood which is one of strictly proper score
        return value
コード例 #14
0
    def CalIgnoranceScore(self,
                          data,
                          obs_list,
                          distribution=None,
                          discrete=False):

        sample_size = len(obs_list)
        IgnoranceList = [None] * sample_size
        if discrete == True:  #Dawid-Sebastiani
            ENSEMBLES = data
            OBS = obs_list
            for sample_index in range(sample_size):  #Dawid-Sebastiani
                ensemble = ENSEMBLES[sample_index]
                obs = OBS[sample_index]
                pdf = KernelDensity.kde_gaussian(obs, ensemble)
                IgnoranceList[sample_index] = -1.0 * np.log(
                    pdf + sys.float_info.epsilon)
        elif discrete == False:

            if distribution == 'gev':

                for i in range(sample_size):
                    shape = data[i][0]
                    mu = data[i][1]
                    sigma = data[i][2]
                    pdf = gev.pdf(obs_list[i], c=shape, loc=mu, scale=sigma)
                    IgnoranceList[i] = -1.0 * np.log(pdf +
                                                     sys.float_info.epsilon)
            elif distribution == 'TN' or 'LN':
                COEFF = data
                for sample_index in range(sample_size):
                    mu = COEFF[sample_index][0]
                    sigma = COEFF[sample_index][1]
                    if distribution == 'TN':
                        pdf = TN_PDF(obs_list[sample_index],
                                     mu,
                                     sigma,
                                     a=0.0,
                                     b=np.inf)

                    elif distribution == 'LN':
                        pdf = LN_PDF(obs_list[sample_index], mu, sigma)
                    IgnoranceList[sample_index] = -1.0 * np.log(
                        pdf + sys.float_info.epsilon)

        return IgnoranceList
コード例 #15
0
def plot_histograma_e_gev(str_fam_sinal,
                          df_sinais,
                          c,
                          loc,
                          scale,
                          num_inicio,
                          num_final,
                          num_total,
                          nome_coluna='valor'):
    arr_valores_atuais = df_sinais[nome_coluna].to_numpy()
    histogram, bins_edge = np.histogram(arr_valores_atuais, bins=20)

    width = 0.7 * (bins_edge[1] - bins_edge[0])
    center = (bins_edge[:-1] + bins_edge[1:]) / 2

    # plot histograma
    # fig, ax = plt.subplots(1, 1)
    fig, ax1 = plt.subplots()
    color = 'tab:blue'
    plt.bar(center, histogram, align='center', width=width)
    plt.title('Histograma da Série {}'.format(str_fam_sinal))
    plt.xlabel("bin")
    plt.ylabel("Quantidade")
    ax1.tick_params(axis='y', labelcolor=color)

    # plot PDF
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    color = 'tab:ref'
    x = np.linspace(genextreme.ppf(0.01, c), genextreme.ppf(0.99, c), 100)
    x = np.linspace(num_inicio, num_final, num_total)
    ax2.get_yaxis().set_ticks([])
    ax2.plot(x,
             genextreme.pdf(x, c, loc, scale),
             'r-',
             lw=5,
             alpha=0.6,
             label='genextreme pdf')

    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    plt.savefig("./histograma_familia_{}.png".format(str_fam_sinal))
    plt.show()
    plt.close()
コード例 #16
0
def plot_probability_density(annual_max, station_id):
    mle = genextreme.fit(sorted(annual_max), 0)
    mu = mle[1]
    sigma = mle[2]
    xi = mle[0]
    min_x = min(annual_max) - 0.5
    max_x = max(annual_max) + 0.5
    x = np.linspace(min_x, max_x, num=100)
    y = [genextreme.pdf(z, xi, loc=mu, scale=sigma) for z in x]

    fig = plt.figure(figsize=(12, 6))
    axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
    xlabel = (station_id + " - Annual Max Wind Speed (m/s)")
    axes.set_title("Probability Density & Normalized Histogram")
    axes.set_xlabel(xlabel)
    axes.plot(x, y, color='Red')
    axes.hist(annual_max,
              bins=arange(min_x, max_x, abs((max_x - min_x) / 10)),
              normed=1,
              color='Yellow')
コード例 #17
0
def plotajuste_completo(dados, ndivh, titulo):
    plt.figure()

    # dados e histograma do ajuste
    plt.hist(dados, bins=ndivh, density=True, label='histogram')
    xmin, xmax = plt.xlim()

    xx = np.linspace(xmin, xmax, num=100)

    # Calcula ajuste GEV
    shape, loc, scale = gev.fit(dados)
    ygev = gev.pdf(xx, shape, loc, scale)
    plt.plot(xx, ygev, 'orange', label='GEV')

    # Calcula ajuste gaussiana
    mean, std = nrm.fit(dados)
    ygaus = nrm.pdf(xx, mean, std)
    plt.plot(xx, ygaus, 'green', label='Gaussian')

    plt.title(titulo + " | GEV (orange) - Gaussian (green)")

    plt.draw()
コード例 #18
0
ファイル: Retrieval.py プロジェクト: YanlanLiu/VOD_hydraulics
 def f_p50_prior(p50): return np.log(gev.pdf(-p50, 1.08, -2.86, 2.92)+1e-20)
 p50_init = 2.86
コード例 #19
0
ファイル: Retrieval.py プロジェクト: YanlanLiu/VOD_hydraulics
 def f_p50_prior(p50): return np.log(gev.pdf(-p50, 0.76, -3.64, 2.55)+1e-20)
 p50_init = 3.64
コード例 #20
0
ファイル: extremes.py プロジェクト: teslakit/teslakit
def Plot_FitSim_GevFit(data_fit,
                       data_sim,
                       vn,
                       xds_GEV_Par,
                       kma_fit,
                       n_bins=30,
                       color_1='white',
                       color_2='skyblue',
                       alpha_1=0.7,
                       alpha_2=0.4,
                       label_1='Historical',
                       label_2='Simulation',
                       gs_1=1,
                       gs_2=1,
                       n_clusters=1,
                       vlim=1,
                       show=True):
    'Plots fit vs sim histograms and gev fit by clusters for variable "vn"'

    # plot figure
    fig = plt.figure(figsize=(_fsize * gs_2 / 2, _fsize * gs_1 / 2.3))

    # grid spec
    gs = gridspec.GridSpec(gs_1, gs_2)  #, wspace=0.0, hspace=0.0)

    # clusters
    for c in range(n_clusters):

        # select wt data
        wt = c + 1

        ph_wt = np.where(kma_fit.bmus == wt)[0]
        ps_wt = np.where(data_sim.DWT == wt)[0]

        dh = data_fit[vn].values[:][ph_wt]  #; dh = dh[~np.isnan(dh)]
        ds = data_sim[vn].values[:][ps_wt]  #; ds = ds[~np.isnan(ds)]

        # TODO: problem if gumbell?
        # select wt GEV parameters
        pars_GEV = xds_GEV_Par[vn]
        sha = pars_GEV.sel(parameter='shape').sel(n_cluster=wt).values
        sca = pars_GEV.sel(parameter='scale').sel(n_cluster=wt).values
        loc = pars_GEV.sel(parameter='location').sel(n_cluster=wt).values

        # compare histograms
        ax = fig.add_subplot(gs[c])
        axplot_compare_histograms(
            ax,
            dh,
            ds,
            ttl='WT: {0}'.format(wt),
            density=True,
            n_bins=n_bins,
            color_1=color_1,
            color_2=color_2,
            alpha_1=alpha_1,
            alpha_2=alpha_2,
            label_1=label_1,
            label_2=label_2,
        )

        # add gev fit
        x = np.linspace(genextreme.ppf(0.001, -1 * sha, loc, sca), vlim, 100)
        ax.plot(x, genextreme.pdf(x, -1 * sha, loc, sca), label='GEV fit')

        # customize axis
        ax.legend(prop={'size': 8})

    # fig suptitle
    #fig.suptitle('{0}'.format(vn), fontsize=14, fontweight = 'bold')

    # show and return figure
    if show: plt.show()
    return fig
コード例 #21
0
ファイル: Retrieval.py プロジェクト: YanlanLiu/VOD_hydraulics
 def f_p50_prior(p50): return np.log(gev.pdf(-p50, 0.77, -1.86, 1.25)+1e-20)
 p50_init = 1.86
コード例 #22
0
ファイル: ocean_kit.py プロジェクト: rdkit/OCEAN
def calc_ocean_parameter(FP_MANAGER, fp, datasource, recalc=False):
    """
    http://www.jamesphoughton.com/2013/08/making-gif-animations-with-matplotlib.html
    """
    print "calcOceanStatistics function start"
    db_ocean = DB_connector("default")  # chembl
    cursor = db_ocean.cursor

    ds = DataSources.objects.get(name=datasource.name)
    if recalc:
        print "delete rnd set items for fp",fp
        Rnd_set_comparison.objects.all().filter(fp=fp).filter(datasource=ds).delete()
        print "done"
    print "delete parameter entries for fp",fp
    FP_Parameter.objects.all().filter(fp_id=fp).filter(datasource=ds).delete()
    print "done"

    if not recalc and Rnd_set_comparison.objects.all().filter(fp=fp).filter(datasource=ds).count()==0:
        return "no entries for fp %d, try ?recalc=True" % fp

    repeats = settings.CALC_OCEAN_PARAMETER_REPEATS

    start = settings.CALC_OCEAN_PARAMETER_START
    end = settings.CALC_OCEAN_PARAMETER_END
    steps = settings.CALC_OCEAN_PARAMETER_STEPS

    thresh_start = settings.CALC_OCEAN_PARAMETER_THRESH_START
    thresh_end   = settings.CALC_OCEAN_PARAMETER_THRESH_END
    thresh_steps = settings.CALC_OCEAN_PARAMETER_THRESH_STEPS

    animatedGif = True

    try:
        from PIL import Image
        from images2gif import writeGif
    except:
        print >> sys.stderr, "Couldn't import Image from PIL or writeGif from images2gif, so plotting is deactivated now"
        animatedGif = False

    plotting = True
    try:
        import matplotlib.pyplot as plt
    except:
        plotting = True
        animatedGif = False

    processes = settings.PARALLEL_PROCESSES
    if recalc: walker = Pool(processes=processes)

    thresh_list = np.arange(thresh_start,thresh_end,thresh_steps)
    molecule_ids = np.asarray(FP_MANAGER[datasource][fp].keys())

    ds = DataSources.objects.get(name=datasource.name)
    for runde in range(repeats):
        if not recalc: continue

        print "runde %d" % runde
        result = {}
        rand_lists1 = createRandLists(start,end,steps,molecule_ids)
        rand_lists2 = createRandLists(start,end,steps,molecule_ids)

        tasks = [([FP_MANAGER[datasource][fp].get(x1) for x1 in rand_lists1[i]],[FP_MANAGER[datasource][fp].get(x2) for x2 in rand_lists2[i]]) for i in range(len(rand_lists2))]

        if processes>1:
            np.random.shuffle(tasks)
            result2 = {}
            for data_entry in walker.imap_unordered(get_tc_list_para,tasks,20):
                result2[data_entry[0]] = data_entry[1]
                print "addet %d of %d" % (len(result2),len(tasks))
        else:
            result2 = {}
            while (len(tasks)>0):
                task = tasks.pop()
                score = get_tc_list_para(task)
                result2[score[0]] = score[1]
                print "addet %d of %d" % (len(result2),len(tasks))

        print "create %d Result-Objects for DB-Table rnd_set_comparison" % (len(thresh_list) * len(result2))
        with transaction.atomic():
            buffer = []
            for threshold in thresh_list:
                for key,value in result2.iteritems():
                    raw_score = np.sum(value[value>=threshold])
                    item = (key**2,fp,threshold,raw_score)
                    buffer.append(item)
            print "created %d buffered items" % len(buffer)

            for w,x,y,z in buffer:
                obj = Rnd_set_comparison(setsize=w,fp=x,threshold=y,rawscore=z,datasource=ds)
                obj.save()

    figures = []

    data_cache = {}

    min_mean = None
    max_mean = None
    min_stddev = None
    max_stddev = None

    for threshold in thresh_list:
        if db_ocean.db_type=='postgre':
            query = "select setsize,threshold, round(stddev_pop(rawscore)::numeric,2) as stddev_pop,round(avg(rawscore)::numeric,2) as mean from ocean_rnd_set_comparison where fp=%d and threshold=%f and datasource_id=%d group by setsize,threshold order by setsize" % (fp,threshold,ds.id)
        else:
            query = "select setsize,threshold,round(stddev(rawscore),2) as stddev,round(avg(rawscore),2) as mean from ocean_rnd_set_comparison where fp=%d and threshold=%f and datasource_id=%d group by setsize,threshold order by setsize" % (
            fp, threshold, ds.id)
        cursor.execute(query)

        x_data = []
        stddev_data = []
        mean_data = []
        for result in cursor.fetchall():
            x_data.append(float(result[0]))
            mean_data.append(float(result[3]))
            stddev_data.append(float(result[2]))

        if min_mean is None:
            if len(mean_data) > 0:
                min_mean,max_mean = min(mean_data),max(mean_data)
            if len(stddev_data) > 0:
                min_stddev,max_stddev = min(stddev_data),max(stddev_data)
        else:
            if len(mean_data) > 0:
                min_mean, max_mean = min([min_mean,min(mean_data)]), max([max_mean,max(mean_data)])
            if len(stddev_data) > 0:
                min_stddev, max_stddev = min([min_stddev, min(stddev_data)]), max([max_stddev, max(stddev_data)])

        data_cache[threshold] = (x_data,mean_data,stddev_data)

    skip_3_to_6 = True

    for threshold in thresh_list:
        x_data,mean_data,stddev_data = data_cache[threshold]

        if len(x_data) == 0 or len(mean_data)==0 or len(stddev_data)==0:
            continue
        if plotting:
            plt.clf()

        if plotting:
            if skip_3_to_6:
                fig,(r0,r1,r2,r6) = plt.subplots(nrows=4,figsize=(12,14))
            else:
                fig,(r0,r1,r2,r3,r4,r5,r6) = plt.subplots(nrows=7,figsize=(6,14))

        raw_mean_func = Calculator.getRawScoreExpFunction(x_data,mean_data)
        print "\nmean function for threshold: %f is [%s]" % (threshold,raw_mean_func.func_name)

        exp_mean_data = [raw_mean_func(en) for en in x_data]
        if plotting:
            r0.plot(np.array(x_data),np.array(mean_data),linewidth=1.0)
            r0.plot(x_data,exp_mean_data,alpha=0.5,linewidth=2.5)
            r0.set_title("Mean, Threshold: %.2f" % threshold)

            r0.set_ylim((min_mean,max_mean))
            r1.set_ylim((min_stddev,max_stddev))
            r2.set_xlim((-1,1.5))
            r2.set_ylim((0,2.5))
        new_std_function = Calculator.getRawScoreStdDevExpFunction(x_data,stddev_data)

        print "stddev function for threshold: %f is [%s]" % (threshold,new_std_function.func_name)

        newdata2 = new_std_function(x_data)

        if plotting:
            r1.plot(x_data,stddev_data)
            r1.plot(x_data, newdata2, alpha=0.8, linewidth=2.0)
            r1.set_title("StdDev")

        z_Scores = Calculator.getZScores(x_data,mean_data,raw_mean_func,new_std_function)

        histo_bins = 50
        counts,bin_edges = np.histogram(z_Scores,histo_bins,normed=True)
        bin_centres = (bin_edges[:-1] + bin_edges[1:])/2.


        if plotting:
            n,bins,patches = r2.hist(z_Scores,bins=histo_bins,normed=True,alpha=0.5)
            r2.set_title("z-Scores")

        e_val_function = Calculator.getZScoreDistExpFunction(z_Scores)
        e_val_data_x = np.linspace(min(z_Scores),max(z_Scores),num=500)
        e_val_data = [e_val_function(entry) for entry in e_val_data_x]
        if plotting:
            if not skip_3_to_6: r3.plot(e_val_data_x,e_val_data,alpha=0.5)

        c=-0.1
        for c in [-0.05]:
            x_ls = np.linspace(ge.ppf(0.01,c),ge.ppf(0.99,c),100)
            if plotting:
                if not skip_3_to_6: r4.plot(x_ls,ge.pdf(x_ls,c),linewidth=1.6-c*4)

        (shape_evd,loc_evd,scale_evd) = ge.fit(z_Scores)

        loc_norm,scale_norm = norm.fit(z_Scores)
        x = ge.pdf(bin_centres,shape_evd,loc=loc_evd,scale=scale_evd)

        if plotting:
            evd_plot, = r2.plot(bin_centres,x,'b',color='black',label='Extreme Value Distribution')

        ndist = norm.pdf(bin_centres,loc=loc_norm,scale=scale_norm)
        if plotting:
            norm_plot, = r2.plot(bin_centres,ndist,'b',color="red",label='Normal Distribution')
            r2.legend([evd_plot,norm_plot],['Extreme Value Distribution','Normal Distribution'],loc=1)

        def getDecNpArray(value):
            return np.asarray(value).astype(float)

        expected_evd = getDecNpArray(x)
        expected_norm = getDecNpArray(ndist)
        observed = getDecNpArray(counts)

        def normalizedChisquare(observed,expected):
            if len(observed) != len(expected): raise Exception("len of observed and expected has to be the same")

            zipped = zip(observed,expected)
            fun = lambda input: ((input[0]-input[1])**2 / (input[0]+input[1]))
            result = sum(map(fun,zipped))

            return result

        chisq_mean = normalizedChisquare(observed,expected_norm)
        chisq_evd = normalizedChisquare(observed,expected_evd)

        print "chisquare_norm",chisq_mean
        print "chisquare_evd",chisq_evd

        #django doesn't like inf or -inf in float-fields of oracle database, so we change it..
        if isinf(chisq_mean) or isnan(chisq_mean):
            print "chisquare_norm seems to be inf or nan (%s), change to -1.0" % str(chisq_mean)
            chisq_mean = -1.0
        if isinf(chisq_evd) or isnan(chisq_evd):
            print "chisquare_evd seems to be inf or nan (%s), change to -1.0" % str(chisq_evd)
            chisq_evd = -1.0

        if plotting:
            if not skip_3_to_6: n,bins,patches = r5.hist(z_Scores,bins=histo_bins,normed=True,alpha=0.75)#,bins=20)

            if not skip_3_to_6:
                import matplotlib.mlab as mlab
                y = mlab.normpdf(bins,loc_evd,scale_evd)

        fp_parameter = FP_Parameter(fp_id=fp,
                                    threshold=threshold,
                                    formula_raw_mean=raw_mean_func.func_name,
                                    formula_raw_stddev=new_std_function.func_name,
                                    chisquare_mean=chisq_mean,
                                    chisquare_evd=chisq_evd,
                                    datasource=ds)
        fp_parameter.save()
        if plotting:
            if not skip_3_to_6: r5.plot(bins,y)

        if threshold==thresh_list[-1]:      #this is last round
            print "last round"

            query = "select threshold,chisquare_mean,chisquare_evd from ocean_fp_parameter where fp_id=%d and datasource_id=%d order by threshold" % (fp,ds.id)
            cursor.execute(query)
            data_chi2_mean = []
            data_chi2_evd = []
            x_chidata = []
            for val in cursor.fetchall():
                x_chidata.append(float(val[0]))
                data_chi2_mean.append(float(val[1]))
                data_chi2_evd.append(float(val[2]))

            print x_chidata,data_chi2_mean,data_chi2_evd

            if plotting:
                if not skip_3_to_6: r6.plot(x_chidata,data_chi2_mean,'o')
                if not skip_3_to_6: r6.plot(x_chidata,data_chi2_evd,'.')
                chi2_mean, = r6.plot(x_chidata,data_chi2_mean,'o')
                chi2_evd, = r6.plot(x_chidata,data_chi2_evd,'.')
                r6.legend([chi2_mean,chi2_evd],['ChiSquare Normal Distribution','ChiSquare Extreme Value Distribution'],loc=1)

        def fitfunc(p,x):
            if p[0]==0:
                return np.exp(-np.exp(-x))*np.exp(-x)
            else:
                print p[0],type(x)
                return np.exp(-(1-p[0]*x)**(1/p[0]))*(1-p[0]*x)**(1/p[0]-1)
        errfunc = lambda p,x,y: (y-fitfunc(p,x))

        init = [0.2]

        bins = bins[:-1]
        bins = np.array(bins)
        n = np.array(n)

        if plotting:
            plt.tight_layout()
            filename = "%f.png" % threshold
            plt.savefig(filename)
            figures.append(filename)

    if animatedGif:
        file_names = figures
        print "d",file_names
        images = [Image.open(fn) for fn in file_names]
        writeGif("animation_mean_stddev.gif",images,duration=0.5)
        for image in images:
            image.close()
コード例 #23
0
ファイル: Extreme_Waves.py プロジェクト: Bobfrat/system-test
def gev_pdf(x):
    return genextreme.pdf(x, xi, loc=mu, scale=sigma)
コード例 #24
0
from scipy.stats import genextreme
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

c = -0.1
mean, var, skew, kurt = genextreme.stats(c, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(genextreme.ppf(0.01, c), genextreme.ppf(0.99, c), 100)
ax.plot(x, genextreme.pdf(x, c), 'r-', lw=5, alpha=0.6, label='genextreme pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = genextreme(c)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = genextreme.ppf([0.001, 0.5, 0.999], c)
np.allclose([0.001, 0.5, 0.999], genextreme.cdf(vals, c))
# True

# Generate random numbers:
コード例 #25
0
ファイル: extremal.py プロジェクト: gdfa-ugr/protocol
def extremal_distribution_fit(data,
                              var_name,
                              sample,
                              threshold,
                              fit_type,
                              x_min,
                              x_max,
                              n_points,
                              loc=None,
                              scale=None,
                              cumulative=True):
    # Initialization of the output variables
    param = None
    x = None
    y = None
    y_rp = None

    if fit_type == 'gpd':
        # Fit the exceedances over threshold to Generalized Pareto distribution
        param = generalized_pareto_distribution_fit(sample, threshold, loc,
                                                    scale)

        # Calculate the pdf and/or cdf
        x = np.linspace(x_min, x_max, n_points)

        if cumulative:
            y = genpareto.cdf(x, param[0], param[1], param[2])

            # Calculate the number of extreme peaks per year
            n_peaks_year = len(sample) / len(
                data[var_name].index.year.unique())
            y_rp = return_period_curve(n_peaks_year, y)
        else:
            y = genpareto.pdf(x, param[0], param[1], param[2])

    elif fit_type == 'coles':
        # Fit the exceedances over threshold to Generalized Pareto distribution
        param = generalized_pareto_distribution_fit(sample, threshold, loc,
                                                    scale)

        x = np.arange(1, 501)
        u = param[1]
        sigma = param[2]
        xi = param[0]

        # Mean number of data in a year (numero medio de datos en un año)
        n_y = len(data[var_name]) / len(data[var_name].index.year.unique())
        # Total number of POT / number of years
        z_u = len(sample) / len(data[var_name])
        # n_y*z_u is the number of POT / number of years -- > numer of POT per year
        y_rp = u + (sigma / xi) * (((x * n_y * z_u)**xi) - 1)

    elif fit_type == 'gev':
        param = generalized_extreme_value_distribution_fit(sample, loc, scale)

        # Calculate the pdf and/or cdf
        x = np.linspace(x_min, x_max, n_points)

        if cumulative:
            y = genextreme.cdf(x, param[0], param[1], param[2])

            # Calculate the number of extreme peaks per year
            n_peaks_year = 1
            y_rp = return_period_curve(n_peaks_year, y)
        else:
            y = genpareto.pdf(x, param[0], param[1], param[2])

    elif fit_type == 'poisson':
        # Calculate the pdf and/or cdf
        x = np.linspace(x_min, x_max, n_points)

        # Fit the exceedances over threshold to Generalized Pareto distribution
        gpd_param = generalized_pareto_distribution_fit(
            sample, threshold, loc, scale)

        # Poisson parameter (número de eventos extraños al año)
        poisspareto_param = len(sample) / len(
            data[var_name].index.year.unique())
        # Poisson pareto parameters
        poisspareto_param = [
            poisspareto_param, gpd_param[0], gpd_param[2], gpd_param[1]
        ]
        # Equivalent gev parameters
        param = [0, 0, 0]
        param[0] = -poisspareto_param[1]
        param[1] = poisspareto_param[2] * (poisspareto_param[0]**
                                           poisspareto_param[1])
        param[2] = poisspareto_param[3] + (
            (poisspareto_param[2] / poisspareto_param[1]) *
            ((poisspareto_param[0]**poisspareto_param[1]) - 1))

        if cumulative:
            y = genextreme.cdf(x, param[0], param[2], param[1])

            # Calculate the number of extreme peaks per year
            n_peaks_year = 1
            y_rp = return_period_curve(n_peaks_year, y)
        else:
            y = genextreme.pdf(x, param[0], param[2], param[1])

    return param, x, y, y_rp
コード例 #26
0
def sea_levels_gev_pdf(x):
    return genextreme.pdf(x, xi, loc=mu, scale=sigma)
コード例 #27
0
ファイル: Retrieval.py プロジェクト: YanlanLiu/VOD_hydraulics
 def f_p50_prior(p50): return np.log(gev.pdf(-p50, 0.71, -2.23, 1.49)+1e-20)
 p50_init = 2.23
コード例 #28
0
ファイル: histogram.py プロジェクト: joejoezz/climate
def plot_histogram(site,
                   data1,
                   data2,
                   label1='Data1',
                   label2='Data2',
                   subset_label=None,
                   variable=None):
    """
    Plot a normalized histogram of two temperature distributions
    Fit GEV curve to distribution
    :param site: site string
    :param data1: array of data from reference period
    :param data2: array of data from new (warmer climate) period
    :param label1: string label for data1
    :param label2: string label for data2
    :param subset_label: string label for the subset of data (e.g. month/season)
    :return some statistics maybe (TBD)
    """
    # print some parameters of data
    print('Ref data: {}'.format(len(data1)))
    print('New data: {}'.format(len(data2)))

    # get histogram parameters
    range_min = np.nanmin(np.hstack(
        (data1, data2))) - np.nanmin(np.hstack((data1, data2))) % 10
    range_max = np.nanmax(np.hstack(
        (data1, data2))) + (10 - np.nanmax(np.hstack((data1, data2))) % 10)
    bins = int(range_max - range_min)

    # compute histograms
    hist1, bin_edges = np.histogram(data1,
                                    bins=bins,
                                    range=(range_min, range_max),
                                    density=True)
    hist2, bin_edges = np.histogram(data2,
                                    bins=bins,
                                    range=(range_min, range_max),
                                    density=True)

    # gev fitting--use function to try a couple times to get a good fit
    shape1, loc1, scale1 = get_gev_fit(data1)
    shape2, loc2, scale2 = get_gev_fit(data2)

    x_gev = np.linspace(range_min, range_max, bins * 10 + 1)
    y1_gev = gev.pdf(x_gev, shape1, loc1, scale1)
    y2_gev = gev.pdf(x_gev, shape2, loc2, scale2)

    # compute POD and FAR of 2.5-sigma event (from reference climate)
    mean1 = gev.mean(shape1, loc=loc1, scale=scale1)
    mean2 = gev.mean(shape2, loc=loc2, scale=scale2)
    std1 = np.sqrt(gev.var(shape1, loc=loc1, scale=scale1))
    std2 = np.sqrt(gev.var(shape2, loc=loc2, scale=scale2))
    # calculate a, b, and c params from Durran 2019
    sig20_thres = np.where((x_gev > mean1 + 2.0 * std1))
    sig25_thres = np.where((x_gev > mean1 + 2.5 * std1))
    sig35_thres = np.where((x_gev > mean1 + 3.5 * std1))
    c_val = np.sum(y1_gev[sig25_thres])
    a_val = np.sum(y2_gev[sig25_thres]) - c_val
    b_val = np.sum(y2_gev[sig20_thres]) - np.sum(y1_gev[sig20_thres]) - a_val
    pod = a_val / (a_val + b_val)
    far = c_val / (a_val + c_val)
    print('POD = {}   FAR = {}'.format(pod, far))

    fig = plt.figure()
    fig.set_size_inches(6, 4)

    # stats of gev fit
    #mean1, var1, skew1, kurt1 = gev.stats(shape1, moments='mvsk')

    mu1 = np.mean(data1)
    sigma1 = np.std(data1)
    mu2 = np.mean(data2)
    sigma2 = np.std(data2)

    plt.bar(bin_edges[:-1],
            hist1,
            width=1,
            align='edge',
            color='blue',
            alpha=0.5,
            label=label1)
    plt.bar(bin_edges[:-1],
            hist2,
            width=1,
            align='edge',
            color='red',
            alpha=0.5,
            label=label2)
    plt.plot(x_gev, y1_gev, color='blue')
    plt.plot(x_gev, y2_gev, color='red')
    plt.plot([x_gev[sig20_thres[0][0]], x_gev[sig20_thres[0][0]]],
             [0, y2_gev[sig20_thres[0][0]]],
             color='k',
             lw=1.0)
    plt.plot([x_gev[sig25_thres[0][0]], x_gev[sig25_thres[0][0]]],
             [0, y2_gev[sig25_thres[0][0]]],
             color='k',
             lw=1.0)
    #plt.plot([x_gev[sig35_thres[0][0]], x_gev[sig35_thres[0][0]]], [0, y2_gev[sig35_thres[0][0]]], color='k', lw=1.0)
    plt.plot([mu1, mu1], [0, 1], color='blue', linestyle=':')
    plt.plot([mu2, mu2], [0, 1], color='red', linestyle=':')

    plt.ylabel('PDF')
    plt.xlabel('Temperature')
    plt.ylim(
        0,
        np.max(
            (np.max(hist1), np.max(hist2), np.max(y1_gev), np.max(y2_gev))) +
        0.02)

    plt.legend()
    plt.title('{} {}'.format(site, subset_label))

    plt.savefig('{}{}_{}{}.png'.format(config['PLOT_DIR'], site, subset_label,
                                       variable),
                bbox_inches='tight',
                dpi=200)
    print('Plotted histogram for {}'.format(site))

    return
コード例 #29
0
ファイル: evs_plots.py プロジェクト: polyphant1/PyCosmo
from matplotlib import pyplot as plt
from matplotlib import rc
from scipy.stats import genextreme as gev

# set up fonts for plotting
rc('text', usetex=True)
rc('font', family='serif')
rc('font', size=12)

x = np.linspace(-4,7,100) # to evaluate GEVs over

fig = plt.figure(1, figsize=(7, 3.25))
fig.clf()

ax1 = fig.add_subplot(121)
ax1.plot(x, gev.pdf(x, 0), '-r', label='$\gamma=0$')
ax1.plot(x, gev.pdf(x, 0.28), '-g', label='$\gamma=0.28$')
ax1.plot(x, gev.pdf(x, 0.56), '-b', label='$\gamma=0.56$')
ax1.set_xlim([-4,7])
ax1.set_xlabel('$x$')
ax1.set_ylabel('$G_{\mathrm{GEV}}(x)$')
ax1.legend(loc='upper right', frameon=False, handletextpad=0)
ax1.yaxis.set_ticks([0,0.1,0.2,0.3,0.4])

ax2 = fig.add_subplot(122)
ax2.plot(x, gev.pdf(x, 0), '-r', label='$\gamma=0$')
ax2.plot(x, gev.pdf(x, -0.26), '-g', label='$\gamma=-0.28$')
ax2.plot(x, gev.pdf(x, -0.56), '-b', label='$\gamma=-0.56$')
ax2.set_xlim([-4,7])
ax2.set_xlabel('$x$')
ax2.legend(loc='upper right', frameon=False, handletextpad=0)
コード例 #30
0
ファイル: Retrieval.py プロジェクト: YanlanLiu/VOD_hydraulics
 def f_p50_prior(p50): return np.log(gev.pdf(-p50, 0.65, -4.43, 1.94)+1e-20)
 p50_init = 4.43
コード例 #31
0
def plot_pod_vs_far(site, data1_hi, data1_lo, subset_label=None):
    """
    Compare the POD and FAR from 2-sigma to 4-sigma for high and low
    :param site: site string
    :param data1_hi: array of high temp data from reference period
    :param data1_lo: array of low temp data from reference period
    :param subset_label: string label for the subset of data (e.g. month/season)
    """

    # get histogram parameters
    range_min_hi = np.nanmin(np.hstack(
        (data1_hi))) - np.nanmin(np.hstack((data1_hi))) % 10
    range_max_hi = np.nanmax(np.hstack(
        (data1_hi))) + (10 - np.nanmax(np.hstack((data1_hi))) % 10 + 20)
    bins_hi = int(range_max_hi - range_min_hi)
    range_min_lo = np.nanmin(np.hstack(
        (data1_lo))) - np.nanmin(np.hstack((data1_lo))) % 10
    range_max_lo = np.nanmax(np.hstack(
        (data1_lo))) + (10 - np.nanmax(np.hstack((data1_lo))) % 10) + 10
    bins_lo = int(range_max_lo - range_min_lo)

    # gev fitting--use function to try a couple times to get a good fit
    shape1_hi, loc1_hi, scale1_hi = get_gev_fit(data1_hi)

    x_gev_hi = np.linspace(range_min_hi, range_max_hi, bins_hi * 10 + 1)
    y1_gev_hi = gev.pdf(x_gev_hi, shape1_hi, loc1_hi, scale1_hi)

    sigma_array = np.linspace(2, 5, 7)  # do 30 for longer one
    pod_hi = np.zeros(len(sigma_array))
    far_hi = np.zeros(len(sigma_array))

    # compute POD and FAR of 2.5-sigma event (from reference climate)
    mean1_hi = gev.mean(shape1_hi, loc=loc1_hi, scale=scale1_hi)
    std1_hi = np.sqrt(gev.var(shape1_hi, loc=loc1_hi, scale=scale1_hi))

    # same for low
    shape1_lo, loc1_lo, scale1_lo = get_gev_fit(data1_lo)

    x_gev_lo = np.linspace(range_min_lo, range_max_lo, bins_lo * 10 + 1)
    y1_gev_lo = gev.pdf(x_gev_lo, shape1_lo, loc1_lo, scale1_lo)

    pod_lo = np.zeros(len(sigma_array))
    far_lo = np.zeros(len(sigma_array))

    # compute POD and FAR of 2.5-sigma event (from reference climate)
    mean1_lo = gev.mean(shape1_lo, loc=loc1_lo, scale=scale1_lo)
    std1_lo = np.sqrt(gev.var(shape1_lo, loc=loc1_lo, scale=scale1_lo))

    #define dataframes of what we are pulling
    warming_levels = np.linspace(0.1, 1, 10)
    pod_hi = pd.DataFrame(index=sigma_array, columns=warming_levels)
    pod_lo = pd.DataFrame(index=sigma_array, columns=warming_levels)
    far_hi = pd.DataFrame(index=sigma_array, columns=warming_levels)
    far_lo = pd.DataFrame(index=sigma_array, columns=warming_levels)
    far_lo = far_lo.fillna(0)
    y_curves_hi = pd.DataFrame(index=warming_levels, columns=x_gev_hi)
    y_curves_lo = pd.DataFrame(index=warming_levels, columns=x_gev_lo)
    hi_locs = np.zeros(len(warming_levels))
    lo_locs = np.zeros(len(warming_levels))

    for i, level in enumerate(warming_levels):
        loc1_hi_new = loc1_hi + level * std1_hi
        hi_locs[i] = loc1_hi_new
        y2_gev_hi = gev.pdf(x_gev_hi, shape1_hi, loc1_hi_new, scale1_hi)
        y_curves_hi.loc[level] = y2_gev_hi
        for sigma in sigma_array:
            pod, far = get_pod_far_curve(x_gev_hi,
                                         y1_gev_hi,
                                         y2_gev_hi,
                                         mean1_hi,
                                         std1_hi,
                                         sigma,
                                         sig_thresh=2.0)
            pod_hi[level][sigma] = pod * 100.
            far_hi[level][sigma] = far * 100.

        loc1_lo_new = loc1_lo + level * std1_lo
        lo_locs[i] = loc1_lo_new
        y2_gev_lo = gev.pdf(x_gev_lo, shape1_lo, loc1_lo_new, scale1_lo)
        y_curves_lo.loc[level] = y2_gev_lo
        for sigma in sigma_array:
            pod, far = get_pod_far_curve(x_gev_lo,
                                         y1_gev_lo,
                                         y2_gev_lo,
                                         mean1_lo,
                                         std1_lo,
                                         sigma,
                                         sig_thresh=2.0)
            pod_lo[level][sigma] = pod * 100.
            far_lo[level][sigma] = far * 100.

    # POD vs FAR plot
    # labels
    labels = [
        '2.0$\sigma$', '2.5$\sigma$', '3.0$\sigma$', '3.5$\sigma$',
        '4.0$\sigma$', '4.5$\sigma$', '5.0$\sigma$'
    ]

    # another way of plotting POD vs FAR
    fig = plt.figure()
    fig.set_size_inches(6, 4)

    for i, level in enumerate(warming_levels):
        plt.plot(far_hi[level],
                 pod_hi[level],
                 color=plt.cm.Reds(level - 0.05),
                 marker='o',
                 lw=4,
                 label='$\mu$+{}$\sigma$'.format(np.around(level, 1)))

        for j, ind in enumerate(far_hi.index):
            if i == 2:
                plt.text(far_hi[level][ind] + 2,
                         pod_hi[level][ind] - 4,
                         labels[j],
                         color='black',
                         fontsize=8)

    plt.ylabel('POD (%)')
    plt.xlabel('FAR (%)')
    plt.ylim(0, 100)
    plt.xlim(0, 100)

    plt.legend(fontsize=5, loc='upper left')
    plt.title('POD vs FAR {} {} (2.0-$\sigma$ threshold)'.format(
        site, subset_label))

    plt.savefig('{}pod_vs_far_warming_hi_{}_{}.png'.format(
        config['PLOT_DIR'], site, subset_label),
                bbox_inches='tight',
                dpi=200)
    print('Plotted pod_vs_far for {}'.format(site))

    # same for low --------------------
    fig = plt.figure()
    fig.set_size_inches(6, 4)

    for i, level in enumerate(warming_levels):
        plt.plot(far_lo[level],
                 pod_lo[level],
                 color=plt.cm.Blues(level - 0.05),
                 marker='o',
                 lw=4,
                 label='$\mu$+{}$\sigma$'.format(np.around(level, 1)))

        for j, ind in enumerate(far_lo.index):
            if i == 2:
                plt.text(far_lo[level][ind] + 2,
                         pod_lo[level][ind] - 4,
                         labels[j],
                         color='black',
                         fontsize=8)

    plt.ylabel('POD (%)')
    plt.xlabel('FAR (%)')
    plt.ylim(0, 100)
    plt.xlim(0, 100)

    plt.legend(fontsize=5, loc='upper right')
    plt.title('POD vs FAR {} {} (2.0-$\sigma$ threshold)'.format(
        site, subset_label))

    plt.savefig('{}pod_vs_far_warming_lo_{}_{}.png'.format(
        config['PLOT_DIR'], site, subset_label),
                bbox_inches='tight',
                dpi=200)
    print('Plotted pod_vs_far for {}'.format(site))

    pdb.set_trace()

    # plot the different temperature curves... --------------------------------
    fig = plt.figure()
    fig.set_size_inches(6, 4)

    #plot mean
    plt.plot(x_gev_hi, y1_gev_hi, color='black', label='1950-1979')

    for level in warming_levels:
        plt.plot(x_gev_hi,
                 y_curves_hi.loc[level].values,
                 color=plt.cm.Reds(level - 0.05),
                 label='$\mu$+{}$\sigma$'.format(np.around(level, 1)))

    plt.plot([mean1_hi, mean1_hi], [0, 1], color='red', linestyle=':')
    plt.plot([mean1_hi + std1_hi * 2, mean1_hi + std1_hi * 2], [0, 1],
             color='black',
             linestyle=':')
    plt.plot([mean1_hi + std1_hi * 2.5, mean1_hi + std1_hi * 2.5], [0, 1],
             color='black',
             linestyle=':')

    plt.ylabel('PDF')
    plt.xlabel('Temperature')
    plt.ylim(0, np.max(y_curves_hi.values) + 0.02)

    plt.legend(fontsize=5)
    plt.title('{} {}'.format(site, subset_label))

    plt.savefig('{}shift_mean_hi_{}_{}.png'.format(config['PLOT_DIR'], site,
                                                   subset_label),
                bbox_inches='tight',
                dpi=200)
    plt.close()

    # low temp
    fig = plt.figure()
    fig.set_size_inches(6, 4)

    plt.plot(x_gev_lo, y1_gev_lo, color='black', label='1950-1979')

    for level in warming_levels:
        plt.plot(x_gev_lo,
                 y_curves_lo.loc[level].values,
                 color=plt.cm.Blues(level - 0.05),
                 label='$\mu$+{}$\sigma$'.format(np.around(level, 1)))

    plt.plot([mean1_lo, mean1_lo], [0, 1], color='blue', linestyle=':')
    plt.plot([mean1_lo + std1_lo * 2, mean1_lo + std1_lo * 2], [0, 1],
             color='black',
             linestyle=':')
    plt.plot([mean1_lo + std1_lo * 2.5, mean1_lo + std1_lo * 2.5], [0, 1],
             color='black',
             linestyle=':')

    plt.ylabel('PDF')
    plt.xlabel('Temperature')
    plt.ylim(0, np.max(y_curves_lo.values) + 0.02)

    plt.legend(fontsize=5)
    plt.title('{} {}'.format(site, subset_label))

    plt.savefig('{}shift_mean_lo_{}_{}.png'.format(config['PLOT_DIR'], site,
                                                   subset_label),
                bbox_inches='tight',
                dpi=200)

    pdb.set_trace()

    return
コード例 #32
0
ファイル: dataAnalysis.py プロジェクト: vrbaj/CrystalData
    print(i)
    x[i, :] = data_series[i:i + n]

# creation of learning model (adaptive filter)
f = pa.filters.FilterNLMS(n, mu=1., w=np.ones(n))
y, e, w = f.run(d, x)
np.save('e_data', e)

cislo_vahy = 1

w_pokus = w[1:12000, cislo_vahy]
print('SELEKCE VAHY:', w_pokus.shape)
fit = genextreme.fit(w_pokus[1:9400])
print('FIT:', fit)

hpp = genextreme.pdf(w_pokus, fit[0], loc=fit[1], scale=fit[2]) * fit[2]
print('minimum:', min(hpp[0:12000]))
print('minimum index:', np.argmin(hpp[0:12000]))

dw = np.copy(w)
dw[1:] = np.abs(np.diff(dw, n=1, axis=0))
dw = dw[:, cislo_vahy]  # np.sum(dw, axis=1)
print(dw.shape)
fit2 = genextreme.fit(dw[10:13000])
print('FIT2:', fit2)
hpp2 = genextreme.pdf(dw[10:13000], fit2[0], loc=fit2[1],
                      scale=fit2[2]) * fit2[2]
print('odhad hpp2:')
print('minimum2:', min(hpp2))
print('minimum index2:', np.argmin(hpp2))
コード例 #33
0
ファイル: asrOstrava.py プロジェクト: vrbaj/CrystalData
# creation of learning model (adaptive filter)
f = pa.filters.FilterNLMS(n, mu=1., w=np.ones(n))
y, e, w = f.run(d, x)

print(w.shape)
# process tap updates in gev_window sized window
dw = np.copy(w)
dw[1:] = np.abs(np.diff(dw, n=1, axis=0))
dw_count = int(dw.shape[0])
print(dw_count)
hpp = np.zeros((dw_count, n))
for i in range(gev_window, dw.shape[0]):
    print((str(datetime.now())), " processing: ", i)
    for j in range(n):
        fit = genextreme.fit(dw[i - gev_window:i, j])
        hpp[i - gev_window, j] = genextreme.pdf(
            dw[i, j], fit[0], loc=fit[1], scale=fit[2]) * fit[2]

np.save('hpp_data' + str(gev_window), hpp)
# cislo_vahy = 1
#
# w_pokus = w[1:12000, cislo_vahy]
# print('SELEKCE VAHY:', w_pokus.shape)
# fit = genextreme.fit(w_pokus[1:9400])
# print('FIT:', fit)
#
# hpp = genextreme.pdf(w_pokus, fit[0], loc=fit[1], scale=fit[2])*fit[2]
# print('minimum:', min(hpp[0:12000]))
# print('minimum index:', np.argmin(hpp[0:12000]))
#
#
#
コード例 #34
0
ファイル: hminputs.py プロジェクト: nguyetlm/Hapi
    def StatisticalProperties(self,
                              PathNodes,
                              PathTS,
                              StartDate,
                              WarmUpPeriod,
                              SavePlots,
                              SavePath,
                              SeparateFiles=False,
                              Filter=False,
                              Distibution="GEV",
                              EstimateParameters=False,
                              Quartile=0,
                              RIMResults=False,
                              SignificanceLevel=0.1):
        """
        =============================================================================
          StatisticalProperties(PathNodes, PathTS, StartDate, WarmUpPeriod, SavePlots, SavePath,
                              SeparateFiles = False, Filter = False, RIMResults = False)
        =============================================================================

        StatisticalProperties method reads the SWIM output file (.dat file) that
        contains the time series of discharge for some computational nodes
        and calculate some statistical properties

        the code assumes that the time series are of a daily temporal resolution, and
        that the hydrological year is 1-Nov/31-Oct (Petrow and Merz, 2009, JoH).

        Parameters
        ----------
            1-PathNodes : [String]
                the name of the file which contains the ID of the computational
                nodes you want to do the statistical analysis for, the ObservedFile
                should contain the discharge time series of these nodes in order.
            2-PathTS : [String]
                the name of the SWIM result file (the .dat file).
            3-StartDate : [string]
                the begining date of the time series.
            4-WarmUpPeriod : [integer]
                the number of days you want to neglect at the begining of the
                Simulation (warm up period).
            5-SavePlots : [Bool]
                DESCRIPTION.
            6-SavePath : [String]
                the path where you want to  save the statistical properties.
            7-SeparateFiles: [Bool]
                if the discharge data are stored in separate files not all in one file
                SeparateFiles should be True, default [False].
            8-Filter: [Bool]
                for observed or RIMresult data it has gaps of times where the
                model did not run or gaps in the observed data if these gap days
                are filled with a specific value and you want to ignore it here
                give Filter = Value you want
            9-RIMResults: [Bool]
                If the files are results form RIM or observed, as the format
                differes between the two. default [False]

        Returns
        -------
            1-Statistical Properties.csv:
                file containing some statistical properties like mean, std, min, 5%, 25%,
                median, 75%, 95%, max, t_beg, t_end, nyr, q1.5, q2, q5, q10, q25, q50,
                q100, q200, q500.
        """

        ComputationalNodes = np.loadtxt(PathNodes, dtype=np.uint16)
        # hydrographs
        if SeparateFiles:
            TS = pd.DataFrame()
            if RIMResults:
                for i in range(len(ComputationalNodes)):
                    TS.loc[:, int(ComputationalNodes[i])] = self.ReadRIMResult(
                        PathTS + "/" + str(int(ComputationalNodes[i])) +
                        '.txt')
            else:
                for i in range(len(ComputationalNodes)):
                    TS.loc[:, int(ComputationalNodes[i])] = np.loadtxt(
                        PathTS + "/" + str(int(ComputationalNodes[i])) +
                        '.txt')  #,skiprows = 0

            StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d")
            EndDate = StartDate + dt.timedelta(days=TS.shape[0] - 1)
            ind = pd.date_range(StartDate, EndDate)
            TS.index = ind
        else:
            TS = pd.read_csv(PathTS, delimiter=r'\s+', header=None)
            StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d")
            EndDate = StartDate + dt.timedelta(days=TS.shape[0] - 1)
            TS.index = pd.date_range(StartDate, EndDate, freq="D")
            # delete the first two columns
            del TS[0], TS[1]
            TS.columns = ComputationalNodes

        # neglect the first year (warmup year) in the time series
        TS = TS.loc[StartDate + dt.timedelta(days=WarmUpPeriod):EndDate, :]

        # List of the table output, including some general data and the return periods.
        col_csv = [
            'mean', 'std', 'min', '5%', '25%', 'median', '75%', '95%', 'max',
            't_beg', 't_end', 'nyr'
        ]
        rp_name = [
            'q1.5', 'q2', 'q5', 'q10', 'q25', 'q50', 'q100', 'q200', 'q500',
            'q1000'
        ]
        col_csv = col_csv + rp_name

        # In a table where duplicates are removed (np.unique), find the number of
        # gauges contained in the .csv file.
        # no_gauge = len(ComputationalNodes)
        # Declare a dataframe for the output file, with as index the gaugne numbers
        # and as columns all the output names.
        StatisticalPr = pd.DataFrame(np.nan,
                                     index=ComputationalNodes,
                                     columns=col_csv)
        StatisticalPr.index.name = 'ID'
        DistributionPr = pd.DataFrame(np.nan,
                                      index=ComputationalNodes,
                                      columns=['loc', 'scale'])
        DistributionPr.index.name = 'ID'
        # required return periods
        T = [1.5, 2, 5, 10, 25, 50, 50, 100, 200, 500, 1000]
        T = np.array(T)
        # these values are the Non Exceedance probability (F) of the chosen
        # return periods F = 1 - (1/T)
        # Non Exceedance propabilities
        #F = [1/3, 0.5, 0.8, 0.9, 0.96, 0.98, 0.99, 0.995, 0.998]
        F = 1 - (1 / T)
        # Iteration over all the gauge numbers.
        for i in ComputationalNodes:
            QTS = TS.loc[:, i]
            # The time series is resampled to the annual maxima, and turned into a
            # numpy array.
            # The hydrological year is 1-Nov/31-Oct (from Petrow and Merz, 2009, JoH).
            amax = QTS.resample('A-OCT').max().values

            if type(Filter) != bool:
                amax = amax[amax != Filter]
            if EstimateParameters:
                # estimate the parameters through an optimization
                # alpha = (np.sqrt(6) / np.pi) * amax.std()
                # beta = amax.mean() - 0.5772 * alpha
                # param_dist = [beta, alpha]
                threshold = np.quantile(amax, Quartile)
                if Distibution == "GEV":
                    print("Still to be finished later")
                else:
                    param = Gumbel.EstimateParameter(amax, Gumbel.ObjectiveFn,
                                                     threshold)
                    param_dist = [param[1], param[2]]

            else:
                # estimate the parameters through an maximum liklehood method
                if Distibution == "GEV":
                    param_dist = genextreme.fit(amax)
                else:
                    # A gumbel distribution is fitted to the annual maxima
                    param_dist = gumbel_r.fit(amax)

            if Distibution == "GEV":
                DistributionPr.loc[i, 'c'] = param_dist[0]
                DistributionPr.loc[i, 'loc'] = param_dist[1]
                DistributionPr.loc[i, 'scale'] = param_dist[2]
            else:
                DistributionPr.loc[i, 'loc'] = param_dist[0]
                DistributionPr.loc[i, 'scale'] = param_dist[1]

            # Return periods from the fitted distribution are stored.
            # get the Discharge coresponding to the return periods
            if Distibution == "GEV":
                Qrp = genextreme.ppf(F,
                                     param_dist[0],
                                     loc=param_dist[1],
                                     scale=param_dist[2])
            else:
                Qrp = gumbel_r.ppf(F, loc=param_dist[0], scale=param_dist[1])
            # to get the Non Exceedance probability for a specific Value
            # sort the amax
            amax.sort()
            # calculate the F (Exceedence probability based on weibul)
            cdf_Weibul = ST.Weibul(amax)
            # Gumbel.ProbapilityPlot method calculates the theoretical values based on the Gumbel distribution
            # parameters, theoretical cdf (or weibul), and calculate the confidence interval
            if Distibution == "GEV":
                Qth, Qupper, Qlower = GEV.ProbapilityPlot(
                    param_dist, cdf_Weibul, amax, SignificanceLevel)
                # to calculate the F theoretical
                Qx = np.linspace(0, 1.5 * float(amax.max()), 10000)
                pdf_fitted = genextreme.pdf(Qx,
                                            param_dist[0],
                                            loc=param_dist[2],
                                            scale=param_dist[2])
                cdf_fitted = genextreme.cdf(Qx,
                                            param_dist[0],
                                            loc=param_dist[1],
                                            scale=param_dist[2])
            else:
                Qth, Qupper, Qlower = Gumbel.ProbapilityPlot(
                    param_dist, cdf_Weibul, amax, SignificanceLevel)
                # gumbel_r.interval(SignificanceLevel)
                # to calculate the F theoretical
                Qx = np.linspace(0, 1.5 * float(amax.max()), 10000)
                pdf_fitted = gumbel_r.pdf(Qx,
                                          loc=param_dist[0],
                                          scale=param_dist[1])
                cdf_fitted = gumbel_r.cdf(Qx,
                                          loc=param_dist[0],
                                          scale=param_dist[1])
            # then calculate the the T (return period) T = 1/(1-F)
            if SavePlots:
                fig = plt.figure(60, figsize=(20, 10))
                gs = gridspec.GridSpec(nrows=1, ncols=2, figure=fig)
                # Plot the histogram and the fitted distribution, save it for each gauge.
                ax1 = fig.add_subplot(gs[0, 0])
                ax1.plot(Qx, pdf_fitted, 'r-')
                ax1.hist(amax, density=True)
                ax1.set_xlabel('Annual Discharge(m3/s)', fontsize=15)
                ax1.set_ylabel('pdf', fontsize=15)

                ax2 = fig.add_subplot(gs[0, 1])
                ax2.plot(Qx, cdf_fitted, 'r-')
                ax2.plot(amax, cdf_Weibul, '.-')
                ax2.set_xlabel('Annual Discharge(m3/s)', fontsize=15)
                ax2.set_ylabel('cdf', fontsize=15)

                plt.savefig(SavePath + "/" + "Figures/" + str(i) + '.png',
                            format='png')
                plt.close()

                fig = plt.figure(70, figsize=(10, 8))
                plt.plot(Qth,
                         amax,
                         'd',
                         color='#606060',
                         markersize=12,
                         label='Gumbel Distribution')
                plt.plot(Qth,
                         Qth,
                         '^-.',
                         color="#3D59AB",
                         label="Weibul plotting position")
                if Distibution != "GEV":
                    plt.plot(Qth,
                             Qlower,
                             '*--',
                             color="#DC143C",
                             markersize=12,
                             label='Lower limit (' +
                             str(int(
                                 (1 - SignificanceLevel) * 100)) + " % CI)")
                    plt.plot(Qth,
                             Qupper,
                             '*--',
                             color="#DC143C",
                             markersize=12,
                             label='Upper limit (' +
                             str(int(
                                 (1 - SignificanceLevel) * 100)) + " % CI)")

                plt.legend(fontsize=15, framealpha=1)
                plt.xlabel('Theoretical Annual Discharge(m3/s)', fontsize=15)
                plt.ylabel('Annual Discharge(m3/s)', fontsize=15)
                plt.savefig(SavePath + "/" + "Figures/F-" + str(i) + '.png',
                            format='png')
                plt.close()

            StatisticalPr.loc[i, 'mean'] = QTS.mean()
            StatisticalPr.loc[i, 'std'] = QTS.std()
            StatisticalPr.loc[i, 'min'] = QTS.min()
            StatisticalPr.loc[i, '5%'] = QTS.quantile(0.05)
            StatisticalPr.loc[i, '25%'] = QTS.quantile(0.25)
            StatisticalPr.loc[i, 'median'] = QTS.quantile(0.50)
            StatisticalPr.loc[i, '75%'] = QTS.quantile(0.75)
            StatisticalPr.loc[i, '95%'] = QTS.quantile(0.95)
            StatisticalPr.loc[i, 'max'] = QTS.max()
            StatisticalPr.loc[i, 't_beg'] = QTS.index.min()
            StatisticalPr.loc[i, 't_end'] = QTS.index.max()
            StatisticalPr.loc[
                i, 'nyr'] = (StatisticalPr.loc[i, 't_end'] -
                             StatisticalPr.loc[i, 't_beg']).days / 365.25
            for irp, irp_name in zip(Qrp, rp_name):
                StatisticalPr.loc[i, irp_name] = irp

            # Print for prompt and check progress.
            print("Gauge", i, "done.")
        #
        # Output file
        StatisticalPr.to_csv(SavePath + "/" + "Statistical Properties.csv")
        self.StatisticalPr = StatisticalPr
        DistributionPr.to_csv(SavePath + "/" + "DistributionProperties.csv")
        self.DistributionPr = DistributionPr
コード例 #35
0
def gev_pdf(x):
    return genextreme.pdf(x, xi, loc=mu, scale=sigma)