Beispiel #1
0
def plotDiagnostics(data, mu, xi, sigma, figfile):
    """
    Create a 4-panel diagnostics plot of the fitted distribution.

    :param data: :class:`numpy.ndarray` of observed data values (in units
                 of metres/second).
    :param float mu: Selected threshold value.
    :param float xi: Fitted shape parameter.
    :param float sigma: Fitted scale parameter.
    :param str figfile: Path to store the file (includes image format)

    """
    LOG.info("Plotting diagnostics")
    fig, ax = plt.subplots(2, 2)
    axes = ax.flatten()
    # Probability plots
    sortedmax = np.sort(data[data > mu])
    gpdf = fittedPDF(data, mu, xi, sigma)
    pp_x = sm.ProbPlot(sortedmax)
    pp_x.ppplot(xlabel="Empirical", ylabel="Model", ax=axes[0], line='45')
    axes[0].set_title("Probability plot")

    prplot = sm.ProbPlot(sortedmax,
                         genpareto,
                         distargs=(xi, ),
                         loc=mu,
                         scale=sigma)
    prplot.qqplot(xlabel="Model", ylabel="Empirical", ax=axes[1], line='45')
    axes[1].set_title("Quantile plot")

    ax2 = axes[2]
    rp = np.array(
        [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000])
    rate = float(len(sortedmax)) / float(len(data))
    rval = returnLevels(rp, mu, xi, sigma, rate)

    emprp = empiricalReturnPeriod(np.sort(data))
    ax2.semilogx(rp, rval, label="Fitted RP curve", color='r')
    ax2.scatter(emprp[emprp > 1],
                np.sort(data)[emprp > 1],
                color='b',
                label="Empirical RP",
                s=100)
    ax2.legend(loc=2)
    ax2.set_xlabel("Return period")
    ax2.set_ylabel("Return level")
    ax2.set_title("Return level plot")
    ax2.grid(True)
    maxbin = 4 * np.ceil(np.floor(data.max() / 4) + 1)
    sns.distplot(sortedmax,
                 bins=np.arange(mu, maxbin, 2),
                 hist=True,
                 axlabel='Wind speed (m/s)',
                 ax=axes[3])
    axes[3].plot(sortedmax, gpdf, color='r')
    axes[3].set_title("Density plot")
    plt.tight_layout()
    plt.savefig(figfile)
    plt.close()
Beispiel #2
0
def plotFit(data, mu, xi, sigma, title, figfile):
    """
    Plot a fitted distribution, with approximate 90% confidence interval
    and empirical return period values.

    :param data: :class:`numpy.ndarray` of observed data values.
    :param float mu: Selected threshold value.
    :param float xi: Fitted shape parameter.
    :param float sigma: Fitted scale parameter.
    :param str title: Title string for the plot.
    :param str figfile: Path to store the file (includes image format)

    """
    LOG.info("Plotting fitted return period curve")

    rp = np.array(
        [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000])
    rate = float(len(data[data > mu])) / float(len(data))
    rval = returnLevels(rp, mu, xi, sigma, rate)

    emprp = empiricalReturnPeriod(data)
    err = returnPeriodUncertainty(data, mu, xi, sigma, rp)

    sortedmax = np.sort(data)
    fig, ax1 = plt.subplots(1, 1, figsize=(12, 12))
    ax1.semilogx(rp, rval, label="Fitted RP curve")
    ax1.semilogx(rp,
                 rval + 1.96 * err,
                 label="90% CI",
                 linestyle='--',
                 color='0.5')
    ax1.semilogx(rp, rval - 1.96 * err, linestyle='--', color='0.5')

    ax1.scatter(emprp[emprp > 1],
                sortedmax[emprp > 1],
                s=100,
                color='r',
                label="Empirical RP")

    title_str = (
        title + "\n" +
        r"$\mu$ = {0:.2f}, $\xi$ = {1:.5f}, $\sigma$ = {2:.4f}".format(
            mu, xi, sigma))
    ax1.set_title(title_str)
    ax1.legend(loc=2)
    ax1.set_ylim((0, 100))
    ax1.set_xlim((1, 10000))
    ax1.set_ylabel('Wind speed (m/s)')
    ax1.set_xlabel('Return period (years)')
    ax1.grid(which='major')
    ax1.grid(which='minor', linestyle='--', linewidth=1)

    plt.savefig(figfile)
    plt.close()
Beispiel #3
0
def plotDiagnostics(data, mu, xi, sigma, figfile):
    """
    Create a 4-panel diagnostics plot of the fitted distribution.

    :param data: :class:`numpy.ndarray` of observed data values (in units
                 of metres/second).
    :param float mu: Selected threshold value.
    :param float xi: Fitted shape parameter.
    :param float sigma: Fitted scale parameter.
    :param str figfile: Path to store the file (includes image format)

    """
    LOG.info("Plotting diagnostics")
    fig, ax = plt.subplots(2, 2)
    axes = ax.flatten()
    # Probability plots
    sortedmax = np.sort(data[data > mu])   
    gpdf = fittedPDF(data, mu, xi, sigma)
    pp_x = sm.ProbPlot(sortedmax)
    pp_x.ppplot(xlabel="Empirical", ylabel="Model", ax=axes[0], line='45')
    axes[0].set_title("Probability plot")

    prplot = sm.ProbPlot(sortedmax, genpareto, distargs=(xi,),
                         loc=mu, scale=sigma)
    prplot.qqplot(xlabel="Model", ylabel="Empirical", ax=axes[1], line='45')
    axes[1].set_title("Quantile plot")

    ax2 = axes[2]
    rp = np.array([1, 2, 5, 10, 20, 50, 100, 200,
                   500, 1000, 2000, 5000, 10000])
    rate = float(len(sortedmax)) / float(len(data))
    rval = returnLevels(rp, mu, xi, sigma, rate)

    emprp = empiricalReturnPeriod(np.sort(data))
    ax2.semilogx(rp, rval, label="Fitted RP curve", color='r')
    ax2.scatter(emprp[emprp > 1], np.sort(data)[emprp > 1],
                color='b', label="Empirical RP", s=100)
    ax2.legend(loc=2)
    ax2.set_xlabel("Return period")
    ax2.set_ylabel("Return level")
    ax2.set_title("Return level plot")
    ax2.grid(True)
    maxbin = 4 * np.ceil(np.floor(data.max() / 4) + 1)
    sns.distplot(sortedmax, bins=np.arange(mu, maxbin, 2),
                 hist=True, axlabel='Wind speed (m/s)',
                 ax=axes[3])
    axes[3].plot(sortedmax, gpdf, color='r')
    axes[3].set_title("Density plot")
    plt.tight_layout()
    plt.savefig(figfile)
    plt.close()
Beispiel #4
0
def plotFit(data, mu, xi, sigma, title, figfile):
    """
    Plot a fitted distribution, with approximate 90% confidence interval
    and empirical return period values.

    :param data: :class:`numpy.ndarray` of observed data values.
    :param float mu: Selected threshold value.
    :param float xi: Fitted shape parameter.
    :param float sigma: Fitted scale parameter.
    :param str title: Title string for the plot.
    :param str figfile: Path to store the file (includes image format)

    """
    LOG.info("Plotting fitted return period curve")

    rp = np.array([1, 2, 5, 10, 20, 50, 100, 200,
                   500, 1000, 2000, 5000, 10000])
    rate = float(len(data[data > mu])) / float(len(data))
    rval = returnLevels(rp, mu, xi, sigma, rate)

    emprp = empiricalReturnPeriod(data)
    err = returnPeriodUncertainty(data, mu, xi, sigma, rp)

    sortedmax = np.sort(data)
    fig, ax1 = plt.subplots(1, 1, figsize=(12, 12))
    ax1.semilogx(rp, rval, label="Fitted RP curve")
    ax1.semilogx(rp, rval + 1.96 * err, label="90% CI",
                 linestyle='--', color='0.5')
    ax1.semilogx(rp, rval - 1.96 * err, linestyle='--', color='0.5')

    ax1.scatter(emprp[emprp > 1], sortedmax[emprp > 1], s=100,
                color='r', label="Empirical RP")

    title_str = (title + "\n" +
                 r"$\mu$ = {0:.2f}, $\xi$ = {1:.5f}, $\sigma$ = {2:.4f}".
                 format(mu, xi, sigma))
    ax1.set_title(title_str)
    ax1.legend(loc=2)
    ax1.set_ylim((0, 100))
    ax1.set_xlim((1, 10000))
    ax1.set_ylabel('Wind speed (m/s)')
    ax1.set_xlabel('Return period (years)')
    ax1.grid(which='major')
    ax1.grid(which='minor', linestyle='--', linewidth=1)

    plt.savefig(figfile)
    plt.close()
def selectThreshold(data, minexc=10):
    """
    Select an appropriate threshold for fitting a generalised pareto
    distribution.
    The only constraint placed on the selection is that the shape
    parameter is negative (such that the distribution is bounded).
    :param data: :class:`numpy.ndarray` containing the observed values (with
                 missing values removed).
    :param int minexc: Minimum number of exceedances required.
    :returns: tuple of the shape, scale and threshold.
    """

    sh = []
    sc = []
    t = []
    q1000list = []
    q10000list = []

    eps = -0.01
    nobs = len(data)
    mu = np.median(data)
    while mu < data.max():
        #    for mu in np.arange(np.median(data), data.max(), 0.002):
        nexc = len(data[data > mu])
        rate = nexc / nobs
        if nexc < minexc:
            break

        pp = calculateShape(mu, data)
        q1000, q10000 = returnLevels(np.array([1000, 10000]), mu, pp[0], pp[2],
                                     rate)
        if np.isnan(q1000) or np.isnan(q10000):
            continue

        qdiff = np.abs(q10000 - q1000)
        if pp[0] < eps:  # and qdiff < 0.2*q10000:# and qdiff > -eps:
            t.append(mu)
            sh.append(pp[0])
            sc.append(pp[2])
            q1000list.append(q1000)
            q10000list.append(q10000)
        mu += 0.002

    if len(t) == 0:
        log.warn("No suitable shape parameters identified")
        return 0, 0, 0
    Av1000 = np.mean(np.array(q1000list))
    Av10000 = np.mean(np.array(q10000list))
    Av1000 = np.ceil(Av1000 + 0.05 * Av1000)
    Av10000 = np.ceil(Av10000 + 0.05 * Av10000)

    idx1000 = find_nearest_index(np.array(q1000list), Av1000)
    idx10000 = find_nearest_index(np.array(q10000list), Av10000)

    u1000 = t[idx1000]
    u10000 = t[idx10000]

    if u1000 > u10000:
        shmax = sh[idx1000]
        scmax = sc[idx1000]
    else:
        shmax = sh[idx10000]
        scmax = sc[idx10000]

    return shmax, scmax, u1000
                                     stndf['DataEndYear'][i])
    stnName = stndf['stnName'][i].title().strip() + " " + dataRange
    fitname = pjoin(output_path, '{0}_gpdfit.png'.format(stnNum))
    diagname = pjoin(output_path, '{0}_gpddiag.png'.format(stnNum))
    if os.path.exists(filename):
        log.info("Processing {0}".format(stnName))
        df = readDataFile(filename)
        quality = df['QSpeed'].fillna("X").map(
            lambda x: x in ['Y', 'N', 'X', ' ', np.nan])
        dmax = df['Speed'][df['Speed'].notnull() & quality]
        if len(dmax) == 0:
            log.info("No valid data")
            continue
        xi, sigma, mu = selectThreshold(dmax, minexc=10)
        log.debug("Parameters: {0}, {1}, {2}".format(xi, sigma, mu))
        rate = float(len(dmax[dmax > mu])) / float(len(dmax))
        if xi == 0:
            continue
        plotFit(dmax, mu, xi, sigma, stnName, fitname)
        plotDiagnostics(dmax, mu, xi, sigma, diagname)

        gpdfile.write("{0}, {1}, {2:.6f}, {3:.6f}, {4:.3f}, {5:.4f}\n".format(
            stnNum, stnName, xi, sigma, mu, rate))
        rpvals = returnLevels(rp, mu, xi, sigma, rate)
        rpstr = ", ".join(['{:.3f}'] * len(rpvals)).format(*rpvals)
        rpfile.write("{0}, {1}, {2}\n".format(stnNum, stnName, rpstr))
    else:
        log.info("No data file for {0}".format(stnName))
gpdfile.close()
rpfile.close()
def selectThreshold(data, minexc=10):
    """
    Select an appropriate threshold for fitting a generalised pareto
    distribution.
    The only constraint placed on the selection is that the shape
    parameter is negative (such that the distribution is bounded).
    :param data: :class:`numpy.ndarray` containing the observed values (with
                 missing values removed).
    :param int minexc: Minimum number of exceedances required.
    :returns: tuple of the shape, scale and threshold.
    """

    sh = []
    sc = []
    t = []
    q1000list = []
    q10000list = []

    eps = -0.01
    nobs = len(data)
    mu = np.median(data)
    while mu < data.max():
        #    for mu in np.arange(np.median(data), data.max(), 0.002):
        nexc = len(data[data > mu])
        rate = nexc / nobs
        if nexc < minexc:
            break

        pp = calculateShape(mu, data)
        q1000, q10000 = returnLevels(np.array([1000, 10000]),
                                     mu, pp[0], pp[2], rate)
        if np.isnan(q1000) or np.isnan(q10000):
            continue

        qdiff = np.abs(q10000 - q1000)
        if pp[0] < eps: # and qdiff < 0.2*q10000:# and qdiff > -eps:
            t.append(mu)
            sh.append(pp[0])
            sc.append(pp[2])
            q1000list.append(q1000)
            q10000list.append(q10000)
        mu += 0.002

    if len(t) == 0:
        log.warn("No suitable shape parameters identified")
        return 0, 0, 0
    Av1000 = np.mean(np.array(q1000list))
    Av10000 = np.mean(np.array(q10000list))
    Av1000 = np.ceil(Av1000 + 0.05*Av1000)
    Av10000 = np.ceil(Av10000 + 0.05*Av10000)

    idx1000 = find_nearest_index(np.array(q1000list), Av1000)
    idx10000 = find_nearest_index(np.array(q10000list), Av10000)

    u1000 = t[idx1000]
    u10000 = t[idx10000]

    if u1000 > u10000:
        shmax = sh[idx1000]
        scmax = sc[idx1000]
    else:
        shmax = sh[idx10000]
        scmax = sc[idx10000]

    return shmax, scmax, u1000
                                     stndf['DataEndYear'][i])
    stnName = stndf['stnName'][i].title().strip() + " " + dataRange
    fitname = pjoin(output_path, '{0}_gpdfit.png'.format(stnNum))
    diagname = pjoin(output_path, '{0}_gpddiag.png'.format(stnNum))
    if os.path.exists(filename):
        log.info("Processing {0}".format(stnName))
        df = readDataFile(filename)
        quality = df['QSpeed'].fillna("X").map(lambda x: x in 
                                               ['Y', 'N', 'X', ' ', np.nan])
        dmax = df['Speed'][df['Speed'].notnull() & quality]
        if len(dmax) == 0:
            log.info("No valid data")
            continue
        xi, sigma, mu = selectThreshold(dmax, minexc=10)
        log.debug("Parameters: {0}, {1}, {2}".format(xi, sigma, mu))
        rate = float(len(dmax[dmax > mu])) / float(len(dmax))
        if xi == 0:
            continue
        plotFit(dmax, mu, xi, sigma, stnName, fitname)
        plotDiagnostics(dmax, mu, xi, sigma, diagname)

        gpdfile.write("{0}, {1}, {2:.6f}, {3:.6f}, {4:.3f}, {5:.4f}\n".
                      format(stnNum, stnName, xi, sigma, mu, rate))
        rpvals = returnLevels(rp, mu, xi, sigma, rate)
        rpstr = ", ".join(['{:.3f}']*len(rpvals)).format(*rpvals)
        rpfile.write("{0}, {1}, {2}\n".format(stnNum, stnName, rpstr))
    else:
        log.info("No data file for {0}".format(stnName))
gpdfile.close()
rpfile.close()