Python qstatsの例、support_functions.qstats Pythonの例

コード例 #1

0

ファイルを表示

ファイル: departures.py プロジェクト: MBlaschek/radiosonde

def qmap_mean_departure(x, sample1, sample2, meinequantilen, sample_size,
                        return_mean=False, linear=True):
    from support_functions import qstats

    s1d = x[sample1]  # truth (sample1)
    s2d = x[sample2]  # biased (sample2)

    # add 0 and 100
    meinequantilen = np.unique(np.concatenate([[0], meinequantilen, [100]]))

    qb = np.nanpercentile(s1d, meinequantilen)  # truth
    qa = np.nanpercentile(s2d, meinequantilen)  # biased
    mean1 = np.copy(qb)
    mean2 = np.copy(qa)

    # Mean of quantile boxes( not 0 and 100 )
    count1, m1 = qstats(s1d, meinequantilen[1:-1], counts=sample_size)
    count2, m2 = qstats(s2d, meinequantilen[1:-1], counts=sample_size)
    # only missing ?
    mean1[:-1] = m1
    mean2[:-1] = m2
    # interpolation of bin-means
    if linear:
        m1d = np.interp(s2d, qb[1:], mean1[:-1])  # interpoliere Mittelwerte zu Daten
        m2d = np.interp(s2d, qa[1:], mean2[:-1])
    else:
        tck = interpolate.splrep(qb[1:], mean1[:-1], s=0)
        m1d = interpolate.splev(s2d, tck, der=0)
        tck = interpolate.splrep(qa[1:], mean2[:-1], s=0)
        m2d = interpolate.splev(s2d, tck, der=0)
    # difference
    if return_mean:
        return m1, m2

    return m1d - m2d   # one value

コード例 #2

0

ファイルを表示

ファイル: departures.py プロジェクト: MBlaschek/radiosonde

def qmap_departure(x, sample1, sample2, meinequantilen, sample_size, sample3=None, return_mean=False, linear=True,
                   verbose=0):
    from support_functions import qstats
    #
    s1d = x[sample1]  # truth (sample1)
    s2d = x[sample2]  # biased (sample2)
    #
    # add 0 and 100
    meinequantilen = np.unique(np.concatenate([[0], meinequantilen, [100]]))
    # Be sure to remove 0,100 now
    # Mean of quantile boxes( not 0 and 100 )
    count1, m1 = qstats(s1d, meinequantilen[1:-1], counts=sample_size)
    count2, m2 = qstats(s2d, meinequantilen[1:-1], counts=sample_size)
    ok1 = count1[:-1] > sample_size
    ok2 = count2[:-1] > sample_size
    # Enough data to calculate ?
    if not np.any(ok1 & ok2):
        if sample3 is not None:
            return np.zeros(x[sample3].shape)  # return only zeros
        else:
            return np.zeros(s2d.shape)
    #
    if verbose > 1:
        print "Quantiles:", meinequantilen
        print "Sample 1: ", count1
        print "Sample 2: ", count2
    # 
    qb = np.nanpercentile(s1d, meinequantilen)  # truth
    qa = np.nanpercentile(s2d, meinequantilen)  # biased
    #
    diffs = qb - qa  # Difference of quantiles (1st and lst for interp)
    xp = qa
    xp[:-1] = m2  # x punkte der interpolation ( ? NAN )
    diffs[:-1] = m1 - m2  # y punkte der interpolation
    if return_mean:
        return m1, m2
    # interpolate quantile differences
    # how to handle end-point ?
    # if not extrapolate:
    #     diffs = diffs[1:-1] # trim
    #     xp = xp[1:-1]       # trim
    # Spline or linear interpolation
    if not linear:
        tck = interpolate.splrep(xp, diffs, s=0)
        if sample3 is not None:
            out = interpolate.splev(x[sample3], tck, der=0)  # does this retain nan ?
        else:
            out = interpolate.splev(s2d, tck, der=0)
    #
    else:
        # to all data in sample / but not when missing!
        if sample3 is not None:
            out = np.interp(x[sample3], xp, diffs)
        else:
            out = np.interp(s2d, xp, diffs)

    # turn missing into zero
    return np.where(np.isfinite(out), out, 0.)  # size of sample 2 or sample 3 # no adjustment

コード例 #3

0

ファイルを表示

ファイル: departures.py プロジェクト: MBlaschek/radiosonde

def qmap_era_departure(x, y, sample1, meinequantilen, sample_size, verbose=0):
    """Calculate Quantile Matching for a reference period and return matched data
    """
    from support_functions import qstats
    # Match ERA to RASO
    # Sampling Period:
    s1d = x[sample1]  # truth  (sample1) RASO
    s2d = y[sample1]  # biased (sample2) ERA
    #
    # add 0 and 100
    meinequantilen = np.unique(np.concatenate([[0], meinequantilen, [100]]))
    # Be sure to remove 0,100 now
    # Mean of quantile boxes( not 0 and 100 )
    count1, m1 = qstats(s1d, meinequantilen[1:-1], counts=sample_size)
    count2, m2 = qstats(s2d, meinequantilen[1:-1], counts=sample_size)
    ok1 = count1[:-1] > sample_size
    ok2 = count2[:-1] > sample_size
    # Enough data to calculate ?
    if not np.any(ok1 & ok2):
        return y  # np.zeros(y.shape)
    #
    if verbose > 1:
        print "Quantiles:", meinequantilen
        print "Sample 1: ", count1
        print "Sample 2: ", count2
    # 
    qb = np.nanpercentile(s1d, meinequantilen)  # truth
    qa = np.nanpercentile(s2d, meinequantilen)  # biased
    #
    diffs = qb - qa  # Difference of quantiles (1st and lst for interp)
    xp = qa
    xp[:-1] = m2  # x punkte der interpolation ( ? NAN )
    diffs[:-1] = m1 - m2  # y punkte der interpolation
    # interpolate quantile differences
    # how to handle end-point ?
    # if not extrapolate:
    #     diffs = diffs[1:-1] # trim
    #     xp = xp[1:-1]       # trim
    # Spline or linear interpolation
    # if not linear:
    #     tck  = interpolate.splrep(xp, diffs, s=0) 
    #     out = interpolate.splev(y, tck, der=0)
    # else:
    # to all data in sample / but not when missing!
    #
    #
    out = np.interp(y, xp, diffs)  # new, old, old values
    # turn missing into zero
    out = np.where(np.isfinite(out), out, 0.)
    # add ontop of variable
    return out  # size of y

コード例 #4

0

ファイルを表示

ファイル: quantiles_at_breakpoint.py プロジェクト: MBlaschek/radiosonde

def quantiles_at_breakpoint(data, var, dvar=None, quantilen=None, ibreak=None, sample_size=730, borders=180, verbose=0):
    """Calculate Quantiles at the breakpoints
    """
    from departures import qmap_departure
    from support_functions import sample_indices, qstats
    funcid = '[QAB] '

    if not isinstance(var, str):
        raise ValueError(funcid + "var Requires  a string")

    if dvar is not None and not isinstance(dvar, str):
        raise ValueError(funcid + "dvar Requires  a string")

    if dvar is None:
        dvar = var

    print funcid + "Data from Variable: ", dvar
    if not isinstance(data, (pd.DataFrame, pd.Panel)):
        raise ValueError("Require a DataFrame or Panel as input")

    if quantilen is None:
        quantilen = np.arange(0, 101, 10)

    quantilen = quantilen[(quantilen < 100) & (quantilen > 0)]  # drop 0 and 100
    qss = sample_size / (len(quantilen) + 1) / 2  # sample size per quantile
    print funcid + "Quantilen: ", quantilen
    print funcid + "Global Sample size: %d , per quantile(%d): %d" % (sample_size, len(quantilen), qss)
    mlabels = ["Q%d" % i for i in quantilen]
    mlabels.append(">")

    if isinstance(data, pd.DataFrame):
        if not data.columns.isin([var, '%s_breaks' % var]).sum() == 2:
            raise ValueError(funcid + "Variable not found: %s or %s_breaks in %s" % (var, var, str(data.columns)))
        # convert to panel
        if 'p' not in data.columns:

            out = {}
            #  get Breakpoints
            int_breaks = np.where((data['%s_breaks' % var] > 0))[0]
            breaks = data.index[int_breaks]
            nb = len(breaks)
            if nb == 0:
                raise RuntimeError(funcid + "No Breakpoints found in %s and %s_breaks" % (var, var))

            print "Found Breaks: ", nb
            print str(breaks)
            if (int_breaks[-1] + sample_size) > data.shape[0]:
                print funcid + "Reference data set is shorter than 1 year"

            for ib in reversed(range(nb)):
                if ibreak is not None and ibreak != ib:
                    print funcid + "Looking for: ", breaks[ibreak], " at ", breaks[ib]
                    continue
                # ibiased is everything between breakpoints
                # isample is minus the borders -> used to calculate
                ibiased, isample, iref = sample_indices(int_breaks, ib, data.index,
                                                        sample_size=sample_size,
                                                        borders=borders,
                                                        recent=False,
                                                        verbose=verbose - 1)
                # Quantiles at the breakpoint
                b1, c1, quants1 = qstats(data[dvar].values[iref], quantilen, qss)
                b2, c2, quants2 = qstats(data[dvar].values[isample], quantilen, qss)

                if verbose > 0:
                    print funcid + " %s : %s " % (dvar, breaks[ib])
                    print funcid + " Qs(B): ", quants1
                    print funcid + " Qs(#): ", c1
                    print funcid + " Qs(B): ", quants2
                    print funcid + " Qs(#): ", c2

                out[str(breaks[ib])] = pd.DataFrame({'Ref': quants1.tolist(), 'Bias': quants2.tolist()}, index=mlabels)
            return out

        # when there are pressure levels
        data = data.reset_index().set_index(['date', 'p']).to_panel()

    else:
        if not data.items.isin([var, '%s_breaks' % var]).sum() == 2:
            raise ValueError(funcid + "Variable not found: %s or %s_breaks in %s" % (var, var, str(data.items)))

    # per level
    #  get Breakpoints
    int_breaks = np.where((data['%s_breaks' % var] > 0).any(1))[0]
    breaks = data.major_axis[int_breaks]
    nb = len(breaks)
    if nb == 0:
        raise RuntimeError(funcid + "No Breakpoints found in %s and %s_breaks" % (var, var))

    print "Found Breaks: ", nb
    print str(breaks)
    if (int_breaks[-1] + sample_size) > data.shape[0]:
        print funcid + "Reference data set is shorter than 1 year"

    out = {}

    for ib in reversed(range(nb)):
        if ibreak is not None and ibreak != ib:
            print funcid + "Looking for: ", breaks[ibreak], " at ", breaks[ib]
            continue
        # ibiased is everything between breakpoints
        # isample is minus the borders -> used to calculate
        ibiased, isample, iref = sample_indices(int_breaks, ib, data.major_axis,
                                                sample_size=sample_size,
                                                borders=borders,
                                                recent=False,
                                                verbose=verbose - 1)

        # Quantiles at the breakpoint
        def myqstats(x, quantilen, sample_size):
            c, y = qstats(x, quantilen, sample_size)
            return y

        quants1 = np.apply_along_axis(myqstats,
                                      0,
                                      data[dvar].values[iref],
                                      quantilen,
                                      qss)

        quants2 = np.apply_along_axis(myqstats,
                                      0,
                                      data[dvar].values[isample],
                                      quantilen,
                                      qss)
        out[str(breaks[ib])] = pd.Panel({'Ref': quants1, 'Bias': quants2}, major_axis=mlabels,
                                        minor_axis=data.minor_axis)

    return out

コード例 #5

0

ファイルを表示

ファイル: quantiles_at_breakpoint.py プロジェクト: MBlaschek/radiosonde

 def myqstats(x, quantilen, sample_size):
     c, y = qstats(x, quantilen, sample_size)
     return y