def qmap_mean_departure(x, sample1, sample2, meinequantilen, sample_size, return_mean=False, linear=True): from support_functions import qstats s1d = x[sample1] # truth (sample1) s2d = x[sample2] # biased (sample2) # add 0 and 100 meinequantilen = np.unique(np.concatenate([[0], meinequantilen, [100]])) qb = np.nanpercentile(s1d, meinequantilen) # truth qa = np.nanpercentile(s2d, meinequantilen) # biased mean1 = np.copy(qb) mean2 = np.copy(qa) # Mean of quantile boxes( not 0 and 100 ) count1, m1 = qstats(s1d, meinequantilen[1:-1], counts=sample_size) count2, m2 = qstats(s2d, meinequantilen[1:-1], counts=sample_size) # only missing ? mean1[:-1] = m1 mean2[:-1] = m2 # interpolation of bin-means if linear: m1d = np.interp(s2d, qb[1:], mean1[:-1]) # interpoliere Mittelwerte zu Daten m2d = np.interp(s2d, qa[1:], mean2[:-1]) else: tck = interpolate.splrep(qb[1:], mean1[:-1], s=0) m1d = interpolate.splev(s2d, tck, der=0) tck = interpolate.splrep(qa[1:], mean2[:-1], s=0) m2d = interpolate.splev(s2d, tck, der=0) # difference if return_mean: return m1, m2 return m1d - m2d # one value
def qmap_departure(x, sample1, sample2, meinequantilen, sample_size, sample3=None, return_mean=False, linear=True, verbose=0): from support_functions import qstats # s1d = x[sample1] # truth (sample1) s2d = x[sample2] # biased (sample2) # # add 0 and 100 meinequantilen = np.unique(np.concatenate([[0], meinequantilen, [100]])) # Be sure to remove 0,100 now # Mean of quantile boxes( not 0 and 100 ) count1, m1 = qstats(s1d, meinequantilen[1:-1], counts=sample_size) count2, m2 = qstats(s2d, meinequantilen[1:-1], counts=sample_size) ok1 = count1[:-1] > sample_size ok2 = count2[:-1] > sample_size # Enough data to calculate ? if not np.any(ok1 & ok2): if sample3 is not None: return np.zeros(x[sample3].shape) # return only zeros else: return np.zeros(s2d.shape) # if verbose > 1: print "Quantiles:", meinequantilen print "Sample 1: ", count1 print "Sample 2: ", count2 # qb = np.nanpercentile(s1d, meinequantilen) # truth qa = np.nanpercentile(s2d, meinequantilen) # biased # diffs = qb - qa # Difference of quantiles (1st and lst for interp) xp = qa xp[:-1] = m2 # x punkte der interpolation ( ? NAN ) diffs[:-1] = m1 - m2 # y punkte der interpolation if return_mean: return m1, m2 # interpolate quantile differences # how to handle end-point ? # if not extrapolate: # diffs = diffs[1:-1] # trim # xp = xp[1:-1] # trim # Spline or linear interpolation if not linear: tck = interpolate.splrep(xp, diffs, s=0) if sample3 is not None: out = interpolate.splev(x[sample3], tck, der=0) # does this retain nan ? else: out = interpolate.splev(s2d, tck, der=0) # else: # to all data in sample / but not when missing! if sample3 is not None: out = np.interp(x[sample3], xp, diffs) else: out = np.interp(s2d, xp, diffs) # turn missing into zero return np.where(np.isfinite(out), out, 0.) # size of sample 2 or sample 3 # no adjustment
def qmap_era_departure(x, y, sample1, meinequantilen, sample_size, verbose=0): """Calculate Quantile Matching for a reference period and return matched data """ from support_functions import qstats # Match ERA to RASO # Sampling Period: s1d = x[sample1] # truth (sample1) RASO s2d = y[sample1] # biased (sample2) ERA # # add 0 and 100 meinequantilen = np.unique(np.concatenate([[0], meinequantilen, [100]])) # Be sure to remove 0,100 now # Mean of quantile boxes( not 0 and 100 ) count1, m1 = qstats(s1d, meinequantilen[1:-1], counts=sample_size) count2, m2 = qstats(s2d, meinequantilen[1:-1], counts=sample_size) ok1 = count1[:-1] > sample_size ok2 = count2[:-1] > sample_size # Enough data to calculate ? if not np.any(ok1 & ok2): return y # np.zeros(y.shape) # if verbose > 1: print "Quantiles:", meinequantilen print "Sample 1: ", count1 print "Sample 2: ", count2 # qb = np.nanpercentile(s1d, meinequantilen) # truth qa = np.nanpercentile(s2d, meinequantilen) # biased # diffs = qb - qa # Difference of quantiles (1st and lst for interp) xp = qa xp[:-1] = m2 # x punkte der interpolation ( ? NAN ) diffs[:-1] = m1 - m2 # y punkte der interpolation # interpolate quantile differences # how to handle end-point ? # if not extrapolate: # diffs = diffs[1:-1] # trim # xp = xp[1:-1] # trim # Spline or linear interpolation # if not linear: # tck = interpolate.splrep(xp, diffs, s=0) # out = interpolate.splev(y, tck, der=0) # else: # to all data in sample / but not when missing! # # out = np.interp(y, xp, diffs) # new, old, old values # turn missing into zero out = np.where(np.isfinite(out), out, 0.) # add ontop of variable return out # size of y
def quantiles_at_breakpoint(data, var, dvar=None, quantilen=None, ibreak=None, sample_size=730, borders=180, verbose=0): """Calculate Quantiles at the breakpoints """ from departures import qmap_departure from support_functions import sample_indices, qstats funcid = '[QAB] ' if not isinstance(var, str): raise ValueError(funcid + "var Requires a string") if dvar is not None and not isinstance(dvar, str): raise ValueError(funcid + "dvar Requires a string") if dvar is None: dvar = var print funcid + "Data from Variable: ", dvar if not isinstance(data, (pd.DataFrame, pd.Panel)): raise ValueError("Require a DataFrame or Panel as input") if quantilen is None: quantilen = np.arange(0, 101, 10) quantilen = quantilen[(quantilen < 100) & (quantilen > 0)] # drop 0 and 100 qss = sample_size / (len(quantilen) + 1) / 2 # sample size per quantile print funcid + "Quantilen: ", quantilen print funcid + "Global Sample size: %d , per quantile(%d): %d" % (sample_size, len(quantilen), qss) mlabels = ["Q%d" % i for i in quantilen] mlabels.append(">") if isinstance(data, pd.DataFrame): if not data.columns.isin([var, '%s_breaks' % var]).sum() == 2: raise ValueError(funcid + "Variable not found: %s or %s_breaks in %s" % (var, var, str(data.columns))) # convert to panel if 'p' not in data.columns: out = {} # get Breakpoints int_breaks = np.where((data['%s_breaks' % var] > 0))[0] breaks = data.index[int_breaks] nb = len(breaks) if nb == 0: raise RuntimeError(funcid + "No Breakpoints found in %s and %s_breaks" % (var, var)) print "Found Breaks: ", nb print str(breaks) if (int_breaks[-1] + sample_size) > data.shape[0]: print funcid + "Reference data set is shorter than 1 year" for ib in reversed(range(nb)): if ibreak is not None and ibreak != ib: print funcid + "Looking for: ", breaks[ibreak], " at ", breaks[ib] continue # ibiased is everything between breakpoints # isample is minus the borders -> used to calculate ibiased, isample, iref = sample_indices(int_breaks, ib, data.index, sample_size=sample_size, borders=borders, recent=False, verbose=verbose - 1) # Quantiles at the breakpoint b1, c1, quants1 = qstats(data[dvar].values[iref], quantilen, qss) b2, c2, quants2 = qstats(data[dvar].values[isample], quantilen, qss) if verbose > 0: print funcid + " %s : %s " % (dvar, breaks[ib]) print funcid + " Qs(B): ", quants1 print funcid + " Qs(#): ", c1 print funcid + " Qs(B): ", quants2 print funcid + " Qs(#): ", c2 out[str(breaks[ib])] = pd.DataFrame({'Ref': quants1.tolist(), 'Bias': quants2.tolist()}, index=mlabels) return out # when there are pressure levels data = data.reset_index().set_index(['date', 'p']).to_panel() else: if not data.items.isin([var, '%s_breaks' % var]).sum() == 2: raise ValueError(funcid + "Variable not found: %s or %s_breaks in %s" % (var, var, str(data.items))) # per level # get Breakpoints int_breaks = np.where((data['%s_breaks' % var] > 0).any(1))[0] breaks = data.major_axis[int_breaks] nb = len(breaks) if nb == 0: raise RuntimeError(funcid + "No Breakpoints found in %s and %s_breaks" % (var, var)) print "Found Breaks: ", nb print str(breaks) if (int_breaks[-1] + sample_size) > data.shape[0]: print funcid + "Reference data set is shorter than 1 year" out = {} for ib in reversed(range(nb)): if ibreak is not None and ibreak != ib: print funcid + "Looking for: ", breaks[ibreak], " at ", breaks[ib] continue # ibiased is everything between breakpoints # isample is minus the borders -> used to calculate ibiased, isample, iref = sample_indices(int_breaks, ib, data.major_axis, sample_size=sample_size, borders=borders, recent=False, verbose=verbose - 1) # Quantiles at the breakpoint def myqstats(x, quantilen, sample_size): c, y = qstats(x, quantilen, sample_size) return y quants1 = np.apply_along_axis(myqstats, 0, data[dvar].values[iref], quantilen, qss) quants2 = np.apply_along_axis(myqstats, 0, data[dvar].values[isample], quantilen, qss) out[str(breaks[ib])] = pd.Panel({'Ref': quants1, 'Bias': quants2}, major_axis=mlabels, minor_axis=data.minor_axis) return out
def myqstats(x, quantilen, sample_size): c, y = qstats(x, quantilen, sample_size) return y