def inquire(isonde): """Get Information of Radiosonde DataFrames Parameters ---------- isonde radiosonde class Radiosonde Returns ------- Series """ if not isinstance(isonde, radiosonde): raise ValueError("Inquire requires a radiosonde object") res = {} for ivar in isonde.vars: if ivar in ['station']: continue tmp = getattr(isonde, ivar) # .shape[0] # Every Variable idata = {'shape': 0, 'dates': 0, 'plev': 0, 'varis': 0, 'ptqr': 0, 'uv': 0} idata['shape'] = len(tmp.shape) if isinstance(tmp, pd.Panel): idata['dates'] = tmp.major_axis.size idata['plev'] = tmp.minor_axis.size idata['varis'] = tmp.items.size #",".join(tmp.items.tolist()) counts = tmp.count(axis='minor').resample('M').sum() if hasnames(tmp, ['t', 'p']) and not hasnames(tmp, ['r', 'q', 'dpd'], value=0): varis = tmp.items[tmp.items.isin(['r', 'q', 'dpd'])].tolist() idata['ptqr'] = int(counts[varis].max(1).mean()) else: idata['dates'] = len(tmp.index.unique()) if 'p' in tmp.columns: idata['plev'] = len(tmp.p.unique()) idata['varis'] = tmp.columns.size #",".join(tmp.columns.tolist()) counts = tmp.resample('M').count() if hasnames(tmp, ['t','p']) and not hasnames(tmp, ['r', 'q', 'dpd'], value=0): varis = tmp.columns[tmp.columns.isin(['r', 'q', 'dpd'])].tolist() idata['ptqr'] = int(counts[varis].max(1).mean()) if hasnames(tmp, ['u', 'v']): idata['uv'] = int(counts[['u', 'v']].min(1).mean()) res[ivar] = idata return pd.DataFrame(res).T
def detect_and_correct(isonde, data='std_data', var='dpd', quality_controlled=True, savename='sdcor', save=True, tfirst=True, daynight=False, verbose=0, **kwargs): """Detect and Correct Radiosonde biases from Departure Statistics Use ERA-Interim departures to detect breakpoints and correct these with a mean and a quantile adjustment going back in time. uses raso.timeseries.breakpoint.detection / correction Parameters ---------- isonde radiosonde Radiosonde class object data str Radiosonde variable var str Variable quality_controlled bool Use QC to remove flagged values? savename str store name save bool Save? tfirst bool Correct Temperature first ? daynight bool Correct Day and Night Soundings separate? verbose int verbosness Additional Parameters --------------------- thres int [50] SNHT Threshold quantilen list/array [0-100] Quantile Ranges levels list [None] Pressure levels sample_size int [730] minimum Sample size borders int [180] biased sample before and after a break bias30k bool [T] remove 30K Bias for dpd ? verbose int [0] verboseness kwargs dict breakpoint.detection, breakpoint.correction ... Returns ------- """ from ..detect_and_correct import detect_and_correct as detect_and_correct_data from ..detect_and_correct import detect_and_correct_daynight from .. import standard_dewpoint_depression, standard_water_vapor funcid = "[DC] Sonde " if not isinstance(isonde, radiosonde): raise ValueError(funcid + "requires a radiosonde object!") if isonde.is_empty: raise ValueError(funcid + "Radiosonde is empty!") funcid = "[DC] %s " % isonde.id if data not in isonde.vars: raise ValueError(funcid + "Required variable (%s) not present" % data) prof = getattr(isonde, data).copy() # GET DATA print_verbose(funcid+"Savename: %s" % savename, verbose) if hasnames(prof, 'qual'): # # drop all the values that have a qual flag # if quality_controlled: journal(funcid + "QC of %s " % data, isonde.history, verbose) prof = enforcer(prof) # Work on FLAGS, but not DPD 30, makes a copy # # set DPD30 to missing # if hasnames(prof, 'dpd'): prof['dpd'] = np.where(prof.qual.str.contains('3'), np.nan, prof.dpd.values) # Apply? BAD, GOOD if hasnames(prof, 'td'): prof['td'] = np.where(prof.qual.str.contains('3'), np.nan, prof.td.values) # Apply? BAD, GOOD del prof['qual'] # prof.drop('qual', 1, inplace=True) # remove all flag information print_verbose(funcid + " dropping qual ...", verbose) if hasnames(prof, 'orig'): del prof['orig'] # prof.drop('orig', 1, inplace=True) # indicates interpolated or not if 'dpd' in var: if not hasnames(prof, 'dpd_era'): prof['dpd_era'] = prof['t_era'] - prof['td_era'] print_verbose(funcid + " Calculating dpd_era ...", verbose) tbreaks = None if var == 't' and tfirst: tfirst = False print_verbose(funcid + "tfirst=True only with temperature dependent variables", 1) if tfirst: journal(funcid + "Running T D&C first! ", isonde.history, verbose) # # Only Mean Adjustment for Temperature # if not daynight: prof, tbreaks = detect_and_correct_data(prof, var='t', correct_q=False, bounded=None, report=isonde.history, verbose=verbose - 1, **kwargs) else: prof, tbreaks = detect_and_correct_daynight(prof, var='t', correct_q=False, bounded=None, report=isonde.history, verbose=verbose - 1, **kwargs) tbreaks['breaks'] = tbreaks['00Z'] + tbreaks['12Z'] # 00Z and 12Z breaks # # new Columns: t_mcor, t_dep, t_dep_breaks, t_dep_snht # if len(tbreaks['breaks']) > 0: journal(funcid + "T-breaks: %s" % str(tbreaks['breaks']), isonde.history, verbose) # prof.major_axis.name = 'date' # prof.minor_axis.name = 'p' # prof = panel_to_database(prof) # to DataFrame -> Function requires it > Deprecated now # Recalculate Temperature Dependent Variables: prof = standard_water_vapor(prof, tvar='t_mcor', vpvar='vp_tcor', replace=True, report=isonde.history) prof = standard_dewpoint_depression(prof, tvar='t_mcor', dpdvar='dpd_tcor', vpvar='vp_tcor', tdvar='td_tcor', replace=True, report=isonde.history) del prof['qual'] # prof.drop('qual', 1, inplace=True) # remove quality Flag again # prof.rename(items={var: '%s_orig' % var}, inplace=True) # Rochade # prof.rename(items={'%s_tcor' % var: var}, inplace=True) # # if hasnames(prof, '%s_tcor' % var): journal(funcid + "Running t-correct %s D&C ..." % var, isonde.history, verbose) prof['%s_tcor_era' % var] = prof['%s_era' % var] if not daynight: prof, dbreaks = detect_and_correct_data(prof, var='%s_tcor' % var, report=isonde.history, verbose=verbose - 1, **kwargs) else: prof, dbreaks = detect_and_correct_daynight(prof, var='%s_tcor' % var, report=isonde.history, verbose=verbose - 1, **kwargs) dbreaks['breaks'] = dbreaks['00Z'] + dbreaks['12Z'] # Combine del prof['%s_tcor_era' % var] # prof.rename(items={var: '%s_tcor' % var, # '%s_mcor' % var: '%s_tcor_mcor' % var, # '%s_qcor' % var: '%s_tcor_qcor' % var, # '%s_qecor' % var: '%s_tcor_qecor' % var, # '%s_dep' % var: '%s_tcor_dep' % var, # '%s_dep_breaks' % var: '%s_tcor_dep_breaks' % var, # '%s_dep_snht' % var: '%s_tcor_dep_snht' % var, # '%s_orig' % var: var}, inplace=True) else: journal(funcid + "No T breakpoints. ", isonde.history, verbose) # prof = panel_to_database(prof) # Convert to DataFrame (after Detection) > Deprecated now journal(funcid + "Running %s D&C" % var, isonde.history, verbose) if not daynight: prof, stat = detect_and_correct_data(prof, var=var, report=isonde.history, verbose=verbose, **kwargs) else: prof, stat = detect_and_correct_daynight(prof, var=var, report=isonde.history, verbose=verbose, **kwargs) stat['breaks'] = stat['00Z'] + stat['12Z'] # Combine lists # isonde.add_data(savename, prof, replace=True, verbose=verbose) # DID anything change ? # # Options # thres = kwargs.get('thres', 50) borders = kwargs.get('borders', 180) sample_size = kwargs.get('sample_size', 730) quantilen = kwargs.get('quantilen', np.arange(0, 101, 10)) # missing from history > add journal(funcid + "%s (T:%d, N:%d, B:%d, Q:%d)" % (var, int(thres), sample_size, borders, len(quantilen)), isonde.history, 0) if tbreaks is not None: if len(tbreaks['breaks']) > 0: stat['t-breaks'] = tbreaks['breaks'] if len(dbreaks['breaks']) > 0: stat['t-dpd-breaks'] = dbreaks['breaks'] stat['thres'] = thres stat['borders'] = borders stat['sample_size'] = sample_size stat['quantilen'] = quantilen stat['source'] = data stat['savename'] = savename stat['variable'] = var stat['daynight'] = daynight if 'detect_opts' not in isonde.attrs: isonde.add_attr('detect_opts', {"%s_%s" % (savename, var): stat}) # sdcor_dpd else: isonde.detect_opts["%s_%s" % (savename, var)] = stat # update if save: isonde.save(var=savename, update=True, verbose=verbose) return stat
def mean_correction(data, var, breakvar, sample_size=730, borders=180, database=False, bounded=None, varcopy=True, verbose=0): """ Mean Correction of breakpoints Parameters ---------- data var breakvar sample_size borders database bounded varcopy verbose Returns ------- stat, data """ from departures import mean_departure from support_functions import sample_indices funcid = '[CM] ' if isinstance(var, str): var = [var] # as list if isinstance(breakvar, str): breakvar = [breakvar] * len(var) # as list if bounded is None: ubound = None lbound = None else: lbound, ubound = bounded pressure_levels = True if isinstance(data, pd.DataFrame): if 'p' in data.columns: # 2D print funcid + " database detected > conversion to Panel" for ivar, jvar in zip(var, breakvar): if not data.columns.isin([ivar, jvar]).sum() == 2: raise ValueError(funcid + "Variable not found: %s in %s" % (ivar, str(data.columns))) data.index.name = 'date' data = data.reset_index().set_index(['date', 'p']).to_panel() else: # only 1D pressure_levels = False elif isinstance(data, pd.Panel): for ivar, jvar in zip(var, breakvar): if not data.items.isin([ivar, jvar]).sum() == 2: raise ValueError(funcid + "Variable not found: %s in %s" % (ivar, str(data.items))) else: raise ValueError("Require a DataFrame or Panel as input") if pressure_levels: data.major_axis.name = 'date' dates = data.major_axis # Druckflächen plevels = data.minor_axis.values if verbose > 0: print funcid + "p-Levels: ", ",".join(["%d" % (ip / 100) for ip in plevels]), ' hPa' else: dates = data.index for ivar, ibvar in zip(var, breakvar): # BREAKS if pressure_levels: int_breaks = np.where((data[ibvar] > 0).any(1))[0] # breakpoint in all levels else: int_breaks = np.where((data[ibvar] > 0))[0] breaks = dates[int_breaks] if (int_breaks[-1] + sample_size) > dates.shape[0]: print funcid + "Reference data set is shorter than 1 year" # Copy or use existing if not hasnames(data, '%s_mcor' % ivar) or varcopy: data["%s_mcor" % ivar] = data[ivar].copy() # Make a copy nb = len(breaks) if verbose > 0: print funcid + " %s Found %d breakpoints" % (ivar, nb) breakpoint_stat = {} xdata = data["%s_mcor" % ivar].values # Numpy Array (time x p-levels) for ib in reversed(range(nb)): # ibiased is everything between breakpoints # isample is minus the borders -> used to calculate isample, ibiased, iref = sample_indices(int_breaks, ib, dates, sample_size=sample_size, borders=borders, recent=False, verbose=verbose - 1) if pressure_levels: # jvar = data.items.get_loc("%s_mcor" % ivar) # index of variable # data["%s_mcor" % ivar].values, m_dep = np.apply_along_axis(mean_departure, 0, xdata, iref, isample, sample_size) # setting with ndarray requires precise shape conditions if bounded is not None: tmp_qad = xdata[ibiased, :] + m_dep # data.iloc[jvar, ibiased, :] + m_dep m_dep = np.where((tmp_qad < lbound) | (tmp_qad > ubound), 0, m_dep) xdata[ibiased, :] += m_dep # has now the right shape else: # data.iloc[jvar, ibiased, :] = (data.iloc[jvar, ibiased, :].values + m_dep)[np.newaxis, ::] xdata[ibiased, :] += m_dep[np.newaxis, ::] # one value per level, this can cause negative DPD values else: # jvar = data.columns.get_loc("%s_mcor" % ivar) # data["%s_mcor" % ivar].values, m_dep = mean_departure(xdata, iref, isample, sample_size) if bounded is not None: tmp_qad = xdata[ibiased] + m_dep # data.iloc[ibiased, jvar] + m_dep m_dep = np.where((tmp_qad < lbound) | (tmp_qad > ubound), 0, m_dep) # data.iloc[ibiased, jvar] += m_dep # one value per time xdata[ibiased] += m_dep # nsample = data["%s_mcor" % ivar][isample].count() # nref = data["%s_mcor" % ivar][iref].count() nsample = np.isfinite(xdata[isample]).sum() nref = np.isfinite(xdata[iref]).sum() breakpoint_stat[str(breaks[ib])] = {'i': int_breaks[ib], 'isample': isample, 'ibiased': ibiased, 'iref': iref, 'mcor': m_dep, 'nref': nref, 'nsamp': nsample} if verbose > 0: print funcid + " %s : %s 50%%: %9f " % (ivar, breaks[ib], np.nanmedian(m_dep)) data["%s_mcor" % ivar] = xdata # fill in if database: return data.to_frame(filter_observations=False).reset_index().set_index('date', drop=True) return breakpoint_stat, data