def get_breakpoint_dates(data, var=None): """ Look for Breakpoint information in data Parameters ---------- data DataFrame/Panel var str Returns ------- dict """ from raso.support import filter_series res = {} if isinstance(data, pd.DataFrame): if var is None: for ivar in data.columns.tolist(): if "breaks" in ivar: res[ivar] = filter_series(data[ivar] > 0).unique() else: res[var] = filter_series(data[var] > 0).unique() else: # PANEL if var is None: for ivar in data.items.tolist(): if "breaks" in ivar: res[var] = filter_series((data[ivar] > 0).any(1)) else: res[var] = filter_series((data[var] > 0).any(1)) return res
def timeseries_line_snht(data, var, snhtvar, p, varlabel, breaks=None, window=60, min_periods=1, plabel=5, ax=None, **kwargs): import matplotlib.pyplot as plt if not isinstance(data, pd.Panel): raise ValueError("Requires a Panel") ax = data.ix[var, :, p].resample('12h').mean().rolling(window=window, min_periods=min_periods, center=True).mean().plot( legend=False, ax=ax, label=var, **kwargs) naxes = len(plt.gcf().get_axes()) if naxes == 1: ax.text('1980', plabel, '%d hPa' % (p / 100), fontsize=10) ax2 = ax.twinx() ax2._get_lines.prop_cycler.next() # get next color of color-scheme ax2 = data.ix[snhtvar, :, p].plot(ax=ax2, label='') ax.set_ylabel(varlabel) ax.yaxis.label.set_color(ax.get_lines()[0].get_color()) ax2.set_ylabel('SNHT') ax2.yaxis.label.set_color(ax2.get_lines()[0].get_color()) ax2.set_ylim(0, 200) ax.axhline(y=0, c='k', label="") # will be ignored in legend if breaks is not None: breaks = filter_series(data.ix[breaks, :, p] > 0) [ax.axvline(x=ib, color='lightgray', label="") for ib in breaks] yticks = ax2.set_yticks(np.linspace(ax2.get_yticks()[0], ax2.get_yticks()[-1], len(ax.get_yticks()))) ax.set_xlim('1979', '2016') return ax
def timeseries_line_correction(data, corvar, snhtvar, p, varlabel, breaks=None, departures=None, window=60, min_periods=1, plabel=5, ax=None, post_snht=False, snht_window=1460, snht_dist=730, snht_thres=50, force_snht=False, **kwargs): import matplotlib.pyplot as plt if not isinstance(data, pd.Panel): raise ValueError("Requires a Panel") if departures is not None: departures = (data.ix[corvar, :, p] - data.ix[departures, :, p]) ax = departures.resample('12h').mean().rolling(window=window, min_periods=min_periods, center=True).mean().plot(legend=False, ax=ax, label=corvar+'_dep', **kwargs) else: ax = data.ix[corvar, :, p].resample('12h').mean().rolling(window=window, min_periods=min_periods, center=True).mean().plot( legend=False, ax=ax, label=corvar, **kwargs) naxes = len(plt.gcf().get_axes()) if naxes == 1 or force_snht: if plabel is not None: ax.text('1980', plabel, '%d hPa' % (p / 100), fontsize=10) ax2 = ax.twinx() ax2._get_lines.prop_cycler.next() # get next color of color-scheme ax2 = data.ix[snhtvar, :, p].plot(ax=ax2, label='') ax.set_ylabel(varlabel) ax.yaxis.label.set_color(ax.get_lines()[0].get_color()) ax2.set_ylabel('SNHT') ax2.yaxis.label.set_color(ax2.get_lines()[0].get_color()) ax2.axhline(y=snht_thres, c='k', ls=':') if post_snht and departures is not None: departures = departures.to_frame(name='post') ptest = _1d_detection(departures, 'post', snht_window, snht_dist, snht_thres) ax2 = departures['post_snht'].plot(ax=ax2, color=ax2.get_lines()[0].get_color(), label='', style='--') ax2.set_ylim(0, 200) ax.axhline(y=0, c='k', label='') if breaks is not None: breaks = filter_series(data.ix[breaks, :, p] > 0) [ax.axvline(x=ib, color='lightgray', label='') for ib in breaks] yticks = ax2.set_yticks(np.linspace(ax2.get_yticks()[0], ax2.get_yticks()[-1], len(ax.get_yticks()))) else: if post_snht and departures is not None: departures = departures.to_frame(name='post') ptest = _1d_detection(departures,'post', snht_window, snht_dist, snht_thres) ax2 = departures['post_snht'].plot(ax=plt.gcf().get_axes()[1], label='') ax.set_xlim('1979', '2016') return ax
def timeseries_line_trend(data, corvar, snhtvar, p, varlabel, breaks=None, departures=None, window=60, min_periods=1, plabel=5, showline=True, ax=None, label=None, **kwargs): import matplotlib.pyplot as plt from raso.clim import trend_fit_timeseries if not isinstance(data, pd.Panel): raise ValueError("Requires a Panel") if departures is not None: departures = (data.ix[corvar, :, p] - data.ix[departures, :, p]) trend1 = trend_fit_timeseries(departures, anomaly=False) if label is None: label = corvar + '_dep' label += ' (%5.3f [K/10a])' % (trend1.diff().mean() * 2 * 3650) ax = departures.resample('12h').mean().rolling(window=window, min_periods=min_periods, center=True).mean().plot(legend=False, ax=ax, label=label, **kwargs) else: trend1 = trend_fit_timeseries(data.ix[corvar, :, p], anomaly=False) if label is None: label = corvar label += ' (%5.3f [K/10a])' % (trend1.diff().mean() * 2 * 3650) ax = data.ix[corvar, :, p].resample('12h').mean().rolling(window=window, min_periods=min_periods, center=True).mean().plot(legend=False, ax=ax, label=label, **kwargs) if showline: trend1.plot(ax=ax, color=ax.get_lines()[-1].get_color(), label='', alpha=0.5) # ax.text('1980', trend1.min()-0.5, '%5.3f [K/10a]' % (trend1.diff().mean()*2*3650), fontsize=10, color=ax.get_lines()[-1].get_color()) naxes = len(plt.gcf().get_axes()) if naxes == 1: ax.text('1980', plabel, '%d hPa' % (p / 100), fontsize=10) ax2 = ax.twinx() ax2 = data.ix[snhtvar, :, p].plot(ax=ax2, label='', color='firebrick') ax.set_ylabel(varlabel) ax2.set_ylabel('SNHT') ax2.yaxis.label.set_color('firebrick') ax2.set_ylim(0, 200) ax.axhline(y=0, c='k', label='') if breaks is not None: breaks = filter_series(data.ix[breaks, :, p] > 0) [ax.axvline(x=ib, color='lightgray', label='') for ib in breaks] yticks = ax2.set_yticks(np.linspace(ax2.get_yticks()[0], ax2.get_yticks()[-1], len(ax.get_yticks()))) ax.set_xlim('1979', '2016') return ax
def detection(data, var='t', thres=50, window=1460, dist=730, levels=None, min_levels=3, database=False, freq='12h', valid_times=[0, 12], verbose=0, **kwargs): """Break Detection in timeseries using a Standard Normal Homogeneity Test (SNHT) Parameters ---------- data Series/DataFrame/Panel Input Radiosonde data (standard) var str/list Variable to use for detection thres number Threshold for SNHT window number Window size for SNHT dist number Distance between breakpoints levels list only these levels min_levels int Minimum required number of levels database bool verbose int kwargs ** Returns ------- input + var_breaks + var_snht """ funcid = '[BD] ' if not isinstance(data, (pd.Series, pd.DataFrame, pd.Panel)): raise ValueError(funcid + " Require a Series, DataFrame or Panel") if not isinstance(var, (list, str)): raise ValueError(funcid + " Variable needs to be a list or string") if isinstance(var, str): var = [var] # as list data = data.copy() if len(valid_times) == 1: if freq != '24h': raise RuntimeError("Frequency must be 24 hours then") elif len(valid_times) == 2: if freq != '12h': raise RuntimeError("Frequency must be 12 hours then") else: raise RuntimeWarning("Make sure the freqency fits to the valid times") if isinstance(data, pd.Series): data = data.to_frame() if len(var) > 1: raise ValueError("Series object has only one variable") data.columns = var data = data.ix[np.in1d(data.index.hour, valid_times), :] # VIEW data = data.resample(freq).asfreq() # run 1d found_breaks = _1d_detection(data, var, window, dist, thres, verbose=verbose) print_verbose(var + ": " + str(filter_series(data['%s_breaks' % var] > 0).unique()), verbose) return found_breaks, data # dict, dataframe if isinstance(data, pd.DataFrame): for ivar in var: if ivar not in data.columns: raise ValueError(funcid + "Variable not found: %s in %s" % (ivar, str(data.columns))) data = database_to_panel(data, levels=levels, verbose=verbose) if isinstance(data, pd.DataFrame): # only one level data = data.ix[np.in1d(data.index.hour, valid_times), :] # VIEW data = data.resample(freq).asfreq() found_breaks = {} for ivar in var: found_breaks[ivar] = _1d_detection(data, ivar, window, dist, thres, verbose=verbose) print_verbose(ivar + ": " + str(filter_series(data['%s_breaks' % ivar] > 0).unique()), verbose) return found_breaks, data # dict, dataframe # Panel for ivar in var: if ivar not in data.items: raise ValueError(funcid + "Variable not found: %s in %s" % (ivar, str(data.items))) # Select times: data = data.loc[:, np.in1d(data.major_axis.hour, valid_times), :] # Resample to frequency if data.major_axis.inferred_freq != freq: print_verbose(funcid + "Resampling to %s frequency" % freq, verbose) newdates = pd.date_range(data.major_axis[0], data.major_axis[-1], freq=freq) data = data.reindex(major_axis=newdates) data.major_axis.name = 'date' if levels is not None: data = data.ix[:, :, data.minor_axis.isin(levels)].copy() print_verbose(funcid + "Selecting only specified levels ...", verbose) print_verbose(funcid + "p-Levels: " + ",".join(["%d" % (ip / 100) for ip in levels]) + ' hPa', verbose) # 2D detection found_breaks = {} for ivar in var: found_breaks[ivar] = _2d_detection(data, ivar, window, dist, thres, min_levels, verbose=verbose) if verbose > 0: print "Minimum amount of levels required: ", min_levels print "Breakpoints" for ivar in var: print ivar + ": ", str(data.major_axis[(data['%s_breaks' % ivar] > 0).all(1)]) # Output as Database if database: print_verbose(funcid + "as database.", verbose) data = data.to_frame(filter_observations=False).reset_index().sort_values(by=['date', 'p']).set_index('date', drop=True) return found_breaks, data # dict, panel / database
def breakpoint_timeseries(data, var, unit='K', p=None, bins=None, departures=False, post_snht=False, raw=False, window=365, min_periods=30, figsize=(16,9), ylim=None, verbose=0, **kwargs): """ Parameters ---------- data var unit p bins departures post_snht window min_periods figsize ylim verbose Returns ------- """ import matplotlib.pyplot as plt from timeseries import timeseries_snht from raso.breakpoint.detections import _1d_detection funcid = '[BTS] ' pressure_levels, dates, plevs, test, label = timeseries_preprocessor(data, [var, "%s_dep_breaks" % var, "%s_dep_snht" % var], p=p, add_vars=["%s_mcor" % var, "%s_qcor" % var, "%s_qecor" % var, "%s_era" % var, "%s_era_adj" % var, "%s_dep" % var], funcid=funcid) if plevs is None: raise RuntimeError(funcid + " Require an availbale pressure level at least!") else: if isinstance(plevs, (int,float)): plevs = [plevs] # as list if bins is None: bins = np.arange(0, 50) varselection = [] varlabel = [] correction_present = False for jvar in label: if jvar == var: varlabel = ['U'] varselection.append(jvar) elif 'mcor' in jvar: varlabel.append('M') varselection.append(jvar) correction_present = True elif 'qcor' in jvar: varlabel.append('Q') varselection.append(jvar) correction_present = True elif 'qecor' in jvar: varlabel.append('QE') varselection.append(jvar) correction_present = True elif 'adj' in jvar: varlabel.append('EA') varselection.append(jvar) elif jvar == "%s_era" % var: varlabel.append('E') varselection.append(jvar) else: if verbose > 0: print "Not used: ", jvar if verbose > 0: print varlabel print varselection # title = kwargs.pop('title', 'Radiosonde Breakpoint Detection and Correction') xlabel = kwargs.pop('xlabel', 'Time') if pressure_levels: breaks = filter_series((test["%s_dep_breaks" % var] > 0).any(1)) else: breaks = filter_series((test["%s_dep_breaks" % var] > 0)) # nbreaks = len(breaks) # if nbreaks == 0: nbreaks = 1 nplotlevels = 4 # snht, timeseries, hist, hist if not correction_present: nplotlevels = 3 for z, ilev in enumerate(plevs): print "%d / %d : %d" % (z+1, len(plevs), ilev) if pressure_levels: plotdata = test.minor_xs(ilev) else: plotdata = test if verbose > 0: print plotdata.shape print plotdata.columns plotdata = plotdata.resample('12h').asfreq() # make sure it is the same freq # f = plt.figure(figsize=figsize) # # Figure 1 : SNHT # axs = plt.subplot2grid((nplotlevels, nbreaks), (0, 0), colspan=nbreaks) timeseries_snht(plotdata, "%s_dep" % var, ax=axs, label='U') axs.set_ylabel('SNHT') axs.set_yticklabels(axs.get_yticklabels()[::2]) if post_snht: for k, zvar in enumerate(varselection): if (zvar == '%s_era' % var) or (zvar == var): continue tmpdata = plotdata.loc[:, zvar].copy().to_frame() if verbose > 0: print "SNHT for ", zvar status = _1d_detection(tmpdata, zvar, 1460, 730, 50) # default parameters axs.plot(tmpdata.index, tmpdata['%s_snht' % zvar], label=varlabel[k], alpha=0.7, ls='--') axs.legend(loc="upper left", fontsize=plt.rcParams.get('font.size') - 2, bbox_to_anchor=(1.02, 1), bbox_transform=axs.transAxes) # plt.setp(axs.get_xticklabels(), visible=False) axt = plt.subplot2grid((nplotlevels, nbreaks), (1, 0), colspan=nbreaks, sharex=axs) # # DEPARTURES # if departures: for jvar in varselection: if jvar == "%s_era" % var: continue plotdata[jvar] = plotdata[jvar] - plotdata["%s_era" % var] # ref !! # if raw: pd_mean = plotdata else: pd_mean = plotdata.rolling(window=window, min_periods=min_periods).mean() # # Figure 2 : Values # for j, jvar in enumerate(varselection): if departures and jvar == '%s_era' % var: continue axt.plot(pd_mean.index, pd_mean[jvar], lw=2, label=varlabel[j]) axs.set_xticks(np.unique(pd_mean.index.year.astype('str'))[::2]) # ticks axs.set_title(title + " %d hPa" % int(ilev/100)) axs.grid() # overwrite axs.grid() axt.set_xticks(np.unique(pd_mean.index.year.astype('str'))[::2]) # ticks if ylim is None: ym = int(pd_mean[var].mean()) ys = 5 # int(pd_mean[var].std())*2 ylim = (ym-ys, ym+ys) axt.set_ylim(ylim) axt.set_yticks(axt.get_yticks()[::2]) if departures: axt.set_ylabel("%s [%s]" % (var.upper(),unit)) else: axt.set_ylabel("%s dep. [%s]" % (var.upper(),unit)) axt.grid() # axt.legend(loc="upper center", ncol=5, fontsize=plt.rcParams.get('font.size') - 2) # bbox_to_anchor=(1, 1)) axt.legend(loc="upper left", fontsize=plt.rcParams.get('font.size') - 2, bbox_to_anchor=(1.02, 1), bbox_transform=axt.transAxes) [axt.axvline(x=ib, color='k') for ib in breaks] [axt.text(ib, pd_mean[var].min(), "B%02d" % (nbreaks - i), color='k') for i, ib in enumerate(breaks)] # # # Figure 3: Histograms for each breakpoint # for ibreak in range(nbreaks): if ibreak == 0: ax1 = plt.subplot2grid((nplotlevels, nbreaks), (2, ibreak)) axi = ax1 else: axi = plt.subplot2grid((nplotlevels, nbreaks), (2, ibreak), sharey=ax1) # Reference if ibreak + 1 < nbreaks: iref = slice(breaks[ibreak], breaks[ibreak + 1]) else: iref = slice(breaks[ibreak], None) # Biased Sample if ibreak > 0: ibiased = slice(breaks[ibreak - 1], breaks[ibreak]) else: ibiased = slice(None, breaks[ibreak]) # axi.hist(plotdata.ix[iref, var], bins=bins, normed=1, label='BB') if "%s_era_adj" % var in varselection: axi.hist(plotdata.ix[iref, "%s_era_adj" % var], bins=bins, normed=1, label='EA', alpha=0.6) axi.hist(plotdata.ix[ibiased, var], bins=bins, normed=1, label='AB', alpha=0.6) # if ibreak == 0: axi.set_yticks(axi.get_yticks()[::2]) axi.grid() axi.set_title("B%02d" % (nbreaks - ibreak), y=0.75) if ibreak > 0: plt.setp(axi.get_yticklabels(), visible=False) # # Figure 4: Correction Histograms for each breakpoint # if correction_present: axj = plt.subplot2grid((nplotlevels, nbreaks), (3, ibreak), sharey=ax1, sharex=axi) plt.setp(axi.get_xticklabels(), visible=False) alpha=1 for j, jvar in enumerate(varselection): if 'cor' in jvar: axj.hist(plotdata.ix[ibiased, jvar], bins=bins, normed=1, label=varlabel[j], alpha=(alpha-0.1*j)) axj.grid() axj.set_xlabel('%s [%s]' % (var.upper(), unit)) if ibreak > 0: plt.setp(axj.get_yticklabels(), visible=False) axi.legend(loc='upper left', fontsize=plt.rcParams.get('font.size') - 2, bbox_to_anchor=(1.05, 1)) if correction_present: axj.legend(loc='upper left', fontsize=plt.rcParams.get('font.size') - 2, bbox_to_anchor=(1.05, 1))