def get_breakpoint_dates(data, var=None):
    """ Look for Breakpoint information in data

    Parameters
    ----------
    data        DataFrame/Panel
    var         str

    Returns
    -------
    dict
    """
    from raso.support import filter_series

    res = {}
    if isinstance(data, pd.DataFrame):
        if var is None:
            for ivar in data.columns.tolist():
                if "breaks" in ivar:
                    res[ivar] = filter_series(data[ivar] > 0).unique()
        else:
            res[var] = filter_series(data[var] > 0).unique()
    else:
        # PANEL
        if var is None:
            for ivar in data.items.tolist():
                if "breaks" in ivar:
                    res[var] = filter_series((data[ivar] > 0).any(1))
        else:
            res[var] = filter_series((data[var] > 0).any(1))

    return res
Example #2
0
def timeseries_line_snht(data, var, snhtvar, p, varlabel, breaks=None, window=60, min_periods=1, plabel=5, ax=None, **kwargs):
    import matplotlib.pyplot as plt

    if not isinstance(data, pd.Panel):
        raise ValueError("Requires a Panel")

    ax = data.ix[var, :, p].resample('12h').mean().rolling(window=window, min_periods=min_periods, center=True).mean().plot(
        legend=False, ax=ax, label=var, **kwargs)

    naxes = len(plt.gcf().get_axes())
    if naxes == 1:
        ax.text('1980', plabel, '%d hPa' % (p / 100), fontsize=10)
        ax2 = ax.twinx()
        ax2._get_lines.prop_cycler.next()  # get next color of color-scheme
        ax2 = data.ix[snhtvar, :, p].plot(ax=ax2, label='')
        ax.set_ylabel(varlabel)
        ax.yaxis.label.set_color(ax.get_lines()[0].get_color())
        ax2.set_ylabel('SNHT')
        ax2.yaxis.label.set_color(ax2.get_lines()[0].get_color())
        ax2.set_ylim(0, 200)
        ax.axhline(y=0, c='k', label="")  # will be ignored in legend
        if breaks is not None:
            breaks = filter_series(data.ix[breaks, :, p] > 0)
            [ax.axvline(x=ib, color='lightgray', label="") for ib in breaks]

        yticks = ax2.set_yticks(np.linspace(ax2.get_yticks()[0], ax2.get_yticks()[-1], len(ax.get_yticks())))

    ax.set_xlim('1979', '2016')
    return ax
Example #3
0
def timeseries_line_correction(data, corvar, snhtvar, p, varlabel, breaks=None, departures=None, window=60,
                               min_periods=1, plabel=5, ax=None, post_snht=False, snht_window=1460, snht_dist=730,
                               snht_thres=50, force_snht=False, **kwargs):
    import matplotlib.pyplot as plt

    if not isinstance(data, pd.Panel):
        raise ValueError("Requires a Panel")

    if departures is not None:
        departures = (data.ix[corvar, :, p] - data.ix[departures, :, p])
        ax = departures.resample('12h').mean().rolling(window=window, min_periods=min_periods, center=True).mean().plot(legend=False, ax=ax, label=corvar+'_dep', **kwargs)
    else:
        ax = data.ix[corvar, :, p].resample('12h').mean().rolling(window=window, min_periods=min_periods, center=True).mean().plot(
            legend=False, ax=ax, label=corvar, **kwargs)

    naxes = len(plt.gcf().get_axes())
    if naxes == 1 or force_snht:
        if plabel is not None:
            ax.text('1980', plabel, '%d hPa' % (p / 100), fontsize=10)
        ax2 = ax.twinx()
        ax2._get_lines.prop_cycler.next()  # get next color of color-scheme
        ax2 = data.ix[snhtvar, :, p].plot(ax=ax2, label='')
        ax.set_ylabel(varlabel)
        ax.yaxis.label.set_color(ax.get_lines()[0].get_color())
        ax2.set_ylabel('SNHT')
        ax2.yaxis.label.set_color(ax2.get_lines()[0].get_color())
        ax2.axhline(y=snht_thres, c='k', ls=':')
        if post_snht and departures is not None:
            departures = departures.to_frame(name='post')
            ptest = _1d_detection(departures, 'post', snht_window, snht_dist, snht_thres)
            ax2 = departures['post_snht'].plot(ax=ax2, color=ax2.get_lines()[0].get_color(), label='', style='--')

        ax2.set_ylim(0, 200)
        ax.axhline(y=0, c='k', label='')
        if breaks is not None:
            breaks = filter_series(data.ix[breaks, :, p] > 0)
            [ax.axvline(x=ib, color='lightgray', label='') for ib in breaks]

        yticks = ax2.set_yticks(np.linspace(ax2.get_yticks()[0], ax2.get_yticks()[-1], len(ax.get_yticks())))
    else:
        if post_snht and departures is not None:
            departures = departures.to_frame(name='post')
            ptest = _1d_detection(departures,'post', snht_window, snht_dist, snht_thres)
            ax2 = departures['post_snht'].plot(ax=plt.gcf().get_axes()[1], label='')

    ax.set_xlim('1979', '2016')
    return ax
Example #4
0
def timeseries_line_trend(data, corvar, snhtvar, p, varlabel, breaks=None, departures=None, window=60, min_periods=1,
                          plabel=5, showline=True, ax=None, label=None, **kwargs):
    import matplotlib.pyplot as plt
    from raso.clim import trend_fit_timeseries
    if not isinstance(data, pd.Panel):
        raise ValueError("Requires a Panel")

    if departures is not None:
        departures = (data.ix[corvar, :, p] - data.ix[departures, :, p])
        trend1 = trend_fit_timeseries(departures, anomaly=False)
        if label is None:
            label = corvar + '_dep'
        label += ' (%5.3f [K/10a])' % (trend1.diff().mean() * 2 * 3650)
        ax = departures.resample('12h').mean().rolling(window=window, min_periods=min_periods, center=True).mean().plot(legend=False, ax=ax, label=label, **kwargs)

    else:
        trend1 = trend_fit_timeseries(data.ix[corvar, :, p], anomaly=False)
        if label is None:
            label = corvar
        label += ' (%5.3f [K/10a])' % (trend1.diff().mean() * 2 * 3650)
        ax = data.ix[corvar, :, p].resample('12h').mean().rolling(window=window, min_periods=min_periods, center=True).mean().plot(legend=False, ax=ax, label=label, **kwargs)

    if showline:
        trend1.plot(ax=ax, color=ax.get_lines()[-1].get_color(), label='', alpha=0.5)
    # ax.text('1980', trend1.min()-0.5, '%5.3f [K/10a]' % (trend1.diff().mean()*2*3650), fontsize=10, color=ax.get_lines()[-1].get_color())
    naxes = len(plt.gcf().get_axes())
    if naxes == 1:
        ax.text('1980', plabel, '%d hPa' % (p / 100), fontsize=10)
        ax2 = ax.twinx()
        ax2 = data.ix[snhtvar, :, p].plot(ax=ax2, label='', color='firebrick')
        ax.set_ylabel(varlabel)
        ax2.set_ylabel('SNHT')
        ax2.yaxis.label.set_color('firebrick')
        ax2.set_ylim(0, 200)
        ax.axhline(y=0, c='k', label='')
        if breaks is not None:
            breaks = filter_series(data.ix[breaks, :, p] > 0)
            [ax.axvline(x=ib, color='lightgray', label='') for ib in breaks]

        yticks = ax2.set_yticks(np.linspace(ax2.get_yticks()[0], ax2.get_yticks()[-1], len(ax.get_yticks())))

    ax.set_xlim('1979', '2016')
    return ax
Example #5
0
def detection(data, var='t', thres=50, window=1460, dist=730, levels=None, min_levels=3, database=False, freq='12h',
              valid_times=[0, 12], verbose=0, **kwargs):
    """Break Detection in timeseries using a Standard Normal Homogeneity Test (SNHT)

    Parameters
    ----------
    data        Series/DataFrame/Panel      Input Radiosonde data (standard)
    var         str/list                    Variable to use for detection
    thres       number                      Threshold for SNHT
    window      number                      Window size for SNHT
    dist        number                      Distance between breakpoints
    levels      list                        only these levels
    min_levels  int                         Minimum required number of levels
    database    bool
    verbose     int
    kwargs      **

    Returns
    -------
    input + var_breaks + var_snht
    """
    funcid = '[BD] '

    if not isinstance(data, (pd.Series, pd.DataFrame, pd.Panel)):
        raise ValueError(funcid + " Require a Series, DataFrame or Panel")

    if not isinstance(var, (list, str)):
        raise ValueError(funcid + " Variable needs to be a list or string")

    if isinstance(var, str):
        var = [var]  # as list

    data = data.copy()

    if len(valid_times) == 1:
        if freq != '24h':
            raise RuntimeError("Frequency must be 24 hours then")
    elif len(valid_times) == 2:
        if freq != '12h':
            raise RuntimeError("Frequency must be 12 hours then")
    else:
        raise RuntimeWarning("Make sure the freqency fits to the valid times")

    if isinstance(data, pd.Series):
        data = data.to_frame()
        if len(var) > 1:
            raise ValueError("Series object has only one variable")
        data.columns = var
        data = data.ix[np.in1d(data.index.hour, valid_times), :]  # VIEW
        data = data.resample(freq).asfreq()
        # run 1d
        found_breaks = _1d_detection(data, var, window, dist, thres, verbose=verbose)
        print_verbose(var + ": " + str(filter_series(data['%s_breaks' % var] > 0).unique()), verbose)
        return found_breaks, data  # dict, dataframe

    if isinstance(data, pd.DataFrame):
        for ivar in var:
            if ivar not in data.columns:
                raise ValueError(funcid + "Variable not found: %s in %s" % (ivar, str(data.columns)))

        data = database_to_panel(data, levels=levels, verbose=verbose)

        if isinstance(data, pd.DataFrame):
            # only one level
            data = data.ix[np.in1d(data.index.hour, valid_times), :]  # VIEW
            data = data.resample(freq).asfreq()
            found_breaks = {}
            for ivar in var:
                found_breaks[ivar] = _1d_detection(data, ivar, window, dist, thres, verbose=verbose)
                print_verbose(ivar + ": " + str(filter_series(data['%s_breaks' % ivar] > 0).unique()), verbose)

            return found_breaks, data  # dict, dataframe

    # Panel
    for ivar in var:
        if ivar not in data.items:
            raise ValueError(funcid + "Variable not found: %s in %s" % (ivar, str(data.items)))

    # Select times:
    data = data.loc[:, np.in1d(data.major_axis.hour, valid_times), :]
    # Resample to frequency
    if data.major_axis.inferred_freq != freq:
        print_verbose(funcid + "Resampling to %s frequency" % freq, verbose)
        newdates = pd.date_range(data.major_axis[0], data.major_axis[-1], freq=freq)
        data = data.reindex(major_axis=newdates)

    data.major_axis.name = 'date'
    if levels is not None:
        data = data.ix[:, :, data.minor_axis.isin(levels)].copy()
        print_verbose(funcid + "Selecting only specified levels ...", verbose)
        print_verbose(funcid + "p-Levels: " + ",".join(["%d" % (ip / 100) for ip in levels]) + ' hPa', verbose)

    # 2D detection
    found_breaks = {}
    for ivar in var:
        found_breaks[ivar] = _2d_detection(data, ivar, window, dist, thres, min_levels,
                                           verbose=verbose)

    if verbose > 0:
        print "Minimum amount of levels required: ", min_levels
        print "Breakpoints"
        for ivar in var:
            print ivar + ": ", str(data.major_axis[(data['%s_breaks' % ivar] > 0).all(1)])

    # Output as Database
    if database:
        print_verbose(funcid + "as database.", verbose)
        data = data.to_frame(filter_observations=False).reset_index().sort_values(by=['date', 'p']).set_index('date', drop=True)

    return found_breaks, data  # dict, panel / database
Example #6
0
def breakpoint_timeseries(data, var, unit='K', p=None, bins=None, departures=False, post_snht=False, raw=False,
                          window=365, min_periods=30, figsize=(16,9), ylim=None, verbose=0, **kwargs):
    """

    Parameters
    ----------
    data
    var
    unit
    p
    bins
    departures
    post_snht
    window
    min_periods
    figsize
    ylim
    verbose

    Returns
    -------

    """
    import matplotlib.pyplot as plt
    from timeseries import timeseries_snht
    from raso.breakpoint.detections import _1d_detection
    funcid = '[BTS] '

    pressure_levels, dates, plevs, test, label = timeseries_preprocessor(data,
                                                                         [var, "%s_dep_breaks" % var, "%s_dep_snht" % var],
                                                                         p=p,
                                                                         add_vars=["%s_mcor" % var, "%s_qcor" % var,
                                                                                   "%s_qecor" % var, "%s_era" % var,
                                                                                   "%s_era_adj" % var, "%s_dep" % var],
                                                                         funcid=funcid)
    if plevs is None:
        raise RuntimeError(funcid + " Require an availbale pressure level at least!")
    else:
        if isinstance(plevs, (int,float)):
            plevs = [plevs]  # as list

    if bins is None:
        bins = np.arange(0, 50)

    varselection = []
    varlabel = []
    correction_present = False
    for jvar in label:
        if jvar == var:
            varlabel = ['U']
            varselection.append(jvar)
        elif 'mcor' in jvar:
            varlabel.append('M')
            varselection.append(jvar)
            correction_present = True
        elif 'qcor' in jvar:
            varlabel.append('Q')
            varselection.append(jvar)
            correction_present = True
        elif 'qecor' in jvar:
            varlabel.append('QE')
            varselection.append(jvar)
            correction_present = True
        elif 'adj' in jvar:
            varlabel.append('EA')
            varselection.append(jvar)
        elif jvar == "%s_era" % var:
            varlabel.append('E')
            varselection.append(jvar)
        else:
            if verbose > 0:
                print "Not used: ", jvar

    if verbose > 0:
        print varlabel
        print varselection
    #
    title = kwargs.pop('title', 'Radiosonde Breakpoint Detection and Correction')
    xlabel = kwargs.pop('xlabel', 'Time')

    if pressure_levels:
        breaks = filter_series((test["%s_dep_breaks" % var] > 0).any(1))
    else:
        breaks = filter_series((test["%s_dep_breaks" % var] > 0))
    #
    nbreaks = len(breaks)
    #
    if nbreaks == 0:
        nbreaks = 1

    nplotlevels = 4  # snht, timeseries, hist, hist
    if not correction_present:
        nplotlevels = 3

    for z, ilev in enumerate(plevs):
        print "%d / %d : %d" % (z+1, len(plevs), ilev)
        if pressure_levels:
            plotdata = test.minor_xs(ilev)
        else:
            plotdata = test
        if verbose > 0:
            print plotdata.shape
            print plotdata.columns

        plotdata = plotdata.resample('12h').asfreq()  # make sure it is the same freq
        #
        f = plt.figure(figsize=figsize)
        #
        # Figure 1 : SNHT
        #
        axs = plt.subplot2grid((nplotlevels, nbreaks), (0, 0), colspan=nbreaks)
        timeseries_snht(plotdata, "%s_dep" % var, ax=axs, label='U')
        axs.set_ylabel('SNHT')
        axs.set_yticklabels(axs.get_yticklabels()[::2])

        if post_snht:
            for k, zvar in enumerate(varselection):
                if (zvar == '%s_era' % var) or (zvar == var):
                    continue

                tmpdata = plotdata.loc[:, zvar].copy().to_frame()
                if verbose > 0:
                    print "SNHT for ", zvar

                status = _1d_detection(tmpdata, zvar, 1460, 730, 50)  # default parameters
                axs.plot(tmpdata.index, tmpdata['%s_snht' % zvar], label=varlabel[k], alpha=0.7, ls='--')
            axs.legend(loc="upper left", fontsize=plt.rcParams.get('font.size') - 2, bbox_to_anchor=(1.02, 1),
                       bbox_transform=axs.transAxes)
        #
        plt.setp(axs.get_xticklabels(), visible=False)
        axt = plt.subplot2grid((nplotlevels, nbreaks), (1, 0), colspan=nbreaks, sharex=axs)
        #
        # DEPARTURES
        #
        if departures:
            for jvar in varselection:
                if jvar == "%s_era" % var:
                    continue
                plotdata[jvar] = plotdata[jvar] - plotdata["%s_era" % var]  # ref !!

        #
        if raw:
            pd_mean = plotdata
        else:
            pd_mean = plotdata.rolling(window=window, min_periods=min_periods).mean()
        #
        # Figure 2 : Values
        #
        for j, jvar in enumerate(varselection):
            if departures and jvar == '%s_era' % var:
                continue
            axt.plot(pd_mean.index, pd_mean[jvar], lw=2, label=varlabel[j])

        axs.set_xticks(np.unique(pd_mean.index.year.astype('str'))[::2])  # ticks
        axs.set_title(title + " %d hPa" % int(ilev/100))
        axs.grid()  # overwrite
        axs.grid()
        axt.set_xticks(np.unique(pd_mean.index.year.astype('str'))[::2])  # ticks
        if ylim is None:
            ym = int(pd_mean[var].mean())
            ys = 5 # int(pd_mean[var].std())*2
            ylim = (ym-ys, ym+ys)

        axt.set_ylim(ylim)

        axt.set_yticks(axt.get_yticks()[::2])

        if departures:
            axt.set_ylabel("%s [%s]" % (var.upper(),unit))
        else:
            axt.set_ylabel("%s dep. [%s]" % (var.upper(),unit))

        axt.grid()
        # axt.legend(loc="upper center", ncol=5, fontsize=plt.rcParams.get('font.size') - 2)  # bbox_to_anchor=(1, 1))
        axt.legend(loc="upper left", fontsize=plt.rcParams.get('font.size') - 2, bbox_to_anchor=(1.02, 1),
                   bbox_transform=axt.transAxes)

        [axt.axvline(x=ib, color='k') for ib in breaks]
        [axt.text(ib, pd_mean[var].min(), "B%02d" % (nbreaks - i), color='k') for i, ib in enumerate(breaks)]
        #
        #
        # Figure 3: Histograms for each breakpoint
        #
        for ibreak in range(nbreaks):
            if ibreak == 0:
                ax1 = plt.subplot2grid((nplotlevels, nbreaks), (2, ibreak))
                axi = ax1
            else:
                axi = plt.subplot2grid((nplotlevels, nbreaks), (2, ibreak), sharey=ax1)
            # Reference
            if ibreak + 1 < nbreaks:
                iref = slice(breaks[ibreak], breaks[ibreak + 1])
            else:
                iref = slice(breaks[ibreak], None)
            # Biased Sample
            if ibreak > 0:
                ibiased = slice(breaks[ibreak - 1], breaks[ibreak])
            else:
                ibiased = slice(None, breaks[ibreak])
            #

            axi.hist(plotdata.ix[iref, var], bins=bins, normed=1, label='BB')
            if "%s_era_adj" % var in varselection:
                axi.hist(plotdata.ix[iref, "%s_era_adj" % var], bins=bins, normed=1, label='EA', alpha=0.6)
            axi.hist(plotdata.ix[ibiased, var], bins=bins, normed=1, label='AB', alpha=0.6)
            #
            if ibreak == 0:
                axi.set_yticks(axi.get_yticks()[::2])

            axi.grid()
            axi.set_title("B%02d" % (nbreaks - ibreak), y=0.75)
            if ibreak > 0:
                plt.setp(axi.get_yticklabels(), visible=False)
            #
            # Figure 4: Correction Histograms for each breakpoint
            #
            if correction_present:
                axj = plt.subplot2grid((nplotlevels, nbreaks), (3, ibreak), sharey=ax1, sharex=axi)
                plt.setp(axi.get_xticklabels(), visible=False)
                alpha=1
                for j, jvar in enumerate(varselection):
                    if 'cor' in jvar:
                        axj.hist(plotdata.ix[ibiased, jvar], bins=bins, normed=1, label=varlabel[j], alpha=(alpha-0.1*j))

                axj.grid()
                axj.set_xlabel('%s [%s]' % (var.upper(), unit))
                if ibreak > 0:
                    plt.setp(axj.get_yticklabels(), visible=False)

        axi.legend(loc='upper left', fontsize=plt.rcParams.get('font.size') - 2, bbox_to_anchor=(1.05, 1))
        if correction_present:
            axj.legend(loc='upper left', fontsize=plt.rcParams.get('font.size') - 2, bbox_to_anchor=(1.05, 1))