Ejemplo n.º 1
0
def detect_and_correct_daynight(data, var='dpd', thres=50, correct_m=True, correct_q=True, quantilen=None, levels=None,
                                sample_size=730, borders=180, bounded=(0, 60), verbose=0, **kwargs):
    """Detect and Correct Radiosonde biases from Departure Statistics
    Use ERA-Interim departures to detect breakpoints and
    correct these with a mean and a quantile adjustment going back in time.

    uses raso.timeseries.breakpoint.detection / correction

    Parameters
    ----------
    data            pd.DataFrame    Radiosonde Database
    var             str             Variable: t, dpd
    thres           int             SNHT Threshold
    quantilen       list/array      Quantile Ranges
    levels          list            Pressure levels
    sample_size     int             minimum Sample size
    borders         int             biased sample before and after a break
    bias30k         bool            remove 30K Bias for dpd ?
    verbose         int             verboseness
    kwargs          dict            breakpoint.detection, breakpoint.correction ...

    Returns
    -------
    pd.DataFrame
    """
    funcid = "[DC] Data "

    if not hasnames(data, '%s_dep' % var):
        if not hasnames(data, [var, '%s_era' % var]):
            raise ValueError(funcid + "Missing variables for departures: %s %s_era" % (var, var))
        data['%s_dep' % var] = data[var] - data['%s_era' % var]  # Departures
    else:
        print_verbose(funcid + "Departures: %s_dep used!" % var, verbose)

    if quantilen is None:
        quantilen = np.arange(0, 101, 10)
    # Detect in Departures
    # Detect breakpoints > to panel
    # might not be data for both ?
    ibreaks1, night = breakpoint.detection(data, '%s_dep' % var,
                                           thres=thres,
                                           levels=levels,
                                           valid_times=[0],
                                           freq='24h',
                                           verbose=verbose - 1,
                                           **kwargs)

    ibreaks2, noon = breakpoint.detection(data, '%s_dep' % var,
                                          thres=thres,
                                          levels=levels,
                                          valid_times=[12],
                                          freq='24h',
                                          verbose=verbose - 1,
                                          **kwargs)
    # new variables:
    # *_breaks, *_snht
    #
    # if levels is None:
    #     levels = night.minor_axis
    #
    night_breaks = {}
    day_breaks = {}
    if ibreaks1['%s_dep' % var]:
        # Correct absolute Values
        # Mean Correction
        if correct_m:
            night_breaks, night = breakpoint.mean_correction(night, var, '%s_dep_breaks' % var,
                                                             sample_size=sample_size,
                                                             borders=borders,
                                                             bounded=bounded,
                                                             verbose=verbose - 1)
        if correct_q:
            # Quantile Correction
            night_breaks, night = breakpoint.quantile_correction(night, var, '%s_dep_breaks' % var,
                                                                 quantilen=quantilen,
                                                                 sample_size=sample_size,
                                                                 borders=borders,
                                                                 bounded=bounded,
                                                                 verbose=verbose - 1)
            # ERA Quantile Correction
            bqestat, night = breakpoint.quantile_era_correction(night, var, '%s_era' % var, '%s_dep_breaks' % var,
                                                                quantilen=quantilen,
                                                                sample_size=sample_size,
                                                                borders=None,
                                                                bounded=bounded,
                                                                verbose=verbose - 1)
    if ibreaks2['%s_dep' % var]:
        # Correct absolute Values
        # Mean Correction
        if correct_m:
            day_breaks, noon = breakpoint.mean_correction(noon, var, '%s_dep_breaks' % var,
                                                          sample_size=sample_size,
                                                          borders=borders,
                                                          bounded=bounded,
                                                          verbose=verbose - 1)
        if correct_q:
            # Quantile Correction
            day_breaks, noon = breakpoint.quantile_correction(noon, var, '%s_dep_breaks' % var,
                                                              quantilen=quantilen,
                                                              sample_size=sample_size,
                                                              borders=borders,
                                                              bounded=bounded,
                                                              verbose=verbose - 1)
            # ERA Quantile Correction
            bqestat, noon = breakpoint.quantile_era_correction(noon, var, '%s_era' % var, '%s_dep_breaks' % var,
                                                               quantilen=quantilen,
                                                               sample_size=sample_size,
                                                               borders=None,
                                                               bounded=bounded,
                                                               verbose=verbose - 1)
            #
    # night breaks will be negative
    night['%s_dep_breaks' % var] *= -1  # night is negative
    # recombine day and night
    new = pd.concat([night, noon], axis=1, join='outer')
    #
    return new, {'00Z': night_breaks.keys(), '12Z': day_breaks.keys()}
Ejemplo n.º 2
0
def detect_and_correct_loop(data, var='dpd', thres=50, iteration=1, correct_m=True, correct_q=True,
                            quantilen=None, levels=None, sample_size=730, borders=180, bounded=(0, 60),
                            verbose=0, **kwargs):
    """Detect and Correct Radiosonde biases from Departure Statistics
    Use ERA-Interim departures to detect breakpoints and
    correct these with a mean and a quantile adjustment going back in time.

    uses raso.timeseries.breakpoint.detection / correction

    Parameters
    ----------
    data            DataFrame/Panel Radiosonde Database
    var             str             Variable: t, dpd
    thres           int             SNHT Threshold
    iteration       int             Number of Detect and Correct Lopps
    correct_m       bool            Correct Mean Adjust ?
    correct_q       bool            Correct Quantile Adjust ?
    quantilen       list/array      Quantile Ranges
    levels          list            Pressure levels
    sample_size     int             minimum Sample size
    borders         int             biased sample before and after a break
    bias30k         bool            remove 30K Bias for dpd ?
    verbose         int             verboseness
    kwargs          dict            breakpoint.detection, breakpoint.correction ...

    Returns
    -------
    Panel
    """
    funcid = "[DCL] Data "
    if not isinstance(data, (pd.DataFrame, pd.Panel)):
        raise ValueError(funcid + "Requires a DataFrame or Panel")

    if not hasnames(data, '%s_dep' % var):
        if not hasnames(data, [var, '%s_era' % var]):
            raise ValueError(funcid + "Missing variables for departures: %s %s_era" % (var, var))

        data['%s_dep' % var] = data[var] - data['%s_era' % var]  # Departures

    else:
        print_verbose(funcid + "Departures: %s_dep used!" % var, verbose)

    if quantilen is None:
        quantilen = np.arange(0, 101, 10)

    # Detect in Departures (First Blood)
    # Detect breakpoints > to panel
    ibreaks, new = breakpoint.detection(data, '%s_dep' % var,
                                        thres=thres,
                                        levels=levels,
                                        verbose=verbose - 1,
                                        **kwargs)
    breakpoints = []
    # new variables:
    # *_breaks, *_snht
    new.rename(items={'%s_dep_breaks' % var: '%s_dep_initbreaks' % var,
                      '%s_dep_snht' % var: '%s_dep_initsnht' % var}, inplace=True)

    if ibreaks['%s_dep' % var]:
        initial_breaks = sorted(np.where((new['%s_dep_initbreaks' % var] > 0).any(1))[0])
        mcor_breaks = len(initial_breaks)
        qcor_breaks = len(initial_breaks)

        for irun in range(iteration):
            # Correction
            # Mean Correction
            if correct_m and mcor_breaks > 0:
                if irun == 0:
                    new['%s_mcor_dep_breaks' % var] = new['%s_dep_initbreaks' % var]  # copy breakpoints
                # Same call as everywhere
                bmstat, new = breakpoint.mean_correction(new, var, '%s_mcor_dep_breaks' % var,
                                                         sample_size=sample_size,
                                                         borders=borders,
                                                         bounded=bounded,
                                                         varcopy=False,   # reuse mcor
                                                         verbose=verbose - 1)
                # works on >> var_mcor
                breakpoints = bmstat.keys()
                # create Departures for Detection
                new["%s_mcor_dep" % var] = new["%s_mcor" % var] - new['%s_era' % var]
                # Detection
                ibreaks, new = breakpoint.detection(new, '%s_mcor_dep' % var, thres=thres, levels=levels, verbose=verbose - 1, **kwargs)
                mcor_breaks = len(ibreaks)  # ?

            if correct_q and qcor_breaks > 0:
                if irun == 0:
                    new['%s_qcor_dep_breaks' % var] = new['%s_dep_initbreaks' % var]  # copy breakpoints

                # Quantile Correction
                bqstat, new = breakpoint.quantile_correction(new, var, '%s_dep_breaks' % var,
                                                             quantilen=quantilen,
                                                             sample_size=sample_size,
                                                             borders=borders,
                                                             bounded=bounded,
                                                             verbose=verbose - 1)
                breakpoints = bqstat.keys()
                # individual or not ?
                # ERA Quantile Correction
                bqestat, new = breakpoint.quantile_era_correction(new, var, '%s_era' % var, '%s_dep_breaks' % var,
                                                                  quantilen=quantilen,
                                                                  sample_size=sample_size,
                                                                  borders=None,
                                                                  bounded=bounded,
                                                                  verbose=verbose - 1)
        #
    return new, {'breaks': breakpoints}
Ejemplo n.º 3
0
def clim(data, anomaly=False, per_month=None, attach=False, how='mean', period=None, verbose=0):
    """Calculate monthly mean climatology from data and add as column.
    Select only levels with at least min_count values available.

    Parameters
    ----------
    data            Series / DataFrame / Panel      Input data
    anomaly         bool                            climate anomaly
    per_month       str                             return monthly values: clim, count, all
    attach          bool                            attach to data
    how             str                             climatological moment
    verbose         int                             verboseness

    Returns
    -------
    Series / DataFrame / Panel
    """
    if per_month is not None and per_month not in ['clim', 'count', 'all']:
        raise ValueError("per_month: clim, count or all")

    if attach and per_month is not None:
        raise RuntimeError("[CLIM] Can not attach a reduced shape to original data")

    if not isinstance(data, (pd.Series, pd.DataFrame, pd.Panel)):
        raise ValueError("[CLIM] Require a Series, DataFrame or Panel")

    data = data.copy()
    # For Series
    if isinstance(data, pd.Series):
        return clim_timeseries(data, anomaly=anomaly, per_month=per_month, attach=attach, how=how, period=period,
                               verbose=verbose-1)

    # For DataFrame
    if isinstance(data, pd.DataFrame):
        if 'p' in data.columns and len(data['p'].unique()) > 1:
            print_verbose("[CLIM] database detected converting to panel", verbose)
            data = data.reset_index().set_index(['date', 'p']).to_panel()  # To Panel

        else:
            tmp = data.select_dtypes(include=['number']).apply(clim_timeseries, axis=0, per_month=per_month,
                                                               anomaly=anomaly, how=how, attach=attach, period=period,
                                                               verbose=verbose-1)
            if attach:
                if not data.select_dtypes(exclude=['number']).empty:
                    tmp = tmp.join(data.select_dtypes(exclude=['float', 'float32', 'int', 'int16']))

            return tmp

    # Panel
    # relatively slow
    # return data.apply(lambda x: clim(x, anomaly=anomaly, per_month=per_month, attach=attach, how=how,
    #                                  verbose=verbose-1))
    tmp = {}
    for it in data.items:
        tmp[it] = clim(data[it], anomaly=anomaly, per_month=per_month, attach=attach, how=how, period=period,
                       verbose=verbose - 1)

    try:
        tmp = pd.Panel(tmp)

    except Exception, e:
        print repr(e)
Ejemplo n.º 4
0
def detect_and_correct(data, var='dpd', thres=50, correct_m=True, correct_q=True, quantilen=None,
                       levels=None, sample_size=730, borders=180, bounded=(0, 60), verbose=0, **kwargs):
    """Detect and Correct Radiosonde biases from Departure Statistics
    Use ERA-Interim departures to detect breakpoints and
    correct these with a mean and a quantile adjustment going back in time.

    uses raso.timeseries.breakpoint.detection / correction

    Parameters
    ----------
    data            DataFrame/Panel Radiosonde Database
    var             str             Variable: t, dpd
    thres           int             SNHT Threshold
    correct_m       bool            Correct Mean Adjust ?
    correct_q       bool            Correct Quantile Adjust ?
    quantilen       list/array      Quantile Ranges
    levels          list            Pressure levels
    sample_size     int             minimum Sample size
    borders         int             biased sample before and after a break
    bias30k         bool            remove 30K Bias for dpd ?
    verbose         int             verboseness
    kwargs          dict            breakpoint.detection, breakpoint.correction ...

    Returns
    -------
    Panel
    """
    funcid = "[DC] Data "
    if not isinstance(data, (pd.DataFrame, pd.Panel)):
        raise ValueError(funcid + "Requires a DataFrame or Panel")

    if not hasnames(data, '%s_dep' % var):
        if not hasnames(data, [var, '%s_era' % var]):
            raise ValueError(funcid + "Missing variables for departures: %s %s_era" % (var, var))
        data['%s_dep' % var] = data[var] - data['%s_era' % var]  # Departures
    else:
        print_verbose(funcid + "Departures: %s_dep used!" % var, verbose)

    if quantilen is None:
        quantilen = np.arange(0, 101, 10)
    # Detect in Departures
    # Detect breakpoints > to panel
    ibreaks, new = breakpoint.detection(data, '%s_dep' % var,
                                        thres=thres,
                                        levels=levels,
                                        verbose=verbose - 1,
                                        **kwargs)
    breakpoints = []
    # new variables:
    # *_breaks, *_snht
    if ibreaks['%s_dep' % var]:
        # Correct absolute Values
        # Mean Correction
        if correct_m:
            bmstat, new = breakpoint.mean_correction(new, var, '%s_dep_breaks' % var,
                                                     sample_size=sample_size,
                                                     borders=borders,
                                                     bounded=bounded,
                                                     verbose=verbose - 1)
            breakpoints = bmstat.keys()
        if correct_q:
            # Quantile Correction
            bqstat, new = breakpoint.quantile_correction(new, var, '%s_dep_breaks' % var,
                                                         quantilen=quantilen,
                                                         sample_size=sample_size,
                                                         borders=borders,
                                                         bounded=bounded,
                                                         verbose=verbose - 1)
            breakpoints = bqstat.keys()
            # ERA Quantile Correction
            bqestat, new = breakpoint.quantile_era_correction(new, var, '%s_era' % var, '%s_dep_breaks' % var,
                                                              quantilen=quantilen,
                                                              sample_size=sample_size,
                                                              borders=None,
                                                              bounded=bounded,
                                                              verbose=verbose - 1)
    return new, {'breaks': breakpoints}
Ejemplo n.º 5
0
def trend(data, timeframe, deg=1, fit=False, anomaly=True, per_month=False, pmiss=0.3, freq='12h', verbose=0,
          debug=False, **kwargs):
    """ Calculate poly-linear trend per level and variable

    Parameters
    ----------
    data            DataFrame / Panel       Input Data
    timeframe       slice                   Timeslice
    deg             int                     Polygon degree
    fit             bool                    return residuum
    anomaly         bool                    remove climatology
    per_month       bool                    calculate trends per month
    pmiss           float                   percentage of missing values allowed
    freq            str                     resampling frequency
    verbose         int                     verbosness
    debug           bool                    debug
    kwargs          dict                    to numpy polyfit

    Returns
    -------
    DataFrame or Panel
    """
    data = data.copy()
    if not isinstance(data, (pd.Series, pd.DataFrame, pd.Panel)):
        raise ValueError("Requires a Series, DataFrame or Panel")

    if timeframe is not None and not isinstance(timeframe, slice):
        raise ValueError("Requires a slice")

    miss = None

    if isinstance(data, pd.Series):
        print_verbose("[TREND] Series", verbose)

        if timeframe is None:
            timeframe = slice(str(data.index[0].year), str(data.index[-1].year))
            miss = len(pd.date_range(timeframe.start, timeframe.stop, freq=freq)) * (1 - pmiss)

        data = data[timeframe]  # Select
        timeframe = pd.date_range(timeframe.start, timeframe.stop, freq=freq)
        data = data.reindex(timeframe)

        if fit:
            return trend_fit_timeseries(data, miss=miss, **kwargs)

        else:
            return trend_timeseries(data, anomaly=anomaly, **kwargs)

    elif isinstance(data, pd.DataFrame):
        print_verbose("[TREND] DataFrame", verbose)
        if timeframe is None:
            timeframe = slice(str(data.index[0].year), str(data.index[-1].year))
            miss = len(pd.date_range(timeframe.start, timeframe.stop, freq=freq)) * (1 - pmiss)

        data = data.ix[timeframe, :]
        timeframe = pd.date_range(timeframe.start, timeframe.stop, freq=freq)
        data = data.reindex(timeframe)  # resample to right size

        # DATABASE ?
        if 'p' in data.columns and len(data['p'].unique()) > 0:
            out = {}
            plevels = filter_series((data.groupby('p').count() > 10).all(1))
            plevels = plevels.tolist()
            if len(plevels) > 60:
                raise RuntimeWarning("[TREND] more than 60 pressure levels ? %d" % len(plevels))

            for ip in plevels:
                tmp = data.query('p==%d' % ip).drop('p', 1)
                if fit:
                    out[ip] = tmp.apply(trend_fit_timeseries, axis=0, poly_deg=deg, miss=miss, debug=debug, **kwargs)
                else:
                    out[ip] = tmp.apply(trend_timeseries, axis=0, poly_deg=deg, anomaly=anomaly, debug=debug, **kwargs)

            out = pd.Panel(out).squeeze()
            if isinstance(out, pd.Panel):
                return out.swapaxes(0, 2)
            return out
        else:
            # PANEL SLICE
            print_verbose("[TREND] whole DataFrame", verbose)
            if fit:
                return data.apply(trend_fit_timeseries, axis=0, poly_deg=deg, miss=miss, debug=debug, **kwargs)
            else:
                return data.apply(trend_timeseries, axis=0, poly_deg=deg, anomaly=anomaly, debug=debug, **kwargs)

    elif isinstance(data, pd.Panel):
        print_verbose("[TREND] Panel", verbose)
        # out = {}
        if timeframe is None:
            timeframe = slice(str(data.major_axis[0].year),
                              str(data.major_axis[-1].year))  # everyone has the same timeframe
        #
        return data.apply(lambda x: trend(x, timeframe, deg=deg, fit=fit, anomaly=anomaly, per_month=per_month,
                                          pmiss=pmiss, freq=freq, debug=debug, **kwargs), axis=(1, 2))