예제 #1
0
def inquire(isonde):
    """Get Information of Radiosonde DataFrames

    Parameters
    ----------
    isonde          radiosonde class    Radiosonde

    Returns
    -------
    Series
    """
    if not isinstance(isonde, radiosonde):
        raise ValueError("Inquire requires a radiosonde object")
    res = {}
    for ivar in isonde.vars:
        if ivar in ['station']:
            continue

        tmp = getattr(isonde, ivar)  # .shape[0] # Every Variable
        idata = {'shape': 0, 'dates': 0, 'plev': 0, 'varis': 0, 'ptqr': 0, 'uv': 0}
        idata['shape'] = len(tmp.shape)
        if isinstance(tmp, pd.Panel):
            idata['dates'] = tmp.major_axis.size
            idata['plev'] = tmp.minor_axis.size
            idata['varis'] = tmp.items.size #",".join(tmp.items.tolist())
            counts = tmp.count(axis='minor').resample('M').sum()
            if hasnames(tmp, ['t', 'p']) and not hasnames(tmp, ['r', 'q', 'dpd'], value=0):
                varis = tmp.items[tmp.items.isin(['r', 'q', 'dpd'])].tolist()
                idata['ptqr'] = int(counts[varis].max(1).mean())

        else:
            idata['dates'] = len(tmp.index.unique())
            if 'p' in tmp.columns:
                idata['plev'] = len(tmp.p.unique())

            idata['varis'] = tmp.columns.size #",".join(tmp.columns.tolist())
            counts = tmp.resample('M').count()

            if hasnames(tmp, ['t','p']) and not hasnames(tmp, ['r', 'q', 'dpd'], value=0):
                varis = tmp.columns[tmp.columns.isin(['r', 'q', 'dpd'])].tolist()
                idata['ptqr'] = int(counts[varis].max(1).mean())

        if hasnames(tmp, ['u', 'v']):
            idata['uv'] = int(counts[['u', 'v']].min(1).mean())

        res[ivar] = idata

    return pd.DataFrame(res).T
예제 #2
0
def detect_and_correct(isonde, data='std_data', var='dpd', quality_controlled=True, savename='sdcor', save=True,
                       tfirst=True, daynight=False, verbose=0, **kwargs):
    """Detect and Correct Radiosonde biases from Departure Statistics
    Use ERA-Interim departures to detect breakpoints and
    correct these with a mean and a quantile adjustment going back in time.

    uses raso.timeseries.breakpoint.detection / correction

    Parameters
    ----------
    isonde              radiosonde      Radiosonde class object
    data                str             Radiosonde variable
    var                 str             Variable
    quality_controlled  bool            Use QC to remove flagged values?
    savename            str             store name
    save                bool            Save?
    tfirst              bool            Correct Temperature first ?
    daynight            bool            Correct Day and Night Soundings separate?
    verbose             int             verbosness

    Additional Parameters
    ---------------------
    thres               int             [50]    SNHT Threshold
    quantilen           list/array      [0-100] Quantile Ranges
    levels              list            [None]  Pressure levels
    sample_size         int             [730]   minimum Sample size
    borders             int             [180]   biased sample before and after a break
    bias30k             bool            [T]     remove 30K Bias for dpd ?
    verbose             int             [0]     verboseness
    kwargs              dict                    breakpoint.detection, breakpoint.correction ...

    Returns
    -------

    """
    from ..detect_and_correct import detect_and_correct as detect_and_correct_data
    from ..detect_and_correct import detect_and_correct_daynight
    from .. import standard_dewpoint_depression, standard_water_vapor

    funcid = "[DC] Sonde "
    if not isinstance(isonde, radiosonde):
        raise ValueError(funcid + "requires a radiosonde object!")

    if isonde.is_empty:
        raise ValueError(funcid + "Radiosonde is empty!")

    funcid = "[DC] %s " % isonde.id
    if data not in isonde.vars:
        raise ValueError(funcid + "Required variable (%s) not present" % data)

    prof = getattr(isonde, data).copy()  # GET DATA
    print_verbose(funcid+"Savename: %s" % savename, verbose)
    if hasnames(prof, 'qual'):
        #
        # drop all the values that have a qual flag
        #
        if quality_controlled:
            journal(funcid + "QC of %s " % data, isonde.history, verbose)
            prof = enforcer(prof)  # Work on FLAGS, but not DPD 30, makes a copy
            #
            # set DPD30 to missing
            #
            if hasnames(prof, 'dpd'):
                prof['dpd'] = np.where(prof.qual.str.contains('3'), np.nan, prof.dpd.values)  # Apply? BAD, GOOD

            if hasnames(prof, 'td'):
                prof['td'] = np.where(prof.qual.str.contains('3'), np.nan, prof.td.values)  # Apply? BAD, GOOD

        del prof['qual']  # prof.drop('qual', 1, inplace=True)  # remove all flag information
        print_verbose(funcid + " dropping qual ...", verbose)

    if hasnames(prof, 'orig'):
        del prof['orig']  # prof.drop('orig', 1, inplace=True)  # indicates interpolated or not

    if 'dpd' in var:
        if not hasnames(prof, 'dpd_era'):
            prof['dpd_era'] = prof['t_era'] - prof['td_era']
            print_verbose(funcid + " Calculating dpd_era ...", verbose)

    tbreaks = None
    if var == 't' and tfirst:
        tfirst = False
        print_verbose(funcid + "tfirst=True only with temperature dependent variables", 1)

    if tfirst:
        journal(funcid + "Running T D&C first! ", isonde.history, verbose)
        #
        # Only Mean Adjustment for Temperature
        #
        if not daynight:
            prof, tbreaks = detect_and_correct_data(prof, var='t', correct_q=False, bounded=None,
                                                    report=isonde.history, verbose=verbose - 1, **kwargs)
        else:
            prof, tbreaks = detect_and_correct_daynight(prof, var='t', correct_q=False, bounded=None,
                                                        report=isonde.history, verbose=verbose - 1, **kwargs)
            tbreaks['breaks'] = tbreaks['00Z'] + tbreaks['12Z']  # 00Z and 12Z breaks
        #
        # new Columns: t_mcor, t_dep, t_dep_breaks, t_dep_snht
        #
        if len(tbreaks['breaks']) > 0:
            journal(funcid + "T-breaks: %s" % str(tbreaks['breaks']), isonde.history, verbose)
            # prof.major_axis.name = 'date'
            # prof.minor_axis.name = 'p'
            # prof = panel_to_database(prof)  # to DataFrame -> Function requires it > Deprecated now
            # Recalculate Temperature Dependent Variables:
            prof = standard_water_vapor(prof, tvar='t_mcor', vpvar='vp_tcor', replace=True, report=isonde.history)
            prof = standard_dewpoint_depression(prof, tvar='t_mcor', dpdvar='dpd_tcor', vpvar='vp_tcor',
                                                tdvar='td_tcor', replace=True, report=isonde.history)
            del prof['qual']  # prof.drop('qual', 1, inplace=True)  # remove quality Flag again
            # prof.rename(items={var: '%s_orig' % var}, inplace=True)  # Rochade
            # prof.rename(items={'%s_tcor' % var: var}, inplace=True)  #
            #
            if hasnames(prof, '%s_tcor' % var):
                journal(funcid + "Running t-correct %s D&C ..." % var, isonde.history, verbose)
                prof['%s_tcor_era' % var] = prof['%s_era' % var]
                if not daynight:
                    prof, dbreaks = detect_and_correct_data(prof, var='%s_tcor' % var, report=isonde.history,
                                                            verbose=verbose - 1, **kwargs)
                else:
                    prof, dbreaks = detect_and_correct_daynight(prof, var='%s_tcor' % var, report=isonde.history,
                                                                verbose=verbose - 1, **kwargs)
                    dbreaks['breaks'] = dbreaks['00Z'] + dbreaks['12Z']  # Combine

                del prof['%s_tcor_era' % var]
            # prof.rename(items={var: '%s_tcor' % var,
            #                    '%s_mcor' % var: '%s_tcor_mcor' % var,
            #                    '%s_qcor' % var: '%s_tcor_qcor' % var,
            #                    '%s_qecor' % var: '%s_tcor_qecor' % var,
            #                    '%s_dep' % var: '%s_tcor_dep' % var,
            #                    '%s_dep_breaks' % var: '%s_tcor_dep_breaks' % var,
            #                    '%s_dep_snht' % var: '%s_tcor_dep_snht' % var,
            #                    '%s_orig' % var: var}, inplace=True)

        else:
            journal(funcid + "No T breakpoints. ", isonde.history, verbose)

        # prof = panel_to_database(prof)  # Convert to DataFrame (after Detection) > Deprecated now

    journal(funcid + "Running %s D&C" % var, isonde.history, verbose)
    if not daynight:
        prof, stat = detect_and_correct_data(prof, var=var, report=isonde.history, verbose=verbose, **kwargs)

    else:
        prof, stat = detect_and_correct_daynight(prof, var=var, report=isonde.history, verbose=verbose, **kwargs)
        stat['breaks'] = stat['00Z'] + stat['12Z']  # Combine lists

    #
    isonde.add_data(savename, prof, replace=True, verbose=verbose)  # DID anything change ?
    #
    # Options
    #
    thres = kwargs.get('thres', 50)
    borders = kwargs.get('borders', 180)
    sample_size = kwargs.get('sample_size', 730)
    quantilen = kwargs.get('quantilen', np.arange(0, 101, 10))  # missing from history > add
    journal(funcid + "%s (T:%d, N:%d, B:%d, Q:%d)" % (var, int(thres), sample_size, borders, len(quantilen)),
            isonde.history, 0)
    if tbreaks is not None:
        if len(tbreaks['breaks']) > 0:
            stat['t-breaks'] = tbreaks['breaks']
            if len(dbreaks['breaks']) > 0:
                stat['t-dpd-breaks'] = dbreaks['breaks']

    stat['thres'] = thres
    stat['borders'] = borders
    stat['sample_size'] = sample_size
    stat['quantilen'] = quantilen
    stat['source'] = data
    stat['savename'] = savename
    stat['variable'] = var
    stat['daynight'] = daynight

    if 'detect_opts' not in isonde.attrs:
        isonde.add_attr('detect_opts', {"%s_%s" % (savename, var): stat})  # sdcor_dpd
    else:
        isonde.detect_opts["%s_%s" % (savename, var)] = stat  # update

    if save:
        isonde.save(var=savename, update=True, verbose=verbose)

    return stat
예제 #3
0
def mean_correction(data, var, breakvar, sample_size=730, borders=180, database=False, bounded=None, varcopy=True,
                    verbose=0):
    """ Mean Correction of breakpoints

    Parameters
    ----------
    data
    var
    breakvar
    sample_size
    borders
    database
    bounded
    varcopy
    verbose

    Returns
    -------
    stat, data
    """
    from departures import mean_departure
    from support_functions import sample_indices
    funcid = '[CM] '

    if isinstance(var, str):
        var = [var]  # as list

    if isinstance(breakvar, str):
        breakvar = [breakvar] * len(var)  # as list

    if bounded is None:
        ubound = None
        lbound = None
    else:
        lbound, ubound = bounded

    pressure_levels = True
    if isinstance(data, pd.DataFrame):
        if 'p' in data.columns:
            # 2D
            print funcid + " database detected > conversion to Panel"

            for ivar, jvar in zip(var, breakvar):
                if not data.columns.isin([ivar, jvar]).sum() == 2:
                    raise ValueError(funcid + "Variable not found: %s in %s" % (ivar, str(data.columns)))

            data.index.name = 'date'
            data = data.reset_index().set_index(['date', 'p']).to_panel()
        else:
            # only 1D
            pressure_levels = False

    elif isinstance(data, pd.Panel):
        for ivar, jvar in zip(var, breakvar):
            if not data.items.isin([ivar, jvar]).sum() == 2:
                raise ValueError(funcid + "Variable not found: %s in %s" % (ivar, str(data.items)))

    else:
        raise ValueError("Require a DataFrame or Panel as input")

    if pressure_levels:
        data.major_axis.name = 'date'
        dates = data.major_axis

        # Druckflächen
        plevels = data.minor_axis.values
        if verbose > 0:
            print funcid + "p-Levels: ", ",".join(["%d" % (ip / 100) for ip in plevels]), ' hPa'

    else:
        dates = data.index

    for ivar, ibvar in zip(var, breakvar):
        # BREAKS
        if pressure_levels:
            int_breaks = np.where((data[ibvar] > 0).any(1))[0]  # breakpoint in all levels
        else:
            int_breaks = np.where((data[ibvar] > 0))[0]
        breaks = dates[int_breaks]

        if (int_breaks[-1] + sample_size) > dates.shape[0]:
            print funcid + "Reference data set is shorter than 1 year"

        # Copy or use existing
        if not hasnames(data, '%s_mcor' % ivar) or varcopy:
            data["%s_mcor" % ivar] = data[ivar].copy()  # Make a copy

        nb = len(breaks)
        if verbose > 0:
            print funcid + " %s Found %d breakpoints" % (ivar, nb)

        breakpoint_stat = {}
        xdata = data["%s_mcor" % ivar].values  # Numpy Array (time x p-levels)

        for ib in reversed(range(nb)):

            # ibiased is everything between breakpoints
            # isample is minus the borders -> used to calculate
            isample, ibiased, iref = sample_indices(int_breaks, ib, dates, sample_size=sample_size, borders=borders,
                                                    recent=False, verbose=verbose - 1)
            if pressure_levels:
                # jvar = data.items.get_loc("%s_mcor" % ivar)  # index of variable
                # data["%s_mcor" % ivar].values,
                m_dep = np.apply_along_axis(mean_departure, 0, xdata, iref, isample, sample_size)
                # setting with ndarray requires precise shape conditions
                if bounded is not None:
                    tmp_qad = xdata[ibiased, :] + m_dep  # data.iloc[jvar, ibiased, :] + m_dep
                    m_dep = np.where((tmp_qad < lbound) | (tmp_qad > ubound), 0, m_dep)
                    xdata[ibiased, :] += m_dep  # has now the right shape
                else:
                # data.iloc[jvar, ibiased, :] = (data.iloc[jvar, ibiased, :].values + m_dep)[np.newaxis, ::]
                    xdata[ibiased, :] += m_dep[np.newaxis, ::]
                # one value per level, this can cause negative DPD values
            else:
                # jvar = data.columns.get_loc("%s_mcor" % ivar)
                # data["%s_mcor" % ivar].values,
                m_dep = mean_departure(xdata, iref, isample, sample_size)
                if bounded is not None:
                    tmp_qad = xdata[ibiased] + m_dep  # data.iloc[ibiased, jvar] + m_dep
                    m_dep = np.where((tmp_qad < lbound) | (tmp_qad > ubound), 0, m_dep)

                # data.iloc[ibiased, jvar] += m_dep  # one value per time
                xdata[ibiased] += m_dep

            # nsample = data["%s_mcor" % ivar][isample].count()
            # nref = data["%s_mcor" % ivar][iref].count()
            nsample = np.isfinite(xdata[isample]).sum()
            nref = np.isfinite(xdata[iref]).sum()

            breakpoint_stat[str(breaks[ib])] = {'i': int_breaks[ib], 'isample': isample, 'ibiased': ibiased,
                                                'iref': iref, 'mcor': m_dep, 'nref': nref, 'nsamp': nsample}
            if verbose > 0:
                print funcid + " %s : %s  50%%: %9f " % (ivar, breaks[ib], np.nanmedian(m_dep))

        data["%s_mcor" % ivar] = xdata  # fill in
    if database:
        return data.to_frame(filter_observations=False).reset_index().set_index('date', drop=True)

    return breakpoint_stat, data