Esempio n. 1
0
def interp_dataframe(data, levels=None, variables=None, min_count=5, dropna=True, report=None, verbose=0):
    """
    Interpolate:
    1. Select only levels with enough (min_count) t and r values
    2. Interpolate each profile (date) vertically to levels

    Interpolation is only done at dates with enough data

    Args:
      data       pandas DataFrame with p

    Keyword Args:
      levels     [1000 ... 100000]   mod. ERA-Interim pressure levels
      variables  ['t','r']           Variables for quality check (min_count)
      min_count  [5]                 Minimum required levels per profile
      dropna     [True]              Remove missing values before interpolation
      verbose    [0]                 Show more info

    Returns:
      newdata    pandas DataFrame with newly interpolated values

    Raises:
      ValueError when p or other variables not inside data

    Calls:
      interp_profile -> interp_mod

    """
    from raso.config import std_plevels
    funcid = "[INTP] "

    if levels is None:
        levels = std_plevels

    if variables is None:
        variables = data.columns.tolist()

    # fix index name
    if data.index.name is None or data.index.name == '':
        data.index.name = 'date'

    index_name = data.index.name

    # check plevels vs plevels inside dataframe ?
    variables = list(set(variables + ['p']))  # add p
    variables = data.columns[data.columns.isin(variables)].tolist()  # inside ?

    if len(variables) < 1 or 'p' not in variables:
        raise ValueError(funcid + "Dataframe requires at least 2 columns(p,+) %s" % (",".join(variables)))

    journal(funcid + "Quality check (%s) Min: %d NAN: %s" % (",".join(variables), min_count, color_boolean(dropna)),
            report, verbose)

    # sometimes one variables is completely zero
    counts = data.count()[variables]
    if (counts == 0).any():
        variables = counts[counts > 0].index.tolist()
        if len(variables) < 1 or 'p' not in variables:
            print counts
            raise ValueError(funcid + "Dataframe requires at least 2 columns(p,+) %s" % (",".join(variables)))

    itx = (data.groupby(data.index).count()[variables] > min_count).sum(1) > 2  # at least 2 variables have data

    if itx.sum() == 0:
        raise RuntimeError(funcid + "No data left with %d as minimum data count per profile" % min_count)
    else:
        data = data.ix[itx, :]

    journal(funcid + "from %d to %d" % (len(itx), itx.sum()), report, verbose)
    #
    data = data.groupby(data.index).apply(interp_profile, variables=variables, pout=levels, oan=True, dropna=dropna,
                                          min_values=min_count)

    data = data.reset_index().drop('level_1', axis=1).sort_values(by=[index_name, 'p']).set_index(index_name, drop=True)
    journal(funcid + "Done: %s" % str(data.shape), report, verbose)
    return data
Esempio n. 2
0
def interp_profile_check(x, var='t', pout=None, pcolumn='p', dropna=True, min_values=4):
    """
    Check Interpolation for given variable

    Args:
     x             pandas DataFrame holding one profile (date)

    Keyword Args:
      var        ['t']               Variable to check interpolation
      pout       [1000 ... 100000]   mod. ERA-Interim pressure levels
      oan        [False]             Return ORIG column info
      pcolumn    ['p']               name of pressure column
      dropna     [True]              Remove missing values before interpolation
      min_values [4]                 Minimum required levels per profile

    Returns:
      prints interpolation results

    Raises:
      ValueError     if x is not a pandas DataFrame
      KeyError       if pcolumn not in x
      RuntimeError   if more than one index (date) is in x

    Calls:
      interp_mod
    """
    from raso.config import std_plevels

    if not isinstance(x, pd.DataFrame):
        raise ValueError("[INTP] Requires a DataFrame")

    if pcolumn not in x.columns:
        raise KeyError("[INTP] Pressure levels not in DataFrame")

    if len(x.index.unique()) > 1:
        raise RuntimeError("[INTP] Only one profile allowed here! ")

    if pout is None:
        pout = std_plevels

    pin = x[pcolumn].values  # take pressures
    ipx = np.argsort(pin)  # find accending order
    pin = pin[ipx]  # apply sort to pressures

    pout = np.unique(np.sort(np.concatenate([pin, pout])))  # merge all levels, sort, unique

    data = x.drop(pcolumn, axis=1)  # take values
    values = data[var].values[ipx]  # apply sort

    res = interp_mod(values, np.log(pout), np.log(pin), dropna=dropna, min_values=min_values)

    for i, p in enumerate(pin):
        # first level exception
        if i == 0:
            for no in np.where(pout < p)[0]:
                print "IN: ( %9s  %7s ) OUT: ( %9.2f  %7.2f) " % ('-', '-', pout[no], res[no])
        else:
            for no in np.where(np.logical_and(pout > pin[i - 1], pout < p))[0]:
                print "IN: ( %9s  %7s ) OUT: ( %9.2f  %7.2f) " % ('-', '-', pout[no], res[no])
        # is the output level and input level?
        v1 = np.where(pout == p)[0]
        if len(v1) == 0:
            print "IN: ( %9.2f  %7.2f ) OUT: ( %9s  %7s) " % (p, data[i], '-', '-')
        else:
            print "IN: ( %9.2f  %7.2f ) OUT: ( %9.2f  %7.2f) %s " % (
                p, values[i], pout[v1], res[v1], color_boolean(np.all((values[i] - res[v1]) == 0.0)))
    for no in np.where(pout > pin[-1])[0]:
        print "IN: ( %9s  %7s ) OUT: ( %9.2f  %7.2f) " % ('-', '-', pout[no], res[no])