Example #1
0
def normalize(data,
              old_min=None,
              old_max=None,
              new_min=0,
              new_max=1,
              dim='time'):
    #     Function to remove seasonality from data
    #     Returns de-seasonalized data with same shape as input

    if 'time' in data.dims:  # get year and month as separate dimension
        data = unstack_month_and_year(data)

    if dim == 'time':
        data = data.stack(time=['year', 'month'])

    if old_min is None:
        old_min = data.min(dim=dim)
        old_max = data.max(dim=dim)

    data.values = np.float32(
        minmax_scaler(data,
                      old_min=old_min,
                      new_min=new_min,
                      old_max=old_max,
                      new_max=new_max))

    return data.unstack(), old_min, old_max
Example #2
0
def preprocess(data_fp,
               data,
               do_remove_season=True,
               mean=None,
               std=None,
               do_normalize=True,
               old_min=None,
               old_max=None):
    # Function to pre-process data, with options to remove seasonality, detrend
    # and normalize
    # Returns pre-processed data with time, lat, and lon dimensions

    if 'time' in data.dims:  # get year and month as separate dimension
        year = data.time.dt.year
        month = data.time.dt.month
        times = pd.MultiIndex.from_arrays([year, month],
                                          names=('year', 'month'))
        data = unstack_month_and_year(data)

    # REMOVE SEASONAL CYCLE
    if do_remove_season:
        data, mean, std = remove_season(data,
                                        standardize=True,
                                        mean=mean,
                                        std=std)

    # NORMALIZE
    if do_normalize:
        if remove_season:
            data, old_min, old_max = normalize(data,
                                               dim='time',
                                               old_min=old_min,
                                               old_max=old_max)
        else:
            data, old_min, old_max = normalize(data,
                                               dim='year',
                                               old_min=old_min,
                                               old_max=old_max)

    # WEIGHT BY GRIDCELL AREA
    if 'lat' in data.dims:
        data = weight_by_area(data_fp, data)

    data = data.stack(time=['year', 'month'
                            ])  # Make time a coordinate (and a datetime index)
    data = data.sel(time=times)
    data = data.assign_coords({
        'time': multis_to_datetime(data.time.values)
    }).transpose('time', ...)

    return (data, mean, std, old_min, old_max)
Example #3
0
def diff_detrend_xr(data):
    #     Detrend xarray dataarray along particular axis
    if not ('time' in data.dims):
        data = data.stack(time=['year', 'month'])

    time_dim = data.dims.index('time')  # Get dimension corresponding to time

    #     Update coordinates by reducing time dimension by 1
    new_coords = {
        coord: data.coords[coord]
        for coord in data.coords if coord != 'time'
    }
    new_coords['time'] = data.time[1:]

    #     Detrend
    vals = np.apply_along_axis(diff_detrend, axis=time_dim, arr=data)
    data_new = xr.DataArray(vals, coords=new_coords, dims=data.dims)
    return (data_new)