def normalize(data, old_min=None, old_max=None, new_min=0, new_max=1, dim='time'): # Function to remove seasonality from data # Returns de-seasonalized data with same shape as input if 'time' in data.dims: # get year and month as separate dimension data = unstack_month_and_year(data) if dim == 'time': data = data.stack(time=['year', 'month']) if old_min is None: old_min = data.min(dim=dim) old_max = data.max(dim=dim) data.values = np.float32( minmax_scaler(data, old_min=old_min, new_min=new_min, old_max=old_max, new_max=new_max)) return data.unstack(), old_min, old_max
def preprocess(data_fp, data, do_remove_season=True, mean=None, std=None, do_normalize=True, old_min=None, old_max=None): # Function to pre-process data, with options to remove seasonality, detrend # and normalize # Returns pre-processed data with time, lat, and lon dimensions if 'time' in data.dims: # get year and month as separate dimension year = data.time.dt.year month = data.time.dt.month times = pd.MultiIndex.from_arrays([year, month], names=('year', 'month')) data = unstack_month_and_year(data) # REMOVE SEASONAL CYCLE if do_remove_season: data, mean, std = remove_season(data, standardize=True, mean=mean, std=std) # NORMALIZE if do_normalize: if remove_season: data, old_min, old_max = normalize(data, dim='time', old_min=old_min, old_max=old_max) else: data, old_min, old_max = normalize(data, dim='year', old_min=old_min, old_max=old_max) # WEIGHT BY GRIDCELL AREA if 'lat' in data.dims: data = weight_by_area(data_fp, data) data = data.stack(time=['year', 'month' ]) # Make time a coordinate (and a datetime index) data = data.sel(time=times) data = data.assign_coords({ 'time': multis_to_datetime(data.time.values) }).transpose('time', ...) return (data, mean, std, old_min, old_max)
def diff_detrend_xr(data): # Detrend xarray dataarray along particular axis if not ('time' in data.dims): data = data.stack(time=['year', 'month']) time_dim = data.dims.index('time') # Get dimension corresponding to time # Update coordinates by reducing time dimension by 1 new_coords = { coord: data.coords[coord] for coord in data.coords if coord != 'time' } new_coords['time'] = data.time[1:] # Detrend vals = np.apply_along_axis(diff_detrend, axis=time_dim, arr=data) data_new = xr.DataArray(vals, coords=new_coords, dims=data.dims) return (data_new)