def test_agg_time(): """ Tests if the correct number of days are the result """ serie = xr.DataArray( np.arange(11), dims=('time', ), coords={'time': pd.date_range('2000-01-01', '2000-01-11')}) blocks = agg_time(serie, ndayagg=3, rolling=False) assert len( blocks ) == 11 // 3, "block aggregation with size 3 should result in the exact amount of full blocks that fit within the timeseries" rolling = agg_time(serie, ndayagg=3, rolling=True) assert len( rolling ) == 11 - 2, "rolling aggregation with window size 3 should result in two less left-stamped observation" blocks_start = agg_time(serie, ndayagg=3, rolling=False, firstday=pd.Timestamp('2000-01-04')) assert len(blocks_start) == ( 11 - 3 ) // 3, "block aggregation with size 3 should result in the exact amount of full blocks that fit within the timeseries after the given first day"
clusterfield = xr.open_dataarray(CLUSTERDIR / 't2m-q095.nc').sel(nclusters=15) if spatial_quantile is None: reduced = response.groupby(clusterfield).mean( 'stacked_latitude_longitude') else: reduced = response.groupby(clusterfield).quantile( q=spatial_quantile, dim='stacked_latitude_longitude') reduced = reduced.sel( clustid=responseclustid) # In this case cluster 9 is western europe. response.close() output = [] for responsetimeagg in timeaggs: responseagg = agg_time(array=reduced, ndayagg=responsetimeagg, method='mean', rolling=True, firstday=firstday) logging.debug( f'aggregated response to {responsetimeagg} day timeseries') summersubset = responseagg[responseagg.time.dt.season == 'JJA'] if detrend_response: summersubsetvals = detrend(summersubset.values, axis=0) else: summersubsetvals = summersubset.values summersubset = pd.DataFrame( summersubsetvals, index=summersubset.coords['time'].to_index(), columns=pd.MultiIndex.from_tuples( [(summersubset.name, responsetimeagg, responseclustid)], names=['variable', 'timeagg', 'clustid']))
reduced = response.groupby(clusterfield).mean('stacked_latitude_longitude') reduced = reduced.sel(clustid=9) # In this case cluster 9 is western europe. response.close() del response # Define variable / region combinations for the dummy problem combinations = { 'sst_nhplus.anom.nc': get_natlantic(), 'z300_nhmin.anom.nc': get_europe() } #combinations = {'sst_nhplus.anom.nc':get_natlantic()} # Define the time scale of the response (to be regressed to) responsetimeagg = 15 responseagg = agg_time(array=reduced, ndayagg=responsetimeagg, method='mean', rolling=True, firstday=pd.Timestamp('1981-01-01')) summersubset = responseagg[responseagg.time.dt.season == 'JJA'] summersubset.values = detrend(summersubset.values) # Detrend here? summersubset.to_netcdf( OUTDIR / '.'.join(['response', str(responsetimeagg), 'nc'])) # Quick and dirty save # Only rolling aggregation is possible for intercomparing timescales, as those are equally (daily) stamped timeaggs = [1, 3, 5, 7, 9, 11, 15] laglist = [-1, -3, -5, -7, -9, -11, -15, -20, -25, -30, -35, -40, -45] # Eventually will be negative values # first level loop is variable / block combinations for inputfile, region in combinations.items(): # Investigate the precursors