def test_shift_back(self): for key, data in data_dict.items(): if key not in ['ds_strange', 'ds_renamed']: orig_lon_min = data['lon'].values.min() # original min longitude for lon_min in [-180., -270., 0., 90.]: # try different lon_min values new_data = climapy.xr_shift_lon(data, lon_min=lon_min) new_data = climapy.xr_shift_lon(new_data, lon_min=orig_lon_min) # shift back # Correct small diffs in coords before comparing new_data = new_data.reindex_like(data, method='nearest', tolerance=1e-3) assert new_data.equals(data) # compare to original
def test_default_shift(self): for key, data in data_dict.items(): if key != 'ds_strange': if key == 'ds_renamed': new_lon = climapy.xr_shift_lon(data, lon_name='longitude')['longitude'].values elif key in ['ds_rev_lon', 'ds_rev_both']: new_lon = climapy.xr_shift_lon(data)['lon'].values[::-1] else: new_lon = climapy.xr_shift_lon(data)['lon'].values assert new_lon.min() == new_lon[0] assert new_lon.max() == new_lon[-1] if key in ['ds_irr_lon', 'ds_irr_both']: assert -180 <= new_lon[0] <= -177 # allow some leeway for irreg longitudes assert 177 <= new_lon[-1] <= 180 else: assert new_lon[0] == -180, AssertionError(key) assert new_lon[-1] == 177.5, AssertionError(key)
def test_inside_values(self): for region, bounds in load_region_bounds_dict().items(): cdo_data = cdo_dict[region] lon_bounds, lat_bounds = bounds mask_data = climapy.xr_mask_bounds(data_dict['data01'], lon_bounds=lon_bounds, lat_bounds=lat_bounds, select_how='inside') mask_data = mask_data.dropna(dim='lon', # drop NaN rows/columns, like CDO how='all').dropna(dim='lat', how='all') mask_data = climapy.xr_shift_lon(mask_data, # shift lons for consistency with cdo_data lon_min=cdo_data['lon'].min()) rel_diff = ((mask_data['TS'].values - cdo_data['TS'].values) / cdo_data['TS'].values) # relative difference in 'TS' variable assert np.abs(rel_diff).max() < 1e-12 # check that differences very small
def load_2d_stats(scenario_combination='All1-All0', variable='SWCF_d1'): """ Load dictionary of 2D statistics, for all lons and lats, for a specific variable and scenario. Args: scenario_combination: scenario combination (default 'All1-All0') variable: name of variable (default 'SWCF_d1') Returns: dictionary, with the following keys: scenario_combination: as per input arg variable: as per input arg data: DataArray of annual-means for different years (if single scenario) mean: DataArray of annual-mean averaged across different years error: DataArray of combined standard errors ci99: tuple of DataArrays of 99% confidence intervals (based on 2.576*error) p_value: array of p-values for difference between scenarios contributing_scenarios: list of contributing scenarios (e.g. ['All1', 'All0']) """ # Check if 2D stats have been calculated previously try: result = _2d_stats_dict[(scenario_combination, variable)] except KeyError: # Initialise dictionary with input arguments and None result = {'scenario_combination': scenario_combination, 'variable': variable, 'data': None, 'mean': None, 'error': None, 'ci99': None, 'p_value': None, 'contributing_scenarios': None} # Case 1: scenario_combination is a single scenario if scenario_combination in _inverted_scenario_name_dict: result['contributing_scenarios'] = [scenario_combination, ] # Load annual data data = load_output(variable, scenario=_inverted_scenario_name_dict[scenario_combination], season='annual', apply_sf=True) data = climapy.xr_shift_lon(data, lon_min=-179.) # shift longitudes result['data'] = data # Mean and standard error across years n_years = data['year'].size mean = data.mean(dim='year') # mean across years error = data.std(dim='year', ddof=1) / np.sqrt(n_years) # standard error result['mean'] = mean result['error'] = error # Case 2: scenario_combination is a difference between two scenarios elif (len(scenario_combination.split('-')) == 2 and scenario_combination.split('-')[0] in _inverted_scenario_name_dict): scenario1, scenario2 = scenario_combination.split('-') result['contributing_scenarios'] = [scenario1, scenario2] # Call recursively to get 2D stats for each scenario stats1 = load_2d_stats(scenario_combination=scenario1, variable=variable) stats2 = load_2d_stats(scenario_combination=scenario2, variable=variable) # Combine to get difference between means and the combined error mean = stats1['mean'] - stats2['mean'] error = np.sqrt(stats1['error']**2 + stats2['error']**2) result['mean'] = mean result['error'] = error # p-value based on standard two-sample t-test p_value = ttest_ind(stats1['data'], stats2['data'], equal_var=True)[1] result['p_value'] = p_value # Case 3: scenario_combination is ∑(Θ1-All0) # Note: ∑(Θ1-All0) = ∑(Θ1)-10xAll0 elif scenario_combination == '$\Sigma_{\Theta}$($\Theta$1-All0)': theta1_scenarios = [s for s in _inverted_scenario_name_dict.keys() if (s[-1] == '1' and s not in ['Correct1', 'All1'])] if len(theta1_scenarios) != 10: raise RuntimeError('theta1_scenarios = {}'.format(theta1_scenarios)) result['contributing_scenarios'] = theta1_scenarios + ['All0', ] # Call recursively to get mean and error for All0 temp_stats = load_2d_stats(scenario_combination='All0', variable=variable) mean_all0 = temp_stats['mean'] error_all0 = temp_stats['error'] # Call recursively to get lists of means and errors for each Θ1 scenario mean_list = [] error_list = [] for scenario in theta1_scenarios: temp_stats = load_2d_stats(scenario_combination=scenario, variable=variable) mean_list.append(temp_stats['mean']) error_list.append(temp_stats['error']) # Combine to get sum of means and the combined error mean = sum(mean_list) - 10*mean_all0 error = np.sqrt((10*error_all0)**2 + sum([e**2 for e in error_list])) result['mean'] = mean result['error'] = error # Case 4: scenario_combination is ∑(All1-Θ0) elif scenario_combination == '$\Sigma_{\Theta}$(All1-$\Theta$0)': theta0_scenarios = [s for s in _inverted_scenario_name_dict.keys() if (s[-1] == '0' and s != 'All0')] if len(theta0_scenarios) != 10: raise RuntimeError('theta0_scenarios = {}'.format(theta0_scenarios)) result['contributing_scenarios'] = theta0_scenarios + ['All1', ] # Call recursively to get mean and error for All1 temp_stats = load_2d_stats(scenario_combination='All1', variable=variable) mean_all1 = temp_stats['mean'] error_all1 = temp_stats['error'] # Call recursively to get lists of means and errors for each Θ0 scenario mean_list = [] error_list = [] for scenario in theta0_scenarios: temp_stats = load_2d_stats(scenario_combination=scenario, variable=variable) mean_list.append(temp_stats['mean']) error_list.append(temp_stats['error']) # Combine to get sum of means and the combined error mean = 10*mean_all1 - sum(mean_list) error = np.sqrt((10*error_all1)**2 + sum([e ** 2 for e in error_list])) result['mean'] = mean result['error'] = error # Case 5: scenario_combination is ∑(All1-Θ0)-∑(Θ1-All0) elif scenario_combination == ('$\Sigma_{\Theta}$(All1-$\Theta$0)-' '$\Sigma_{\Theta}$($\Theta$1-All0)'): scenario_combination1 = '$\Sigma_{\Theta}$(All1-$\Theta$0)' scenario_combination2 = '$\Sigma_{\Theta}$($\Theta$1-All0)' # Call recursively to get 2D stats for each scenario combination stats1 = load_2d_stats(scenario_combination=scenario_combination1, variable=variable) stats2 = load_2d_stats(scenario_combination=scenario_combination2, variable=variable) # Contributing scenarios result['contributing_scenarios'] = (stats1['contributing_scenarios'] + stats2['contributing_scenarios']) # Combine to get difference between means and the combined error mean = stats1['mean'] - stats2['mean'] error = np.sqrt(stats1['error']**2 + stats2['error']**2) result['mean'] = mean result['error'] = error # Case 6: scenario_combination is mean of two differences # e.g. '((EAs1-All0)+(All1-EAs0))/2' elif len(scenario_combination.split('+')) == 2 and scenario_combination[-2:] == '/2': scenario_combination1, scenario_combination2 = \ scenario_combination[:-2].replace('(', '').replace(')', '').split('+') # Call recursively to get 2D stats for each scenario combination stats1 = load_2d_stats(scenario_combination=scenario_combination1, variable=variable) stats2 = load_2d_stats(scenario_combination=scenario_combination2, variable=variable) # Contributing scenarios result['contributing_scenarios'] = (stats1['contributing_scenarios'] + stats2['contributing_scenarios']) # Combine to get means of means and the combined error mean = (stats1['mean'] + stats2['mean']) / 2 error = (np.sqrt(stats1['error'] ** 2 + stats2['error'] ** 2)) / 2 result['mean'] = mean result['error'] = error else: raise ValueError('scenario_combination not recognized') # 99% confidence interval based on standard error ci99 = (mean - 2.576 * error, mean + 2.576 * error) result['ci99'] = ci99 # Save result for future reference _2d_stats_dict[(scenario_combination, variable)] = result # Return result return result
def prepare_test_data(): """ Load data01.nc and manipulate to create additional test data. Used to load data into data_dict below. """ # Dictionary in which to store data data_dict = {} # Load data01.nc Dataset data01 = xr.open_dataset(os.path.dirname(__file__)+'/data/data01.nc', decode_times=False, autoclose=True) data_dict['data01'] = data01.copy() # Extract two *DataArrays* - to test functions with DataArrays da_ts = data01['TS'].copy() da_precl = data01['PRECL'].copy() data_dict['da_ts'] = da_ts.copy() data_dict['da_precl'] = da_precl.copy() # Dataset with *shifted* longitudes ds_shift_lon = climapy.xr_shift_lon(data01.copy()) data_dict['ds_shift_lon'] = ds_shift_lon.copy() # Datasets with *reversed* lon/lat coordinates and data ds_rev_lon = data01.copy() ds_rev_lon['lon'].values = ds_rev_lon['lon'].values[::-1] for var_name in ['TS', 'PRECL']: # array order: time, lat, lon ds_rev_lon[var_name].values = ds_rev_lon[var_name].values[:, :, ::-1] ds_rev_lat = data01.copy() ds_rev_lat['lat'].values = ds_rev_lat['lat'].values[::-1] for var_name in ['TS', 'PRECL']: ds_rev_lat[var_name].values = ds_rev_lat[var_name].values[:, ::-1, :] ds_rev_both = data01.copy() ds_rev_both['lat'].values = ds_rev_both['lat'].values[::-1] ds_rev_both['lon'].values = ds_rev_both['lon'].values[::-1] for var_name in ['TS', 'PRECL']: ds_rev_both[var_name].values = ds_rev_both[var_name].values[:, ::-1, ::-1] data_dict['ds_rev_lon'] = ds_rev_lon.copy() data_dict['ds_rev_lat'] = ds_rev_lat.copy() data_dict['ds_rev_both'] = ds_rev_both.copy() # Dataset with *transposed* lon/lat coords ds_transposed = data01.copy() ds_transposed = ds_transposed.transpose() data_dict['ds_transposed'] = ds_transposed.copy() # Dataset with *renamed* longitude and latitude coords ds_renamed = data01.copy() ds_renamed = ds_renamed.rename({'lon': 'longitude', 'lat': 'latitude'}) data_dict['ds_renamed'] = ds_renamed.copy() # Datasets with slightly *irregular* lon/lat coords, yet still monotonic nx, ny = data01['lon'].size, data01['lat'].size lon_irr = (data01['lon'].values + np_rand.uniform(low=-0.5, high=0.5, size=nx)) # add small amount of noise lon_irr[[0, -1]] = data01['lon'].values[[0, -1]] # keep end values unchanged lat_irr = (data01['lat'].values + np_rand.uniform(low=-0.5, high=0.5, size=ny)) lat_irr[[0, -1]] = data01['lat'].values[[0, -1]] ds_irr_lon = data01.copy() ds_irr_lon['lon'].values = lon_irr.copy() ds_irr_lat = data01.copy() ds_irr_lat['lat'].values = lat_irr.copy() ds_irr_both = data01.copy() ds_irr_both['lon'].values = lon_irr.copy() ds_irr_both['lat'].values = lat_irr.copy() data_dict['ds_irr_lon'] = ds_irr_lon.copy() data_dict['ds_irr_lat'] = ds_irr_lat.copy() data_dict['ds_irr_both'] = ds_irr_both.copy() # Dataset with *strange* lon/lat coords - very irregular and not monotonic lon_strange = (data01['lon'].values + np_rand.uniform(low=-10, high=10, size=nx)) # add large amount of noise lon_strange[[0, -1]] = data01['lon'].values[[0, -1]] # keep end values unchanged lat_strange = (data01['lat'].values + np_rand.uniform(low=-10, high=10, size=ny)) lat_strange[[0, -1]] = data01['lat'].values[[0, -1]] # keep end values unchanged ds_strange = data01.copy() ds_strange['lon'].values = lon_strange.copy() ds_strange['lat'].values = lat_strange.copy() data_dict['ds_strange'] = ds_strange.copy() # Return dictionary of data return data_dict
def test_non_monotonic(self): with pytest.raises(ValueError): climapy.xr_shift_lon(data_dict['ds_strange'])
def test_incorrect_lon_name(self): with pytest.raises(KeyError): climapy.xr_shift_lon(data_dict['ds_renamed']) with pytest.raises(KeyError): climapy.xr_shift_lon(data_dict['data01'], lon_name='longitude')