Ejemplo n.º 1
0
 def test_shift_back(self):
     for key, data in data_dict.items():
         if key not in ['ds_strange', 'ds_renamed']:
             orig_lon_min = data['lon'].values.min()  # original min longitude
             for lon_min in [-180., -270., 0., 90.]:  # try different lon_min values
                 new_data = climapy.xr_shift_lon(data, lon_min=lon_min)
                 new_data = climapy.xr_shift_lon(new_data,
                                                 lon_min=orig_lon_min)  # shift back
                 # Correct small diffs in coords before comparing
                 new_data = new_data.reindex_like(data, method='nearest', tolerance=1e-3)
                 assert new_data.equals(data)  # compare to original
Ejemplo n.º 2
0
 def test_default_shift(self):
     for key, data in data_dict.items():
         if key != 'ds_strange':
             if key == 'ds_renamed':
                 new_lon = climapy.xr_shift_lon(data, lon_name='longitude')['longitude'].values
             elif key in ['ds_rev_lon', 'ds_rev_both']:
                 new_lon = climapy.xr_shift_lon(data)['lon'].values[::-1]
             else:
                 new_lon = climapy.xr_shift_lon(data)['lon'].values
             assert new_lon.min() == new_lon[0]
             assert new_lon.max() == new_lon[-1]
             if key in ['ds_irr_lon', 'ds_irr_both']:
                 assert -180 <= new_lon[0] <= -177  # allow some leeway for irreg longitudes
                 assert 177 <= new_lon[-1] <= 180
             else:
                 assert new_lon[0] == -180, AssertionError(key)
                 assert new_lon[-1] == 177.5, AssertionError(key)
Ejemplo n.º 3
0
 def test_inside_values(self):
     for region, bounds in load_region_bounds_dict().items():
         cdo_data = cdo_dict[region]
         lon_bounds, lat_bounds = bounds
         mask_data = climapy.xr_mask_bounds(data_dict['data01'],
                                            lon_bounds=lon_bounds, lat_bounds=lat_bounds,
                                            select_how='inside')
         mask_data = mask_data.dropna(dim='lon',  # drop NaN rows/columns, like CDO
                                      how='all').dropna(dim='lat', how='all')
         mask_data = climapy.xr_shift_lon(mask_data,  # shift lons for consistency with cdo_data
                                          lon_min=cdo_data['lon'].min())
         rel_diff = ((mask_data['TS'].values - cdo_data['TS'].values) /
                     cdo_data['TS'].values)  # relative difference in 'TS' variable
         assert np.abs(rel_diff).max() < 1e-12  # check that differences very small
Ejemplo n.º 4
0
def load_2d_stats(scenario_combination='All1-All0',
                  variable='SWCF_d1'):
    """
    Load dictionary of 2D statistics, for all lons and lats, for a specific variable and scenario.

    Args:
        scenario_combination: scenario combination (default 'All1-All0')
        variable: name of variable (default 'SWCF_d1')

    Returns:
        dictionary, with the following keys:
            scenario_combination: as per input arg
            variable: as per input arg
            data: DataArray of annual-means for different years (if single scenario)
            mean: DataArray of annual-mean averaged across different years
            error: DataArray of combined standard errors
            ci99: tuple of DataArrays of 99% confidence intervals (based on 2.576*error)
            p_value: array of p-values for difference between scenarios
            contributing_scenarios: list of contributing scenarios (e.g. ['All1', 'All0'])
    """
    # Check if 2D stats have been calculated previously
    try:
        result = _2d_stats_dict[(scenario_combination, variable)]
    except KeyError:
        # Initialise dictionary with input arguments and None
        result = {'scenario_combination': scenario_combination,
                  'variable': variable,
                  'data': None,
                  'mean': None,
                  'error': None,
                  'ci99': None,
                  'p_value': None,
                  'contributing_scenarios': None}
        # Case 1: scenario_combination is a single scenario
        if scenario_combination in _inverted_scenario_name_dict:
            result['contributing_scenarios'] = [scenario_combination, ]
            # Load annual data
            data = load_output(variable,
                               scenario=_inverted_scenario_name_dict[scenario_combination],
                               season='annual', apply_sf=True)
            data = climapy.xr_shift_lon(data, lon_min=-179.)  # shift longitudes
            result['data'] = data
            # Mean and standard error across years
            n_years = data['year'].size
            mean = data.mean(dim='year')  # mean across years
            error = data.std(dim='year', ddof=1) / np.sqrt(n_years)  # standard error
            result['mean'] = mean
            result['error'] = error
        # Case 2: scenario_combination is a difference between two scenarios
        elif (len(scenario_combination.split('-')) == 2 and
              scenario_combination.split('-')[0] in _inverted_scenario_name_dict):
            scenario1, scenario2 = scenario_combination.split('-')
            result['contributing_scenarios'] = [scenario1, scenario2]
            # Call recursively to get 2D stats for each scenario
            stats1 = load_2d_stats(scenario_combination=scenario1, variable=variable)
            stats2 = load_2d_stats(scenario_combination=scenario2, variable=variable)
            # Combine to get difference between means and the combined error
            mean = stats1['mean'] - stats2['mean']
            error = np.sqrt(stats1['error']**2 + stats2['error']**2)
            result['mean'] = mean
            result['error'] = error
            # p-value based on standard two-sample t-test
            p_value = ttest_ind(stats1['data'], stats2['data'], equal_var=True)[1]
            result['p_value'] = p_value
        # Case 3: scenario_combination is ∑(Θ1-All0)
        # Note: ∑(Θ1-All0) = ∑(Θ1)-10xAll0
        elif scenario_combination == '$\Sigma_{\Theta}$($\Theta$1-All0)':
            theta1_scenarios = [s for s in _inverted_scenario_name_dict.keys() if
                                (s[-1] == '1' and s not in ['Correct1', 'All1'])]
            if len(theta1_scenarios) != 10:
                raise RuntimeError('theta1_scenarios = {}'.format(theta1_scenarios))
            result['contributing_scenarios'] = theta1_scenarios + ['All0', ]
            # Call recursively to get mean and error for All0
            temp_stats = load_2d_stats(scenario_combination='All0', variable=variable)
            mean_all0 = temp_stats['mean']
            error_all0 = temp_stats['error']
            # Call recursively to get lists of means and errors for each Θ1 scenario
            mean_list = []
            error_list = []
            for scenario in theta1_scenarios:
                temp_stats = load_2d_stats(scenario_combination=scenario,
                                           variable=variable)
                mean_list.append(temp_stats['mean'])
                error_list.append(temp_stats['error'])
            # Combine to get sum of means and the combined error
            mean = sum(mean_list) - 10*mean_all0
            error = np.sqrt((10*error_all0)**2 + sum([e**2 for e in error_list]))
            result['mean'] = mean
            result['error'] = error
        # Case 4: scenario_combination is ∑(All1-Θ0)
        elif scenario_combination == '$\Sigma_{\Theta}$(All1-$\Theta$0)':
            theta0_scenarios = [s for s in _inverted_scenario_name_dict.keys() if
                                (s[-1] == '0' and s != 'All0')]
            if len(theta0_scenarios) != 10:
                raise RuntimeError('theta0_scenarios = {}'.format(theta0_scenarios))
            result['contributing_scenarios'] = theta0_scenarios + ['All1', ]
            # Call recursively to get mean and error for All1
            temp_stats = load_2d_stats(scenario_combination='All1', variable=variable)
            mean_all1 = temp_stats['mean']
            error_all1 = temp_stats['error']
            # Call recursively to get lists of means and errors for each Θ0 scenario
            mean_list = []
            error_list = []
            for scenario in theta0_scenarios:
                temp_stats = load_2d_stats(scenario_combination=scenario,
                                           variable=variable)
                mean_list.append(temp_stats['mean'])
                error_list.append(temp_stats['error'])
            # Combine to get sum of means and the combined error
            mean = 10*mean_all1 - sum(mean_list)
            error = np.sqrt((10*error_all1)**2 + sum([e ** 2 for e in error_list]))
            result['mean'] = mean
            result['error'] = error
        # Case 5: scenario_combination is ∑(All1-Θ0)-∑(Θ1-All0)
        elif scenario_combination == ('$\Sigma_{\Theta}$(All1-$\Theta$0)-'
                                      '$\Sigma_{\Theta}$($\Theta$1-All0)'):
            scenario_combination1 = '$\Sigma_{\Theta}$(All1-$\Theta$0)'
            scenario_combination2 = '$\Sigma_{\Theta}$($\Theta$1-All0)'
            # Call recursively to get 2D stats for each scenario combination
            stats1 = load_2d_stats(scenario_combination=scenario_combination1, variable=variable)
            stats2 = load_2d_stats(scenario_combination=scenario_combination2, variable=variable)
            # Contributing scenarios
            result['contributing_scenarios'] = (stats1['contributing_scenarios'] +
                                                stats2['contributing_scenarios'])
            # Combine to get difference between means and the combined error
            mean = stats1['mean'] - stats2['mean']
            error = np.sqrt(stats1['error']**2 + stats2['error']**2)
            result['mean'] = mean
            result['error'] = error
        # Case 6: scenario_combination is mean of two differences
        #     e.g. '((EAs1-All0)+(All1-EAs0))/2'
        elif len(scenario_combination.split('+')) == 2 and scenario_combination[-2:] == '/2':
            scenario_combination1, scenario_combination2 = \
                scenario_combination[:-2].replace('(', '').replace(')', '').split('+')
            # Call recursively to get 2D stats for each scenario combination
            stats1 = load_2d_stats(scenario_combination=scenario_combination1, variable=variable)
            stats2 = load_2d_stats(scenario_combination=scenario_combination2, variable=variable)
            # Contributing scenarios
            result['contributing_scenarios'] = (stats1['contributing_scenarios'] +
                                                stats2['contributing_scenarios'])
            # Combine to get means of means and the combined error
            mean = (stats1['mean'] + stats2['mean']) / 2
            error = (np.sqrt(stats1['error'] ** 2 + stats2['error'] ** 2)) / 2
            result['mean'] = mean
            result['error'] = error
        else:
            raise ValueError('scenario_combination not recognized')
        # 99% confidence interval based on standard error
        ci99 = (mean - 2.576 * error, mean + 2.576 * error)
        result['ci99'] = ci99
        # Save result for future reference
        _2d_stats_dict[(scenario_combination, variable)] = result
    # Return result
    return result
Ejemplo n.º 5
0
def prepare_test_data():
    """
    Load data01.nc and manipulate to create additional test data.
    Used to load data into data_dict below.
    """
    # Dictionary in which to store data
    data_dict = {}
    # Load data01.nc Dataset
    data01 = xr.open_dataset(os.path.dirname(__file__)+'/data/data01.nc',
                             decode_times=False, autoclose=True)
    data_dict['data01'] = data01.copy()
    # Extract two *DataArrays* - to test functions with DataArrays
    da_ts = data01['TS'].copy()
    da_precl = data01['PRECL'].copy()
    data_dict['da_ts'] = da_ts.copy()
    data_dict['da_precl'] = da_precl.copy()
    # Dataset with *shifted* longitudes
    ds_shift_lon = climapy.xr_shift_lon(data01.copy())
    data_dict['ds_shift_lon'] = ds_shift_lon.copy()
    # Datasets with *reversed* lon/lat coordinates and data
    ds_rev_lon = data01.copy()
    ds_rev_lon['lon'].values = ds_rev_lon['lon'].values[::-1]
    for var_name in ['TS', 'PRECL']:  # array order: time, lat, lon
        ds_rev_lon[var_name].values = ds_rev_lon[var_name].values[:, :, ::-1]
    ds_rev_lat = data01.copy()
    ds_rev_lat['lat'].values = ds_rev_lat['lat'].values[::-1]
    for var_name in ['TS', 'PRECL']:
        ds_rev_lat[var_name].values = ds_rev_lat[var_name].values[:, ::-1, :]
    ds_rev_both = data01.copy()
    ds_rev_both['lat'].values = ds_rev_both['lat'].values[::-1]
    ds_rev_both['lon'].values = ds_rev_both['lon'].values[::-1]
    for var_name in ['TS', 'PRECL']:
        ds_rev_both[var_name].values = ds_rev_both[var_name].values[:, ::-1, ::-1]
    data_dict['ds_rev_lon'] = ds_rev_lon.copy()
    data_dict['ds_rev_lat'] = ds_rev_lat.copy()
    data_dict['ds_rev_both'] = ds_rev_both.copy()
    # Dataset with *transposed* lon/lat coords
    ds_transposed = data01.copy()
    ds_transposed = ds_transposed.transpose()
    data_dict['ds_transposed'] = ds_transposed.copy()
    # Dataset with *renamed* longitude and latitude coords
    ds_renamed = data01.copy()
    ds_renamed = ds_renamed.rename({'lon': 'longitude', 'lat': 'latitude'})
    data_dict['ds_renamed'] = ds_renamed.copy()
    # Datasets with slightly *irregular* lon/lat coords, yet still monotonic
    nx, ny = data01['lon'].size, data01['lat'].size
    lon_irr = (data01['lon'].values +
               np_rand.uniform(low=-0.5, high=0.5, size=nx))  # add small amount of noise
    lon_irr[[0, -1]] = data01['lon'].values[[0, -1]]  # keep end values unchanged
    lat_irr = (data01['lat'].values +
               np_rand.uniform(low=-0.5, high=0.5, size=ny))
    lat_irr[[0, -1]] = data01['lat'].values[[0, -1]]
    ds_irr_lon = data01.copy()
    ds_irr_lon['lon'].values = lon_irr.copy()
    ds_irr_lat = data01.copy()
    ds_irr_lat['lat'].values = lat_irr.copy()
    ds_irr_both = data01.copy()
    ds_irr_both['lon'].values = lon_irr.copy()
    ds_irr_both['lat'].values = lat_irr.copy()
    data_dict['ds_irr_lon'] = ds_irr_lon.copy()
    data_dict['ds_irr_lat'] = ds_irr_lat.copy()
    data_dict['ds_irr_both'] = ds_irr_both.copy()
    # Dataset with *strange* lon/lat coords - very irregular and not monotonic
    lon_strange = (data01['lon'].values +
                   np_rand.uniform(low=-10, high=10, size=nx))  # add large amount of noise
    lon_strange[[0, -1]] = data01['lon'].values[[0, -1]]  # keep end values unchanged
    lat_strange = (data01['lat'].values + np_rand.uniform(low=-10, high=10, size=ny))
    lat_strange[[0, -1]] = data01['lat'].values[[0, -1]]  # keep end values unchanged
    ds_strange = data01.copy()
    ds_strange['lon'].values = lon_strange.copy()
    ds_strange['lat'].values = lat_strange.copy()
    data_dict['ds_strange'] = ds_strange.copy()
    # Return dictionary of data
    return data_dict
Ejemplo n.º 6
0
 def test_non_monotonic(self):
     with pytest.raises(ValueError):
         climapy.xr_shift_lon(data_dict['ds_strange'])
Ejemplo n.º 7
0
 def test_incorrect_lon_name(self):
     with pytest.raises(KeyError):
         climapy.xr_shift_lon(data_dict['ds_renamed'])
     with pytest.raises(KeyError):
         climapy.xr_shift_lon(data_dict['data01'], lon_name='longitude')