def test_weight_error(self): with self.assertRaises(ValueError): stats.pearsonr( self.cube_a, self.cube_b[0, :, :], ["latitude", "longitude"], weights=self.weights, )
def test_mdtol(self): cube_small = self.cube_a[:, 0, 0] cube_small_masked = cube_small.copy() cube_small_masked.data = ma.array( cube_small.data, mask=np.array([0, 0, 0, 1, 1, 1], dtype=bool)) r1 = stats.pearsonr(cube_small, cube_small_masked) r2 = stats.pearsonr(cube_small, cube_small_masked, mdtol=0.49) self.assertArrayAlmostEqual(r1.data, np.array([0.74586593])) self.assertMaskedArrayEqual(r2.data, ma.array([0], mask=[True]))
def test_mdtol(self): cube_small = self.cube_a[:, 0, 0] cube_small_masked = cube_small.copy() cube_small_masked.data = ma.array(cube_small.data, mask=np.array([0, 0, 0, 1, 1, 1], dtype=bool)) r1 = stats.pearsonr(cube_small, cube_small_masked) r2 = stats.pearsonr(cube_small, cube_small_masked, mdtol=0.49) self.assertArrayAlmostEqual(r1.data, np.array([0.74586593])) self.assertMaskedArrayEqual(r2.data, ma.array([0], mask=[True]))
def test_broadcast_cubes_weighted(self): r = stats.pearsonr(self.cube_a, self.cube_b[0, :, :], ['latitude', 'longitude'], weights=self.weights[0, :, :]) r_by_slice = [ stats.pearsonr(self.cube_a[i, :, :], self.cube_b[0, :, :], ['latitude', 'longitude'], weights=self.weights[0, :, :]).data for i in range(6) ] self.assertArrayAlmostEqual(r.data, np.array(r_by_slice))
def test_broadcast_cubes(self): r1 = stats.pearsonr(self.cube_a, self.cube_b[0, :, :], ['latitude', 'longitude']) r2 = stats.pearsonr(self.cube_b[0, :, :], self.cube_a, ['latitude', 'longitude']) r_by_slice = [ stats.pearsonr(self.cube_a[i, :, :], self.cube_b[0, :, :], ['latitude', 'longitude']).data for i in range(6) ] self.assertArrayEqual(r1.data, np.array(r_by_slice)) self.assertArrayEqual(r2.data, np.array(r_by_slice))
def test_broadcast_cubes_weighted(self): r = stats.pearsonr(self.cube_a, self.cube_b[0, :, :], ['latitude', 'longitude'], weights=self.weights[0, :, :]) r_by_slice = [ stats.pearsonr(self.cube_a[i, :, :], self.cube_b[0, :, :], ['latitude', 'longitude'], weights=self.weights[0, :, :] ).data for i in range(6) ] self.assertArrayAlmostEqual(r.data, np.array(r_by_slice))
def test_common_mask_simple(self): cube_small = self.cube_a[:, 0, 0] cube_small_masked = cube_small.copy() cube_small_masked.data = ma.array( cube_small.data, mask=np.array([0, 0, 0, 1, 1, 1], dtype=bool)) r = stats.pearsonr(cube_small, cube_small_masked, common_mask=True) self.assertArrayAlmostEqual(r.data, np.array([1.]))
def test_compatible_cubes_weighted(self): r = stats.pearsonr(self.cube_a, self.cube_b, ['latitude', 'longitude'], self.weights) self.assertArrayAlmostEqual(r.data, [ 0.79105429, 0.79988078, 0.78825089, 0.79925653, 0.79009810, 0.80115292 ])
def test_4d_cube_2_dims(self): real_0_c = iris.coords.AuxCoord(np.int32(0), 'realization') real_1_c = iris.coords.AuxCoord(np.int32(1), 'realization') # Make cubes merge-able. self.cube_a.add_aux_coord(real_0_c) self.cube_b.add_aux_coord(real_1_c) self.cube_a.remove_coord('forecast_period') self.cube_a.remove_coord('forecast_reference_time') self.cube_b.remove_coord('forecast_period') self.cube_b.remove_coord('forecast_reference_time') four_d_cube_a = iris.cube\ .CubeList([self.cube_a, self.cube_b]).merge()[0] self.cube_a.remove_coord('realization') self.cube_b.remove_coord('realization') self.cube_a.add_aux_coord(real_1_c) self.cube_b.add_aux_coord(real_0_c) four_d_cube_b = iris.cube\ .CubeList([self.cube_a, self.cube_b]).merge()[0] r = stats.pearsonr(four_d_cube_a, four_d_cube_b, ['latitude', 'longitude']) expected_corr = [[0.99733591, 0.99501693, 0.99674225, 0.99495268, 0.99217004, 0.99362189], [0.99733591, 0.99501693, 0.99674225, 0.99495268, 0.99217004, 0.99362189]] self.assertArrayAlmostEqual(r.data, expected_corr)
def test_4d_cube_2_dims(self): real_0_c = iris.coords.AuxCoord(np.int32(0), 'realization') real_1_c = iris.coords.AuxCoord(np.int32(1), 'realization') # Make cubes merge-able. self.cube_a.add_aux_coord(real_0_c) self.cube_b.add_aux_coord(real_1_c) self.cube_a.remove_coord('forecast_period') self.cube_a.remove_coord('forecast_reference_time') self.cube_b.remove_coord('forecast_period') self.cube_b.remove_coord('forecast_reference_time') four_d_cube_a = iris.cube\ .CubeList([self.cube_a, self.cube_b]).merge()[0] self.cube_a.remove_coord('realization') self.cube_b.remove_coord('realization') self.cube_a.add_aux_coord(real_1_c) self.cube_b.add_aux_coord(real_0_c) four_d_cube_b = iris.cube\ .CubeList([self.cube_a, self.cube_b]).merge()[0] r = stats.pearsonr(four_d_cube_a, four_d_cube_b, ['latitude', 'longitude']) expected_corr = [[ 0.99733591, 0.99501693, 0.99674225, 0.99495268, 0.99217004, 0.99362189 ], [ 0.99733591, 0.99501693, 0.99674225, 0.99495268, 0.99217004, 0.99362189 ]] self.assertArrayAlmostEqual(r.data, expected_corr)
def lagged_correlation(cube, lag): end, start = 2010 - lag, 1950 + lag tnolag = PartialDateTime(year=end) tlag = PartialDateTime(year=start) # constrsain a lag and an unlagged cube. nolag = cube.extract(iris.Constraint(time=lambda t: t.point <= tnolag)) lag = cube.extract(iris.Constraint(time=lambda t: tlag <= t.point)) # define coords to unify the dimensions where the data lays. time = DimCoord(nolag.coord('time').points, standard_name='time') latitude = DimCoord(cube.coord('latitude').points, standard_name='latitude', units='degrees') longitude = DimCoord(cube.coord('longitude').points, standard_name='longitude', units='degrees') # create two cubes with lag btwn them but same coords. lag_cube = Cube(lag.data, dim_coords_and_dims=[(time, 0), (latitude, 1), (longitude, 2)]) nolag_cube = Cube(nolag.data, dim_coords_and_dims=[(time, 0), (latitude, 1), (longitude, 2)]) # Calculate correlation corr_cube = istats.pearsonr(lag_cube, nolag_cube, corr_coords='time') return corr_cube
def test_compatible_cubes_weighted(self): r = stats.pearsonr(self.cube_a, self.cube_b, ['latitude', 'longitude'], self.weights) self.assertArrayAlmostEqual(r.data, [ 0.79106045, 0.79989169, 0.78826918, 0.79925855, 0.79011544, 0.80115837 ])
def test_common_mask_simple(self): cube_small = self.cube_a[:, 0, 0] cube_small_masked = cube_small.copy() cube_small_masked.data = ma.array(cube_small.data, mask=np.array([0, 0, 0, 1, 1, 1], dtype=bool)) r = stats.pearsonr(cube_small, cube_small_masked, common_mask=True) self.assertArrayAlmostEqual(r.data, np.array([1.]))
def test_common_mask_broadcast(self): cube_small = self.cube_a[:, 0, 0] cube_small_2d = self.cube_a[:, 0:2, 0] cube_small.data = ma.array( cube_small.data, mask=np.array([0, 0, 0, 0, 0, 1], dtype=bool)) cube_small_2d.data = ma.array( np.tile(cube_small.data[:, np.newaxis], 2), mask=np.zeros((6, 2), dtype=bool)) # 2d mask varies on unshared coord: cube_small_2d.data.mask[0, 1] = 1 r = stats.pearsonr(cube_small, cube_small_2d, weights=self.weights[:, 0, 0], common_mask=True) self.assertArrayAlmostEqual(r.data, np.array([1., 1.])) # 2d mask does not vary on unshared coord: cube_small_2d.data.mask[0, 0] = 1 r = stats.pearsonr(cube_small, cube_small_2d, common_mask=True) self.assertArrayAlmostEqual(r.data, np.array([1., 1.]))
def test_compatible_cubes(self): with self.assertRaises(ValueError): r = stats.pearsonr(self.cube_a, self.cube_b, ['latitude', 'longitude']) self.assertArrayAlmostEqual(r.data, [ 0.81114936, 0.81690538, 0.79833135, 0.81118674, 0.79745386, 0.81278484 ])
def test_compatible_cubes(self): r = stats.pearsonr(self.cube_a, self.cube_b, ['latitude', 'longitude']) self.assertArrayAlmostEqual(r.data, [0.81114936, 0.81690538, 0.79833135, 0.81118674, 0.79745386, 0.81278484])
def test_compatible_cubes(self): r = stats.pearsonr(self.cube_a, self.cube_b, ['latitude', 'longitude']) self.assertArrayAlmostEqual(r.data, [0.99733591, 0.99501693, 0.99674225, 0.99495268, 0.99217004, 0.99362189])
def test_compatible_cubes_weighted(self): r = stats.pearsonr(self.cube_a, self.cube_b, ['latitude', 'longitude'], self.weights) self.assertArrayAlmostEqual(r.data, [0.79106045, 0.79989169, 0.78826918, 0.79925855, 0.79011544, 0.80115837])
def test_compatible_cubes_weighted(self): r = stats.pearsonr(self.cube_a, self.cube_b, ['latitude', 'longitude'], self.weights) self.assertArrayAlmostEqual(r.data, [0.79105429, 0.79988078, 0.78825089, 0.79925653, 0.79009810, 0.80115292])
def test_common_mask_broadcast(self): cube_small = self.cube_a[:, 0, 0] cube_small_2d = self.cube_a[:, 0:2, 0] cube_small.data = ma.array(cube_small.data, mask=np.array([0, 0, 0, 0, 0, 1], dtype=bool)) cube_small_2d.data = ma.array(np.tile(cube_small.data[:, np.newaxis], 2), mask=np.zeros((6, 2), dtype=bool)) # 2d mask varies on unshared coord: cube_small_2d.data.mask[0, 1] = 1 r = stats.pearsonr(cube_small, cube_small_2d, weights=self.weights[:, 0, 0], common_mask=True) self.assertArrayAlmostEqual(r.data, np.array([1., 1.])) # 2d mask does not vary on unshared coord: cube_small_2d.data.mask[0, 0] = 1 r = stats.pearsonr(cube_small, cube_small_2d, common_mask=True) self.assertArrayAlmostEqual(r.data, np.array([1., 1.]))
def calc_pearsonr(cubex, cubey, corr_coords, alpha=None): """Calculate the Pearson's r correlation coefficient with significance. Calculates Pearson's r over the specified coordinates of two Iris cubes. These cubes must have the same coordinates, but this is not checked. A common data mask on cubex and cubey is enforced when calculating r. If alpha is set, p-values are calculated following the notes in, https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html which are used to mask boxes with pval > alpha. Parameters ---------- cubex, cubey: iris.cube.Cube Input data cubes to be correlated. corr_coords: str or list of str The cube coordinate name(s) over which to calculate correlations. alpha : float, optional Type I error rate above which r values are rejected, range (0, 1). Returns ------- corr: iris.cube.Cube Pearson's r correlation coefficient. """ corr = istats.pearsonr(cubex, cubey, corr_coords, common_mask=True) if alpha is not None: # Calculate the N in the correlations for each gridbox. This seems # long-winded for such a common calculation, is there a better way of # doing it? common_mask = (cubex.data.mask | cubey.data.mask) counter = cubex.copy() counter.data = np.ma.where(common_mask, 0, 1) counter.data.mask = common_mask count = counter.collapsed(corr_coords, iris.analysis.COUNT, function=lambda x: x) # Calculate the p-value of the correlation from r and N values. rabs = -np.ma.abs(corr.data) shps = count.data / 2. - 1 pval = 2 * sstats.beta.cdf(rabs, shps, shps, loc=-1, scale=2) # Mask out correlations (1) made from two value or fewer or (2) with # p-values greater than the requested alpha level. extra_mask = (count.data <= 2) | (pval > alpha) corr.data.mask[extra_mask] = True return corr
def main(cfg): """Compute the time average for each input dataset.""" input_data = group_metadata( cfg['input_data'].values(), 'standard_name', sort='dataset', ) for standard_name in input_data: logger.info("Processing variable %s", standard_name) # Load reference dataset for attributes in input_data[standard_name]: if attributes['reference_dataset'] == attributes['dataset']: reference_name = attributes['dataset'] logger.info("Using %s as a reference dataset", reference_name) reference_filename = attributes['filename'] reference = iris.load_cube(reference_filename) reference = reference.collapsed('time', MEAN) logger.info("Reference cube:\n%s\n%s", reference_filename, reference) break else: raise ValueError("No reference_dataset defined in recipe.") # Compute and plot correlation for attributes in input_data[standard_name]: if attributes['dataset'] == reference_name: continue logger.info("Processing dataset %s", attributes['dataset']) filename = attributes['filename'] dataset = iris.load_cube(filename) kwargs = cfg.get('pearsonr', {}) logger.info( "Computing correlation with settings %s between " "reference and cube:\n%s\n%s", kwargs, filename, dataset) dataset = dataset.collapsed('time', MEAN) # Fix issue with losing vertical bounds in extract_level # preprocessor if reference.coords(axis='Z'): ref_coord = reference.coord(axis='Z') coord = dataset.coord(ref_coord) if not coord.has_bounds(): coord.bounds = ref_coord.bounds cube = pearsonr(dataset, reference, **kwargs) name = '{}_correlation_with_{}'.format( os.path.splitext(os.path.basename(filename))[0], reference_name) provenance_record = get_provenance_record( attributes, ancestor_files=[reference_filename, filename], plot_type=cfg['plot_type']) plot_diagnostic(cube, name, provenance_record, cfg)
def diso(obs, model): '''DISO (Distance between Indices of Simulation and Observation) Index defined by Hu et al. (2018, doi:10.1002/joc.5972) designed to to describe the overall performances of different models against the observed field quantitatively. It merges r (correlation coefficient), AE (absolute error, measuring any persistent bias) and RMSE (root mean square error, averaged magnitude of the deviation) to summarise model performance. Each metric is given equal weight such that the DISO is the Euclidean distance between the obs and the 3 indices in 3D space defined by r, AE and RMSE. [If this doesn't make sense, go read the paper - it has a nice figure] In DISO space, diso(obs) = 0. diso(model) = 0 indicates that the model exactly matches the obs, according to metrics measured by RMSE, AE and r. Args: obs (iris.cube.Cube): Cube of base values (to which you are comparing) model (iris.cube.Cube): Cube of model values Returns: (rr, nae, nrmse) (tupple): Stats used to calculate the DISO diso (iris.cube.Cube): Cube of DISO values ''' # Check input if not isinstance(obs, iris.cube.Cube) or not isinstance(model, iris.cube.Cube): raise TypeError('Args should be Iris cubes') # Check bounds if not obs.coord(axis='x').has_bounds(): obs.coord(axis='x').guess_bounds() obs.coord(axis='y').guess_bounds() if not model.coord(axis='x').has_bounds(): model.coord(axis='x').guess_bounds() model.coord(axis='y').guess_bounds() # Correlation coefficient rr = pearsonr(obs, model) rr.convert_units('1') # Difference diff = model - obs # Absolute obs mean gridweights = iris.analysis.cartography.area_weights(obs) # TODO: Generalise collapsing of cube dimensions, handle rotated pole and time? aobs_bar = cabs(obs.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=gridweights)) # Normalised Absolute Error nae = diff.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=gridweights) / aobs_bar nae.rename('Normalised Absolute Error') # Normalised RMSE nrmse = rmse(model, obs) / aobs_bar nrmse.rename('Normalised Root Mean Square Error') # Distance between Indices of Simulation and Observation diso = ((rr - 1) ** 2 + nae ** 2 + nrmse ** 2) ** 0.5 diso.rename('Distance between Indices of Simulation and Observation') return (rr, nae, nrmse), diso
def test_compatible_cubes(self): r = stats.pearsonr(self.cube_a, self.cube_b, ["latitude", "longitude"]) self.assertArrayAlmostEqual( r.data, [ 0.81114936, 0.81690538, 0.79833135, 0.81118674, 0.79745386, 0.81278484, ], )
def plot_rmaps(file_in_nhd, files_in_prm, file_out_plot, title=None): region = { "longitudes": (60 - 1e-3, 150 + 1e-3), "latitudes": (-5, 55), } if title is None: title = ("Correlation between antecedent precipitation and " "number of JJA hot days") nhd = iris.load_cube(file_in_nhd, "Number of hot days") nr, nc = 2, 2 F, axs = plt.subplots(nrows=nr, ncols=nc, figsize=(12, 8), subplot_kw=dict(projection=ccrs.PlateCarree())) F.suptitle(title) cticks = np.linspace(-1, 1, 11) cmap = cm.get_cmap("coolwarm_r", lut=2 * (len(cticks) - 1)) for ax, (season, file_prm) in zip(axs.flat, files_in_prm.items()): prm = iris.load_cube(file_prm, "precipitation") corr = pearsonr(prm, nhd, corr_coords="year") PCM = iplt.pcolormesh(corr, axes=ax, vmin=min(cticks), vmax=max(cticks), cmap=cmap) ax.set_title("Season: %s" % season) ax.set_extent(region["longitudes"] + region["latitudes"]) ax.add_feature(COASTLINE) _add_gridlines(ax) cax = F.add_axes([0.90, 0.53, 0.02, 0.35]) F.colorbar(PCM, cax=cax, ticks=cticks) plt.savefig(file_out_plot) return
def test_incompatible_cubes(self): with self.assertRaises(ValueError): stats.pearsonr(self.cube_a[:, 0, :], self.cube_b[0, :, :], 'longitude')
def test_perfect_corr_all_dims(self): r = stats.pearsonr(self.cube_a, self.cube_a) self.assertArrayEqual(r.data, np.array([1.]))
def test_perfect_corr(self): r = stats.pearsonr(self.cube_a, self.cube_a, ['latitude', 'longitude']) self.assertArrayEqual(r.data, np.array([1.]*6))
def test_non_existent_coord(self): with self.assertRaises(CoordinateNotFoundError): stats.pearsonr(self.cube_a, self.cube_b, 'bad_coord')
def test_non_existent_coord(self): with self.assertRaises(ValueError): stats.pearsonr(self.cube_a, self.cube_b, 'bad_coord')
def test_perfect_corr(self): r = stats.pearsonr(self.cube_a, self.cube_a, ['latitude', 'longitude']) self.assertArrayEqual(r.data, np.array([1.] * 6))
# Open CSV file with open('fpens_r.csv', 'w', newline='') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',') csvwriter.writerow(['Name', 'Date', 'r']) # Loop through fpens files for file in fpensfiles: name = re.search(r'.*T1Hmax.(.*?).4p4.*', file).group(1) # Load downscaled ensemble ens = iris.load_cube(file).intersection(longitude=LON, latitude=LAT) # Load ERA5 era5 = iris.load_cube(f'fp.era5.*.{name}.nc', VAR).intersection(longitude=LON, latitude=LAT) # Add coordinate system era5.coord(axis='x').coord_system = ens.coord(axis='x').coord_system era5.coord(axis='y').coord_system = ens.coord(axis='y').coord_system # Regrid ERA5 onto ens grid era5r = iris.util.squeeze(era5.regrid(ens[0], iris.analysis.Nearest())) for e in ens.slices_over('forecast_reference_time'): frt = e.coord('forecast_reference_time') frts = frt.units.num2date(frt.points)[0].strftime('%Y-%m-%d %H') # metric = 1 - diso(e, era5r)[1].data metric = pearsonr(e, era5r).data print(f'{name} {frts}: {metric:.4f}') csvwriter.writerow([name, frts, metric])
except: print 'already have bounds' grid_areas = iris.analysis.cartography.area_weights(cube) ts = cube.collapsed(['latitude','longitude'],iris.analysis.MEAN,weights = grid_areas) ts_filtered = ts.copy() ts_filtered.data = high_pass_filter(ts_filtered.data,upper_limit_years) ts_filtered.data = low_pass_filter(ts_filtered.data,lower_limit_years) ts_filtered = ts_filtered[locs] ts_filtered_2D = cube.copy() ts_filtered_2D.data = np.swapaxes(np.swapaxes(np.tile(ts_filtered.data,[180,360,1]),1,2),0,1) cube.data = high_pass_filter(cube.data,upper_limit_years) cube.data = low_pass_filter(cube.data,lower_limit_years) data[model] = {} data[model]['tas'] = {} data[model]['tas'] = istats.pearsonr(ts_filtered_2D,cube,corr_coords=['time']) cube1.data = high_pass_filter(cube1.data,upper_limit_years) cube1.data = low_pass_filter(cube1.data,lower_limit_years) data[model]['tos'] = {} data[model]['tos'] = istats.pearsonr(ts_filtered_2D,cube1,corr_coords=['time']) cube2.data = high_pass_filter(cube2.data,upper_limit_years) cube2.data = low_pass_filter(cube2.data,lower_limit_years) data[model]['pr'] = {} data[model]['pr'] = istats.pearsonr(ts_filtered_2D,cube2,corr_coords=['time'])
def test_weight_error(self): with self.assertRaises(ValueError): stats.pearsonr(self.cube_a, self.cube_b[0, :, :], ['latitude', 'longitude'], weights=self.weights)