def test_vectorise_call(self): # Check that the function being passed through gets called with # numpy.vectorize, before being applied to the points array. # The reason we use numpy.vectorize is to support multi-dimensional # coordinate points. fn = lambda coord, v: v**2 with mock.patch('numpy.vectorize', return_value=self.vectorised) as vectorise_patch: with mock.patch('iris.coords.AuxCoord') as aux_coord_constructor: add_categorised_coord(self.cube, 'foobar', self.coord, fn, units=self.units) # Check the constructor of AuxCoord gets called with the # appropriate arguments. # Start with the vectorised function. vectorise_patch.assert_called_once_with(fn) # Check the vectorize wrapper gets called with the appropriate args. self.vectorised.assert_called_once_with(self.coord, self.coord.points) # Check the AuxCoord constructor itself. aux_coord_constructor.assert_called_once_with( self.vectorised(self.coord, self.coord.points), units=self.units, attributes=self.coord.attributes.copy()) # And check adding the aux coord to the cube mock. self.cube.add_aux_coord.assert_called_once_with( aux_coord_constructor(), self.cube.coord_dims(self.coord))
def test_vectorise_call(self): # Check that the function being passed through gets called with # numpy.vectorize, before being applied to the points array. # The reason we use numpy.vectorize is to support multi-dimensional # coordinate points. fn = lambda coord, v: v**2 with mock.patch('numpy.vectorize', return_value=self.vectorised) as vectorise_patch: with mock.patch('iris.coords.AuxCoord') as aux_coord_constructor: add_categorised_coord(self.cube, 'foobar', self.coord, fn, units=self.units) # Check the constructor of AuxCoord gets called with the # appropriate arguments. # Start with the vectorised function. vectorise_patch.assert_called_once_with(fn) # Check the vectorize wrapper gets called with the appropriate args. self.vectorised.assert_called_once_with(self.coord, self.coord.points) # Check the AuxCoord constructor itself. aux_coord_constructor.assert_called_once_with( self.vectorised(self.coord, self.coord.points), units=self.units, attributes=self.coord.attributes.copy()) # And check adding the aux coord to the cube mock. self.cube.add_aux_coord.assert_called_once_with( aux_coord_constructor(), self.cube.coord_dims(self.coord))
def add_hour_of_day(cube, coord, name='hour'): try: add_categorised_coord(cube, name, coord, lambda coord, x: coord.units.num2date(x).hour) except ValueError, e: print e pass
def test_string_vectorised(self): # Check that special case handling of a vectorized string returning # function is taking place. fn = lambda coord, v: '0123456789'[:v] with mock.patch('numpy.vectorize', return_value=self.vectorised) as vectorise_patch: with mock.patch('iris.coords.AuxCoord') as aux_coord_constructor: add_categorised_coord(self.cube, 'foobar', self.coord, fn, units=self.units) self.assertEqual( aux_coord_constructor.call_args[0][0], vectorise_patch(fn, otypes=[object])(self.coord, self.coord.points) .astype('|S64'))
def test_string_vectorised(self): # Check that special case handling of a vectorized string returning # function is taking place. fn = lambda coord, v: '0123456789'[:v] with mock.patch('numpy.vectorize', return_value=self.vectorised) as vectorise_patch: with mock.patch('iris.coords.AuxCoord') as aux_coord_constructor: add_categorised_coord(self.cube, 'foobar', self.coord, fn, units=self.units) self.assertEqual( aux_coord_constructor.call_args[0][0], vectorise_patch(fn, otypes=[object])(self.coord, self.coord.points) .astype('|S64'))
def test_basic(self): cube = self.cube time_coord = self.time_coord ccat.add_year(cube, time_coord, 'my_year') ccat.add_day_of_month(cube, time_coord, 'my_day_of_month') ccat.add_day_of_year(cube, time_coord, 'my_day_of_year') ccat.add_month(cube, time_coord, 'my_month') with warnings.catch_warnings(record=True): ccat.add_month_shortname(cube, time_coord, 'my_month_shortname') ccat.add_month_fullname(cube, time_coord, 'my_month_fullname') ccat.add_month_number(cube, time_coord, 'my_month_number') ccat.add_weekday(cube, time_coord, 'my_weekday') ccat.add_weekday_number(cube, time_coord, 'my_weekday_number') with warnings.catch_warnings(record=True): ccat.add_weekday_shortname(cube, time_coord, 'my_weekday_shortname') ccat.add_weekday_fullname(cube, time_coord, 'my_weekday_fullname') ccat.add_season(cube, time_coord, 'my_season') ccat.add_season_number(cube, time_coord, 'my_season_number') with warnings.catch_warnings(record=True): ccat.add_season_month_initials(cube, time_coord, 'my_season_month_initials') ccat.add_season_year(cube, time_coord, 'my_season_year') # also test 'generic' categorisation interface def _month_in_quarter(coord, pt_value): date = coord.units.num2date(pt_value) return (date.month - 1) % 3 ccat.add_categorised_coord(cube, 'my_month_in_quarter', time_coord, _month_in_quarter) # To ensure consistent results between 32-bit and 64-bit # platforms, ensure all the numeric categorisation coordinates # are always stored as int64. for coord in cube.coords(): if coord.long_name is not None and coord.points.dtype.kind == 'i': coord.points = coord.points.astype(np.int64) # check values self.assertCML(cube, ('categorisation', 'quickcheck.cml'))
def test_basic(self): cube = self.cube time_coord = self.time_coord ccat.add_year(cube, time_coord, 'my_year') ccat.add_day_of_month(cube, time_coord, 'my_day_of_month') ccat.add_day_of_year(cube, time_coord, 'my_day_of_year') ccat.add_month(cube, time_coord, 'my_month') with warnings.catch_warnings(record=True): ccat.add_month_shortname(cube, time_coord, 'my_month_shortname') ccat.add_month_fullname(cube, time_coord, 'my_month_fullname') ccat.add_month_number(cube, time_coord, 'my_month_number') ccat.add_weekday(cube, time_coord, 'my_weekday') ccat.add_weekday_number(cube, time_coord, 'my_weekday_number') with warnings.catch_warnings(record=True): ccat.add_weekday_shortname(cube, time_coord, 'my_weekday_shortname') ccat.add_weekday_fullname(cube, time_coord, 'my_weekday_fullname') ccat.add_season(cube, time_coord, 'my_season') ccat.add_season_number(cube, time_coord, 'my_season_number') with warnings.catch_warnings(record=True): ccat.add_season_month_initials(cube, time_coord, 'my_season_month_initials') ccat.add_season_year(cube, time_coord, 'my_season_year') # also test 'generic' categorisation interface def _month_in_quarter(coord, pt_value): date = coord.units.num2date(pt_value) return (date.month - 1) % 3 ccat.add_categorised_coord(cube, 'my_month_in_quarter', time_coord, _month_in_quarter) # To ensure consistent results between 32-bit and 64-bit # platforms, ensure all the numeric categorisation coordinates # are always stored as int64. for coord in cube.coords(): if coord.long_name is not None and coord.points.dtype.kind == 'i': coord.points = coord.points.astype(np.int64) # check values self.assertCML(cube, ('categorisation', 'quickcheck.cml'))
def test_basic(self): #make a series of 'day numbers' for the time, that slide across month boundaries day_numbers = np.arange(0, 600, 27, dtype=np.int32) cube = iris.cube.Cube(day_numbers, long_name='test cube', units='metres') #use day numbers as data values also (don't actually use this for anything) cube.data = day_numbers time_coord = iris.coords.DimCoord( day_numbers, standard_name='time', units=iris.unit.Unit('days since epoch', 'gregorian')) cube.add_dim_coord(time_coord, 0) #add test coordinates for examples wanted ccat.add_year(cube, time_coord) ccat.add_day_of_month(cube, 'time') #NB test passing coord-name instead of coord itself ccat.add_month(cube, time_coord) ccat.add_month_shortname(cube, time_coord, name='month_short') ccat.add_month_fullname(cube, time_coord, name='month_full') ccat.add_month_number(cube, time_coord, name='month_number') ccat.add_weekday(cube, time_coord) ccat.add_weekday_number(cube, time_coord, name='weekday_number') ccat.add_weekday_shortname(cube, time_coord, name='weekday_short') ccat.add_weekday_fullname(cube, time_coord, name='weekday_full') ccat.add_season(cube, time_coord) ccat.add_season_number(cube, time_coord, name='season_number') ccat.add_season_month_initials(cube, time_coord, name='season_months') ccat.add_season_year(cube, time_coord, name='year_ofseason') #also test 'generic' categorisation interface def _month_in_quarter(coord, pt_value): date = coord.units.num2date(pt_value) return (date.month - 1) % 3 ccat.add_categorised_coord(cube, 'month_in_quarter', time_coord, _month_in_quarter) for coord_name in ['month_number', 'month_in_quarter', 'weekday_number', 'season_number', 'year_ofseason', 'year', 'day']: cube.coord(coord_name).points = cube.coord(coord_name).points.astype(np.int64) #check values self.assertCML(cube, ('categorisation', 'quickcheck.cml'))
def test_string_vectorised(self): # Check that special case handling of a vectorized string returning # function is taking place. def fn(coord, v): return "0123456789"[:v] with mock.patch("numpy.vectorize", return_value=self.vectorised) as vectorise_patch: with mock.patch("iris.coords.AuxCoord") as aux_coord_constructor: add_categorised_coord(self.cube, "foobar", self.coord, fn, units=self.units) self.assertEqual( aux_coord_constructor.call_args[0][0], vectorise_patch(fn, otypes=[object])(self.coord, self.coord.points).astype("|S64"), )
def test_basic(self): cube = self.cube time_coord = self.time_coord ccat.add_year(cube, time_coord, "my_year") ccat.add_day_of_month(cube, time_coord, "my_day_of_month") ccat.add_day_of_year(cube, time_coord, "my_day_of_year") ccat.add_month(cube, time_coord, "my_month") ccat.add_month_fullname(cube, time_coord, "my_month_fullname") ccat.add_month_number(cube, time_coord, "my_month_number") ccat.add_weekday(cube, time_coord, "my_weekday") ccat.add_weekday_number(cube, time_coord, "my_weekday_number") ccat.add_weekday_fullname(cube, time_coord, "my_weekday_fullname") ccat.add_season(cube, time_coord, "my_season") ccat.add_season_number(cube, time_coord, "my_season_number") ccat.add_season_year(cube, time_coord, "my_season_year") # also test 'generic' categorisation interface def _month_in_quarter(coord, pt_value): date = coord.units.num2date(pt_value) return (date.month - 1) % 3 ccat.add_categorised_coord( cube, "my_month_in_quarter", time_coord, _month_in_quarter ) # To ensure consistent results between 32-bit and 64-bit # platforms, ensure all the numeric categorisation coordinates # are always stored as int64. for coord in cube.coords(): if coord.long_name is not None and coord.points.dtype.kind == "i": coord.points = coord.points.astype(np.int64) # check values self.assertCML(cube, ("categorisation", "quickcheck.cml"))
def get_seasonal_mean_sst(self, start_year=None, end_year=None, season_to_months=None): """ :param start_year: :param end_year: :param season_to_months: :return: dict(year -> season -> field) """ def group_key(c, val): for k, months in season_to_months.items(): if val in months: return k result = {} for the_year in range(start_year, end_year + 1): result[the_year] = {} data_path = self.year_to_path[the_year] cube = iris.load_cube(data_path, "Sea Surface temperature") print(cube) coord_categorisation.add_month_number(cube, "time") coord_categorisation.add_categorised_coord(cube, "season", "month_number", group_key) assert isinstance(cube, Cube) seas_mean = cube.aggregated_by(["season"], iris.analysis.MEAN) assert isinstance(seas_mean, Cube) assert isinstance(self.basemap, Basemap) for the_season in list(season_to_months.keys()): c = iris.Constraint(season=the_season) the_mean = seas_mean.extract(c) assert isinstance(the_mean, Cube) result[the_year][the_season] = the_mean.data.transpose() return result
def add_band_membership(cube, coord, name, dlat): """ Add a custom coord in latitude dimension which allows averaging over N-degree lat bands, eg. N=15 deg ------- N.B. 1) Requires dlat is greater than or equal to the lat spacing of the native grid 2) Requires that native grid is regularly spaced in latitude (I think??) TODO: Automatic re-gridding to 1deg lat-spacing?? ------- Args: * cube (:class:`iris.cube.Cube`): the cube containing 'from_coord'. The new coord will be added into it. * name (string): name of the created coordinate * from_coord (:class:`iris.coords.Coord` or string): coordinate in 'cube', or the name of one * dlat: specified width of the aggregated latitude bands. N.B. dlat should be >= the lat spacing of the native grid. """ # Checks assert dlat == int(dlat), "dlat needs to be an integer!" assert dlat > np.mean(np.diff(cube.coord('latitude').points)), "dlat should be >= the lat spacing of the native grid!" lats = cube.coord('latitude') test_lats = np.linspace(-90,90,181)[::dlat] l = [] for i in range(len(test_lats)): l.append(find_nearest(lats.points, test_lats[i])) # For some reason the first value gets looped over twice, so need to start at -2 # Also not really needed, this is more aesthetic than anything global ticker ticker = -2 """Category function""" def cat_func(coord, value): """ Returning a category value for a coordinate point-value. ---------- N.B. This function needs updating! It's okay for rough latitude bands but sometimes has spurious end-points or round-off errors ---------- Args: * coord (:class:`iris.coords.Coord` or string): coordinate in 'cube', or the name of one * value: coordinate point-value """ global ticker if value in l[:-1]: ticker += 1 return ticker cat.add_categorised_coord(cube, name, coord, cat_func)
def add_hour_of_day(cube, coord, name='hour'): add_categorised_coord(cube, name, coord, lambda coord, x: coord.units.num2date(x).hour)
try: #cube_names = ['%s' % cube_name_param, '%s' % cube_name_explicit] cubeconv = iris.load_cube(fu,'%s' % cube_name_param & glob_tc & lat_constraint & lon_constraint) cubestrat = iris.load_cube(fu,'%s' % cube_name_explicit & glob_tc & lat_constraint & lon_constraint) cube=cubeconv+cubestrat cube.rename('total_precipitation_rate') except iris.exceptions.ConstraintMismatchError: cube = iris.load_cube(fu, ('%s' % cube_name_explicit) & glob_tc & lat_constraint & lon_constraint) #time_interval = (cube.coord('time').points[1]-cube.coord('time').points[0]) # Mean at each grid point by hour of day and save #helper = np.vectorize(lambda r : r.hour) #hour = helper(h) add_categorised_coord(cube, 'hour', 'time',lambda coord, x: coord.units.num2date(x).hour) diurnal_mean_cube = cube.aggregated_by('hour', iris.analysis.MEAN) del cube #try: # iris.save(diurnal_mean_cube, '%s%s/%s/%s_rainfall_hourly_mean.pp' % (pp_file_path, expmin1, experiment_id, diag)) #except Exception, e: # print e # pass # Load land/sea mask # For Sea and Land, mask area and calculate mean of each hour for sea/land and SAVE as numpy array
def accum_3hr(cube, coord, name='3hourly'): add_categorised_coord( cube, name, coord, lambda coord, x: 0 if x < 3 else 1 if x < 6 else 2 if x < 9 else 3 if x < 12 else 4 if x < 15 else 5 if x < 18 else 6 if x < 21 else 7)
if __name__ == "__main__": ''' Code to plot Alaska case study For a more detailed code, refer to main_notebook.ipynb ''' # create data PATH = '../Datasets/FPA_FOD_20170508.sqlite' fire_df = create_wildfire_df(PATH) coords = large_fire_coord('AK', 10000, fire_df) PATH_temp = "../Datasets/air.mon.mean.nc" cube_temp = iris.load_cube(PATH_temp) cat.add_categorised_coord(cube_temp, 'year', 'time', get_year) cat.add_categorised_coord(cube_temp, 'month', 'time', get_month) cube_local = region_based_cube(cube_temp, coords) # geometric temperature create_map(2004, coords, cube_local, fire_df) # trend analysis fire_time_based = process_df_local(coords, fire_df) fire_time_based = fire_time_based.rename(columns={ "fire_year": "year", "fire_month": "month" }) PATH_temp = "../Datasets/air.mon.mean.nc" cube_temp = iris.load_cube(PATH_temp) cube_local = region_based_cube(cube_temp, coords)
def accum_6hr(cube, coord, name='6hourly'): add_categorised_coord( cube, name, coord, lambda coord, x: 0 if x < 6 else 1 if x < 12 else 2 if x < 18 else 3)
def plot_hydrometeors_composite_integrated(Hydrometeors_Composite, maxvalue=None, aggregate_min=None, mp=None, xlim=None, ylim=None, xlim_profile=None, ylim_integrated=None, title=None, figsize=(20 / 2.54, 10 / 2.54), height_ratios=[1.8, 1], width_ratios=[4, 1]): from mpdiag import hydrometeors_colors from iris.analysis import MEAN, SUM from iris.coord_categorisation import add_categorised_coord from iris.cube import CubeList from copy import deepcopy dict_hydrometeors_colors, dict_hydrometeors_names = hydrometeors_colors( microphysics_scheme=mp) if xlim is None: xlim = [ Hydrometeors_Composite[0].coord('time').points[0], Hydrometeors_Composite[0].coord('time').points[-1] ] if ylim is None: ylim = [ Hydrometeors_Composite[0].coord('geopotential_height').points[0] / 1000, Hydrometeors_Composite[0].coord('geopotential_height').points[-1] / 1000 ] fig, ax = plt.subplots( nrows=2, ncols=2, #sharex='col', sharey='row', gridspec_kw={ 'height_ratios': height_ratios, 'width_ratios': width_ratios }, figsize=figsize) fig.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.93, wspace=0.1, hspace=0.2) Hydrometeors_Composite_copy = deepcopy(Hydrometeors_Composite) if aggregate_min is not None: def get_min(coord, value): minutes = value return np.floor( minutes / aggregate_min) * aggregate_min + aggregate_min / 2 hydrometeors_aggregated = CubeList() for cube in Hydrometeors_Composite_copy: if aggregate_min == 5: add_categorised_coord(cube, 'time_aggregated', 'time', get_min) hydrometeors_aggregated.append( cube.aggregated_by(['time_aggregated'], MEAN)) hydrometeors_piecharts = hydrometeors_aggregated else: hydrometeors_piecharts = hydrometeors_Composite plot_hydrometeors_color_time(hydrometeors_piecharts, Aux=None, axes=ax[0, 0], microphysics_scheme=mp, scaling='linear', minvalue=0, maxvalue=maxvalue, vscale=maxvalue, piecharts_rasterized=False, legend_piecharts=True, fontsize_legend=6, legend_piecharts_pos=(1.05, -0.25), legend_overlay=False, legend_overlay_pos=(1, -0.6), overlay=False, xlabel=False, ylabel=False, xlim=xlim, ylim=ylim, scale=True, unit_scale='kg m$^{-1}$ s$^{-1}$', fontsize_scale=6, x_shift=0) ax[0, 0].plot([0, 0], [0, 20000], color='grey', ls='-') for cube in Hydrometeors_Composite: color = dict_hydrometeors_colors[cube.name()] ax[0, 1].plot(cube.collapsed(('time'), MEAN).data, cube.coord('geopotential_height').points / 1000, color=color) ax[1, 0].plot(cube.coord('time').points, cube.collapsed(('geopotential_height'), SUM).data, color=color) #ax1[1,0].set_ylim(0,1000) # ax[1,0].plot([0,0],[0,2e10],color='grey',ls='-') ax[1, 1].axis('off') ax[0, 0].set_ylabel('altitude (km)') ax[1, 0].set_xlabel('time (min)') ax[1, 0].set_xlim(xlim) ax[1, 0].set_ylim(ylim_integrated) ax[0, 1].set_ylim(ylim) ax[0, 1].set_xlim(xlim_profile) ax[1, 0].set_ylabel('integrated (kg $^{-1}$)') ax[1, 0].ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) ax[0, 1].ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) ax[0, 0].xaxis.set_tick_params(labelbottom=False) ax[0, 1].yaxis.set_tick_params(labelleft=False) ax[0, 1].set_xlabel('integrated (kg m$^{-1}$)', labelpad=10) if title: ax[0, 0].set_title(title, loc='left') return fig
def daily_stat(cube, stat='max', min_periods=1, aqdates=False): """ Calculate daily maximum cube. :param cube: input iris cube :param stat: string, statistic to calculate. Available options are 'max', 'mean', 'min', 'sum', 'std' (standard deviation), 'var' (variance) :param min_periods: Minimum number of data values required to calculate a value - if less than this number present, then value is set to NaN. :param aqdates: If True, then dates are set according to air quality standards, ie 00Z is actually 24Z from the previous day. >>> import config >>> import adaq_data >>> samplepath = config.SAMPLE_DATADIR+'sites_cube_list/' >>> od = adaq_data.ADAQData() >>> obs_scl = od.load_ts(samplepath+'aurn_5days.nc') >>> cube = od.extract(short_name='O3', singlecube=True) >>> maxcube = daily_stat(cube, stat='max', min_periods=18) >>> print(maxcube) mass_concentration_of_ozone_in_air / (ug/m3) (site_id: 5; time: 6) Dimension coordinates: site_id x - time - x Auxiliary coordinates: abbrev x - latitude x - longitude x - site_altitude x - site_name x - site_type x - date - x Attributes: Conventions: CF-1.5 label: Obs short_name: O3 source: AURN Cell methods: mean: time (1 hour) nanmax_min18periods: date >>> print(cube.data[0,:24].max()) 90.0 >>> np.set_printoptions(formatter={'float':lambda x: '{:5.2f}'.format(x)}) >>> print(maxcube.data[0,:]) [90.00 95.00 86.00 88.00 91.00 nan] >>> np.set_printoptions() """ #Check cube has a time-coordinate try: cube.coord('time') except: raise ValueError('cube does not have a time coordinate') cube_tmp = cube.copy() #Firstly add the date coordinate if aqdates: coord_cat.add_categorised_coord(cube_tmp, 'date', cube_tmp.coord('time'), cube_time.date_from_time_aq, units='') else: coord_cat.add_categorised_coord(cube_tmp, 'date', cube_tmp.coord('time'), cube_time.date_from_time, units='') #Then can aggregate to get maximum value for each date aggregator_name = "nan" + stat if min_periods > 1: aggregator_name += "_min%dperiods" % (min_periods) max_aggregator = iris.analysis.Aggregator( aggregator_name, array_statistics.nanstat_minperiod, stat=stat, min_periods=min_periods) day_cube = cube_tmp.aggregated_by('date', max_aggregator) return day_cube
def aggregate_time(cube, period='hour', aggregator_name='NANMEAN', collapsed=False): """ Aggregate cube according to a specific time component. The time co-ordinate is replaced with a co-ordinate for the given period. For example, when aggregating over 'hour', all points at the same time of day are combined, regardless of which day they are in. The resulting 'hour' co-ordinate therefore contains at most 24 points - one for each hour. .. note:: Not to be confused with :any:`periodic_stat`, which is used to (for example) aggregate all hours within the same day to a single point. :param cube: iris.cube.Cube :param period: period to aggregate over. Currently available options are: * 'hour' (gives a diurnal cube), * 'monthly' (gives a monthly cube), also sets up extra coord 'monthname' containing three letter month name, eg 'Feb'. Over three years, this will end up with a coord 12 points long. * 'yearmonth', aggregates into months but retains the year component, so over three years will end up with a coord 3x12 points long. Also sets up extra coord 'yearmonthname' containing three letter month name and year as a string, eg 'Feb 2014'. :param aggregator_name: string name of aggregator to use. By default this is 'NANMEAN' which will take a mean over all values, ignoring NaNs. Other available alternatives include 'NANMAX' (max, ignoring NaNs), 'MEAN' and 'MAX', which are the mean and max values, taking NaNs into account (if any value in array is NaN, returns NaN). Uses cube_statistics.CUBE_AGGREGATORS dictionary. Alternatively any aggregator in iris.analysis could be used. :param collapsed: logical to determine whether to collapse over all dimensions. If False (default), time dimension is removed and replaced with hour, but all other dimensions kept. If True, the same aggregator is used to collapse all other dimensions. In this case, the only dimension in the returned cube is time. Method: Works on a cube to return a cube aggregated by required period. When the cube is aggregated the time coordinate is no longer monotonic. The time coordinate and any other coordinates on this dimension (for example forecast period) are removed, and the new period coordinate is added instead as a dimcoord. To ensure the resulting cube can be plotted in the expected time order (eg 0-23Z for period='hour'), the aggregated cube is sliced by period and the indiviudal subcubes are then concatenated in the correct order. Example: >>> import config >>> import adaq_data >>> sample_data_path = config.SAMPLE_DATADIR+'sites_cube_list/' >>> md = adaq_data.ADAQData() >>> scl = md.load_ts(sample_data_path+'aqum_oper_5days.nc') >>> mod_cube = md.extract(short_name='O3', singlecube=True) >>> print(mod_cube) # doctest: +ELLIPSIS mass_concentration_of_ozone_in_air / (ug/m3) (site_id: 5; time: 121) Dimension coordinates: site_id x - time - x Auxiliary coordinates: abbrev x - grid_latitude x - grid_longitude x - latitude x - longitude x - site_altitude x - site_name x - site_type x - surface_altitude x - forecast_period - x Scalar coordinates: forecast_day: 1.0 Days level_height: 20.00... m, bound=(0.0, 49.99...) m model_level_number: 1 sigma: 0.99..., bound=(1.0, 0.99...) Attributes: Conventions: CF-1.5 STASH: m01s34i001 label: aqum_oper short_name: O3 source: Data from Met Office Unified Model Cell methods: mean: time (1 hour) Calculate diurnal variation, but without collapsing the site_id coordinate, use period='hour': >>> diurnal_cube = aggregate_time(mod_cube, period='hour', collapsed=False) >>> print(diurnal_cube) # doctest: +ELLIPSIS mass_concentration_of_ozone_in_air / (ug/m3) (hour: 24; site_id: 5) Dimension coordinates: hour x - site_id - x Auxiliary coordinates: abbrev - x grid_latitude - x grid_longitude - x latitude - x longitude - x site_altitude - x site_name - x site_type - x surface_altitude - x Scalar coordinates: forecast_day: 1.0 Days level_height: 20.00... m, bound=(0.0, 49.99...) m model_level_number: 1 sigma: 0.99..., bound=(1.0, 0.99...) Attributes: Conventions: CF-1.5 STASH: m01s34i001 label: aqum_oper short_name: O3 source: Data from Met Office Unified Model Cell methods: mean: time (1 hour) nanmean: hour And now collapse the site_id coordinate (the nanmean is also taken across site_id): >>> diurnal_cube = aggregate_time(mod_cube, period='hour', collapsed=True) >>> print(diurnal_cube) # doctest: +ELLIPSIS mass_concentration_of_ozone_in_air / (ug/m3) (hour: 24) Dimension coordinates: hour x Scalar coordinates: abbrev: YW|ACTH|AH|ABD|HAR forecast_day: 1.0 Days grid_latitude: 1.381... degrees, \ bound=(-1.895..., 4.658...) degrees grid_longitude: -0.021... degrees, \ bound=(-0.772..., 0.729...) degrees latitude: 53.877... degrees, \ bound=(50.597..., 57.157...) degrees level_height: 20.000... m, bound=(0.0, 49.998...) m longitude: -2.521... degrees, \ bound=(-3.716..., -1.326...) degrees model_level_number: 1 sigma: 0.997..., bound=(1.0, 0.994...) site_altitude: 195... m, bound=(20, 370) m site_id: 35747847.141..., bound=(35628361.140..., 35867333.141...) site_name: Yarner_Wood|Auchencorth_Moss|Aston_Hill|\ Aberdeen|Harwell site_type: RURAL|RURAL|RURAL|URBAN_BACKGROUND|RURAL surface_altitude: 137.04... m, bound=(38.888..., 235.209...) m Attributes: Conventions: CF-1.5 STASH: m01s34i001 label: aqum_oper short_name: O3 source: Data from Met Office Unified Model Cell methods: mean: time (1 hour) nanmean: hour nanmean: site_id This returned cube has only one dimension - hour, which is the required period. As an example of using period='month' to get a monthly cube: >>> scl = md.load_ts(sample_data_path+'aqum_oper_10days.nc') >>> mod_cube = md.extract(short_name='O3', singlecube=True) >>> cube = aggregate_time(mod_cube, period='month') >>> print(cube.coord('month').points) [3 4] >>> print(cube.coord('monthname').points) ['Mar' 'Apr'] And finally using period='yearmonth': >>> cube = aggregate_time(mod_cube, period='yearmonth') >>> print(cube.coord('yearmonth').points) [201403 201404] >>> print(cube.coord('yearmonthname').points) ['Mar 2014' 'Apr 2014'] """ #If period already exists as a coordinate, remove first if cube.coords(period): warnings.warn('Removing pre-existing ' + period + ' coordinate') cube.remove_coord(period) timeunits = period if period == 'hour': #Add hour as a categorised co-ordinate coord_cat.add_categorised_coord(cube, 'hour', cube.coord('time'), cube_time.hour_from_time, units=timeunits) elif period == 'month': #Add month as a categorised co-ordinate coord_cat.add_categorised_coord(cube, 'month', cube.coord('time'), cube_time.month_from_time, units=timeunits) elif period == 'yearmonth': #Add month as a categorised co-ordinate timeunits = '1' coord_cat.add_categorised_coord(cube, 'yearmonth', cube.coord('time'), cube_time.yearmonth_from_time, units=timeunits) #Remove coordinates on original time-axis as these will #be incorrect/misleading and won't let it aggregate over hour otherwise. cube_tmp = cube.copy() #Don't modify input cube time_dim = cube_tmp.coord_dims('time') time_axis_coords = [ coord.name() for coord in cube_tmp.coords(dimensions=time_dim) ] for coord_name in time_axis_coords: if coord_name != period: warnings.warn('Removing coordinate on time axis: ' + coord_name) cube_tmp.remove_coord(coord_name) aggregator = CUBE_AGGREGATORS[aggregator_name] #Aggregate cube by period using the aggregator aggregated_cube = cube_tmp.aggregated_by(period, aggregator) #Collapse cube across all non-time/period dim coords #to give a mean cube over all dimensions if collapsed: dim_coord_names = [ coord.name() for coord in aggregated_cube.dim_coords ] for coord_name in dim_coord_names: if coord_name != 'time' and coord_name != period: aggregated_cube = aggregated_cube.collapsed( coord_name, aggregator) # Generate cube in expected time order cube_list = iris.cube.CubeList() for subcube in aggregated_cube.slices_over(period): new_coord = subcube.coord(period) subcube.remove_coord(period) subcube.add_aux_coord( iris.coords.DimCoord(new_coord.points, long_name=period, units=timeunits)) subcube = iris.util.new_axis(subcube, period) cube_list.append(subcube) new_cube = cube_list.concatenate_cube() if period == 'month': #Also add name of month as an extra coord #Eg 'Feb' coord_cat.add_categorised_coord(new_cube, 'monthname', new_cube.coord('month'), cube_time.monthname_from_month) if period == 'yearmonth': #Also add name of month and year as an extra coord #Eg 'Feb 2014' coord_cat.add_categorised_coord(new_cube, 'yearmonthname', new_cube.coord('yearmonth'), cube_time.yearmonthname_from_yearmonth) #Also add datetime of month and year as an extra coord #Convert to datetime format yearmonthdt = np.array([ datetime.datetime( int(str(point)[:4]), #year int(str(point)[4:6]), #month 1) #first day of month for point in new_cube.coord('yearmonth').points ]) #Then convert this to numbers units = cube.coord('time').units #units = cf_units.Unit('hours since epoch', calendar='gregorian') yearmonth_num = np.array([units.date2num(t) for t in yearmonthdt]) #Can now set up new coord coord = iris.coords.AuxCoord(yearmonth_num, long_name='yearmonthdt', units=units) #Guess bounds, where point is at the beginning of the month #so bound covers entire month if len(yearmonth_num) > 1: coord.guess_bounds(0.0) new_cube.add_aux_coord(coord, 0) #Add this coord to new_cube return new_cube
def periodic_stat(cube, stat='max', period='day', min_periods=1, aqdates=None): """ Calculate daily, monthly, or yearly statistics. .. note:: Not to be confused with :any:`aggregate_time`, which is used to (for example) aggregate all points at the same hour of different days to a single point. :param cube: input iris cube :param stat: string, statistic to calculate. Available options are 'max', 'mean', 'min', 'sum', 'std' (standard deviation), 'var' (variance) :param period: period to aggregate over. Currently available options are 'day', 'month', and 'year'. :param min_periods: Minimum number of data values required to calculate a value - if less than this number present, then value is set to NaN. :param aqdates: If True, then dates are set according to air quality standards, ie 00Z is actually 24Z from the previous day. If not specified (ie None), the value True or False will be guessed from the bounds of the first point. :returns: cube containing data for each `period`. Example: >>> import config >>> import adaq_data >>> sample_data_path = config.SAMPLE_DATADIR+'sites_cube_list/' >>> md = adaq_data.ADAQData() >>> scl = md.load_ts(sample_data_path+'aqum_oper_5days.nc') >>> mod_cube = md.extract(short_name='O3', singlecube=True) >>> print(mod_cube) # doctest: +ELLIPSIS mass_concentration_of_ozone_in_air / (ug/m3) (site_id: 5; time: 121) Dimension coordinates: site_id x - time - x Auxiliary coordinates: abbrev x - grid_latitude x - grid_longitude x - latitude x - longitude x - site_altitude x - site_name x - site_type x - surface_altitude x - forecast_period - x Scalar coordinates: forecast_day: 1.0 Days level_height: 20.00... m, bound=(0.0, 49.99...) m model_level_number: 1 sigma: 0.99..., bound=(1.0, 0.99...) Attributes: Conventions: CF-1.5 STASH: m01s34i001 label: aqum_oper short_name: O3 source: Data from Met Office Unified Model Cell methods: mean: time (1 hour) Calculate the maximum for each day: >>> stat_cube = periodic_stat(mod_cube, "max", "day") >>> print(stat_cube) mass_concentration_of_ozone_in_air / (ug/m3) (site_id: 5; time: 6) Dimension coordinates: site_id x - time - x Auxiliary coordinates: abbrev x - grid_latitude x - grid_longitude x - latitude x - longitude x - site_altitude x - site_name x - site_type x - surface_altitude x - date - x forecast_period - x Scalar coordinates: forecast_day: 1.0 Days level_height: 20.000338 m, bound=(0.0, 49.998882) m model_level_number: 1 sigma: 0.9977165, bound=(1.0, 0.99429625) Attributes: Conventions: CF-1.5 STASH: m01s34i001 label: aqum_oper short_name: O3 source: Data from Met Office Unified Model Cell methods: mean: time (1 hour) nanmax: date The new co-ordinate 'date' contains a string representation for each period - in this case each day: >>> print(stat_cube.coord("date").points) ['2014-03-25' '2014-03-26' '2014-03-27' '2014-03-28' '2014-03-29' '2014-03-30'] Note that the time co-ordinate bounds have been merged, and that the time points now represent the midpoint: >>> print(stat_cube.coord("time")) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE DimCoord([2014-03-25 23:30:00, 2014-03-26 12:00:00, 2014-03-27 12:00:00, 2014-03-28 12:00:00, 2014-03-29 12:00:00, 2014-03-30 12:00:00], bounds=[[2014-03-25 23:00:00, 2014-03-26 00:00:00], [2014-03-26 00:00:00, 2014-03-27 00:00:00], [2014-03-27 00:00:00, 2014-03-28 00:00:00], [2014-03-28 00:00:00, 2014-03-29 00:00:00], [2014-03-29 00:00:00, 2014-03-30 00:00:00], [2014-03-30 00:00:00, 2014-03-31 00:00:00]], ...) """ #Check cube has a time-coordinate if not cube.coords('time'): raise ValueError('cube does not have a time coordinate') #Define an appropriate aggregator aggregator_name = "nan" + stat if min_periods > 1: aggregator_name += "_min{}periods".format(min_periods) aggregator = iris.analysis.Aggregator(aggregator_name, array_statistics.nanstat_minperiod, stat=stat, min_periods=min_periods) #Choose a date format that groups times in the same period if period == 'day': date_fmt = '%Y-%m-%d' elif period == 'month': date_fmt = '%Y-%m' elif period == 'year': date_fmt = '%Y' else: raise ValueError("unrecognised aggregation period: {}".format(period)) #Work on a copy of the input cube stat_cube = cube.copy() coord = stat_cube.coord('time') #Determine whether a point at 00Z represents data for the same day # or the previous day, by checking bounds. In fact only check # whether the first point is at the end of its bound, and assume # that the same applies to all points. if aqdates is None: if coord.has_bounds(): aqdates = coord.points[0] == coord.bounds[0, 1] else: warnings.warn("aqdates could not be guessed but assumed False") aqdates = False #Add a new coordinate to aggregate by if aqdates: func = lambda coord, point: cube_time.date_from_time_aq( coord, point, fmt=date_fmt) else: func = lambda coord, point: cube_time.date_from_time( coord, point, fmt=date_fmt) coord_cat.add_categorised_coord(stat_cube, 'date', coord, func, units='no_unit') #Perform the aggregation stat_cube = stat_cube.aggregated_by('date', aggregator) return stat_cube
def limit_sites(ini_dict, n_sites_per_country=3, limit_species=None, output_sites_file=None): """ Limit the number of sites used. This is based on the sites with the maximum data availablity for required limit_species. There is a limit of sites per country, given by n_sites_per_country. The generated list of sites is output to file, along with a plot of locations. :param ini_dict: Dictionary of a :class:`inifile` object Should contain: * 'obs_fmt' - currently only 'camsaqobs' allowed * 'obs_dir' - directory contain obs files * 'short_name_list' * 'start_datetime' * 'end_datetime' * 'plot_dir' - for output of text file and map * 'site_types' - list of required site types :param n_sites_per_country: Maximum number of sites per country :param limit_species: List of species to use for determining which sites to use. Only sites with data for these species will be considered. If None, then uses short_name_list from ini_dict. :param output_sites_file: Filename to write output sites list to. If set to None, then defaults to ini_dict['plot_dir']+'/limited_site_locations.txt' :returns: sites_data - numpy ndarray containing site information data from a :class:`sites_info.SitesInfo` object. >>> ini_dict = {} >>> ini_dict['obs_fmt'] = 'camsaqobs' >>> ini_dict['obs_dir'] = config.SAMPLE_DATADIR+'CAMSAQ_obs' >>> ini_dict['short_name_list'] = ['O3'] >>> ini_dict['site_types_list'] = ['RURAL_BACKGROUND', 'URBAN_BACKGROUND'] >>> ini_dict['start_datetime'] = datetime.datetime(2015,4,4,00) >>> ini_dict['end_datetime'] = datetime.datetime(2015,4,5,00) >>> ini_dict['plot_dir'] = config.CODE_DIR+'adaqdocs/figures/' >>> sites_data = limit_sites(ini_dict, n_sites_per_country=3) ... # doctest: +ELLIPSIS Creating observation data at ... Reading .../obsmacc4ana_20150403.csv Reading .../obsmacc4ana_20150404.csv Converting to cubes Total sites: 51 Total countries: 19 Total RURAL_BACKGROUND sites: 35 Total URBAN_BACKGROUND sites: 16 Saved to file .../limited_site_locations.png Written to file .../limited_site_locations.txt >>> fmtstr = sites_info.format_string(sites_data) >>> for site in sites_data[:3]: ... print(fmtstr.format(*site)) 28889.13064500,ES1754A,ES, 40.645, 0.289, 428.0, 0, 0, 0, 0, 1, \ 0, 0, 0,ES1754A,RURAL_BACKGROUND 29092.14229850,GB0045R,GB, 52.298, 0.291, 5.0, 0, 1, 1, 1, 4, \ 0, 0, 3,GB0045R,RURAL_BACKGROUND 44083.13105920,ES1379A,ES, 41.059, 0.441, 363.0, 0, 0, 0, 0, 2, \ 0, 0, 0,ES1379A,RURAL_BACKGROUND .. image:: ../adaqdocs/figures/limited_site_locations.png :scale: 75% """ if limit_species is not None: ini_dict['short_name_list'] = limit_species od = adaq_functions.get_obs(ini_dict, None) #Get example site cube from observation site cube sites_cube = od.sites_cube_list[0] #Get array of total number of valid times for each site across all species n_validtimes = np.zeros((len(sites_cube.coord('site_id').points))) for cube in od.sites_cube_list: n_validtimes += np.sum(np.isfinite(cube.data), axis=1) #Add country coordinate coord_cat.add_categorised_coord(sites_cube, 'country_abbrev', sites_cube.coord('abbrev'), country_from_abbrev, units='') #Find maximum data coverage in each country countries = sites_cube.coord('country_abbrev').points unique_countries = np.unique(countries) required_abbrevs = [] for country in unique_countries: #Find indices for this country country_indices = np.where(countries == country) #Get the indices in order which would pick out this country #in order of n_validtimes, from min to max sort_indices = np.argsort(n_validtimes[country_indices]) #Indices to actually use for this country country_indices = country_indices[0][ sort_indices[0 - n_sites_per_country:]] #Now get the abbrevs from a od sites_cube_list abbrevs = list(sites_cube[country_indices].coord('abbrev').points) required_abbrevs += abbrevs new_sites_cube = sites_cube.extract( iris.Constraint(abbrev=required_abbrevs)) #Print some information about chosen sites. print('Total sites: ', len(new_sites_cube.coord('abbrev').points)) print('Total countries: ', len(unique_countries)) site_types = new_sites_cube.coord('site_type').points for site_type in np.unique(site_types): print('Total ' + site_type + ' sites: ', end=' ') print(len(np.where(site_types == site_type)[0])) #Convert into a SitesInfo object so can get sites_data returned, #along with plotting locations and writing to a site list si = sites_info.SitesInfo() sites_data = si.read_from_sites_cube(new_sites_cube) if not os.path.isdir(ini_dict['plot_dir']): os.makedirs(ini_dict['plot_dir']) si.plot_location_map(ini_dict['plot_dir'] + '/limited_site_locations.png', label=True, labelsize='x-small') if output_sites_file is None: output_sites_file = ini_dict['plot_dir'] + '/limited_site_locations.txt' si.write_to_file(output_sites_file) return sites_data
def am_or_pm(cube, coord, name='am_or_pm'): add_categorised_coord(cube, name, coord, lambda coord, x: 'am' if x < 12 else 'pm')
def add_hour_of_day(cube, coord, name='hour'): add_categorised_coord(cube, name, coord, lambda coord, x: coord.units.num2date(x).hour)