def test_datetime_attrs(self): """ Test that datetime objects will be converted to iso format when writing attributes. """ attrs = {'timestamp': datetime.datetime.now()} fname = 'test_datetime_attrs.h5' with h5py.File(fname, **self.memory_kwargs) as fid: hdf5.write_scalar(self.scalar_data, 'scalar', fid, attrs=attrs) data = hdf5.read_scalar(fid, 'scalar') self.assertEqual(data['timestamp'], attrs['timestamp'])
def test_datetime_attrs(self): """ Test that datetime objects will be converted to iso format when writing attributes. """ attrs = {"timestamp": datetime.datetime.now()} fname = "test_datetime_attrs.h5" with h5py.File(fname, "w", **self.memory_kwargs) as fid: hdf5.write_scalar(self.scalar_data, "scalar", fid, attrs=attrs) data = hdf5.read_scalar(fid, "scalar") self.assertEqual(data["timestamp"], attrs["timestamp"])
def test_scalar_attributes(self): """ Test the scalar attributes. """ attrs = {"test_attribute": "this is a scalar"} data = {"value": self.scalar_data, "CLASS": "SCALAR", "VERSION": "0.1"} # insert the attribute into the data dict for k, v in attrs.items(): data[k] = v fname = "test_scalar_dataset.h5" with h5py.File(fname, "w", **self.memory_kwargs) as fid: hdf5.write_scalar(data["value"], "test-scalar", fid, attrs=attrs) self.assertDictEqual(hdf5.read_scalar(fid, "test-scalar"), data)
def test_scalar_attributes(self): """ Test the scalar attributes. """ attrs = {'test_attribute': 'this is a scalar'} data = {'value': self.scalar_data, 'CLASS': 'SCALAR', 'VERSION': '0.1'} # insert the attribute into the data dict for k, v in attrs.items(): data[k] = v fname = 'test_scalar_dataset.h5' with h5py.File(fname, **self.memory_kwargs) as fid: hdf5.write_scalar(data['value'], 'test-scalar', fid, attrs=attrs) self.assertDictEqual(hdf5.read_scalar(fid, 'test-scalar'), data)
def create_pq_yaml(acquisition, ancillary, tests_run, out_group): """ Write the PQ metadata captured during the entire workflow to a HDF5 SCALAR dataset using the yaml document format. :param acquisition: An instance of `acquisition`. :param ancillary: A dict containing the ancillary information. :param test_run: A dict containing the key/value pairs of tests and whether or not a given test was run. :param out_group: A `h5py.Group` object opened for write access. :return: None; The yaml document is written to the HDF5 file. """ source_info = { 'source_l1t': dirname(acquisition.dir_name), 'source_reflectance': 'NBAR' } algorithm = { 'software_version': wagl.__version__, 'software_repository': 'https://github.com/GeoscienceAustralia/wagl.git', 'pq_doi': 'http://dx.doi.org/10.1109/IGARSS.2013.6723746' } metadata = { 'system_information': get_system_information(), 'source_data': source_info, 'algorithm_information': algorithm, 'ancillary': ancillary, 'tests_run': tests_run } # output dname = DatasetName.PQ_YAML.value yml_data = yaml.dump(metadata, default_flow_style=False) write_scalar(yml_data, dname, out_group, attrs={'file_format': 'yaml'})
def test_write_scalar(self): """ Test the write_scalar function. """ data = self.scalar_data fname = "test_write_scalar.h5" with h5py.File(fname, "w", **self.memory_kwargs) as fid: self.assertIsNone(hdf5.write_scalar(data, "scalar", fid))
def create_pq_yaml(acquisition, ancillary, tests_run, out_group): """ Write the PQ metadata captured during the entire workflow to a HDF5 SCALAR dataset using the yaml document format. :param acquisition: An instance of `acquisition`. :param ancillary: A dict containing the ancillary information. :param test_run: A dict containing the key/value pairs of tests and whether or not a given test was run. :param out_group: A `h5py.Group` object opened for write access. :return: None; The yaml document is written to the HDF5 file. """ dist = distribution("wagl") source_info = { "source_l1t": dirname(acquisition.dir_name), "source_reflectance": "NBAR", } algorithm = { "software_version": dist.version, "software_repository": dist.metadata.get("Home-page"), "pq_doi": "http://dx.doi.org/10.1109/IGARSS.2013.6723746", } metadata = { "system_information": get_system_information(), "source_data": source_info, "algorithm_information": algorithm, "ancillary": ancillary, "tests_run": tests_run, } # output dname = DatasetName.PQ_YAML.value yml_data = yaml.dump(metadata, default_flow_style=False) write_scalar(yml_data, dname, out_group, attrs={"file_format": "yaml"})
def _store_parameter_settings(fid, spheriod, orbital_elements, satellite_model, satellite_track, params): """ An internal function for storing the parameter settings for the calculate_angles workflow. """ group = fid.create_group('PARAMETERS') # generic parameters dname = DatasetName.GENERIC.value write_scalar('GENERIC PARAMETERS', dname, group, params) # sheroid desc = "The spheroid used in the satellite and solar angles calculation." attrs = {'description': desc} dname = DatasetName.SPHEROID.value sph_dset = group.create_dataset(dname, data=spheriod) attach_table_attributes(sph_dset, title='Spheroid', attrs=attrs) # orbital elements desc = ("The satellite orbital parameters used in the satellite and " "solar angles calculation.") attrs = {'description': desc} dname = DatasetName.ORBITAL_ELEMENTS.value orb_dset = group.create_dataset(dname, data=orbital_elements) attach_table_attributes(orb_dset, title='Orbital Elements', attrs=attrs) # satellite model desc = ("The satellite model used in the satellite and solar angles " "calculation.") attrs = {'description': desc} dname = DatasetName.SATELLITE_MODEL.value sat_dset = group.create_dataset(dname, data=satellite_model) attach_table_attributes(sat_dset, title='Satellite Model', attrs=attrs) # satellite track desc = ("The satellite track information used in the satellite and solar " "angles calculation.") attrs = {'description': desc} dname = DatasetName.SATELLITE_TRACK.value track_dset = group.create_dataset(dname, data=satellite_track) attach_table_attributes(track_dset, title='Satellite Track', attrs=attrs)
def create_ard_yaml(res_group_bands, ancillary_group, out_group, parameters, workflow): """ Write the NBAR metadata captured during the entire workflow to a HDF5 SCALAR dataset using the yaml document format. :param res_group_bands: A `dict` mapping resolution group names to lists of `Acquisition` instances. :param ancillary_group: The root HDF5 `Group` that contains the ancillary data collected via wagl.ancillary.collect_ancillary> :param out_group: A `h5py.Group` object opened for write access. :param parameters: A `dict` containing `DataStandardisation` parameters :param workflow: Which workflow to run (from the `wagl.constants.Workflow` enumeration). :return: None; The yaml document is written to the HDF5 file. """ sbt = workflow in [Workflow.STANDARD, Workflow.SBT] nbar = workflow in [Workflow.STANDARD, Workflow.NBAR] def load_sbt_ancillary(group): """ Load the sbt ancillary data retrieved during the worlflow. """ point_data = { DatasetName.DEWPOINT_TEMPERATURE.value: {}, DatasetName.SURFACE_GEOPOTENTIAL.value: {}, DatasetName.TEMPERATURE_2M.value: {}, DatasetName.SURFACE_RELATIVE_HUMIDITY.value: {}, DatasetName.GEOPOTENTIAL.value: {}, DatasetName.RELATIVE_HUMIDITY.value: {}, DatasetName.TEMPERATURE.value: {} } npoints = group[DatasetName.COORDINATOR.value].shape[0] for point in range(npoints): pnt_grp = group[POINT_FMT.format(p=point)] lonlat = tuple(pnt_grp.attrs['lonlat']) # scalars dname = DatasetName.DEWPOINT_TEMPERATURE.value point_data[dname][lonlat] = read_scalar(pnt_grp, dname) dname = DatasetName.SURFACE_GEOPOTENTIAL.value point_data[dname][lonlat] = read_scalar(pnt_grp, dname) dname = DatasetName.TEMPERATURE_2M.value point_data[dname][lonlat] = read_scalar(pnt_grp, dname) dname = DatasetName.SURFACE_RELATIVE_HUMIDITY.value point_data[dname][lonlat] = read_scalar(pnt_grp, dname) # tables dname = DatasetName.GEOPOTENTIAL.value dset = pnt_grp[dname] attrs = {k: v for k, v in dset.attrs.items()} df = read_h5_table(pnt_grp, dname) for column in df.columns: attrs[column] = df[column].values point_data[dname][lonlat] = attrs dname = DatasetName.RELATIVE_HUMIDITY.value dset = pnt_grp[dname] attrs = {k: v for k, v in dset.attrs.items()} df = read_h5_table(pnt_grp, dname) for column in df.columns: attrs[column] = df[column].values point_data[dname][lonlat] = attrs dname = DatasetName.TEMPERATURE.value dset = pnt_grp[dname] attrs = {k: v for k, v in dset.attrs.items()} df = read_h5_table(pnt_grp, dname) for column in df.columns: attrs[column] = df[column].values point_data[dname][lonlat] = attrs return point_data def load_nbar_ancillary(acquisitions, fid): """ Load the ancillary data retrieved during the workflow. """ ids = [] tier = [] alphas = { 'alpha_1': {}, 'alpha_2': {}, } for acq in acquisitions: if acq.band_type == BandType.THERMAL: continue bn = acq.band_name for param in BrdfDirectionalParameters: fmt = DatasetName.BRDF_FMT.value dname = fmt.format(band_name=bn, parameter=param.value) dset = fid[dname] ids.extend(dset.attrs['id']) tier.append(BrdfTier[dset.attrs['tier']].value) alpha_key = param.value.lower().replace('-', '_') bn_key = bn.lower().replace('-', '_') alphas[alpha_key][bn_key] = dset[()] # unique listing of brdf ids ids = numpy.unique(numpy.array(ids)).tolist() # a single tier level will dictate the metadata entry tier = BrdfTier(numpy.min(tier)).name result = { 'id': ids, 'tier': tier, 'alpha_1': alphas['alpha_1'], 'alpha_2': alphas['alpha_2'], } return result def pick_acquisition(): # pick any acquisition band_group = next(iter(res_group_bands)) return res_group_bands[band_group][0] acquisition = pick_acquisition() level1_path = acquisition.pathname acq_datetime = (acquisition.acquisition_datetime.replace(tzinfo=dtz.utc)) def source_info(): result = { 'source_level1': level1_path, 'acquisition_datetime': acq_datetime, 'platform_id': acquisition.platform_id, 'sensor_id': acquisition.sensor_id } # ancillary metadata tracking result.update(extract_ancillary_metadata(level1_path)) return result def remove_fields(data): fields = ['CLASS', 'VERSION', 'query_date', 'data_source'] for field in fields: data.pop(field, None) return data def elevation_provenance(anc_grp): ids = [] # low resolution source dname = DatasetName.ELEVATION.value dset = anc_grp[dname] ids.extend(dset.attrs['id']) # high resolution source (res group is adjacent to ancillary group) parent_group = anc_grp.parent for res_group in res_group_bands: dname = ppjoin(res_group, GroupName.ELEVATION_GROUP.value, DatasetName.DSM_SMOOTHED.value) dset = parent_group[dname] ids.extend(dset.attrs['id']) # unique listing of ids ids = numpy.unique(numpy.array(ids)).tolist() md = { 'id': ids, } return md def ancillary(fid): # load the ancillary and remove fields not of use to ODC # retrieve the averaged ancillary if available anc_grp = fid.get(GroupName.ANCILLARY_AVG_GROUP.value) if anc_grp is None: anc_grp = fid dname = DatasetName.AEROSOL.value aerosol_data = remove_fields(read_scalar(anc_grp, dname)) dname = DatasetName.WATER_VAPOUR.value water_vapour_data = remove_fields(read_scalar(anc_grp, dname)) dname = DatasetName.OZONE.value ozone_data = remove_fields(read_scalar(anc_grp, dname)) # currently have multiple sources of elevation data elevation_data = elevation_provenance(anc_grp) result = { 'aerosol': aerosol_data, 'water_vapour': water_vapour_data, 'ozone': ozone_data, 'elevation': elevation_data } if sbt: result.update(load_sbt_ancillary(fid)) if nbar: for grp_name in res_group_bands: grp_ancillary = load_nbar_ancillary(res_group_bands[grp_name], fid) result['brdf'] = grp_ancillary return result def software_versions(): return { 'wagl': { 'version': wagl.__version__, 'repo_url': 'https://github.com/GeoscienceAustralia/wagl.git' }, 'modtran': { 'version': '6.0.1', 'repo_url': 'http://www.ontar.com/software/productdetails.aspx?item=modtran' } } def algorithm(): result = {} if sbt: result['sbt_doi'] = 'TODO' if nbar: result['algorithm_version'] = 2.0 result[ 'nbar_doi'] = 'http://dx.doi.org/10.1109/JSTARS.2010.2042281' result[ 'nbar_terrain_corrected_doi'] = 'http://dx.doi.org/10.1016/j.rse.2012.06.018' return result metadata = { 'system_information': get_system_information(), 'source_datasets': source_info(), 'ancillary': ancillary(ancillary_group), 'algorithm_information': algorithm(), 'software_versions': software_versions(), 'id': str(uuid.uuid4()), 'parameters': parameters } # output yml_data = yaml.dump(metadata, default_flow_style=False) write_scalar(yml_data, metadata['id'], out_group, attrs={'file_format': 'yaml'}) out_group[DatasetName.CURRENT_METADATA.value] = h5py.SoftLink( '{}/{}'.format(out_group.name, metadata['id']))
def format_json(acquisitions, ancillary_group, satellite_solar_group, lon_lat_group, workflow, out_group): """ Creates json files for the albedo (0) and thermal """ # angles data sat_view = satellite_solar_group[DatasetName.SATELLITE_VIEW.value] sat_azi = satellite_solar_group[DatasetName.SATELLITE_AZIMUTH.value] longitude = lon_lat_group[DatasetName.LON.value] latitude = lon_lat_group[DatasetName.LAT.value] # retrieve the averaged ancillary if available anc_grp = ancillary_group.get(GroupName.ANCILLARY_AVG_GROUP.value) if anc_grp is None: anc_grp = ancillary_group # ancillary data coordinator = ancillary_group[DatasetName.COORDINATOR.value] aerosol = anc_grp[DatasetName.AEROSOL.value][()] water_vapour = anc_grp[DatasetName.WATER_VAPOUR.value][()] ozone = anc_grp[DatasetName.OZONE.value][()] elevation = anc_grp[DatasetName.ELEVATION.value][()] npoints = coordinator.shape[0] view = numpy.zeros(npoints, dtype='float32') azi = numpy.zeros(npoints, dtype='float32') lat = numpy.zeros(npoints, dtype='float64') lon = numpy.zeros(npoints, dtype='float64') for i in range(npoints): yidx = coordinator['row_index'][i] xidx = coordinator['col_index'][i] view[i] = sat_view[yidx, xidx] azi[i] = sat_azi[yidx, xidx] lat[i] = latitude[yidx, xidx] lon[i] = longitude[yidx, xidx] view_corrected = 180 - view azi_corrected = azi + 180 rlon = 360 - lon # check if in western hemisphere idx = rlon >= 360 rlon[idx] -= 360 idx = (180 - view_corrected) < 0.1 view_corrected[idx] = 180 azi_corrected[idx] = 0 idx = azi_corrected > 360 azi_corrected[idx] -= 360 # get the modtran profiles to use based on the centre latitude _, centre_lat = acquisitions[0].gridded_geo_box().centre_lonlat if out_group is None: out_group = h5py.File('atmospheric-inputs.h5', 'w') if GroupName.ATMOSPHERIC_INPUTS_GRP.value not in out_group: out_group.create_group(GroupName.ATMOSPHERIC_INPUTS_GRP.value) group = out_group[GroupName.ATMOSPHERIC_INPUTS_GRP.value] iso_time = acquisitions[0].acquisition_datetime.isoformat() group.attrs['acquisition-datetime'] = iso_time json_data = {} # setup the json files required by MODTRAN if workflow in (Workflow.STANDARD, Workflow.NBAR): acqs = [a for a in acquisitions if a.band_type == BandType.REFLECTIVE] for p in range(npoints): for alb in Workflow.NBAR.albedos: input_data = {'name': POINT_ALBEDO_FMT.format(p=p, a=str(alb.value)), 'water': water_vapour, 'ozone': ozone, 'doy': acquisitions[0].julian_day(), 'visibility': -aerosol, 'lat': lat[p], 'lon': rlon[p], 'time': acquisitions[0].decimal_hour(), 'sat_azimuth': azi_corrected[p], 'sat_height': acquisitions[0].altitude / 1000.0, 'elevation': elevation, 'sat_view': view_corrected[p], 'albedo': float(alb.value), 'filter_function': acqs[0].spectral_filter_name, 'binary': False } if centre_lat < -23.0: data = mpjson.midlat_summer_albedo(**input_data) else: data = mpjson.tropical_albedo(**input_data) input_data['description'] = 'Input file for MODTRAN' input_data['file_format'] = 'json' input_data.pop('binary') json_data[(p, alb)] = data data = json.dumps(data, cls=JsonEncoder, indent=4) dname = ppjoin(POINT_FMT.format(p=p), ALBEDO_FMT.format(a=alb.value), DatasetName.MODTRAN_INPUT.value) write_scalar(data, dname, group, input_data) # create json for sbt if it has been collected if ancillary_group.attrs.get('sbt-ancillary'): dname = ppjoin(POINT_FMT, DatasetName.ATMOSPHERIC_PROFILE.value) acqs = [a for a in acquisitions if a.band_type == BandType.THERMAL] for p in range(npoints): atmos_profile = read_h5_table(ancillary_group, dname.format(p=p)) n_layers = atmos_profile.shape[0] + 6 elevation = atmos_profile.iloc[0]['GeoPotential_Height'] input_data = {'name': POINT_ALBEDO_FMT.format(p=p, a='TH'), 'ozone': ozone, 'n': n_layers, 'prof_alt': list(atmos_profile['GeoPotential_Height']), 'prof_pres': list(atmos_profile['Pressure']), 'prof_temp': list(atmos_profile['Temperature']), 'prof_water': list(atmos_profile['Relative_Humidity']), 'visibility': -aerosol, 'sat_height': acquisitions[0].altitude / 1000.0, 'gpheight': elevation, 'sat_view': view_corrected[p], 'filter_function': acqs[0].spectral_filter_name, 'binary': False } data = mpjson.thermal_transmittance(**input_data) input_data['description'] = 'Input File for MODTRAN' input_data['file_format'] = 'json' input_data.pop('binary') json_data[(p, Albedos.ALBEDO_TH)] = data data = json.dumps(data, cls=JsonEncoder, indent=4) out_dname = ppjoin(POINT_FMT.format(p=p), ALBEDO_FMT.format(a=Albedos.ALBEDO_TH.value), DatasetName.MODTRAN_INPUT.value) write_scalar(data, out_dname, group, input_data) # attach location info to each point Group for p in range(npoints): lonlat = (coordinator['longitude'][p], coordinator['latitude'][p]) group[POINT_FMT.format(p=p)].attrs['lonlat'] = lonlat return json_data, out_group
def format_tp5(acquisitions, ancillary_group, satellite_solar_group, lon_lat_group, workflow, out_group): """ Creates str formatted tp5 files for the albedo (0, 1) and transmittance (t). """ # angles data sat_view = satellite_solar_group[DatasetName.SATELLITE_VIEW.value] sat_azi = satellite_solar_group[DatasetName.SATELLITE_AZIMUTH.value] longitude = lon_lat_group[DatasetName.LON.value] latitude = lon_lat_group[DatasetName.LAT.value] # retrieve the averaged ancillary if available anc_grp = ancillary_group.get(GroupName.ANCILLARY_AVG_GROUP.value) if anc_grp is None: anc_grp = ancillary_group # ancillary data coordinator = ancillary_group[DatasetName.COORDINATOR.value] aerosol = anc_grp[DatasetName.AEROSOL.value][()] water_vapour = anc_grp[DatasetName.WATER_VAPOUR.value][()] ozone = anc_grp[DatasetName.OZONE.value][()] elevation = anc_grp[DatasetName.ELEVATION.value][()] npoints = coordinator.shape[0] view = numpy.zeros(npoints, dtype='float32') azi = numpy.zeros(npoints, dtype='float32') lat = numpy.zeros(npoints, dtype='float64') lon = numpy.zeros(npoints, dtype='float64') for i in range(npoints): yidx = coordinator['row_index'][i] xidx = coordinator['col_index'][i] view[i] = sat_view[yidx, xidx] azi[i] = sat_azi[yidx, xidx] lat[i] = latitude[yidx, xidx] lon[i] = longitude[yidx, xidx] view_corrected = 180 - view azi_corrected = azi + 180 rlon = 360 - lon # check if in western hemisphere idx = rlon >= 360 rlon[idx] -= 360 idx = (180 - view_corrected) < 0.1 view_corrected[idx] = 180 azi_corrected[idx] = 0 idx = azi_corrected > 360 azi_corrected[idx] -= 360 # get the modtran profiles to use based on the centre latitude _, centre_lat = acquisitions[0].gridded_geo_box().centre_lonlat if centre_lat < -23.0: albedo_profile = MIDLAT_SUMMER_ALBEDO trans_profile = MIDLAT_SUMMER_TRANSMITTANCE else: albedo_profile = TROPICAL_ALBEDO trans_profile = TROPICAL_TRANSMITTANCE if out_group is None: out_group = h5py.File('atmospheric-inputs.h5', 'w') if GroupName.ATMOSPHERIC_INPUTS_GRP.value not in out_group: out_group.create_group(GroupName.ATMOSPHERIC_INPUTS_GRP.value) group = out_group[GroupName.ATMOSPHERIC_INPUTS_GRP.value] iso_time = acquisitions[0].acquisition_datetime.isoformat() group.attrs['acquisition-datetime'] = iso_time tp5_data = {} # setup the tp5 files required by MODTRAN if workflow == Workflow.STANDARD or workflow == Workflow.NBAR: acqs = [a for a in acquisitions if a.band_type == BandType.REFLECTIVE] for p in range(npoints): for alb in Workflow.NBAR.albedos: input_data = { 'water': water_vapour, 'ozone': ozone, 'filter_function': acqs[0].spectral_filter_file, 'visibility': -aerosol, 'elevation': elevation, 'sat_height': acquisitions[0].altitude / 1000.0, 'sat_view': view_corrected[p], 'doy': acquisitions[0].julian_day(), 'binary': 'T' } if alb == Albedos.ALBEDO_T: input_data['albedo'] = 0.0 input_data['sat_view_offset'] = 180.0 - view_corrected[p] data = trans_profile.format(**input_data) else: input_data['albedo'] = float(alb.value) input_data['lat'] = lat[p] input_data['lon'] = rlon[p] input_data['time'] = acquisitions[0].decimal_hour() input_data['sat_azimuth'] = azi_corrected[p] data = albedo_profile.format(**input_data) tp5_data[(p, alb)] = data dname = ppjoin(POINT_FMT.format(p=p), ALBEDO_FMT.format(a=alb.value), DatasetName.TP5.value) write_scalar(numpy.string_(data), dname, group, input_data) # create tp5 for sbt if it has been collected if ancillary_group.attrs.get('sbt-ancillary'): dname = ppjoin(POINT_FMT, DatasetName.ATMOSPHERIC_PROFILE.value) acqs = [a for a in acquisitions if a.band_type == BandType.THERMAL] for p in range(npoints): atmospheric_profile = [] atmos_profile = read_h5_table(ancillary_group, dname.format(p=p)) n_layers = atmos_profile.shape[0] + 6 elevation = atmos_profile.iloc[0]['GeoPotential_Height'] for i, row in atmos_profile.iterrows(): input_data = { 'gpheight': row['GeoPotential_Height'], 'pressure': row['Pressure'], 'airtemp': row['Temperature'], 'humidity': row['Relative_Humidity'], 'zero': 0.0 } atmospheric_profile.append(SBT_FORMAT.format(**input_data)) input_data = { 'ozone': ozone, 'filter_function': acqs[0].spectral_filter_file, 'visibility': -aerosol, 'gpheight': elevation, 'n': n_layers, 'sat_height': acquisitions[0].altitude / 1000.0, 'sat_view': view_corrected[p], 'binary': 'T', 'atmospheric_profile': ''.join(atmospheric_profile) } data = THERMAL_TRANSMITTANCE.format(**input_data) tp5_data[(p, Albedos.ALBEDO_TH)] = data out_dname = ppjoin(POINT_FMT.format(p=p), ALBEDO_FMT.format(a=Albedos.ALBEDO_TH.value), DatasetName.TP5.value) write_scalar(numpy.string_(data), out_dname, group, input_data) # attach location info to each point Group for p in range(npoints): lonlat = (coordinator['longitude'][p], coordinator['latitude'][p]) group[POINT_FMT.format(p=p)].attrs['lonlat'] = lonlat return tp5_data, out_group
def collect_nbar_ancillary( container, aerosol_dict=None, water_vapour_dict=None, ozone_path=None, dem_path=None, brdf_dict=None, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None, ): """ Collects the ancillary information required to create NBAR. :param container: An instance of an `AcquisitionsContainer` object. :param aerosol_dict: A `dict` defined as either of the following: * {'user': <value>} * {'pathname': <value>} :param water_vapour_dict: A `dict` defined as either of the following: * {'user': <value>} * {'pathname': <value>} :param ozone_path: A `str` containing the full file pathname to the directory containing the ozone data. :param dem_path: A `str` containing the full file pathname to the directory containing the digital elevation model data. :param brdf_dict: A `dict` defined as either of the following: * {'user': {<band-alias>: {'alpha_1': <value>, 'alpha_2': <value>}, ...}} * {'brdf_path': <path-to-BRDF>, 'brdf_fallback_path': <path-to-average-BRDF>} :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. :notes: The keywords compression and filter_opts aren't used as we no longer save the BRDF imagery. However, we may need to store tables in future, therefore they can remain until we know for sure they'll never be used. """ # Initialise the output files if out_group is None: fid = h5py.File("nbar-ancillary.h5", "w", driver="core", backing_store=False) else: fid = out_group acquisition = container.get_highest_resolution()[0][0] dt = acquisition.acquisition_datetime geobox = acquisition.gridded_geo_box() aerosol = get_aerosol_data(acquisition, aerosol_dict) write_scalar(aerosol[0], DatasetName.AEROSOL.value, fid, aerosol[1]) wv = get_water_vapour(acquisition, water_vapour_dict) write_scalar(wv[0], DatasetName.WATER_VAPOUR.value, fid, wv[1]) ozone = get_ozone_data(ozone_path, geobox.centre_lonlat, dt) write_scalar(ozone[0], DatasetName.OZONE.value, fid, ozone[1]) elev = get_elevation_data(geobox.centre_lonlat, dem_path) write_scalar(elev[0], DatasetName.ELEVATION.value, fid, elev[1]) # brdf dname_format = DatasetName.BRDF_FMT.value for group in container.groups: for acq in container.get_acquisitions(group=group): if acq.band_type is not BandType.REFLECTIVE: continue data = get_brdf_data(acq, brdf_dict, compression) # output for param in data: dname = dname_format.format(parameter=param.value, band_name=acq.band_name) brdf_value = data[param].pop("value") write_scalar(brdf_value, dname, fid, data[param]) if out_group is None: return fid
def collect_sbt_ancillary( acquisition, lonlats, ancillary_path, invariant_fname=None, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None, ): """ Collects the ancillary data required for surface brightness temperature. :param acquisition: An instance of an `Acquisition` object. :param lonlats: A `list` of tuples containing (longitude, latitude) coordinates. :param ancillary_path: A `str` containing the directory pathname to the ECMWF ancillary data. :param invariant_fname: A `str` containing the file pathname to the invariant geopotential data. :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ # Initialise the output files if out_group is None: fid = h5py.File("sbt-ancillary.h5", "w", driver="core", backing_store=False) else: fid = out_group fid.attrs["sbt-ancillary"] = True dt = acquisition.acquisition_datetime description = ("Combined Surface and Pressure Layer data retrieved from " "the ECWMF catalogue.") attrs = {"description": description, "Date used for querying ECWMF": dt} for i, lonlat in enumerate(lonlats): pnt = POINT_FMT.format(p=i) # get data located at the surface dew = ecwmf_dewpoint_temperature(ancillary_path, lonlat, dt) t2m = ecwmf_temperature_2metre(ancillary_path, lonlat, dt) sfc_prs = ecwmf_surface_pressure(ancillary_path, lonlat, dt) sfc_hgt = ecwmf_elevation(invariant_fname, lonlat) sfc_rh = relative_humdity(t2m[0], dew[0]) # output the scalar data along with the attrs dname = ppjoin(pnt, DatasetName.DEWPOINT_TEMPERATURE.value) write_scalar(dew[0], dname, fid, dew[1]) dname = ppjoin(pnt, DatasetName.TEMPERATURE_2M.value) write_scalar(t2m[0], dname, fid, t2m[1]) dname = ppjoin(pnt, DatasetName.SURFACE_PRESSURE.value) write_scalar(sfc_prs[0], dname, fid, sfc_prs[1]) dname = ppjoin(pnt, DatasetName.SURFACE_GEOPOTENTIAL.value) write_scalar(sfc_hgt[0], dname, fid, sfc_hgt[1]) dname = ppjoin(pnt, DatasetName.SURFACE_RELATIVE_HUMIDITY.value) attrs = {"description": "Relative Humidity calculated at the surface"} write_scalar(sfc_rh, dname, fid, attrs) # get the data from each of the pressure levels (1 -> 1000 ISBL) gph = ecwmf_geo_potential(ancillary_path, lonlat, dt) tmp = ecwmf_temperature(ancillary_path, lonlat, dt) rh = ecwmf_relative_humidity(ancillary_path, lonlat, dt) dname = ppjoin(pnt, DatasetName.GEOPOTENTIAL.value) write_dataframe(gph[0], dname, fid, compression, attrs=gph[1], filter_opts=filter_opts) dname = ppjoin(pnt, DatasetName.TEMPERATURE.value) write_dataframe(tmp[0], dname, fid, compression, attrs=tmp[1], filter_opts=filter_opts) dname = ppjoin(pnt, DatasetName.RELATIVE_HUMIDITY.value) write_dataframe(rh[0], dname, fid, compression, attrs=rh[1], filter_opts=filter_opts) # combine the surface and higher pressure layers into a single array cols = [ "GeoPotential_Height", "Pressure", "Temperature", "Relative_Humidity" ] layers = pandas.DataFrame(columns=cols, index=range(rh[0].shape[0]), dtype="float64") layers["GeoPotential_Height"] = gph[0]["GeoPotential_Height"].values layers["Pressure"] = ECWMF_LEVELS[::-1] layers["Temperature"] = tmp[0]["Temperature"].values layers["Relative_Humidity"] = rh[0]["Relative_Humidity"].values # define the surface level df = pandas.DataFrame( { "GeoPotential_Height": sfc_hgt[0], "Pressure": sfc_prs[0], "Temperature": kelvin_2_celcius(t2m[0]), "Relative_Humidity": sfc_rh, }, index=[0], ) # MODTRAN requires the height to be ascending # and the pressure to be descending wh = (layers["GeoPotential_Height"] > sfc_hgt[0]) & (layers["Pressure"] < sfc_prs[0].round()) df = df.append(layers[wh]) df.reset_index(drop=True, inplace=True) dname = ppjoin(pnt, DatasetName.ATMOSPHERIC_PROFILE.value) write_dataframe(df, dname, fid, compression, attrs=attrs, filter_opts=filter_opts) fid[pnt].attrs["lonlat"] = lonlat if out_group is None: return fid
def scalar_residual(ref_fid, test_fid, pathname, out_fid, save_inputs): """ Undertake a simple equivalency test, rather than a numerical difference. This allows strings to be compared. :param ref_fid: A h5py file object (essentially the root Group), containing the reference data. :param test_fid: A h5py file object (essentially the root Group), containing the test data. :param pathname: A `str` containing the pathname to the SCALAR Dataset. :param out_fid: A h5py file object (essentially the root Group), opened for writing the output data. :param save_inputs: A `bool` indicating whether or not to save the input datasets used for evaluating the residuals alongside the results. Default is False. :return: None; This routine will only return None or a print statement, this is essential for the HDF5 visit routine. """ class_name = 'SCALAR' ref_data = read_scalar(ref_fid, pathname) test_data = read_scalar(test_fid, pathname) # copy the attrs attrs = ref_data.copy() attrs.pop('value') attrs['description'] = 'Equivalency Test' # drop 'file_format' as the conversion tool will try to output that format # but currently we're not testing contents, just if it is different # so saying we've created a yaml string when it is a simple bool is # not correct attrs.pop('file_format', None) # this'll handle string types, but we won't get a numerical # difference value for numerical values, only a bool diff = ref_data['value'] == test_data['value'] # output base_dname = pbasename(pathname) group_name = ref_fid[pathname].parent.name.strip('/') dname = ppjoin('RESULTS', class_name, 'EQUIVALENCY', group_name, base_dname) write_scalar(diff, dname, out_fid, attrs) if save_inputs: # copy the reference data out_grp = out_fid.require_group(ppjoin('REFERENCE-DATA', group_name)) ref_fid.copy(ref_fid[pathname], out_grp) # copy the test data out_grp = out_fid.require_group(ppjoin('TEST-DATA', group_name)) test_fid.copy(test_fid[pathname], out_grp)