def _build_ls8_raw(): _reset_runtime_id() raw = ptype.DatasetMetadata( id_=uuid.UUID('1c76a8ca-51ae-11e4-8644-0050568d59ac'), creation_dt=dateutil.parser.parse("2014-10-12 04:18:01"), size_bytes=5680940 * 1024, ga_label= 'MD_P00_LC81010700832014285LGN00_101_070-083_20141012T032336Z20141012T032910_1', product_type='satellite_telemetry_data', usgs=ptype.UsgsMetadata(interval_id='LC81010782014285LGN00'), platform=ptype.PlatformMetadata(code='LANDSAT-8'), instrument=ptype.InstrumentMetadata(name='OLI_TIRS'), format_=ptype.FormatMetadata(name='MD'), acquisition=ptype.AcquisitionMetadata( aos=dateutil.parser.parse('2014-10-12T00:52:52'), los=dateutil.parser.parse('2014-10-12T00:58:37'), groundstation=ptype.GroundstationMetadata( code='ASA', antenna_coord=ptype.Coord(lat=-23.759, lon=133.8824, height=579.312)), heading='D', platform_orbit=8846), extent=None, grid_spatial=None, browse=None, image=ptype.ImageMetadata(satellite_ref_point_start=ptype.Point( 101, 70), satellite_ref_point_end=ptype.Point(101, 83)), lineage=ptype.LineageMetadata(machine=ptype.MachineMetadata())) return raw
def test_extract_md(_run_pdsinfo_exe): input_dir = write_files({'P1540064AAAAAAAAAAAAAA14219032341001.PDS': ''}) # def run_pdsinfo(file_): # assert file_ == input_dir # # return _run_pdsinfo_exe.return_value = b"""APID 64: count 610338 invalid 0 missing 6255 first packet: 2014/08/07 03:16:28.750910 last packet: 2014/08/07 03:21:28.604695 missing seconds: 2 day packets: 545223/64311 night packets: 0/0 engineering packets: 804/0 """ md = pds.extract_md(ptype.DatasetMetadata(), input_dir) expected = ptype.DatasetMetadata( platform=ptype.PlatformMetadata(code='AQUA'), instrument=ptype.InstrumentMetadata(name='MODIS'), format_=ptype.FormatMetadata(name='PDS'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2014, 8, 7, 3, 16, 28, 750910), los=datetime.datetime(2014, 8, 7, 3, 21, 28, 604695) ), image=ptype.ImageMetadata( day_percentage_estimate=100.0 ) ) md.id_, expected.id_ = None, None assert_same(expected, md)
def test_aqua_pds_label(self): ds = ptype.DatasetMetadata( id_=UUID('d083fa45-1edd-11e5-8f9e-1040f381a756'), product_type='satellite_telemetry_data', creation_dt=datetime.datetime(2015, 6, 11, 5, 51, 50), platform=ptype.PlatformMetadata(code='AQUA'), instrument=ptype.InstrumentMetadata(name='MODIS'), format_=ptype.FormatMetadata(name='PDS'), rms_string='S1A1C1D1R1', acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2014, 8, 7, 3, 16, 28, 750910), los=datetime.datetime(2014, 8, 7, 3, 16, 30, 228023), platform_orbit=65208 ), image=ptype.ImageMetadata(day_percentage_estimate=100.0), lineage=ptype.LineageMetadata( machine=ptype.MachineMetadata(), source_datasets={} ) ) self.assertEqual( "AQUA_MODIS_STD-PDS_P00_65208.S1A1C1D1R1_0_0_20140807T031628Z20140807T031630", drivers.RawDriver().get_ga_label(ds) )
def _extract_rcc_filename_fields(base_md, filename): """ Landsat 5 and 7 RCC format specifications: http://landsat.usgs.gov/documents/LS_DFCB_01.pdf http://landsat.usgs.gov/documents/LS_DFCB_06.pdf :type base_md: ptype.DatasetMetadata :type filename: str :rtype: ptype.DatasetMetadata """ m = re.search( r'(?P<satsens>\w{4})(?P<date>\d{13})(?P<gsi>[^\d]+).*?(?P<version>\d\d)?\.data', filename) fields = m.groupdict() if not base_md.platform or not base_md.platform.code: # TODO: Do we have a cleaner way to do this? A list of mappings? satsens_ = fields['satsens'] vehicle = satsens_[0] vehicle_num = satsens_[1] instrument_short = satsens_[2] smode_short = satsens_[3] platform_code, instrument_name, operation_mode = _expand_platform_info( vehicle, vehicle_num, instrument_short, smode_short) if not base_md.platform: base_md.platform = ptype.PlatformMetadata() if not base_md.instrument: base_md.instrument = ptype.InstrumentMetadata() base_md.platform.code = platform_code base_md.instrument.name = instrument_name base_md.instrument.operation_mode = operation_mode if not base_md.acquisition: base_md.acquisition = ptype.AcquisitionMetadata() if not base_md.acquisition.aos: base_md.acquisition.aos = datetime.datetime.strptime( fields['date'], "%Y%j%H%M%S") base_md.gsi = fields['gsi'] if not base_md.acquisition.groundstation: base_md.acquisition.groundstation = ptype.GroundstationMetadata( code=fields['gsi']) if not base_md.usgs: base_md.usgs = ptype.UsgsMetadata() base_md.usgs.interval_id = _usgs_id_from_filename(filename) # RCC is raw: P00 base_md.ga_level = 'P00' version = int(fields['version']) if fields.get('version') else None base_md.format_ = ptype.FormatMetadata(name='RCC', version=version) return base_md
def test_eods_fill_metadata(self): dataset_folder = "LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012" bandname = '10' bandfile = dataset_folder + '_B' + bandname + '.tif' input_folder = write_files({ dataset_folder: { 'metadata.xml': """<EODS_DATASET> <ACQUISITIONINFORMATION> <EVENT> <AOS>20141012T03:23:36</AOS> <LOS>20141012T03:29:10</LOS> </EVENT> </ACQUISITIONINFORMATION> <EXEXTENT> <TEMPORALEXTENTFROM>20141012 00:55:54</TEMPORALEXTENTFROM> <TEMPORALEXTENTTO>20141012 00:56:18</TEMPORALEXTENTTO> </EXEXTENT> </EODS_DATASET>""", 'scene01': { bandfile: '' } } }) expected = ptype.DatasetMetadata( id_=_EXPECTED_NBAR.id_, ga_label=dataset_folder, ga_level='P54', product_type='EODS_NBAR', platform=ptype.PlatformMetadata(code='LANDSAT_8'), instrument=ptype.InstrumentMetadata(name='OLI_TIRS'), format_=ptype.FormatMetadata(name='GeoTiff'), acquisition=ptype.AcquisitionMetadata(aos=datetime.datetime(2014, 10, 12, 3, 23, 36), los=datetime.datetime(2014, 10, 12, 3, 29, 10), groundstation=ptype.GroundstationMetadata(code='LGS')), extent=ptype.ExtentMetadata( center_dt=datetime.datetime(2014, 10, 12, 0, 56, 6), from_dt=datetime.datetime(2014, 10, 12, 0, 55, 54), to_dt=datetime.datetime(2014, 10, 12, 0, 56, 18) ), image=ptype.ImageMetadata(satellite_ref_point_start=ptype.Point(x=101, y=78), satellite_ref_point_end=ptype.Point(x=101, y=78), bands={bandname: ptype.BandMetadata(number=bandname, path=Path(input_folder, dataset_folder, 'scene01', bandfile))}) ) dataset = ptype.DatasetMetadata( id_=_EXPECTED_NBAR.id_ ) received = drivers.EODSDriver().fill_metadata(dataset, input_folder.joinpath(dataset_folder)) self.assert_same(expected, received)
def _populate_from_mtl_dict(md, mtl_, folder): """ :param mtl_: Parsed mtl file :param folder: Folder containing imagery (and mtl). For fixing relative paths in the MTL. :type md: eodatasets.type.DatasetMetadata :type mtl_: dict of (str, obj) :rtype: eodatasets.type.DatasetMetadata """ if not md.usgs: md.usgs = ptype.UsgsMetadata() md.usgs.scene_id = _get(mtl_, 'METADATA_FILE_INFO', 'landsat_scene_id') md.creation_dt = _get(mtl_, 'METADATA_FILE_INFO', 'file_date') # TODO: elsewhere we've used 'GAORTHO01' etc. Here it's 'L1T' etc. md.product_level = _translate_to_old_usgs_code( _get(mtl_, 'PRODUCT_METADATA', 'data_type')) # md.size_bytes=None, satellite_id = _get(mtl_, 'PRODUCT_METADATA', 'spacecraft_id') if not md.platform: md.platform = ptype.PlatformMetadata() md.platform.code = satellite_id md.format_ = ptype.FormatMetadata( name=_get(mtl_, 'PRODUCT_METADATA', 'output_format')) product_md = _get(mtl_, 'PRODUCT_METADATA') sensor_id = _get(mtl_, 'PRODUCT_METADATA', 'sensor_id') if not md.instrument: md.instrument = ptype.InstrumentMetadata() md.instrument.name = sensor_id # md.instrument.type_ md.instrument.operation_mode = _get(product_md, 'sensor_mode') if not md.acquisition: md.acquisition = ptype.AcquisitionMetadata() md.acquisition.groundstation = ptype.GroundstationMetadata( code=_get(mtl_, "METADATA_FILE_INFO", "station_id")) # md.acquisition.groundstation.antenna_coord # aos, los, groundstation, heading, platform_orbit _populate_extent(md, product_md) _populate_grid_spatial(md, mtl_) _populate_image(md, mtl_) _populate_lineage(md, mtl_) return md
def extract_md(base_md, directory_path): """ Extract metadata from a directory of PDF files :type base_md: ptype.DatasetMetadata :type directory_path: pathlib.Path :rtype: ptype.DatasetMetadata """ pds_file = find_pds_file(directory_path) if not pds_file: _LOG.debug('No PDS files found') return base_md # Extract PDS info. _LOG.info('Using PDS file %r', pds_file) base_md.format_ = ptype.FormatMetadata(name='PDS') base_md.platform = ptype.PlatformMetadata( code=_pds_satellite(pds_file.stem)) base_md.instrument = ptype.InstrumentMetadata(name='MODIS') if not base_md.acquisition: base_md.acquisition = ptype.AcquisitionMetadata() base_md.acquisition.aos = _pds_date(pds_file.stem) start, end, day, night = get_pdsinfo(pds_file) base_md.acquisition.aos = start base_md.acquisition.los = end if not base_md.image: base_md.image = ptype.ImageMetadata() base_md.image.day_percentage_estimate = (float(day) / (day + night)) * 100.0 return base_md
def extract_md(base_md, directory_path): """ Extract metadata from an NPP HDF5 filename if one exists. The NPP directory should contain VIRS ".h5" (HDF5) data file from which we can get the date The filename will be of the form: NPP: RNSCA-RVIRS_npp_d20130422_t0357358_e0410333_b07686_c20130422041225898000_nfts_drl.h5 where: d: start date (YYYMMDD) t: start time (hhmmss.s) e: b: orbit number c: stop date/time (YYYMMDDhhmmss.ssssss) :type base_md: ptype.DatasetMetadata :type directory_path: pathlib.Path :rtype: ptype.DatasetMetadata """ files = find_hdf5_files(directory_path) if len(files) < 1: _LOG.debug("No NPP HDF5 file found") return base_md filename = files[0].name base_md = _extract_hdf5_filename_fields(base_md, filename) if not base_md.acquisition: base_md.acquisition = ptype.AcquisitionMetadata() # HDF5 is raw: P00 base_md.ga_level = 'P00' base_md.format_ = ptype.FormatMetadata(name='HDF5') return base_md
FILENAME = 'ls7_definitive_mtl.txt' EXPECTED_OUT = ptype.DatasetMetadata( id_=uuid.UUID('3ff71eb0-d5c5-11e4-aebb-1040f381a756'), product_level='L1G', creation_dt=datetime.datetime(2015, 4, 7, 1, 58, 25), platform=ptype.PlatformMetadata( code='LANDSAT_7' ), instrument=ptype.InstrumentMetadata( name='ETM', operation_mode='SAM' ), format_=ptype.FormatMetadata( name='GeoTIFF' ), acquisition=ptype.AcquisitionMetadata( groundstation=ptype.GroundstationMetadata( code='ASA' ) ), usgs=ptype.UsgsMetadata( scene_id='LE71140732005007ASA00' ), extent=ptype.ExtentMetadata( coord=ptype.CoordPolygon( ul=ptype.Coord( lat=-17.82157, lon=115.58472 ),
def fill_metadata(self, dataset, path, additional_files=()): """ :type additional_files: tuple[Path] :type dataset: ptype.DatasetMetadata :type path: Path :rtype: ptype.DatasetMetadata """ dataset.ga_level = 'P55' # Copy relevant fields from source nbar. if 'nbar' in dataset.lineage.source_datasets: source_ortho = dataset.lineage.source_datasets['nbar'] borrow_single_sourced_fields(dataset, source_ortho) # TODO, it'd be better to grab this from the images, but they're generated after # this code is run. Copying from Source will do for now dataset.grid_spatial = deepcopy( dataset.lineage.source_datasets['nbar'].grid_spatial) contiguous_data_bit = 0b100000000 dataset.grid_spatial.projection.valid_data = self.calculate_valid_data_region( path, contiguous_data_bit) dataset.format_ = ptype.FormatMetadata('GeoTIFF') with open(str(path.joinpath(self.METADATA_FILE))) as f: pq_metadata = yaml.load(f, Loader=Loader) if not dataset.lineage: dataset.lineage = ptype.LineageMetadata() dataset.lineage.algorithm = ptype.AlgorithmMetadata( name='pqa', version=str( pq_metadata['algorithm_information']['software_version']), doi=pq_metadata['algorithm_information']['pq_doi']) # Add ancillary files ancils = pq_metadata['ancillary'] ancil_files = {} for name, values in ancils.items(): ancil_files[name] = ptype.AncillaryMetadata( type_=name, name=values['data_source'], uri=values['data_file'], file_owner=values['user'], # PyYAML parses these as datetimes already. access_dt=values['accessed'], modification_dt=values['modified']) if ancil_files: dataset.lineage.ancillary = ancil_files product_flags = {} # Record which tests where run in 'product_flags' for test_name, val in pq_metadata['tests_run'].items(): product_flags['tested_%s' % test_name] = val dataset.product_flags = product_flags return dataset
def fill_metadata(self, dataset, path, additional_files=()): """ :type additional_files: tuple[Path] :type dataset: ptype.DatasetMetadata :type path: Path :rtype: ptype.DatasetMetadata """ fields = re.match((r"(?P<vehicle>LS[578])" r"_(?P<instrument>OLI_TIRS|OLI|TIRS|TM|ETM)" r"_(?P<type>NBAR|PQ|FC)" r"_(?P<level>[^-_]*)" r"_(?P<product>[^-_]*)" r"-(?P<groundstation>[0-9]{3})" r"_(?P<path>[0-9]{3})" r"_(?P<row>[0-9]{3})" r"_(?P<date>[12][0-9]{7})" r"(_(?P<version>[0-9]+))?" "$"), path.stem).groupdict() dataset.product_type = "EODS_" + fields["type"] dataset.ga_level = fields["level"] dataset.ga_label = path.stem dataset.format_ = ptype.FormatMetadata(name='GeoTiff') if not dataset.platform: dataset.platform = ptype.PlatformMetadata() dataset.platform.code = "LANDSAT_" + fields["vehicle"][2] if not dataset.instrument: dataset.instrument = ptype.InstrumentMetadata() dataset.instrument.name = fields["instrument"] if not dataset.image: dataset.image = ptype.ImageMetadata(bands={}) dataset.image.satellite_ref_point_start = ptype.Point( int(fields["path"]), int(fields["row"])) dataset.image.satellite_ref_point_end = ptype.Point( int(fields["path"]), int(fields["row"])) for image_path in path.joinpath("scene01").iterdir(): band = self.to_band(dataset, image_path) if band: dataset.image.bands[band.number] = band md_image.populate_from_image_metadata(dataset) if not dataset.acquisition: dataset.acquisition = ptype.AcquisitionMetadata() for _station in _GROUNDSTATION_LIST: if _station["eods_domain_code"] == fields["groundstation"]: dataset.acquisition.groundstation = ptype.GroundstationMetadata( code=_station["code"]) break if not dataset.extent: dataset.extent = ptype.ExtentMetadata() def els2date(els): if not els: return None return parse(els[0].text) doc = ElementTree.parse(str(path.joinpath('metadata.xml'))) start_time = els2date(doc.findall("./EXEXTENT/TEMPORALEXTENTFROM")) end_time = els2date(doc.findall("./EXEXTENT/TEMPORALEXTENTTO")) # check if the dates in the metadata file are at least as accurate as what we have filename_time = datetime.datetime.strptime(fields["date"], "%Y%m%d") time_diff = start_time - filename_time # Is the EODS metadata extremely off? if abs(time_diff).days != 0: raise ValueError( 'EODS time information differs too much from source files: %s' % time_diff) dataset.acquisition.aos = els2date( doc.findall("./ACQUISITIONINFORMATION/EVENT/AOS")) dataset.acquisition.los = els2date( doc.findall("./ACQUISITIONINFORMATION/EVENT/LOS")) dataset.extent.center_dt = start_time + (end_time - start_time) / 2 dataset.extent.from_dt = start_time dataset.extent.to_dt = end_time return dataset
def _build_ls7_wofs(): return ptype.DatasetMetadata( ga_label='LS7_ETM_WATER_140_-027_2013-07-24T00-32-27.952897', product_type='GAWATER', size_bytes=616 * 1024, platform=ptype.PlatformMetadata(code='LS7'), instrument=ptype.InstrumentMetadata(name='ETM', type_='Multi-Spectral'), format_=ptype.FormatMetadata('GeoTIFF', version=1), extent=ptype.ExtentMetadata( reference_system='WGS84', coord=ptype.CoordPolygon(ul=ptype.Coord(140.0000000, -26.0000000), ll=ptype.Coord(140.0000000, -27.0000000), ur=ptype.Coord(141.0000000, -26.0000000), lr=ptype.Coord(141.0000000, -27.0000000)), # TODO: Should we store the center coordinate? from_dt=dateutil.parser.parse('2013-07-24 00:32:27.952897'), to_dt=dateutil.parser.parse('2013-07-24 00:33:15.899670')), grid_spatial=ptype.GridSpatialMetadata( dimensions=[ ptype.DimensionMetadata(name='x', resolution=27.1030749476, size=4000), ptype.DimensionMetadata(name='y', resolution=27.1030749476, size=4000) ], # TODO: Should WOfS have tile coordinates here? # georectified=ptype.GeoRectifiedSpacialMetadata( # geo_ref_points=PointPolygon( # ul=ptype.Point(255012.500, 7229987.500), # ur=ptype.Point(497012.500, 7229987.500), # ll=ptype.Point(255012.500, 7019987.500), # lr=ptype.Point(497012.500, 7229987.500) # ), # checkpoint_availability=0, # datum='GDA94', # ellipsoid='GRS80', # point_in_pixel='UL', # projection='UTM', # zone=-54 # ) ), image=ptype.ImageMetadata( satellite_ref_point_start=ptype.Point(98, 78), satellite_ref_point_end=ptype.Point(98, 79), cloud_cover_percentage=0.76494375, cloud_cover_details='122391 count', sun_elevation=33.0061002772, sun_azimuth=38.2433049177, bands={ 'W': ptype.BandMetadata(path=Path( 'LS7_ETM_WATER_140_-027_2013-07-24T00-32-27.952897.tif'), # TODO: Nodata value? ) }), lineage=ptype.LineageMetadata( algorithm=ptype.AlgorithmMetadata(name='WOfS', version='1.3', parameters={}), machine=ptype.MachineMetadata(), source_datasets={ # TODO: LS7 dataset? }))
def _build_ls8_ortho(): _reset_runtime_id() return ptype.DatasetMetadata( id_=uuid.UUID('17b92c16-51d3-11e4-909d-005056bb6972'), ga_label='LS8_OLITIRS_OTH_P51_GALPGS01-002_101_078_20141012', product_type='GAORTHO01', creation_dt=dateutil.parser.parse('2014-10-12 05:46:20'), size_bytes=2386550 * 1024, platform=ptype.PlatformMetadata(code='LANDSAT-8'), instrument=ptype.InstrumentMetadata(name='OLI_TIRS', type_="Multi-Spectral", operation_mode='PUSH-BROOM'), format_=ptype.FormatMetadata(name='GeoTiff', version=1), extent=ptype.ExtentMetadata( reference_system='WGS84', coord=ptype.CoordPolygon(ul=ptype.Coord(lat=-24.97, lon=133.97969), ur=ptype.Coord(lat=-24.96826, lon=136.24838), lr=ptype.Coord(lat=-26.96338, lon=136.26962), ll=ptype.Coord(lat=-26.96528, lon=133.96233)), from_dt=dateutil.parser.parse("2014-10-12T00:55:54"), center_dt=dateutil.parser.parse("2014-10-12T00:56:06"), to_dt=dateutil.parser.parse("2014-10-12T00:56:18"), ), grid_spatial=ptype.GridSpatialMetadata( dimensions=[ ptype.DimensionMetadata(name='sample', resolution=25.0, size=9161), ptype.DimensionMetadata(name='line', resolution=25.0, size=9161) ], projection=ptype.ProjectionMetadata( centre_point=ptype.Point(511512.500000, 7127487.500000), geo_ref_points=ptype.PointPolygon( ul=ptype.Point(397012.5, 7237987.5), ur=ptype.Point(626012.5, 7237987.5), ll=ptype.Point(397012.5, 7016987.5), lr=ptype.Point(626012.5, 7016987.5)), datum='GDA94', ellipsoid='GRS80', point_in_pixel='UL', map_projection='UTM', resampling_option='CUBIC_CONVOLUTION', zone=-53)), browse={ 'medium': ptype.BrowseMetadata(path=Path( 'product/LS8_OLITIRS_OTH_P51_GALPGS01-032_101_078_20141012.jpg' ), file_type='image/jpg', cell_size=219.75, red_band=7, green_band=5, blue_band=1), 'full': ptype.BrowseMetadata(path=Path( 'LS8_OLITIRS_OTH_P51_GALPGS01-032_101_078_20141012_FR.jpg'), file_type='image/jpg', cell_size=25.0, red_band=7, green_band=5, blue_band=1) }, image=ptype.ImageMetadata( satellite_ref_point_start=ptype.Point(101, 78), cloud_cover_percentage=0, cloud_cover_details=None, sun_elevation=58.00268508, sun_azimuth=59.41814014, ground_control_points_model=420, geometric_rmse_model=4.610, geometric_rmse_model_x=3.527, geometric_rmse_model_y=2.968, # TODO: What are these two? viewing_incidence_angle_long_track=0, viewing_incidence_angle_x_track=0, bands={ 'coastal_aerosol': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_B1.TIF'), number=1, type_='reflective', cell_size=25.0, ), 'visible_blue': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_B2.TIF'), number=2, type_='reflective', cell_size=25.0, ), 'visible_green': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_B3.TIF'), number=3, type_='reflective', cell_size=25.0, ), 'visible_red': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_B4.TIF'), number=4, type_='reflective', cell_size=25.0, ), 'near_infrared': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_B5.TIF'), number=5, type_='reflective', cell_size=25.0, ), 'short_wave_infrared1': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_B6.TIF'), number=6, type_='reflective', cell_size=25.0, ), 'short_wave_infrared2': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_B7.TIF'), number=7, type_='reflective', cell_size=25.0, ), 'panchromatic': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_B8.TIF'), number=8, type_='panchromatic', cell_size=12.50, shape=ptype.Point(17761, 18241), ), 'cirrus': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_B9.TIF'), number=9, type_='atmosphere', ), 'thermal_infrared1': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_B10.TIF'), number=10, type_='thermal', cell_size=25.0, shape=ptype.Point(8881, 9121), ), 'thermal_infrared2': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_B11.TIF'), number=11, type_='thermal', cell_size=25.0, shape=ptype.Point(8881, 9121), ), 'quality': ptype.BandMetadata( path=Path('product/LC81010782014285LGN00_BQA.TIF'), number='QA', type_='quality', ) }), lineage=ptype.LineageMetadata( algorithm=ptype.AlgorithmMetadata( name='Pinkmatter Landsat Processor', version='3.3.3104', parameters={ 'resampling': 'CC', 'radiometric_correction': 'CPF', 'orientation': 'NUP', 'hemisphere': 'S', }), machine=ptype.MachineMetadata( hostname='rhe-jm-prod08.prod.lan', type_id='jobmanager', uname= 'Linux rhe-jm-dev08.dev.lan 2.6.32-279.22.1.el6.x86_64 #1 SMP Sun Oct ' '12 ' '09:21:40 EST 2014 x86_64 x86_64 x86_64 GNU/Linux'), ancillary={ 'cpf': ptype.AncillaryMetadata( name='L8CPF20141001_20141231.01', uri= '/eoancillarydata/sensor-specific/LANDSAT8/CalibrationParameterFile' '/L8CPF20141001_20141231.01'), 'bpf_tirs': ptype.AncillaryMetadata( name='LT8BPF20141012002432_20141012020301.01', uri= '/eoancillarydata/sensor-specific/LANDSAT8/BiasParameterFile/2014/10' '/LT8BPF20141012002432_20141012020301.01'), 'bpf_oli': ptype.AncillaryMetadata( name='LO8BPF20141012002825_20141012011100.01', uri= '/eoancillarydata/sensor-specific/LANDSAT8/BiasParameterFile/2014/10' '/LT8BPF20141012002432_20141012020301.01'), 'rlut': ptype.AncillaryMetadata(name='L8RLUT20130211_20431231v09.h5') }, source_datasets={'satellite_telemetry_data': _build_ls8_raw()}))
def _build_ls8_nbar(): _reset_runtime_id() nbar = ptype.DatasetMetadata( id_=uuid.UUID("249ae098-bd88-11e4-beaa-1040f381a756"), size_bytes=622208 * 1024, ga_label='LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012', product_type='GANBAR01', platform=ptype.PlatformMetadata(code='LANDSAT-8'), instrument=ptype.InstrumentMetadata(name='OLI_TIRS', type_="Multi-Spectral", operation_mode='PUSH-BROOM'), # acquisition=ptype.AcquisitionMetadata(), format_=ptype.FormatMetadata(name='GeoTiff', version=1), extent=ptype.ExtentMetadata( reference_system='WGS84', coord=ptype.CoordPolygon(ul=ptype.Coord(lat=-24.97, lon=133.97969), ur=ptype.Coord(lat=-24.96826, lon=136.24838), lr=ptype.Coord(lat=-26.96338, lon=136.26962), ll=ptype.Coord(lat=-26.96528, lon=133.96233)), from_dt=dateutil.parser.parse("2014-10-12T00:55:54"), to_dt=dateutil.parser.parse("2014-10-12T00:56:18"), ), grid_spatial=ptype.GridSpatialMetadata( dimensions=[ ptype.DimensionMetadata(name='sample', resolution=25.0, size=9161), ptype.DimensionMetadata(name='line', resolution=25.0, size=9161) ], projection=ptype.ProjectionMetadata( centre_point=ptype.Point(511512.500000, 7127487.500000), geo_ref_points=ptype.PointPolygon( ul=ptype.Point(397012.5, 7237987.5), ur=ptype.Point(626012.5, 7237987.5), ll=ptype.Point(397012.5, 7016987.5), lr=ptype.Point(626012.5, 7016987.5)), datum='GDA94', ellipsoid='GRS80', point_in_pixel='UL', map_projection='UTM', resampling_option='CUBIC_CONVOLUTION', zone=-53)), browse={ 'medium': ptype.BrowseMetadata(path=Path( 'LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012.tif'), file_type='image/jpg', cell_size=219.75, red_band=7, green_band=5, blue_band=2), 'full': ptype.BrowseMetadata(path=Path( 'LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012_FR.tif'), file_type='image/jpg', cell_size=25.0, red_band=7, green_band=5, blue_band=2) }, image=ptype.ImageMetadata( satellite_ref_point_start=ptype.Point(101, 78), cloud_cover_percentage=0.01, cloud_cover_details=None, # TODO: What are these two? viewing_incidence_angle_long_track=0, viewing_incidence_angle_x_track=0, bands={ '1': ptype.BandMetadata(path=Path( 'product/LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012_B1.tif' ), ), '2': ptype.BandMetadata(path=Path( 'product/LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012_B2.tif' ), ), '3': ptype.BandMetadata(path=Path( 'product/LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012_B3.tif' ), ), '4': ptype.BandMetadata(path=Path( 'product/LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012_B4.tif' ), ), '5': ptype.BandMetadata(path=Path( 'product/LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012_B5.tif' ), ), '6': ptype.BandMetadata(path=Path( 'product/LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012_B6.tif' ), ), '7': ptype.BandMetadata(path=Path( 'product/LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012_B7.tif' ), ) }), lineage=ptype.LineageMetadata( algorithm=ptype.AlgorithmMetadata(name='GANBAR', version='3.2.1', parameters={}), machine=ptype.MachineMetadata(), source_datasets={'level1': _build_ls8_ortho()}, ancillary={})) return nbar
def test_parse_variations(self): new_examples = { 'L7EB2007303000923ASA222Q.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L7EB2007303000923ASA222'), platform=ptype.PlatformMetadata(code='LANDSAT_7'), instrument=ptype.InstrumentMetadata(name='ETM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2007, 10, 30, 0, 9, 23), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L7EB2015118010116ASA213Q00.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L7EB2015118010116ASA213'), platform=ptype.PlatformMetadata(code='LANDSAT_7'), instrument=ptype.InstrumentMetadata(name='ETM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC', version=0), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2015, 4, 28, 1, 1, 16), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L7EB2011239021036ASA111Q.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L7EB2011239021036ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_7'), instrument=ptype.InstrumentMetadata(name='ETM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2011, 8, 27, 2, 10, 36), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L5TB2005120001242ASA111I.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L5TB2005120001242ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_5'), instrument=ptype.InstrumentMetadata(name='TM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2005, 4, 30, 0, 12, 42), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L5TT1995117002206ASA111I00.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L5TT1995117002206ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_5'), instrument=ptype.InstrumentMetadata(name='TM', operation_mode='SAM'), format_=ptype.FormatMetadata(name='RCC', version=0), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(1995, 4, 27, 0, 22, 6), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L5TT1990118013106ASA111I00.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L5TT1990118013106ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_5'), instrument=ptype.InstrumentMetadata(name='TM', operation_mode='SAM'), format_=ptype.FormatMetadata(name='RCC', version=0), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(1990, 4, 28, 1, 31, 6), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L7ET2005302020634ASA123Q.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L7ET2005302020634ASA123'), platform=ptype.PlatformMetadata(code='LANDSAT_7'), instrument=ptype.InstrumentMetadata(name='ETM', operation_mode='SAM'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2005, 10, 29, 2, 6, 34), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L5TB2011299000126ASA111I00.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L5TB2011299000126ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_5'), instrument=ptype.InstrumentMetadata(name='TM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC', version=0), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2011, 10, 26, 0, 1, 26), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L5TB2010119010045ASA214I.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L5TB2010119010045ASA214'), platform=ptype.PlatformMetadata(code='LANDSAT_5'), instrument=ptype.InstrumentMetadata(name='TM', operation_mode='BUMPER'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2010, 4, 29, 1, 0, 45), groundstation=ptype.GroundstationMetadata(code='ASA'))), 'L7ET2000296234136ASA111Q.data': ptype.DatasetMetadata( ga_level='P00', usgs=ptype.UsgsMetadata(interval_id='L7ET2000296234136ASA111'), platform=ptype.PlatformMetadata(code='LANDSAT_7'), instrument=ptype.InstrumentMetadata(name='ETM', operation_mode='SAM'), format_=ptype.FormatMetadata(name='RCC'), acquisition=ptype.AcquisitionMetadata( aos=datetime.datetime(2000, 10, 22, 23, 41, 36), groundstation=ptype.GroundstationMetadata(code='ASA'))), } for file_name, expected_output in new_examples.items(): output = rccfile._extract_rcc_filename_fields( ptype.DatasetMetadata(), file_name) # The ids will be different — clear them before comparison. output.id_ = None expected_output.id_ = None self.assert_same(expected_output, output)
def extract_md(base_md, directory_path): """ Extract metadata from a directory of MDF files From http://landsat.usgs.gov/documents/LDCM-DFCB-001.pdf the MDF directory will have a name of the form... VINpppRRRrrrYYYYdddGSIvv where V = the vehicle (L=Landsat) I = instrument (O=OLI T=TIRS C=combined OLI/TIRS) N = vehicle number (8 = Landsat 8) ppp = WRS-2 starting path (001-233) RRR = WRS-2 starting row (001-248) rrr = WRS-2 ending row (001-248) YYYY = Acquisition starting year DOY = Acquisition starting day of year GSI = Ground station identifier (e.g. ASA) vv = version (00-99) for example LC80850800822013137ASA00 :type base_md: ptype.DatasetMetadata :type directory_path: Path :rtype: ptype.DatasetMetadata """ directory_path, files = find_mdf_files(directory_path) if len(files) < 1: _log.debug("No MDF files found") return base_md usgs_id = directory_path.name if directory_path else None if not usgs_id: # Look at siblings of the mdf files. for f in list(files)[0].parent.iterdir(): prefix = _before_underscore(f.name) if is_mdf_usgs_id(prefix): _log.info('Found usgs id %r', usgs_id) usgs_id = prefix if not usgs_id: _log.debug('No MDF id matched. Assuming not MDF.') return base_md _log.info("Found MDF files %r in directory %r", files, directory_path) if usgs_id: base_md = _extract_mdf_id_fields(base_md, usgs_id) if files: base_md = _extract_mdf_file_fields(base_md, [f.name for f in files]) base_md.product_type = 'satellite_telemetry_data' base_md.ga_level = 'P00' if not base_md.format_: base_md.format_ = ptype.FormatMetadata() base_md.format_.name = 'MD' return base_md
def fill_metadata(self, dataset, path, additional_files=()): """ :type additional_files: tuple[Path] :type dataset: ptype.DatasetMetadata :type path: Path :rtype: ptype.DatasetMetadata """ with open(str(path.joinpath(self.METADATA_FILE))) as f: nbar_metadata = yaml.load(f, Loader=Loader) # Copy relevant fields from source ortho. if 'level1' in dataset.lineage.source_datasets: source_ortho = dataset.lineage.source_datasets['level1'] borrow_single_sourced_fields(dataset, source_ortho) # TODO, it'd be better to grab this from the images, but they're generated after # this code is run. Copying from Source will do for now dataset.grid_spatial = deepcopy( dataset.lineage.source_datasets['level1'].grid_spatial) dataset.grid_spatial.projection.valid_data = self.calculate_valid_data_region( path) if not dataset.lineage: dataset.lineage = ptype.LineageMetadata() self._fill_algorithm_information( dataset, nbar_metadata['algorithm_information']) dataset.product_doi = nbar_metadata['algorithm_information'][ 'arg25_doi'] # Extract ancillary file data and values parameters = {} ancils = nbar_metadata['ancillary_data'] brdfs = ancils.pop('brdf', {}) brdf_ancils = { '_'.join((band_name, 'brdf', ancil_type)): values for band_name, ancil_types in brdfs.items() for ancil_type, values in ancil_types.items() } ancils.update(brdf_ancils) # Add algorithm parameters for name, values in ancils.items(): parameters[name] = values['value'] if parameters: dataset.lineage.algorithm.parameters = parameters # Add ancillary files ancil_files = {} for name, values in ancils.items(): if 'data_file' not in values: continue ancil_files[name] = ptype.AncillaryMetadata( type_=name, name=values['data_file'].rpartition('/')[2], uri=values['data_file'], file_owner=values['user'], # PyYAML parses these as datetimes already. access_dt=values['accessed'], modification_dt=values['modified']) if ancil_files: dataset.lineage.ancillary = ancil_files # All NBARs are P54. (source: Lan Wei) dataset.ga_level = 'P54' dataset.format_ = ptype.FormatMetadata('GeoTIFF') return dataset