def update(self): """Update or create the reference vector file""" self.FRED._open_ds(1) for dt in self._dt_xml.keys(): surveys = [] this_xml = f_utils.iso_xml(self._dt_xml[dt], timeout=1000, read_timeout=2000) charts = this_xml.xml_doc.findall('.//{*}has', namespaces = this_xml.namespaces) if self.verbose: _prog = utils.CliProgress('scanning {} surveys in {}.'.format(len(charts), dt)) for i, chart in enumerate(charts): this_xml.xml_doc = chart title = this_xml.title() if self.verbose: _prog.update_perc((i, len(charts))) self.FRED._attribute_filter(["ID = '{}'".format(title)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: h_epsg, v_epsg = this_xml.reference_system() this_data = this_xml.linkages() geom = this_xml.polygon(geom=True) if geom is not None: surveys.append({'Name': title, 'ID': title, 'Agency': 'NOAA', 'Date': this_xml.date(), 'MetadataLink': this_xml.url, 'MetadataDate': this_xml.xml_date(), 'DataLink': this_data, 'Link': self._charts_url, 'DataType': dt, 'DataSource': 'charts', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Info': this_xml.abstract, 'geom': geom}) self.FRED._add_surveys(surveys) if self.verbose: _prog.end(0, 'scanned {} surveys in {}'.format(len(charts), dt)) utils.echo_msg('added {} surveys from {}'.format(len(surveys), dt)) self.FRED._close_ds()
def update(self): """Crawl the NOS database and update/generate the NOS reference vector.""" self.FRED._open_ds(1) for nosdir in self._nos_directories: if self.callback(): break surveys = [] xml_catalog = self._nos_xml_url(nosdir) page = f_utils.Fetch(xml_catalog).fetch_html() if page is None: xml_catalog = self._nos_iso_xml_url(nosdir) page = f_utils.Fetch(xml_catalog).fetch_html() if page is None: utils.echo_error_msg('failed to retrieve {}'.format(nosdir)) break rows = page.xpath('//a[contains(@href, ".xml")]/@href') if self.verbose: _prog = utils.CliProgress('scanning {} surveys in {}...'.format(len(rows), nosdir)) for i, survey in enumerate(rows): if self.callback(): break sid = survey[:-4] if self.verbose: _prog.update_perc((i, len(rows))) self.FRED._attribute_filter(["ID = '{}'".format(sid)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: this_xml = f_utils.iso_xml(xml_catalog + survey) h_epsg, v_epsg = this_xml.reference_system() this_data = this_xml.data_links() d_links = [] d_types = [] for key in this_data.keys(): if key in ['GEODAS_XYZ', 'BAG', 'GRID_BAG']: d_links.append(this_data[key]) d_types.append(key) geom = this_xml.bounds(geom=True) if geom is not None: surveys.append({'Name': this_xml.title(), 'ID': sid, 'Agency': 'NOAA/NOS', 'Date': this_xml.date(), 'MetadataLink': this_xml.url, 'MetadataDate': this_xml.xml_date(), 'DataLink': ','.join([','.join(x) for x in d_links]), 'DataType': ','.join(list(set(d_types))), 'DataSource': 'nos', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Info': this_xml.abstract(), 'geom': geom}) if self.verbose: _prog.end(0, 'scanned {} surveys in {}.'.format(len(rows), nosdir)) utils.echo_msg('added {} surveys from {}'.format(len(surveys), nosdir)) self.FRED._add_surveys(surveys) self.FRED._close_ds()
def _parse_catalog(self, catalog_url): ntCatalog = f_utils.iso_xml(catalog_url) ntCatRefs = ntCatalog.xml_doc.findall('.//th:catalogRef', namespaces=ntCatalog.namespaces) for ntCatRef in ntCatRefs: ntCatHref = ntCatRef.attrib['{http://www.w3.org/1999/xlink}href'] if ntCatHref[0] == "/": ntCatUrl = '{}{}'.format(self._ngdc_url, ntCatHref) else: ntCatUrl = '{}/{}'.format(os.path.dirname(catalog_url), ntCatHref) self._parse_dataset(ntCatUrl)
def update(self): """Update FRED with each dataset in TNM""" datasets = self._datasets() self.FRED._open_ds(1) if self.verbose: _prog = utils.CliProgress( 'scanning {} datasets from TNM...'.format(len(datasets))) for i, ds in enumerate(datasets): if self.verbose: _prog.update_perc((i, len(datasets))) for fmt in ds['formats']: if 'isDefault' in fmt.keys(): fmt = fmt['value'] break #print(ds) #print(len(ds)) #this_xml = FRED.iso_xml('{}{}?format=iso'.format(self._tnm_meta_base, ds['id'])) tags = ds['tags'] if len(tags) > 0: for tag in tags: print(tag) this_xml = f_utils.iso_xml('{}?format=iso'.format( tag['infoUrl'])) geom = this_xml.bounds(geom=True) h_epsg, v_epsg = this_xml.reference_system() self._update_dataset(tag, fmt, geom, h_epsg, v_epsg) else: this_xml = f_utils.iso_xml('{}?format=iso'.format( ds['infoUrl'])) geom = this_xml.bounds(geom=True) h_epsg, v_epsg = this_xml.reference_system() self._update_dataset(ds, fmt, geom, h_epsg, v_epsg) if self.verbose: _prog.end(0, 'scanned {} datasets from TNM'.format(len(datasets))) self.FRED._close_ds()
def _update(self): """Update the FRED reference vector after scanning the relevant metadata from Digital Coast. """ #self.FRED = FRED(verbose=self.verbose, local=True) self.FRED._open_ds(1) for ld in self._dc_dirs: cols = [] surveys = [] page = f_utils.Fetch(self._dc_htdata_url + ld).fetch_html() if page is None: continue tr = page.xpath('//table')[0].xpath('.//tr') if len(tr) <= 0: continue [cols.append(i.text_content()) for i in tr[0]] if self.verbose: _prog = utils.CliProgress( 'scanning {} datasets in {}...'.format(len(tr), ld)) for i in range(1, len(tr)): if self.callback(): break if self.verbose: _prog.update_perc((i, len(tr))) #dc['ID #'])) cells = tr[i].getchildren() dc = {} for j, cell in enumerate(cells): cl = cell.xpath('a') if len(cl) > 0: if cols[j] == 'Dataset Name': dc[cols[j]] = cell.text_content() dc['Metadata'] = cl[0].get('href') else: dc[cols[j]] = cl[0].get('href') else: dc[cols[j]] = cell.text_content() self.FRED._attribute_filter(["ID = '{}'".format(dc['ID #'])]) if self.FRED.layer is None or len(self.FRED.layer) == 0: if 'Metadata' in dc.keys(): this_xml = f_utils.iso_xml(dc['Metadata']) h_epsg, v_epsg = this_xml.reference_system() geom = this_xml.bounds(geom=True) if geom is not None: if self.verbose: _prog.update_perc( (i, len(tr)), msg='{} ** adding: {} **'.format( _prog.opm, dc['ID #'])) surveys.append({ 'Name': dc['Dataset Name'], 'ID': dc['ID #'], 'Date': this_xml.date(), 'MetadataLink': dc['Metadata'], 'MetadataDate': this_xml.xml_date(), 'DataLink': dc['https'], 'IndexLink': dc['Tile Index'], 'Link': self._dc_url, 'DataType': ld.split('_')[0], 'DataSource': 'dc', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Info': this_xml.abstract(), 'geom': geom }) self.FRED._add_surveys(surveys) if self.verbose: _prog.end(0, 'scanned {} datasets in {}.'.format(len(tr), ld)) utils.echo_msg('added {} surveys from {}'.format( len(surveys), ld)) self.FRED._close_ds()
def _parse_dataset(self, catalog_url): ntCatXml = f_utils.iso_xml(catalog_url) this_ds = ntCatXml.xml_doc.findall('.//th:dataset', namespaces=ntCatXml.namespaces) this_ds_services = ntCatXml.xml_doc.findall( './/th:service', namespaces=ntCatXml.namespaces) if self.verbose: _prog = utils.CliProgress('scanning {} datasets in {}...'.format( len(this_ds), this_ds[0].attrib['name'])) surveys = [] for i, node in enumerate(this_ds): this_title = node.attrib['name'] this_id = node.attrib['ID'] if self.verbose: _prog.update_perc((i, len(this_ds))) self.FRED._attribute_filter(["ID = '{}'".format(this_id)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: subCatRefs = node.findall('.//th:catalogRef', namespaces=ntCatXml.namespaces) if len(subCatRefs) > 0: self._parse_catalog(catalog_url) break try: ds_path = node.attrib['urlPath'] except: continue iso_url = False wcs_url = False http_url = False for service in this_ds_services: service_name = service.attrib['name'] if service_name == 'iso': iso_url = '{}{}{}'.format(self._ngdc_url, service.attrib['base'], ds_path) if service_name == 'wcs': wcs_url = '{}{}{}'.format(self._ngdc_url, service.attrib['base'], ds_path) if service_name == 'http': http_url = '{}{}{}'.format(self._ngdc_url, service.attrib['base'], ds_path) this_xml = f_utils.iso_xml(iso_url) title = this_xml.title() h_epsg, v_epsg = this_xml.reference_system() zv = this_xml.xml_doc.findall( './/gmd:dimension/gmd:MD_Band/gmd:sequenceIdentifier/gco:MemberName/gco:aName/gco:CharacterString', namespaces=this_xml.namespaces) if zv is not None: for zvs in zv: print(zvs.text) if zvs.text == 'bathy' or zvs.text == 'Band1' or zvs.text == 'z': zvar = zvs.text break else: zvar = 'z' geom = this_xml.bounds(geom=True) if geom is not None: surveys.append({ 'Name': title, 'ID': this_id, 'Agency': 'NOAA', 'Date': this_xml.date(), 'MetadataLink': this_xml.url, 'MetadataDate': this_xml.xml_date(), 'DataLink': http_url, 'IndexLink': wcs_url, 'Link': self._nt_catalog, 'DataType': 'raster', 'DataSource': 'ncei_thredds', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Etcetra': zvar, 'Info': this_xml.abstract(), 'geom': geom }) self.FRED._add_surveys(surveys) if self.verbose: _prog.end( 0, 'scanned {} datasets in {}.'.format(len(this_ds), this_ds[0].attrib['name'])) utils.echo_msg('added {} surveys from {}'.format( len(surveys), this_ds[0].attrib['name']))
def _update_prods(self): """updated FRED with each product file available from TNM""" for dsTag in self._elev_ds: offset = 0 utils.echo_msg('processing TNM dataset {}...'.format(dsTag)) _req = f_utils.Fetch( self._tnm_product_url).fetch_req(params={ 'max': 1, 'datasets': dsTag }) try: _dsTag_results = _req.json() except ValueError: utils.echo_error_msg('tnm server error, try again') except Exception as e: utils.echo_error_msg('error, {}'.format(e)) total = _dsTag_results['total'] if self.verbose: _prog = utils.CliProgress( 'gathering {} products from {}...'.format(total, dsTag)) ds = self._datasets(dataset=dsTag) #this_xml = f_utils.iso_xml('{}{}?format=iso'.format(self._tnm_meta_base, ds['id'])) this_xml = f_utils.iso_xml('{}?format=iso'.format(ds['infoUrl'])) h_epsg, v_epsg = this_xml.reference_system() while True: _data = {'max': 100, 'datasets': dsTag, 'offset': offset} _req = f_utils.Fetch( self._tnm_product_url).fetch_req(params=_data) try: _dsTag_results = _req.json() except ValueError: utils.echo_error_msg('tnm server error, try again') except Exception as e: utils.echo_error_msg('error, {}'.format(e)) if self.verbose: _prog.update_perc( (offset, total), msg='gathering {} products from {}...'.format( total, dsTag)) for i, item in enumerate(_dsTag_results['items']): if self.verbose: _prog.update_perc( (i + offset, total), msg='gathering {} products from {}...'.format( total, dsTag)) try: self.FRED.layer.SetAttributeFilter("ID = '{}'".format( item['sourceId'])) except: pass if self.FRED.layer is None or len(self.FRED.layer) == 0: bbox = item['boundingBox'] geom = regions.Region().from_list([ bbox['minX'], bbox['maxX'], bbox['minY'], bbox['maxY'] ]).export_as_geom() if item['format'] == 'IMG' or item[ 'format'] == 'GeoTIFF': tnm_ds = 'raster' elif item['format'] == 'LAZ' or item['format'] == 'LAS': tnm_ds = 'lidar' else: tnm_ds = 'tnm' if geom is not None: self.FRED._add_survey( Name=item['title'], ID=item['sourceId'], Agency='USGS', Date=item['publicationDate'], MetadataLink=item['metaUrl'], MetadataDate=item['dateCreated'], DataLink=item['downloadURL'], Link=item['sourceOriginId'], Resolution=item['extent'], DataType=tnm_ds, DataSource='tnm', HorizontalDatum=h_epsg, VerticalDatum=v_epsg, Etcetra=dsTag, Info=item['moreInfo'], geom=geom) offset += 100 if total - offset <= 0: break if self.verbose: _prog.end(0, 'gathered {} products from {}'.format(total, dsTag))