def update(self): """Update or create the reference vector file""" self.FRED._open_ds(1) for dt in self._dt_xml.keys(): surveys = [] this_xml = f_utils.iso_xml(self._dt_xml[dt], timeout=1000, read_timeout=2000) charts = this_xml.xml_doc.findall('.//{*}has', namespaces = this_xml.namespaces) if self.verbose: _prog = utils.CliProgress('scanning {} surveys in {}.'.format(len(charts), dt)) for i, chart in enumerate(charts): this_xml.xml_doc = chart title = this_xml.title() if self.verbose: _prog.update_perc((i, len(charts))) self.FRED._attribute_filter(["ID = '{}'".format(title)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: h_epsg, v_epsg = this_xml.reference_system() this_data = this_xml.linkages() geom = this_xml.polygon(geom=True) if geom is not None: surveys.append({'Name': title, 'ID': title, 'Agency': 'NOAA', 'Date': this_xml.date(), 'MetadataLink': this_xml.url, 'MetadataDate': this_xml.xml_date(), 'DataLink': this_data, 'Link': self._charts_url, 'DataType': dt, 'DataSource': 'charts', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Info': this_xml.abstract, 'geom': geom}) self.FRED._add_surveys(surveys) if self.verbose: _prog.end(0, 'scanned {} surveys in {}'.format(len(charts), dt)) utils.echo_msg('added {} surveys from {}'.format(len(surveys), dt)) self.FRED._close_ds()
def update(self): """Crawl the NOS database and update/generate the NOS reference vector.""" self.FRED._open_ds(1) for nosdir in self._nos_directories: if self.callback(): break surveys = [] xml_catalog = self._nos_xml_url(nosdir) page = f_utils.Fetch(xml_catalog).fetch_html() if page is None: xml_catalog = self._nos_iso_xml_url(nosdir) page = f_utils.Fetch(xml_catalog).fetch_html() if page is None: utils.echo_error_msg('failed to retrieve {}'.format(nosdir)) break rows = page.xpath('//a[contains(@href, ".xml")]/@href') if self.verbose: _prog = utils.CliProgress('scanning {} surveys in {}...'.format(len(rows), nosdir)) for i, survey in enumerate(rows): if self.callback(): break sid = survey[:-4] if self.verbose: _prog.update_perc((i, len(rows))) self.FRED._attribute_filter(["ID = '{}'".format(sid)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: this_xml = f_utils.iso_xml(xml_catalog + survey) h_epsg, v_epsg = this_xml.reference_system() this_data = this_xml.data_links() d_links = [] d_types = [] for key in this_data.keys(): if key in ['GEODAS_XYZ', 'BAG', 'GRID_BAG']: d_links.append(this_data[key]) d_types.append(key) geom = this_xml.bounds(geom=True) if geom is not None: surveys.append({'Name': this_xml.title(), 'ID': sid, 'Agency': 'NOAA/NOS', 'Date': this_xml.date(), 'MetadataLink': this_xml.url, 'MetadataDate': this_xml.xml_date(), 'DataLink': ','.join([','.join(x) for x in d_links]), 'DataType': ','.join(list(set(d_types))), 'DataSource': 'nos', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Info': this_xml.abstract(), 'geom': geom}) if self.verbose: _prog.end(0, 'scanned {} surveys in {}.'.format(len(rows), nosdir)) utils.echo_msg('added {} surveys from {}'.format(len(surveys), nosdir)) self.FRED._add_surveys(surveys) self.FRED._close_ds()
def _update_all(self): self.FRED._open_ds(1) v_zip = os.path.basename(self._hrdem_footprints_url) status = f_utils.Fetch(self._hrdem_footprints_url, verbose=self.verbose).fetch_ftp_file(v_zip) v_shps = utils.p_unzip(v_zip, ['shp', 'shx', 'dbf', 'prj']) v_shp = None for v in v_shps: if '.shp' in v: v_shp = v try: v_ds = ogr.Open(v_shp) except: v_ds = None status = -1 if v_ds is not None: layer = v_ds.GetLayer() fcount = layer.GetFeatureCount() if self.verbose: _prog = utils.CliProgress( 'scanning {} datasets...'.format(fcount)) for f in range(0, fcount): feature = layer[f] name = feature.GetField('Tile_name') if self.verbose: _prog.update_perc((f, fcount)) try: self.FRED.layer.SetAttributeFilter( "Name = '{}'".format(name)) except: pass if self.FRED.layer is None or len(self.FRED.layer) == 0: data_link = feature.GetField('Ftp_dtm') if data_link is not None: geom = feature.GetGeometryRef() self.FRED._add_survey( Name=name, ID=feature.GetField('Project'), Agency='NRCAN', Date=utils.this_year(), MetadataLink=feature.GetField('Meta_dtm'), MetadataDate=utils.this_year(), DataLink=data_link.replace('http', 'ftp'), IndexLink=self._hrdem_footprints_url, DataType='raster', DataSource='hrdem', HorizontalDatum=feature.GetField( 'Coord_Sys').split(':')[-1], Info=feature.GetField('Provider'), geom=geom) if self.verbose: _prog.end('scanned {} datasets.'.format(fcount)) utils.remove_glob(v_zip, *v_shps) self.FRED._close_ds()
def update(self): self.FRED._open_ds(1) chs_wcs = f_utils.WCS(self._chs_url) contents = chs_wcs._contents() if self.verbose: _prog = utils.CliProgress( 'Scanning {} WCS coverages from {}...'.format( len(contents), self._chs_url)) for i, layer in enumerate(contents): if self.verbose: _prog.update_perc((i, len(contents))) if 'Tiles' not in layer['CoverageId'][0]: self.FRED._attribute_filter( ["ID = '{}'".format(layer['CoverageId'][0])]) if self.FRED.layer is None or len(self.FRED.layer) == 0: d = chs_wcs._describe_coverage(layer['CoverageId'][0]) if d is not None: ds_region = chs_wcs._get_coverage_region(d) geom = ds_region.export_as_geom() url = chs_wcs._get_coverage_url(layer['CoverageId'][0], region=ds_region) try: name = d['name'][0] except: name = d['CoverageId'][0] try: meta = layer['Metadata'] except: meta = None try: info = layer['Abstract'] except: info = None self.FRED._add_survey(Name=name, ID=layer['CoverageId'][0], Date=this_year(), MetadataLink=meta, MetadataDate=this_year(), DataLink=url, DataType='raster', DataSource='chs', HorizontalDatum=4326, VerticalDatum=1092, Info=info, geom=geom) if self.verbose: _prog.end( 0, 'Scanned {} WCS coverages from {}'.format( len(contents), self._chs_url)) self.FRED._close_ds()
def _filter(self, region=None, where=[], layers=[]): """Search for data in the reference vector file""" _results = [] if region is not None: _boundsGeom = region.export_as_geom() else: _boundsGeom = None if self._verbose: _prog = utils.CliProgress('filtering {}...'.format(self.FREDloc)) if not self.open_p: self._open_ds() close_p = True else: close_p = False for i, layer in enumerate(layers): if self._verbose: _prog.update_perc((i, len(layers))) #this_layer = self.layer where.append("DataSource = '{}'".format(layer)) if self._verbose: utils.echo_msg('FRED filter: {}'.format(where)) self._attribute_filter(where=where) for feat in self.layer: if _boundsGeom is not None: geom = feat.GetGeometryRef() if geom is not None: if _boundsGeom.Intersects(geom): _results.append({}) f_j = json.loads(feat.ExportToJson()) for key in f_j['properties'].keys(): _results[-1][key] = feat.GetField(key) else: _results.append({}) f_j = json.loads(feat.ExportToJson()) for key in f_j['properties'].keys(): _results[-1][key] = feat.GetField(key) #this_layer = None if close_p: self._close_ds() if self._verbose: _prog.end( 0, 'filtered \033[1m{}\033[m data records from FRED'.format( len(_results))) #clear where #where = [] return (_results)
def update(self): """Crawl the COP30 database and update/generate the NASADEM reference vector.""" self.FRED._open_ds(1) surveys = [] f = f_utils.Fetch(self.nasadem_vrt_url, headers=self.headers, verbose=True) page = f.fetch_xml() fns = page.findall('.//SourceFilename') if self.verbose: _prog = utils.CliProgress('scanning {} tiles in {}...'.format(len(fns), self.nasadem_url)) for i, fn in enumerate(fns): sid = fn.text.split('/')[-1].split('.')[0] if self.verbose: _prog.update_perc((i, len(fns))) self.FRED._attribute_filter(["ID = '{}'".format(sid)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: spat = fn.text.split('_HGT_')[-1].split('.')[0] xsplit = 'e' if 'e' in spat else 'w' ysplit = 's' if 's' in spat else 'n' x = int(spat.split(xsplit)[-1]) y = int(spat.split(xsplit)[0].split(ysplit)[-1]) if xsplit == 'w': x = x * -1 if ysplit == 's': y = y * -1 this_region = regions.Region().from_list([x, x + 1, y, y + 1]) geom = this_region.export_as_geom() if geom is not None: surveys.append({'Name': fn.text.split('.')[0].split('/')[-1], 'ID': sid, 'Agency': 'NASA', 'Date': utils.this_date(), 'MetadataLink': '', 'MetadataDate': utils.this_date(), 'DataLink': self.nasadem_url + fn.text.split('/')[-1] + '?token=', 'DataType': '1', 'DataSource': 'nasadem', 'HorizontalDatum': 4326, 'Etcetra': self.nasadem_rurl, 'VerticalDatum': 'msl', 'Info': '', 'geom': geom}) if self.verbose: _prog.end(0, 'scanned {} tiles in {}.'.format(len(fns), self.nasadem_url)) utils.echo_msg('added {} NASADEM DEM tiles'.format(len(surveys))) self.FRED._add_surveys(surveys) self.FRED._close_ds()
def gdal_ogr_mask_union(src_layer, src_field, dst_defn=None): '''`union` a `src_layer`'s features based on `src_field` where `src_field` holds a value of 0 or 1. optionally, specify an output layer defn for the unioned feature. returns the output feature class''' if dst_defn is None: dst_defn = src_layer.GetLayerDefn() multi = ogr.Geometry(ogr.wkbMultiPolygon) src_layer.SetAttributeFilter("{} = 1".format(src_field)) feats = len(src_layer) _prog = utils.CliProgress('unioning {} features...'.format(feats)) if feats > 0: for n, f in enumerate(src_layer): _prog.update_perc((n, feats)) f_geom = f.geometry() #f_geom.CloseRings() #try: # f_geom_valid = f_geom.MakeValid() #except: f_geom_valid = f_geom #wkt = f_geom_valid.ExportToWkt() #wkt_geom = ogr.CreateGeometryFromWkt(wkt) #multi.AddGeometryDirectly(wkt_geom) multi.AddGeometry(f_geom_valid) #union = multi.UnionCascaded() ## slow on large multi... _prog.end(0, 'unioned {} features'.format(feats)) utils.echo_msg('setting geometry to unioned feature...') out_feat = ogr.Feature(dst_defn) #out_feat.SetGeometryDirectly(multi) out_feat.SetGeometry(multi) union = multi = None return(out_feat)
def _sub_region_analysis(self, sub_regions): """sub-region analysis""" utils.echo_msg('analyzing {} sub-regions...'.format(len(sub_regions))) sub_zones = {} dem_ds = gdal.Open(self.dem.fn) msk_ds = gdal.Open(self.dem.mask_fn) prox_ds = gdal.Open(self.prox) #slp_ds = gdal.Open(self.slope) _prog = utils.CliProgress('analyzing {} sub-regions.'.format(len(sub_regions))) for sc, sub_region in enumerate(sub_regions): _prog.update_perc((sc, len(sub_regions))) #utils.echo_msg_inline('analyzing sub-regions [{}]'.format(sc)) s_sum, s_g_max, s_perc = self._mask_analysis(msk_ds, region=sub_region) p_perc = self._prox_analysis(prox_ds, region=sub_region) #slp_perc = self._prox_analysis(slp_ds, region=sub_region) #slp_perc = 0 s_dc = demfun.gather_infos(dem_ds, region=sub_region, scan=True) if p_perc < self.prox_perc_33 or abs(p_perc - self.prox_perc_33) < 0.01: zone = self._zones[2] elif p_perc < self.prox_perc_66 or abs(p_perc - self.prox_perc_66) < 0.01: zone = self._zones[1] else: zone = self._zones[0] # if slp_perc < self.slp_perc_33 or abs(slp_perc - self.slp_perc_33) < 0.01: # zone = self._zones[3] # elif slp_perc < self.slp_perc_66 or abs(slp_perc - self.slp_perc_66) < 0.01: # zone = self._zones[4] # else: # zone = self._zones[5] #sub_zones[sc + 1] = [sub_region, s_g_max, s_sum, s_perc, p_perc, slp_perc, s_dc['zr'][0], s_dc['zr'][1], zone] sub_zones[sc + 1] = [sub_region, s_g_max, s_sum, s_perc, p_perc, zone] dem_ds = msk_ds = prox_ds = slp_ds = None _prog.end(0, 'analyzed {} sub-regions.'.format(len(sub_regions))) #utils.echo_msg_inline('analyzing sub-regions [OK]\n') return(sub_zones)
def update(self): """Update FRED with each dataset in TNM""" datasets = self._datasets() self.FRED._open_ds(1) if self.verbose: _prog = utils.CliProgress( 'scanning {} datasets from TNM...'.format(len(datasets))) for i, ds in enumerate(datasets): if self.verbose: _prog.update_perc((i, len(datasets))) for fmt in ds['formats']: if 'isDefault' in fmt.keys(): fmt = fmt['value'] break #print(ds) #print(len(ds)) #this_xml = FRED.iso_xml('{}{}?format=iso'.format(self._tnm_meta_base, ds['id'])) tags = ds['tags'] if len(tags) > 0: for tag in tags: print(tag) this_xml = f_utils.iso_xml('{}?format=iso'.format( tag['infoUrl'])) geom = this_xml.bounds(geom=True) h_epsg, v_epsg = this_xml.reference_system() self._update_dataset(tag, fmt, geom, h_epsg, v_epsg) else: this_xml = f_utils.iso_xml('{}?format=iso'.format( ds['infoUrl'])) geom = this_xml.bounds(geom=True) h_epsg, v_epsg = this_xml.reference_system() self._update_dataset(ds, fmt, geom, h_epsg, v_epsg) if self.verbose: _prog.end(0, 'scanned {} datasets from TNM'.format(len(datasets))) self.FRED._close_ds()
def _update(self): """Update the FRED reference vector after scanning the relevant metadata from Digital Coast. """ #self.FRED = FRED(verbose=self.verbose, local=True) self.FRED._open_ds(1) for ld in self._dc_dirs: cols = [] surveys = [] page = f_utils.Fetch(self._dc_htdata_url + ld).fetch_html() if page is None: continue tr = page.xpath('//table')[0].xpath('.//tr') if len(tr) <= 0: continue [cols.append(i.text_content()) for i in tr[0]] if self.verbose: _prog = utils.CliProgress( 'scanning {} datasets in {}...'.format(len(tr), ld)) for i in range(1, len(tr)): if self.callback(): break if self.verbose: _prog.update_perc((i, len(tr))) #dc['ID #'])) cells = tr[i].getchildren() dc = {} for j, cell in enumerate(cells): cl = cell.xpath('a') if len(cl) > 0: if cols[j] == 'Dataset Name': dc[cols[j]] = cell.text_content() dc['Metadata'] = cl[0].get('href') else: dc[cols[j]] = cl[0].get('href') else: dc[cols[j]] = cell.text_content() self.FRED._attribute_filter(["ID = '{}'".format(dc['ID #'])]) if self.FRED.layer is None or len(self.FRED.layer) == 0: if 'Metadata' in dc.keys(): this_xml = f_utils.iso_xml(dc['Metadata']) h_epsg, v_epsg = this_xml.reference_system() geom = this_xml.bounds(geom=True) if geom is not None: if self.verbose: _prog.update_perc( (i, len(tr)), msg='{} ** adding: {} **'.format( _prog.opm, dc['ID #'])) surveys.append({ 'Name': dc['Dataset Name'], 'ID': dc['ID #'], 'Date': this_xml.date(), 'MetadataLink': dc['Metadata'], 'MetadataDate': this_xml.xml_date(), 'DataLink': dc['https'], 'IndexLink': dc['Tile Index'], 'Link': self._dc_url, 'DataType': ld.split('_')[0], 'DataSource': 'dc', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Info': this_xml.abstract(), 'geom': geom }) self.FRED._add_surveys(surveys) if self.verbose: _prog.end(0, 'scanned {} datasets in {}.'.format(len(tr), ld)) utils.echo_msg('added {} surveys from {}'.format( len(surveys), ld)) self.FRED._close_ds()
def _split_sample(self, trains, perc): """split-sample simulations and error calculations sims = max-simulations """ #utils.echo_msg('performing MAX {} SPLIT-SAMPLE simulations...'.format(self.sims)) _prog = utils.CliProgress('performing MAX {} SPLIT-SAMPLE simulations'.format(self.sims)) #utils.echo_msg('simulation\terrors\tproximity-coeff\tp_diff\tslp-coeff\tslp_diff') utils.echo_msg('simulation\terrors\tproximity-coeff\tp_diff') sim = 0 status = 0 last_ec_d = None while True: status = 0 sim += 1 #trains = self._regions_sort(trainers, verbose=False) for z, train in enumerate(trains): train_h = train[:25] ss_samp = perc ## ============================================== ## perform split-sample analysis on each training region. ## ============================================== for n, sub_region in enumerate(train_h): ss_samp = perc #perc = int(float(n+(len(train_h) * z))/(len(train_h)*len(trains)) * 100) #_prog.update_perc((int(float(n+(len(train_h) * z))), len(train_h)*len(trains))) _prog.update() this_region = sub_region[0] if sub_region[3] < ss_samp: ss_samp = None ## ============================================== ## extract the xyz data for the region from the DEM ## ============================================== o_xyz = '{}_{}.xyz'.format(self.dem.name, n) ds = gdal.Open(self.dem.fn) ds_config = demfun.gather_infos(ds) b_region = this_region b_region.buffer(20*self.dem.inc) srcwin = b_region.srcwin(ds_config['geoT'], ds_config['nx'], ds_config['ny']) with open(o_xyz, 'w') as o_fh: for xyz in demfun.parse(ds, srcwin=srcwin, mask=self.dem.mask_fn): xyz.dump(dst_port=o_fh) ds = None if os.stat(o_xyz).st_size != 0: ## ============================================== ## split the xyz data to inner/outer; outer is ## the data buffer, inner will be randomly sampled ## ============================================== s_inner, s_outer = self._gmt_select_split( o_xyz, this_region, 'sub_{}'.format(n), verbose=False ) if os.stat(s_inner).st_size != 0: sub_xyz = np.loadtxt(s_inner, ndmin=2, delimiter=' ') else: sub_xyz = [] ss_len = len(sub_xyz) if ss_samp is not None: sx_cnt = int(sub_region[1] * (ss_samp / 100.)) + 1 else: sx_cnt = 1 sub_xyz_head = 'sub_{}_head.xyz'.format(n) np.random.shuffle(sub_xyz) np.savetxt(sub_xyz_head, sub_xyz[:sx_cnt], '%f', ' ') ## ============================================== ## generate the random-sample DEM ## ============================================== waff = waffles.WaffleFactory( mod=self.dem.mod, data=[s_outer, sub_xyz_head], src_region=this_region, inc=self.dem.inc, name='sub_{}'.format(n), node=self.dem.node, fmt=self.dem.fmt, extend=self.dem.extend, extend_proc=self.dem.extend_proc, weights=self.dem.weights, sample=self.dem.sample, clip=self.dem.clip, epsg=self.dem.epsg, mask=True, verbose=False, clobber=True ) waff.mod_args = self.dem.mod_args wf = waff.acquire().generate() if wf.valid_p(): ## ============================================== ## generate the random-sample data PROX and SLOPE ## ============================================== sub_prox = '{}_prox.tif'.format(wf.name) demfun.proximity('{}_m.tif'.format(wf.name), sub_prox) #sub_slp = '{}_slp.tif'.format(wf.name) #demfun.slope(wf.fn, sub_slp) ## ============================================== ## Calculate the random-sample errors ## ============================================== sub_xyd = demfun.query(sub_xyz[sx_cnt:], wf.fn, 'xyd') #sub_dp = gdalfun.gdal_query(sub_xyd, sub_prox, 'zg') sub_dp = demfun.query(sub_xyd, sub_prox, 'xyzg') #sub_ds = demfun.query(sub_dp, self.slope, 'g') #if len(sub_dp) > 0: # if sub_dp.shape[0] == sub_ds.shape[0]: # sub_dp = np.append(sub_dp, sub_ds, 1) # else: sub_dp = [] else: sub_dp = None utils.remove_glob(sub_xyz_head) #if s_dp is not None: if sub_dp is not None and len(sub_dp) > 0: try: s_dp = np.concatenate((s_dp, sub_dp), axis = 0) except: s_dp = sub_dp #else: s_dp = sub_dp utils.remove_glob(o_xyz, 'sub_{}*'.format(n)) if len(s_dp) > 0: d_max = self.region_info[self.dem.name][4] #s_max = self.region_info[self.dem.name][5] s_dp = s_dp[s_dp[:,3] < d_max,:] s_dp = s_dp[s_dp[:,3] > 0,:] prox_err = s_dp[:,[2,3]] if last_ec_d is None: last_ec_d = [0, 0.1, 0.2] last_ec_diff = 10 else: last_ec_diff = abs(last_ec_d[2] - last_ec_d[1]) ec_d = self._err2coeff(prox_err[:50000000], coeff_guess=last_ec_d, dst_name=self.dem.name + '_prox', xa='distance') ec_diff = abs(ec_d[2] - ec_d[1]) ec_l_diff = abs(last_ec_diff - ec_diff) # s_dp = s_dp[s_dp[:,4] < s_max,:] # slp_err = s_dp[:,[2,4]] # #print(slp_err) # #ec_s = self._err2coeff(slp_err[:50000000], coeff_guess=[0, 0.1, 0.2], dst_name = self.dem.name + '_slp', xa = 'slope') # ec_s = [0, 1, 2] # utils.echo_msg('{}\t{}\t{}\t{}\t{}\t{}'.format(sim, len(s_dp), ec_d, ec_d[2] - ec_d[1], ec_s, ec_s[2] - ec_s[1])) utils.echo_msg('{}\t{}\t{}\t{}'.format(sim, len(s_dp), ec_d, ec_l_diff)) #if ec_d[2] < 0.0001: continue #if abs(ec_d[2] - ec_d[1]) > 2: continue if ec_d[0] == 0 and ec_d[1] == 0.1 and ec_d[2] == 0.2: continue if sim >= int(self.sims): break if abs(last_ec_diff - ec_diff) < 0.0001: break if len(s_dp) >= int(self.region_info[self.dem.name][1] / 10): break last_ec_d = ec_d #else: utils.echo_msg('{}\t{}\t{}\t{}\t{}\t{}'.format(sim, len(s_dp), None, None, None, None)) else: utils.echo_msg('{}\t{}\t{}\t{}'.format(sim, len(s_dp), None, None)) _prog.end(status, 'performed {} SPLIT-SAMPLE simulations'.format(sim)) return([ec_d])
def update(self): """Crawl the COP30 database and update/generate the COPERNICUS reference vector.""" self.FRED._open_ds(1) surveys = [] page = f_utils.Fetch(self.cop_10_url, verbose=True).fetch_html() rows = page.xpath('//a[contains(@href, ".zip")]/@href') if self.verbose: _prog = utils.CliProgress('scanning {} tiles in {}...'.format( len(rows), self.cop_10_url)) for i, row in enumerate(rows): sid = row.split('.')[0] if self.verbose: _prog.update_perc((i, len(rows))) self.FRED._attribute_filter(["ID = '{}'".format(sid)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: spat = row.split('.')[0].split('_')[-1] x = int(spat.split('x')[-1]) y = int(spat.split('x')[0].split('y')[-1]) this_region = regions.Region().from_list( [x, x + 10, y, y + 10]) geom = this_region.export_as_geom() if geom is not None: surveys.append({ 'Name': row.split('.')[0], 'ID': sid, 'Agency': 'EU', 'Date': utils.this_date(), 'MetadataLink': self.cop_10_aux_url, 'MetadataDate': utils.this_date(), 'DataLink': self.cop_10_url + row, 'DataType': '3', 'DataSource': 'copernicus', 'HorizontalDatum': 'epsg:4326', 'VerticalDatum': 'msl', 'Info': '', 'geom': geom }) if self.verbose: _prog.end( 0, 'scanned {} tiles in {}.'.format(len(rows), self.cop_10_url)) f = f_utils.Fetch(self.cop30_vrt_url, headers=self.headers, verbose=True) page = f.fetch_xml() fns = page.findall('.//SourceFilename') if self.verbose: _prog = utils.CliProgress('scanning {} tiles in {}...'.format( len(fns), self.cop30_url)) for i, fn in enumerate(fns): sid = fn.text.split('/')[-1].split('.')[0] if self.verbose: _prog.update_perc((i, len(fns))) self.FRED._attribute_filter(["ID = '{}'".format(sid)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: spat = fn.text.split('_10_')[-1].split('_DEM')[0] xsplit = '_E' if 'E' in spat else '_W' ysplit = 'S' if 'S' in spat else 'N' x = int(spat.split(xsplit)[-1].split('_')[0]) y = int(spat.split(xsplit)[0].split(ysplit)[-1].split('_')[0]) if xsplit == '_W': x = x * -1 if ysplit == 'S': y = y * -1 this_region = regions.Region().from_list([x, x + 1, y, y + 1]) geom = this_region.export_as_geom() if geom is not None: surveys.append({ 'Name': fn.text.split('.')[0].split('/')[-1], 'ID': sid, 'Agency': 'EU', 'Date': utils.this_date(), 'MetadataLink': '', 'MetadataDate': utils.this_date(), 'DataLink': self.cop30_url + fn.text.split('/')[-1] + '?token=', 'DataType': '1', 'DataSource': 'copernicus', 'HorizontalDatum': 'epsg:4326', 'Etcetra': self.cop30_rurl, 'VerticalDatum': 'msl', 'Info': '', 'geom': geom }) if self.verbose: _prog.end( 0, 'scanned {} tiles in {}.'.format(len(fns), self.cop30_url)) utils.echo_msg('added {} COPERNICUS DEM tiles'.format( len(surveys))) self.FRED._add_surveys(surveys) self.FRED._close_ds()
def parse(self): """import a datalist entry from a string""" if self.verbose: _prog = utils.CliProgress('parsing datalist {}{}'.format( self.fn, ' @{}'.format(self.weight) if self.weight is not None else '')) if os.path.exists(self.fn): with open(self.fn, 'r') as f: count = sum(1 for _ in f) with open(self.fn, 'r') as op: for l, this_line in enumerate(op): if self.verbose: _prog.update_perc((l, count)) if this_line[0] != '#' and this_line[ 0] != '\n' and this_line[0].rstrip() != '': data_set = DatasetFactory( this_line, weight=self.weight, parent=self, src_region=self.region, metadata=copy.deepcopy(self.metadata), src_srs=self.src_srs, dst_srs=self.dst_srs, x_inc=self.x_inc, y_inc=self.y_inc, sample_alg=self.sample_alg, verbose=self.verbose).acquire() if data_set is not None and data_set.valid_p( fmts=DatasetFactory.data_types[ data_set.data_format]['fmts']): if self.region is not None and self.region.valid_p( check_xy=True): # try: # inf_region = regions.Region().from_string( # data_set.infos['wkt'] # ) # except: try: inf_region = regions.Region().from_list( data_set.infos['minmax']) except: inf_region = self.region.copy() inf_region.wmin = data_set.weight inf_region.wmax = data_set.weight if regions.regions_intersect_p( inf_region, self.region if data_set.dst_trans is None else data_set.trans_region): for ds in data_set.parse(): self.data_entries.append(ds) yield (ds) else: for ds in data_set.parse(): self.data_entries.append(ds) yield (ds) elif len(self.data_entries) > 0: for data_set in self.data_entries: for ds in data_set.parse(): yield (ds) else: if self.verbose: utils.echo_warning_msg( 'could not open datalist/entry {}'.format(self.fn)) if self.verbose: _prog.end( 0, 'parsed datalist {}{}'.format( self.fn, ' @{}'.format(self.weight) if self.weight is not None else ''))
def parse_json(self): if self.verbose: _prog = utils.CliProgress('parsing datalist json {}{}'.format( self.fn, ' @{}'.format(self.weight) if self.weight is not None else '')) if os.path.exists('{}.json'.format(self.fn)): driver = ogr.GetDriverByName('GeoJSON') dl_ds = driver.Open('{}.json'.format(self.fn)) dl_layer = dl_ds.GetLayer() ldefn = dl_layer.GetLayerDefn() if self.region is not None: _boundsGeom = self.region.export_as_geom() else: _boundsGeom = None dl_layer.SetSpatialFilter(_boundsGeom) count = len(dl_layer) for l, feat in enumerate(dl_layer): _prog.update_perc((l, count)) if self.region is not None: w_region = self.region.w_region() if w_region[0] is not None: if float(feat.GetField('Weight')) < w_region[0]: continue if w_region[1] is not None: if float(feat.GetField('Weight')) > w_region[1]: continue data_set = DatasetFactory( '{} {} {}'.format(feat.GetField('Path'), feat.GetField('Format'), feat.GetField('Weight')), weight=self.weight, parent=self, src_region=self.region, metadata=copy.deepcopy(self.metadata), src_srs=self.src_srs, dst_srs=self.dst_srs, x_inc=self.x_inc, y_inc=self.y_inc, sample_alg=self.sample_alg, verbose=self.verbose).acquire() if data_set is not None and data_set.valid_p( fmts=DatasetFactory.data_types[ data_set.data_format]['fmts']): for ds in data_set.parse(): self.data_entries.append(ds) yield (ds) dl_ds = dl_layer = None else: for ds in self.parse(): yield (ds) if self.verbose: _prog.end( 0, 'parsed {} datasets from datalist {}{}'.format( count, self.fn, ' @{}'.format(self.weight) if self.weight is not None else ''))
def _update_prods(self): """updated FRED with each product file available from TNM""" for dsTag in self._elev_ds: offset = 0 utils.echo_msg('processing TNM dataset {}...'.format(dsTag)) _req = f_utils.Fetch( self._tnm_product_url).fetch_req(params={ 'max': 1, 'datasets': dsTag }) try: _dsTag_results = _req.json() except ValueError: utils.echo_error_msg('tnm server error, try again') except Exception as e: utils.echo_error_msg('error, {}'.format(e)) total = _dsTag_results['total'] if self.verbose: _prog = utils.CliProgress( 'gathering {} products from {}...'.format(total, dsTag)) ds = self._datasets(dataset=dsTag) #this_xml = f_utils.iso_xml('{}{}?format=iso'.format(self._tnm_meta_base, ds['id'])) this_xml = f_utils.iso_xml('{}?format=iso'.format(ds['infoUrl'])) h_epsg, v_epsg = this_xml.reference_system() while True: _data = {'max': 100, 'datasets': dsTag, 'offset': offset} _req = f_utils.Fetch( self._tnm_product_url).fetch_req(params=_data) try: _dsTag_results = _req.json() except ValueError: utils.echo_error_msg('tnm server error, try again') except Exception as e: utils.echo_error_msg('error, {}'.format(e)) if self.verbose: _prog.update_perc( (offset, total), msg='gathering {} products from {}...'.format( total, dsTag)) for i, item in enumerate(_dsTag_results['items']): if self.verbose: _prog.update_perc( (i + offset, total), msg='gathering {} products from {}...'.format( total, dsTag)) try: self.FRED.layer.SetAttributeFilter("ID = '{}'".format( item['sourceId'])) except: pass if self.FRED.layer is None or len(self.FRED.layer) == 0: bbox = item['boundingBox'] geom = regions.Region().from_list([ bbox['minX'], bbox['maxX'], bbox['minY'], bbox['maxY'] ]).export_as_geom() if item['format'] == 'IMG' or item[ 'format'] == 'GeoTIFF': tnm_ds = 'raster' elif item['format'] == 'LAZ' or item['format'] == 'LAS': tnm_ds = 'lidar' else: tnm_ds = 'tnm' if geom is not None: self.FRED._add_survey( Name=item['title'], ID=item['sourceId'], Agency='USGS', Date=item['publicationDate'], MetadataLink=item['metaUrl'], MetadataDate=item['dateCreated'], DataLink=item['downloadURL'], Link=item['sourceOriginId'], Resolution=item['extent'], DataType=tnm_ds, DataSource='tnm', HorizontalDatum=h_epsg, VerticalDatum=v_epsg, Etcetra=dsTag, Info=item['moreInfo'], geom=geom) offset += 100 if total - offset <= 0: break if self.verbose: _prog.end(0, 'gathered {} products from {}'.format(total, dsTag))
def _parse_dataset(self, catalog_url): ntCatXml = f_utils.iso_xml(catalog_url) this_ds = ntCatXml.xml_doc.findall('.//th:dataset', namespaces=ntCatXml.namespaces) this_ds_services = ntCatXml.xml_doc.findall( './/th:service', namespaces=ntCatXml.namespaces) if self.verbose: _prog = utils.CliProgress('scanning {} datasets in {}...'.format( len(this_ds), this_ds[0].attrib['name'])) surveys = [] for i, node in enumerate(this_ds): this_title = node.attrib['name'] this_id = node.attrib['ID'] if self.verbose: _prog.update_perc((i, len(this_ds))) self.FRED._attribute_filter(["ID = '{}'".format(this_id)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: subCatRefs = node.findall('.//th:catalogRef', namespaces=ntCatXml.namespaces) if len(subCatRefs) > 0: self._parse_catalog(catalog_url) break try: ds_path = node.attrib['urlPath'] except: continue iso_url = False wcs_url = False http_url = False for service in this_ds_services: service_name = service.attrib['name'] if service_name == 'iso': iso_url = '{}{}{}'.format(self._ngdc_url, service.attrib['base'], ds_path) if service_name == 'wcs': wcs_url = '{}{}{}'.format(self._ngdc_url, service.attrib['base'], ds_path) if service_name == 'http': http_url = '{}{}{}'.format(self._ngdc_url, service.attrib['base'], ds_path) this_xml = f_utils.iso_xml(iso_url) title = this_xml.title() h_epsg, v_epsg = this_xml.reference_system() zv = this_xml.xml_doc.findall( './/gmd:dimension/gmd:MD_Band/gmd:sequenceIdentifier/gco:MemberName/gco:aName/gco:CharacterString', namespaces=this_xml.namespaces) if zv is not None: for zvs in zv: print(zvs.text) if zvs.text == 'bathy' or zvs.text == 'Band1' or zvs.text == 'z': zvar = zvs.text break else: zvar = 'z' geom = this_xml.bounds(geom=True) if geom is not None: surveys.append({ 'Name': title, 'ID': this_id, 'Agency': 'NOAA', 'Date': this_xml.date(), 'MetadataLink': this_xml.url, 'MetadataDate': this_xml.xml_date(), 'DataLink': http_url, 'IndexLink': wcs_url, 'Link': self._nt_catalog, 'DataType': 'raster', 'DataSource': 'ncei_thredds', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Etcetra': zvar, 'Info': this_xml.abstract(), 'geom': geom }) self.FRED._add_surveys(surveys) if self.verbose: _prog.end( 0, 'scanned {} datasets in {}.'.format(len(this_ds), this_ds[0].attrib['name'])) utils.echo_msg('added {} surveys from {}'.format( len(surveys), this_ds[0].attrib['name']))
def fetches_cli(argv=sys.argv): """run fetches from command-line See `fetches_cli_usage` for full cli options. """ i_regions = [] these_regions = [] mods = [] mod_opts = {} want_list = False want_proc = False want_verbose = True stop_threads = False ## ============================================== ## parse command line arguments. ## ============================================== i = 1 while i < len(argv): arg = argv[i] if arg == '--region' or arg == '-R': i_regions.append(str(argv[i + 1])) i = i + 1 elif arg[:2] == '-R': i_regions.append(str(arg[2:])) elif arg == '--list' or arg == '-l': want_list = True elif arg == '--process' or arg == '-p': want_proc = True elif arg == '--quiet' or arg == '-q': want_verbose = False elif arg == '--help' or arg == '-h': sys.stderr.write(fetches_usage) sys.exit(1) elif arg == '--version' or arg == '-v': print('{}, version {}'.format(os.path.basename(sys.argv[0]), fetches.__version__)) sys.exit(1) elif arg == '--modules' or arg == '-m': try: if argv[i + 1] in FetchesFactory.mods.keys(): sys.stderr.write( _fetches_module_long_desc({ k: FetchesFactory.mods[k] for k in (argv[i + 1], ) })) else: sys.stderr.write( _fetches_module_long_desc(FetchesFactory.mods)) except: sys.stderr.write(_fetches_module_long_desc( FetchesFactory.mods)) sys.exit(0) elif arg[0] == '-': sys.stderr.write(fetches_usage) sys.exit(0) else: mods.append(arg) i = i + 1 if len(mods) == 0: sys.stderr.write(fetches_usage) utils.echo_error_msg('you must select at least one fetch module') sys.exit(-1) for i_region in i_regions: tmp_region = regions.Region().from_string(i_region) if tmp_region.valid_p(check_xy=True): these_regions.append(tmp_region) else: i_region_s = i_region.split(':') tmp_region = regions.ogr_wkts(i_region_s[0]) for i in tmp_region: if i.valid_p(): if len(i_region_s) > 1: these_regions.append(regions.Region().from_string( '/'.join([i.format('str'), i_region_s[1]]))) else: these_regions.append(i) if not these_regions: these_regions = [regions.Region().from_string('-R-180/180/-90/90')] if want_verbose: utils.echo_msg('parsed {} region(s)'.format(len(these_regions))) for rn, this_region in enumerate(these_regions): if stop_threads: return x_fs = [ FetchesFactory(mod=mod, src_region=this_region, verbose=want_verbose).acquire(dst_srs='epsg:4326') for mod in mods ] for x_f in x_fs: if x_f is None: continue if want_verbose: utils.echo_msg( 'running fetch module {} on region {}...'.format( x_f.name, this_region.format('str'))) x_f.run() if want_verbose: utils.echo_msg('found {} data files.'.format(len(x_f.results))) if len(x_f.results) == 0: break if want_list: for result in x_f.results: print(result[0]) else: fr = f_utils.fetch_results(x_f, want_proc=want_proc) fr.daemon = True _p = utils.CliProgress('fetching {} remote data files'.format( len(x_f.results))) try: fr.start() while True: time.sleep(2) sys.stderr.write('\x1b[2K\r') perc = float( (len(x_f.results) - fr.fetch_q.qsize())) / len( x_f.results) * 100 if len( x_f.results) > 0 else 1 if want_verbose: _p.update_perc( (len(x_f.results) - fr.fetch_q.qsize(), len(x_f.results))) sys.stderr.flush() if not fr.is_alive(): break except (KeyboardInterrupt, SystemExit): utils.echo_error_msg( 'user breakage...please wait for while fetches exits.') x_f.status = -1 stop_threads = True while not fr.fetch_q.empty(): try: fr.fetch_q.get(False) except Empty: continue fr.fetch_q.task_done() fr.join() _p.end(x_f.status, 'fetched {} remote data files'.format(len(x_f.results))) if want_verbose: utils.echo_msg('ran fetch module {} on region {}...\ '.format(x_f.name, this_region.format('str')))
def fetch_file(self, dst_fn, params=None, datatype=None, overwrite=False, timeout=140, read_timeout=320): """fetch src_url and save to dst_fn""" status = 0 req = None #start = time.perf_counter() if self.verbose: progress = utils.CliProgress('fetching remote file: {}...'.format( self.url)) if not os.path.exists(os.path.dirname(dst_fn)): try: os.makedirs(os.path.dirname(dst_fn)) except: pass #if not os.path.exists(dst_fn) or overwrite: try: with requests.get(self.url, stream=True, params=params, headers=self.headers, timeout=(timeout, read_timeout), verify=self.verify) as req: req_h = req.headers if 'Content-length' in req_h: req_s = int(req_h['Content-length']) else: req_s = -1 try: if not overwrite and req_s == os.path.getsize(dst_fn): raise UnboundLocalError('File exists, skipping') except OSError: pass if req.status_code == 300: #if req_h['Location'] pass ## ============================================== ## hack for earthdata credential redirect... ## recursion here may never end with incorrect user/pass ## ============================================== if req.status_code == 401: ## ============================================== ## we're hoping for a redirect url here. ## ============================================== if self.url == req.url: raise UnboundLocalError('Incorrect Authentication') Fetch(url=req.url, headers=self.headers, verbose=self.verbose).fetch_file( dst_fn, params=params, datatype=datatype, overwrite=overwrite, timeout=timeout, read_timeout=read_timeout) elif req.status_code == 200: curr_chunk = 0 with open(dst_fn, 'wb') as local_file: for chunk in req.iter_content(chunk_size=8196): if self.callback(): break if self.verbose: done = int(50 * curr_chunk / req_s) #utils.echo_msg_inline("[%s%s] %s bps" % ('=' * done, ' ' * (50-done), curr_chunk//(time.perf_counter() - start))) progress.update_perc((curr_chunk, req_s)) curr_chunk += 8196 if chunk: local_file.write(chunk) elif req.status_code == 429: ## ============================================== ## pause a bit and retry... ## ============================================== utils.echo_warning_msg( 'server returned: {}, taking a nap and trying again...' .format(req.status_code)) time.sleep(10) Fetch(url=self.url, headers=self.headers, verbose=self.verbose).fetch_file( dst_fn, params=params, datatype=datatype, overwrite=overwrite, timeout=timeout, read_timeout=read_timeout) self.verbose = False else: utils.echo_error_msg('server returned: {}'.format( req.status_code)) except UnboundLocalError as e: #utils.echo_error_msg(e) #status = 0 pass except Exception as e: utils.echo_error_msg(e) status = -1 if not os.path.exists(dst_fn) or os.stat(dst_fn).st_size == 0: status = -1 if self.verbose and status == 0: progress.end(status, 'fetched remote file as: {}.'.format(dst_fn)) #utils.echo_msg_inline('fetched remote file as: {}'.format(dst_fn)) #utils.echo_msg('fetched remote file as: {} @ {}'.format(dst_fn, time.perf_counter() - start)) return (status)