def _load_background(self): """GSHHG/GMRT - Global low-res Used to fill un-set cells. """ if wg['gc']['GMT'] is not None and not self.want_gmrt: utils.run_cmd('gmt grdlandmask {} -I{} -r -Df -G{}=gd:GTiff -V -N1/0/1/0/1\ '.format(self.p_region.format('gmt'), self.inc, self.g_mask), verbose=self.verbose) else: this_gmrt = gmrt.GMRT(src_region=self.p_region, weight=self.weight, verbose=self.verbose, layer='topo-mask').run() #gmrt_tif = this_gmrt.results[0] this_gmrt.fetch_results() utils.run_cmd('gdalwarp {} {} -tr {} {} -overwrite'.format(gmrt_tif, g_mask, wg['inc'], wg['inc']), verbose = True) #utils.remove_glob(gmrt_tif) ## ============================================== ## update wet/dry mask with gsshg/gmrt data ## speed up! ## ============================================== utils.echo_msg('filling the coast mask with gsshg/gmrt data...') c_ds = gdal.Open(self.g_mask) c_ds_arr = c_ds.GetRasterBand(1).ReadAsArray() #c_ds = gdal.Open(self.g_mask) for this_xyz in gdalfun.gdal_parse(c_ds): xpos, ypos = utils._geo2pixel(this_xyz.x, this_xyz.y, self.dst_gt) try: if self.coast_array[ypos, xpos] == self.ds_config['ndv']: if this_xyz.z == 1: self.coast_array[ypos, xpos] = 0 elif this_xyz.z == 0: self.coast_array[ypos, xpos] = 1 except: pass c_ds = None utils.remove_glob('{}*'.format(self.g_mask))
def gdal_ogr_mask_union(src_layer, src_field, dst_defn=None): '''`union` a `src_layer`'s features based on `src_field` where `src_field` holds a value of 0 or 1. optionally, specify an output layer defn for the unioned feature. returns the output feature class''' if dst_defn is None: dst_defn = src_layer.GetLayerDefn() multi = ogr.Geometry(ogr.wkbMultiPolygon) feats = len(src_layer) utils.echo_msg('unioning {} features'.format(feats)) for n, f in enumerate(src_layer): gdal.TermProgress_nocb((n + 1 / feats) * 100) if f.GetField(src_field) == 0: src_layer.DeleteFeature(f.GetFID()) elif f.GetField(src_field) == 1: f.geometry().CloseRings() wkt = f.geometry().ExportToWkt() multi.AddGeometryDirectly(ogr.CreateGeometryFromWkt(wkt)) src_layer.DeleteFeature(f.GetFID()) #union = multi.UnionCascaded() ## slow on large multi... out_feat = ogr.Feature(dst_defn) out_feat.SetGeometryDirectly(multi) #union = multi = None return (out_feat)
def update(self): """Update or create the reference vector file""" self.FRED._open_ds(1) for dt in self._dt_xml.keys(): surveys = [] this_xml = f_utils.iso_xml(self._dt_xml[dt], timeout=1000, read_timeout=2000) charts = this_xml.xml_doc.findall('.//{*}has', namespaces = this_xml.namespaces) if self.verbose: _prog = utils.CliProgress('scanning {} surveys in {}.'.format(len(charts), dt)) for i, chart in enumerate(charts): this_xml.xml_doc = chart title = this_xml.title() if self.verbose: _prog.update_perc((i, len(charts))) self.FRED._attribute_filter(["ID = '{}'".format(title)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: h_epsg, v_epsg = this_xml.reference_system() this_data = this_xml.linkages() geom = this_xml.polygon(geom=True) if geom is not None: surveys.append({'Name': title, 'ID': title, 'Agency': 'NOAA', 'Date': this_xml.date(), 'MetadataLink': this_xml.url, 'MetadataDate': this_xml.xml_date(), 'DataLink': this_data, 'Link': self._charts_url, 'DataType': dt, 'DataSource': 'charts', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Info': this_xml.abstract, 'geom': geom}) self.FRED._add_surveys(surveys) if self.verbose: _prog.end(0, 'scanned {} surveys in {}'.format(len(charts), dt)) utils.echo_msg('added {} surveys from {}'.format(len(surveys), dt)) self.FRED._close_ds()
def _regions_sort(self, trainers, t_num=25, verbose=False): """sort regions by distance; regions is a list of regions [xmin, xmax, ymin, ymax]. returns the sorted region-list """ train_sorted = [] for z, train in enumerate(trainers): train_d = [] np.random.shuffle(train) train_total = len(train) while True: if verbose: utils.echo_msg_inline('sorting training tiles [{}]'.format(len(train))) if len(train) == 0: break this_center = train[0][0].center() train_d.append(train[0]) train = train[1:] if len(train_d) > t_num or len(train) == 0: break dsts = [utils.euc_dst(this_center, x[0].center()) for x in train] min_dst = np.percentile(dsts, 50) d_t = lambda t: utils.euc_dst(this_center, t[0].center()) > min_dst np.random.shuffle(train) train.sort(reverse=True, key=d_t) if verbose: utils.echo_msg(' '.join([x[0].format('gmt') for x in train_d[:t_num]])) train_sorted.append(train_d) if verbose: utils.echo_msg_inline('sorting training tiles [OK]\n') return(train_sorted)
def fetch_ftp_file(self, dst_fn, params=None, datatype=None, overwrite=False): """fetch an ftp file via urllib""" status = 0 f = None if self.verbose: utils.echo_msg('fetching remote ftp file: {}...'.format( self.url[:20])) if not os.path.exists(os.path.dirname(dst_fn)): try: os.makedirs(os.path.dirname(dst_fn)) except: pass try: f = urllib.request.urlopen(self.url) except: f = None status - 1 if f is not None: with open(dst_fn, 'wb') as local_file: local_file.write(f.read()) if self.verbose: utils.echo_msg('fetched remote ftp file: {}.'.format( os.path.basename(self.url))) return (status)
def update(self): """Crawl the NOS database and update/generate the NOS reference vector.""" self.FRED._open_ds(1) for nosdir in self._nos_directories: if self.callback(): break surveys = [] xml_catalog = self._nos_xml_url(nosdir) page = f_utils.Fetch(xml_catalog).fetch_html() if page is None: xml_catalog = self._nos_iso_xml_url(nosdir) page = f_utils.Fetch(xml_catalog).fetch_html() if page is None: utils.echo_error_msg('failed to retrieve {}'.format(nosdir)) break rows = page.xpath('//a[contains(@href, ".xml")]/@href') if self.verbose: _prog = utils.CliProgress('scanning {} surveys in {}...'.format(len(rows), nosdir)) for i, survey in enumerate(rows): if self.callback(): break sid = survey[:-4] if self.verbose: _prog.update_perc((i, len(rows))) self.FRED._attribute_filter(["ID = '{}'".format(sid)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: this_xml = f_utils.iso_xml(xml_catalog + survey) h_epsg, v_epsg = this_xml.reference_system() this_data = this_xml.data_links() d_links = [] d_types = [] for key in this_data.keys(): if key in ['GEODAS_XYZ', 'BAG', 'GRID_BAG']: d_links.append(this_data[key]) d_types.append(key) geom = this_xml.bounds(geom=True) if geom is not None: surveys.append({'Name': this_xml.title(), 'ID': sid, 'Agency': 'NOAA/NOS', 'Date': this_xml.date(), 'MetadataLink': this_xml.url, 'MetadataDate': this_xml.xml_date(), 'DataLink': ','.join([','.join(x) for x in d_links]), 'DataType': ','.join(list(set(d_types))), 'DataSource': 'nos', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Info': this_xml.abstract(), 'geom': geom}) if self.verbose: _prog.end(0, 'scanned {} surveys in {}.'.format(len(rows), nosdir)) utils.echo_msg('added {} surveys from {}'.format(len(surveys), nosdir)) self.FRED._add_surveys(surveys) self.FRED._close_ds()
def fetch_queue(q, m, p=False): """fetch queue `q` of fetch results\ each fetch queue should be a list of the following: [remote_data_url, local_data_path, regions.region, lambda: stop_p, data-type] if region is defined, will prompt the queue to process the data to the given region. """ while True: fetch_args = q.get() #this_region = fetch_args[2] if not m.callback(): if not os.path.exists(os.path.dirname(fetch_args[1])): try: os.makedirs(os.path.dirname(fetch_args[1])) except: pass #if this_region is None: if not p: if fetch_args[0].split(':')[0] == 'ftp': Fetch(url=fetch_args[0], callback=m.callback, verbose=m.verbose, headers=m.headers).fetch_ftp_file(fetch_args[1]) else: Fetch(url=fetch_args[0], callback=m.callback, verbose=m.verbose, headers=m.headers, verify=False if fetch_args[2] == 'srtm' or fetch_args[2] == 'mar_grav' else True).fetch_file( fetch_args[1]) else: if m.region is not None: o_x_fn = fetch_args[1] + m.region.format('fn') + '.xyz' else: o_x_fn = fetch_args[1] + '.xyz' utils.echo_msg('processing local file: {}'.format(o_x_fn)) if not os.path.exists(o_x_fn): with open(o_x_fn, 'w') as out_xyz: m.dump_xyz(fetch_args, dst_port=out_xyz) try: if os.path.exists(o_x_fn): if os.stat(o_x_fn).st_size == 0: utils.remove_glob(o_x_fn) except: pass q.task_done()
def _filter(self, region=None, where=[], layers=[]): """Search for data in the reference vector file""" _results = [] if region is not None: _boundsGeom = region.export_as_geom() else: _boundsGeom = None if self._verbose: _prog = utils.CliProgress('filtering {}...'.format(self.FREDloc)) if not self.open_p: self._open_ds() close_p = True else: close_p = False for i, layer in enumerate(layers): if self._verbose: _prog.update_perc((i, len(layers))) #this_layer = self.layer where.append("DataSource = '{}'".format(layer)) if self._verbose: utils.echo_msg('FRED filter: {}'.format(where)) self._attribute_filter(where=where) for feat in self.layer: if _boundsGeom is not None: geom = feat.GetGeometryRef() if geom is not None: if _boundsGeom.Intersects(geom): _results.append({}) f_j = json.loads(feat.ExportToJson()) for key in f_j['properties'].keys(): _results[-1][key] = feat.GetField(key) else: _results.append({}) f_j = json.loads(feat.ExportToJson()) for key in f_j['properties'].keys(): _results[-1][key] = feat.GetField(key) #this_layer = None if close_p: self._close_ds() if self._verbose: _prog.end( 0, 'filtered \033[1m{}\033[m data records from FRED'.format( len(_results))) #clear where #where = [] return (_results)
def update(self): """Crawl the COP30 database and update/generate the NASADEM reference vector.""" self.FRED._open_ds(1) surveys = [] f = f_utils.Fetch(self.nasadem_vrt_url, headers=self.headers, verbose=True) page = f.fetch_xml() fns = page.findall('.//SourceFilename') if self.verbose: _prog = utils.CliProgress('scanning {} tiles in {}...'.format(len(fns), self.nasadem_url)) for i, fn in enumerate(fns): sid = fn.text.split('/')[-1].split('.')[0] if self.verbose: _prog.update_perc((i, len(fns))) self.FRED._attribute_filter(["ID = '{}'".format(sid)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: spat = fn.text.split('_HGT_')[-1].split('.')[0] xsplit = 'e' if 'e' in spat else 'w' ysplit = 's' if 's' in spat else 'n' x = int(spat.split(xsplit)[-1]) y = int(spat.split(xsplit)[0].split(ysplit)[-1]) if xsplit == 'w': x = x * -1 if ysplit == 's': y = y * -1 this_region = regions.Region().from_list([x, x + 1, y, y + 1]) geom = this_region.export_as_geom() if geom is not None: surveys.append({'Name': fn.text.split('.')[0].split('/')[-1], 'ID': sid, 'Agency': 'NASA', 'Date': utils.this_date(), 'MetadataLink': '', 'MetadataDate': utils.this_date(), 'DataLink': self.nasadem_url + fn.text.split('/')[-1] + '?token=', 'DataType': '1', 'DataSource': 'nasadem', 'HorizontalDatum': 4326, 'Etcetra': self.nasadem_rurl, 'VerticalDatum': 'msl', 'Info': '', 'geom': geom}) if self.verbose: _prog.end(0, 'scanned {} tiles in {}.'.format(len(fns), self.nasadem_url)) utils.echo_msg('added {} NASADEM DEM tiles'.format(len(surveys))) self.FRED._add_surveys(surveys) self.FRED._close_ds()
def yield_xyz(self, entry): """process stations""" src_data = 'tides_tmp.json' src_csv = 'tides_tmp.csv' ln = 0 if f_utils.Fetch(entry[0], callback=self.callback, verbose=self.verbose).fetch_file(src_data) == 0: with open(src_data, 'r') as json_file: r = json.load(json_file) if len(r) > 0: for feature in r['features']: if self.station_id is not None: if self.station_id != feature['attributes']['id']: continue lon = feature['attributes']['longitude'] lat = feature['attributes']['latitude'] z = feature['attributes'][ self.s_datum] - feature['attributes'][self.t_datum] if self.units == 'm': z = z * 0.3048 xyz = xyzfun.XYZPoint(src_srs='epsg:4326').from_list( [lon, lat, z]) if self.dst_srs is not None: xyz.warp(dst_srs=self.dst_srs) ln += 1 yield (xyz) else: utils.echo_error_msg( 'failed to fetch remote file, {}...'.format(src_data)) if self.verbose: utils.echo_msg('parsed {} data records from {}'.format( ln, src_data)) utils.remove_glob('{}*'.format(src_data))
def _htdp_transform(self, epsg_in, epsg_out): """create an htdp transformation grid""" htdp = htdpfun.HTDP() utils.echo_msg('{}: HTDP: {}->{}'.format(self.src_region, epsg_in, epsg_out)) griddef = (self.src_region.xmin, self.src_region.ymax, self.src_region.xmin, self.src_region.ymin, self.xcount, self.ycount) grid = htdp._new_create_grid(griddef) htdp._write_grid(grid, '_tmp_input.xyz') htdp._write_control('_tmp_control.txt', '_tmp_output.xyz', '_tmp_input.xyz', _htdp_reference_frames[epsg_in]['htdp_id'], 2012.0, _htdp_reference_frames[epsg_out]['htdp_id'], 2012.0) htdp.run('_tmp_control.txt') out_grid = htdp._read_grid('_tmp_output.xyz', (griddef[5],griddef[4])) utils.remove_glob('_tmp_output.xyz', '_tmp_input.xyz', '_tmp_control.txt') return(out_grid, epsg_out)
def _read_grid(self, filename, shape): fd = open(filename) grid = np.zeros(shape) for i in range(5): utils.echo_msg(fd.readline().rstrip()) points_found = 0 ptuple = self._next_point(fd) while ptuple != None: grid[ptuple[1],ptuple[0]] = ptuple[4] points_found += 1 ptuple = self._next_point(fd) if points_found < shape[0] * shape[1]: print('points found: ', points_found) print('points expected:', shape[0] * shape[1]) sys.exit(1) return(grid)
def gdal_ogr_mask_union(src_layer, src_field, dst_defn=None): '''`union` a `src_layer`'s features based on `src_field` where `src_field` holds a value of 0 or 1. optionally, specify an output layer defn for the unioned feature. returns the output feature class''' if dst_defn is None: dst_defn = src_layer.GetLayerDefn() multi = ogr.Geometry(ogr.wkbMultiPolygon) src_layer.SetAttributeFilter("{} = 1".format(src_field)) feats = len(src_layer) _prog = utils.CliProgress('unioning {} features...'.format(feats)) if feats > 0: for n, f in enumerate(src_layer): _prog.update_perc((n, feats)) f_geom = f.geometry() #f_geom.CloseRings() #try: # f_geom_valid = f_geom.MakeValid() #except: f_geom_valid = f_geom #wkt = f_geom_valid.ExportToWkt() #wkt_geom = ogr.CreateGeometryFromWkt(wkt) #multi.AddGeometryDirectly(wkt_geom) multi.AddGeometry(f_geom_valid) #union = multi.UnionCascaded() ## slow on large multi... _prog.end(0, 'unioned {} features'.format(feats)) utils.echo_msg('setting geometry to unioned feature...') out_feat = ogr.Feature(dst_defn) #out_feat.SetGeometryDirectly(multi) out_feat.SetGeometry(multi) union = multi = None return(out_feat)
def _sub_region_analysis(self, sub_regions): """sub-region analysis""" utils.echo_msg('analyzing {} sub-regions...'.format(len(sub_regions))) sub_zones = {} dem_ds = gdal.Open(self.dem.fn) msk_ds = gdal.Open(self.dem.mask_fn) prox_ds = gdal.Open(self.prox) #slp_ds = gdal.Open(self.slope) _prog = utils.CliProgress('analyzing {} sub-regions.'.format(len(sub_regions))) for sc, sub_region in enumerate(sub_regions): _prog.update_perc((sc, len(sub_regions))) #utils.echo_msg_inline('analyzing sub-regions [{}]'.format(sc)) s_sum, s_g_max, s_perc = self._mask_analysis(msk_ds, region=sub_region) p_perc = self._prox_analysis(prox_ds, region=sub_region) #slp_perc = self._prox_analysis(slp_ds, region=sub_region) #slp_perc = 0 s_dc = demfun.gather_infos(dem_ds, region=sub_region, scan=True) if p_perc < self.prox_perc_33 or abs(p_perc - self.prox_perc_33) < 0.01: zone = self._zones[2] elif p_perc < self.prox_perc_66 or abs(p_perc - self.prox_perc_66) < 0.01: zone = self._zones[1] else: zone = self._zones[0] # if slp_perc < self.slp_perc_33 or abs(slp_perc - self.slp_perc_33) < 0.01: # zone = self._zones[3] # elif slp_perc < self.slp_perc_66 or abs(slp_perc - self.slp_perc_66) < 0.01: # zone = self._zones[4] # else: # zone = self._zones[5] #sub_zones[sc + 1] = [sub_region, s_g_max, s_sum, s_perc, p_perc, slp_perc, s_dc['zr'][0], s_dc['zr'][1], zone] sub_zones[sc + 1] = [sub_region, s_g_max, s_sum, s_perc, p_perc, zone] dem_ds = msk_ds = prox_ds = slp_ds = None _prog.end(0, 'analyzed {} sub-regions.'.format(len(sub_regions))) #utils.echo_msg_inline('analyzing sub-regions [OK]\n') return(sub_zones)
def __init__(self, name='FRED', verbose=False, local=False): self._verbose = verbose self.fetchdata = os.path.join(this_dir, 'data') self.driver = ogr.GetDriverByName('GeoJSON') self.fetch_v = '{}.geojson'.format(name) if local: self.FREDloc = self.fetch_v elif os.path.exists(self.fetch_v): self.FREDloc = self.fetch_v elif os.path.exists(os.path.join(self.fetchdata, self.fetch_v)): self.FREDloc = os.path.join(self.fetchdata, self.fetch_v) else: self.FREDloc = self.fetch_v if self._verbose: utils.echo_msg('using {}'.format(self.FREDloc)) self.ds = None self.layer = None self.open_p = False self._fields = [ 'Name', 'ID', 'Date', 'Agency', 'MetadataLink', 'MetadataDate', 'DataLink', 'IndexLink', 'Link', 'DataType', 'DataSource', 'Resolution', 'HorizontalDatum', 'VerticalDatum', 'LastUpdate', 'Etcetra', 'Info' ]
def _gen_slope(self): self.slope = '{}_slope.tif'.format(self.dem.mod) utils.echo_msg('generating proximity grid {}...'.format(self.slope)) demfun.proximity(self.dem.fn, self.slope) if self.dem.epsg is not None: demfun.set_epsg(self.slope, self.dem.epsg)
def _gen_prox(self): self.prox = '{}_prox.tif'.format(self.dem.mod) utils.echo_msg('generating proximity grid {}...'.format(self.prox)) demfun.proximity(self.dem.mask_fn, self.prox) if self.dem.epsg is not None: demfun.set_epsg(self.prox, self.dem.epsg)
def uncertainties_cli(argv = sys.argv): """run waffles from command-line See `waffles_cli_usage` for full cli options. """ wg_user = None status = 0 i = 1 wg = {} wg['verbose'] = True wg['clobber'] = False while i < len(argv): arg = argv[i] if arg == '--quiet' or arg == '-q': wg['verbose'] = False elif arg == '--help' or arg == '-h': sys.stderr.write(uncertainties_cli_usage) sys.exit(0) elif arg == '--version' or arg == '-v': sys.stdout.write('{}\n'.format(cudem.__version__)) sys.exit(0) elif arg[0] == '-': sys.stdout.write(uncertainties_cli_usage) utils.echo_error_msg('{} is not a valid waffles cli switch'.format(arg)) sys.exit(0) else: wg_user = arg i += 1 ## ============================================== ## load the user wg json and run waffles with that. ## ============================================== if wg_user is not None: if os.path.exists(wg_user): #try: with open(wg_user, 'r') as wgj: wg = json.load(wgj) for key in wg.keys(): this_waffle = waffles.WaffleFactory(**wg).acquire() this_waffle.mask = True this_waffle.clobber = False if not this_waffle.valid_p(): this_waffle.generate() i = InterpolationUncertainty(dem=this_waffle).run() utils.echo_msg(this_waffle) sys.exit(0) # except Exception as e: # utils.echo_error_msg(e) # sys.exit(-1) else: utils.echo_error_msg( 'specified waffles config file does not exist, {}'.format(wg_user) ) sys.stderr.write(waffles_cli_usage) sys.exit(-1) else: utils.echo_error_msg( 'you must supply a waffles config file; see waffles --help for more information.' ) sys.exit(-1)
def run(self): s_dp = s_ds = None unc_out = {} zones = ['low-dens','mid-dens','high-dens','low-slp','mid-slp','high-slp'] utils.echo_msg('running INTERPOLATION uncertainty module using {}...'.format(self.dem.mod)) if self.prox is None: self._gen_prox() # if self.slope is None: # self._gen_slope() ## ============================================== ## region and der. analysis ## ============================================== self.region_info = {} msk_ds = gdal.Open(self.dem.mask_fn) num_sum, g_max, num_perc = self._mask_analysis(msk_ds) msk_ds = None self.prox_percentile = demfun.percentile(self.prox, self.percentile) self.prox_perc_33 = demfun.percentile(self.prox, 25) self.prox_perc_66 = demfun.percentile(self.prox, 75) self.prox_perc_100 = demfun.percentile(self.prox, 100) # self.slp_percentile = demfun.percentile(self.slope, self.percentile) # self.slp_perc_33 = demfun.percentile(self.slope, 25) # self.slp_perc_66 = demfun.percentile(self.slope, 75) # self.slp_perc_100 = demfun.percentile(self.slope, 100) #self.region_info[self.dem.name] = [self.dem.region, g_max, num_sum, num_perc, self.prox_percentile, self.slp_percentile] self.region_info[self.dem.name] = [self.dem.region, g_max, num_sum, num_perc, self.prox_percentile] for x in self.region_info.keys(): utils.echo_msg('region: {}: {}'.format(x, self.region_info[x])) ## ============================================== ## chunk region into sub regions ## ============================================== chnk_inc = int((self.region_info[self.dem.name][1] / math.sqrt(g_max)) / self.region_info[self.dem.name][3]) #chnk_inc = 250 sub_regions = self.dem.region.chunk(self.dem.inc, chnk_inc) utils.echo_msg('chunked region into {} sub-regions @ {}x{} cells.'.format(len(sub_regions), chnk_inc, chnk_inc)) ## ============================================== ## sub-region analysis ## ============================================== sub_zones = self._sub_region_analysis(sub_regions) ## ============================================== ## sub-region density and percentiles ## ============================================== s_dens = np.array([sub_zones[x][3] for x in sub_zones.keys()]) s_5perc = np.percentile(s_dens, 5) s_dens = None utils.echo_msg('Sampling density for region is: {:.16f}'.format(s_5perc)) ## ============================================== ## zone analysis / generate training regions ## ============================================== trainers = [] t_perc = 95 s_perc = 50 for z, this_zone in enumerate(self._zones): #print(sub_zones) sub_zones[x][8] (with slope) tile_set = [sub_zones[x] for x in sub_zones.keys() if sub_zones[x][5] == self._zones[z]] if len(tile_set) > 0: d_50perc = np.percentile(np.array([x[3] for x in tile_set]), 50) else: continue t_trainers = [x for x in tile_set if x[3] < d_50perc or abs(x[3] - d_50perc) < 0.01] utils.echo_msg('possible {} training zones: {} @ MAX {}'.format(self._zones[z].upper(), len(t_trainers), d_50perc)) trainers.append(t_trainers) utils.echo_msg('sorting training tiles by distance...') trains = self._regions_sort(trainers, verbose = False) tot_trains = len([x for s in trains for x in s]) utils.echo_msg('sorted sub-regions into {} training tiles.'.format(tot_trains)) utils.echo_msg('analyzed {} sub-regions.'.format(len(sub_regions))) ## ============================================== ## split-sample simulations and error calculations ## sims = max-simulations ## ============================================== if self.sims is None: self.sims = int(len(sub_regions)/tot_trains) ec_d = self._split_sample(trains, s_5perc)[0] ## ============================================== ## Save/Output results ## apply error coefficient to full proximity grid ## TODO: USE numpy/gdal instead! ## ============================================== utils.echo_msg('applying coefficient to PROXIMITY grid') if self.dem.gc['GMT'] is None: utils.run_cmd('gdal_calc.py -A {} --outfile {}_prox_unc.tif --calc "{}+({}*(A**{}))"'.format(self.prox, self.dem.name, 0, ec_d[1], ec_d[2]), verbose = True) else: math_cmd = 'gmt grdmath {} 0 AND ABS {} POW {} MUL {} ADD = {}_prox_unc.tif=gd+n-9999:GTiff\ '.format(self.prox, ec_d[2], ec_d[1], 0, self.dem.name) utils.run_cmd(math_cmd, verbose = self.dem.verbose) if self.dem.epsg is not None: status = demfun.set_epsg('{}_prox_unc.tif'.format(self.dem.name), epsg=self.dem.epsg) utils.echo_msg('applied coefficient {} to PROXIMITY grid'.format(ec_d)) # utils.echo_msg('applying coefficient to SLOPE grid') # if self.dem.gc['GMT'] is None: # utils.run_cmd('gdal_calc.py -A {} --outfile {}_slp_unc.tif --calc "{}+({}*(A**{}))"'.format(self.slope, self.dem.name, 0, ec_s[1], ec_s[2]), verbose = True) # else: # math_cmd = 'gmt grdmath {} 0 AND ABS {} POW {} MUL {} ADD = {}_slp_unc.tif=gd+n-9999:GTiff\ # '.format(self.slope, ec_s[2], ec_s[1], 0, self.dem.name) # utils.run_cmd(math_cmd, verbose = self.dem.verbose) # if self.dem.epsg is not None: status = demfun.set_epsg('{}_prox_unc.tif'.format(self.dem.name), epsg=self.dem.epsg) # utils.echo_msg('applied coefficient {} to SLOPE grid'.format(ec_s)) utils.remove_glob(self.prox) #utils.remove_glob(self.slope) unc_out['prox_unc'] = ['{}_prox_unc.tif'.format(self.dem.name), 'raster'] unc_out['prox_bf'] = ['{}_prox_bf.png'.format(self.dem.name), 'image'] unc_out['prox_scatter'] = ['{}_prox_scatter.png'.format(self.dem.name), 'image'] return(unc_out, 0)
def _split_sample(self, trains, perc): """split-sample simulations and error calculations sims = max-simulations """ #utils.echo_msg('performing MAX {} SPLIT-SAMPLE simulations...'.format(self.sims)) _prog = utils.CliProgress('performing MAX {} SPLIT-SAMPLE simulations'.format(self.sims)) #utils.echo_msg('simulation\terrors\tproximity-coeff\tp_diff\tslp-coeff\tslp_diff') utils.echo_msg('simulation\terrors\tproximity-coeff\tp_diff') sim = 0 status = 0 last_ec_d = None while True: status = 0 sim += 1 #trains = self._regions_sort(trainers, verbose=False) for z, train in enumerate(trains): train_h = train[:25] ss_samp = perc ## ============================================== ## perform split-sample analysis on each training region. ## ============================================== for n, sub_region in enumerate(train_h): ss_samp = perc #perc = int(float(n+(len(train_h) * z))/(len(train_h)*len(trains)) * 100) #_prog.update_perc((int(float(n+(len(train_h) * z))), len(train_h)*len(trains))) _prog.update() this_region = sub_region[0] if sub_region[3] < ss_samp: ss_samp = None ## ============================================== ## extract the xyz data for the region from the DEM ## ============================================== o_xyz = '{}_{}.xyz'.format(self.dem.name, n) ds = gdal.Open(self.dem.fn) ds_config = demfun.gather_infos(ds) b_region = this_region b_region.buffer(20*self.dem.inc) srcwin = b_region.srcwin(ds_config['geoT'], ds_config['nx'], ds_config['ny']) with open(o_xyz, 'w') as o_fh: for xyz in demfun.parse(ds, srcwin=srcwin, mask=self.dem.mask_fn): xyz.dump(dst_port=o_fh) ds = None if os.stat(o_xyz).st_size != 0: ## ============================================== ## split the xyz data to inner/outer; outer is ## the data buffer, inner will be randomly sampled ## ============================================== s_inner, s_outer = self._gmt_select_split( o_xyz, this_region, 'sub_{}'.format(n), verbose=False ) if os.stat(s_inner).st_size != 0: sub_xyz = np.loadtxt(s_inner, ndmin=2, delimiter=' ') else: sub_xyz = [] ss_len = len(sub_xyz) if ss_samp is not None: sx_cnt = int(sub_region[1] * (ss_samp / 100.)) + 1 else: sx_cnt = 1 sub_xyz_head = 'sub_{}_head.xyz'.format(n) np.random.shuffle(sub_xyz) np.savetxt(sub_xyz_head, sub_xyz[:sx_cnt], '%f', ' ') ## ============================================== ## generate the random-sample DEM ## ============================================== waff = waffles.WaffleFactory( mod=self.dem.mod, data=[s_outer, sub_xyz_head], src_region=this_region, inc=self.dem.inc, name='sub_{}'.format(n), node=self.dem.node, fmt=self.dem.fmt, extend=self.dem.extend, extend_proc=self.dem.extend_proc, weights=self.dem.weights, sample=self.dem.sample, clip=self.dem.clip, epsg=self.dem.epsg, mask=True, verbose=False, clobber=True ) waff.mod_args = self.dem.mod_args wf = waff.acquire().generate() if wf.valid_p(): ## ============================================== ## generate the random-sample data PROX and SLOPE ## ============================================== sub_prox = '{}_prox.tif'.format(wf.name) demfun.proximity('{}_m.tif'.format(wf.name), sub_prox) #sub_slp = '{}_slp.tif'.format(wf.name) #demfun.slope(wf.fn, sub_slp) ## ============================================== ## Calculate the random-sample errors ## ============================================== sub_xyd = demfun.query(sub_xyz[sx_cnt:], wf.fn, 'xyd') #sub_dp = gdalfun.gdal_query(sub_xyd, sub_prox, 'zg') sub_dp = demfun.query(sub_xyd, sub_prox, 'xyzg') #sub_ds = demfun.query(sub_dp, self.slope, 'g') #if len(sub_dp) > 0: # if sub_dp.shape[0] == sub_ds.shape[0]: # sub_dp = np.append(sub_dp, sub_ds, 1) # else: sub_dp = [] else: sub_dp = None utils.remove_glob(sub_xyz_head) #if s_dp is not None: if sub_dp is not None and len(sub_dp) > 0: try: s_dp = np.concatenate((s_dp, sub_dp), axis = 0) except: s_dp = sub_dp #else: s_dp = sub_dp utils.remove_glob(o_xyz, 'sub_{}*'.format(n)) if len(s_dp) > 0: d_max = self.region_info[self.dem.name][4] #s_max = self.region_info[self.dem.name][5] s_dp = s_dp[s_dp[:,3] < d_max,:] s_dp = s_dp[s_dp[:,3] > 0,:] prox_err = s_dp[:,[2,3]] if last_ec_d is None: last_ec_d = [0, 0.1, 0.2] last_ec_diff = 10 else: last_ec_diff = abs(last_ec_d[2] - last_ec_d[1]) ec_d = self._err2coeff(prox_err[:50000000], coeff_guess=last_ec_d, dst_name=self.dem.name + '_prox', xa='distance') ec_diff = abs(ec_d[2] - ec_d[1]) ec_l_diff = abs(last_ec_diff - ec_diff) # s_dp = s_dp[s_dp[:,4] < s_max,:] # slp_err = s_dp[:,[2,4]] # #print(slp_err) # #ec_s = self._err2coeff(slp_err[:50000000], coeff_guess=[0, 0.1, 0.2], dst_name = self.dem.name + '_slp', xa = 'slope') # ec_s = [0, 1, 2] # utils.echo_msg('{}\t{}\t{}\t{}\t{}\t{}'.format(sim, len(s_dp), ec_d, ec_d[2] - ec_d[1], ec_s, ec_s[2] - ec_s[1])) utils.echo_msg('{}\t{}\t{}\t{}'.format(sim, len(s_dp), ec_d, ec_l_diff)) #if ec_d[2] < 0.0001: continue #if abs(ec_d[2] - ec_d[1]) > 2: continue if ec_d[0] == 0 and ec_d[1] == 0.1 and ec_d[2] == 0.2: continue if sim >= int(self.sims): break if abs(last_ec_diff - ec_diff) < 0.0001: break if len(s_dp) >= int(self.region_info[self.dem.name][1] / 10): break last_ec_d = ec_d #else: utils.echo_msg('{}\t{}\t{}\t{}\t{}\t{}'.format(sim, len(s_dp), None, None, None, None)) else: utils.echo_msg('{}\t{}\t{}\t{}'.format(sim, len(s_dp), None, None)) _prog.end(status, 'performed {} SPLIT-SAMPLE simulations'.format(sim)) return([ec_d])
def spat_meta_cli(argv = sys.argv): i = 1 dls = [] i_regions = [] these_regions = [] src_srs = 'epsg:4326' xinc = utils.str2inc('1s') yinc = utils.str2inc('1s') node = 'pixel' name = 'waffles_spat' ogr_format = 'ESRI Shapefile' extend = 0 want_verbose = True want_prefix = False want_recursive = False prefix_args = {} argv = sys.argv while i < len(argv): arg = sys.argv[i] if arg == '--region' or arg == '-R': i_regions.append(str(argv[i + 1])) i = i + 1 elif arg[:2] == '-R': i_regions.append(str(arg[2:])) elif arg == '--outname' or arg == '-O': name = argv[i + 1] i += 1 elif arg[:2] == '-O': name = arg[2:] elif arg == '-s_srs' or arg == '--s_srs' or arg == '-P': src_srs = argv[i + 1] i = i + 1 elif arg == '--increment' or arg == '-E': incs = argv[i + 1].split(':') xy_inc = incs[0].split('/') xinc = utils.str2inc(xy_inc[0]) if len(xy_inc) > 1: yinc = utils.str2inc(xy_inc[1]) else: yinc = utils.str2inc(xy_inc[0]) i = i + 1 elif arg[:2] == '-E': incs = arg[2:].split(':') xy_inc = incs[0].split('/') xinc = utils.str2inc(xy_inc[0]) if len(xy_inc) > 1: yinc = utils.str2inc(xy_inc[1]) else: yinc = utils.str2inc(xy_inc[0]) elif arg == '--extend' or arg == '-X': exts = argv[i + 1].split(':') extend = utils.int_or(exts[0], 0) i += 1 elif arg[:2] == '-X': exts = arg[2:].split(':') extend = utils.int_or(exts[0], 0) elif arg == '--format' or arg == '-F': ogr_format = argv[i + 1] i += 1 elif arg[:2] == '-F': ogr_format = argv[2:] elif arg == '-p' or arg == '--prefix': want_prefix = True prefix_opts = argv[i + 1].split(':') prefix_args = utils.args2dict(prefix_opts, prefix_args) if len(prefix_args) > 0: i += 1 elif arg == '-r' or arg == '--grid-node': node = 'grid' elif arg == '-c' or arg == '--recursive': want_recursive = True elif arg == '--quiet' or arg == '-q': want_verbose = False elif arg == '-help' or arg == '--help' or arg == '-h': sys.stderr.write(_usage) sys.exit(1) elif arg == '-version' or arg == '--version': sys.stdout.write('{}\n'.format(__version__)) sys.exit(1) else: dls.append(arg) i = i + 1 for i_region in i_regions: tmp_region = regions.Region().from_string(i_region) if tmp_region.valid_p(check_xy=True): these_regions.append(tmp_region) else: i_region_s = i_region.split(':') tmp_region = regions.ogr_wkts(i_region_s[0]) for i in tmp_region: if i.valid_p(): if len(i_region_s) > 1: these_regions.append( regions.Region().from_string( '/'.join([i.format('str'), i_region_s[1]]) ) ) else: these_regions.append(i) if len(these_regions) == 0: these_regions = [None] utils.echo_error_msg('Could not parse region {}'.format(these_regions)) sys.stderr.write('{}\n'.format(_usage)) sys.exit(1) else: if want_verbose: utils.echo_msg( 'parsed {} region(s)'.format(len(these_regions)) ) name_ = name for rn, this_region in enumerate(these_regions): utils.echo_msg('using region {}'.format(this_region.format('gmt'))) if len(dls) == 0: sys.stderr.write(_usage) utils.echo_error_msg('you must specify some type of data') else: if want_prefix or len(these_regions) > 1: name_ = utils.append_fn(name, this_region, xinc, **prefix_args) if os.path.exists('{}_sm.{}'.format(name_, utils.ogr_fext(ogr_format))): utils.echo_msg( 'SPATIAL METADATA {} already exists, skipping...'.format('{}_sm.{}'.format(name_, utils.ogr_fext(ogr_format))) ) else: SpatialMetadata( data=dls, src_region=this_region, xinc=xinc, yinc=yinc, extend=extend, src_srs=src_srs, node=node, name=name_, verbose=want_verbose, recursive=want_recursive, ogr_format=ogr_format ).run()
for i_region in i_regions: tmp_region = regions.Region().from_string(i_region) if tmp_region.valid_p(): these_regions.append(tmp_region) else: tmp_region = regions.ogr_wkts(i_region) for i in tmp_region: if i.valid_p(): these_regions.append(i) if len(these_regions) == 0: these_regions = [None] else: if want_verbose: utils.echo_msg('parsed {} region(s)'.format(len(these_regions))) for rn, this_region in enumerate(these_regions): if len(dls) == 0: sys.stderr.write(_usage) utils.echo_error_msg('you must specify some type of data') else: if want_prefix or len(these_regions) > 1: name = utils.append_fn(name, this_region, inc) #[x for x in waffles.Waffle(data=dls, src_region=this_region, inc=inc, extend=extend, epsg=epsg, node=node, name=name, verbose=want_verbose).spat_meta(yxyz=False)] SpatialMetadata(data=dls, src_region=this_region, inc=inc, extend=extend, epsg=epsg, node=node,
def update(self): """Crawl the COP30 database and update/generate the COPERNICUS reference vector.""" self.FRED._open_ds(1) surveys = [] page = f_utils.Fetch(self.cop_10_url, verbose=True).fetch_html() rows = page.xpath('//a[contains(@href, ".zip")]/@href') if self.verbose: _prog = utils.CliProgress('scanning {} tiles in {}...'.format( len(rows), self.cop_10_url)) for i, row in enumerate(rows): sid = row.split('.')[0] if self.verbose: _prog.update_perc((i, len(rows))) self.FRED._attribute_filter(["ID = '{}'".format(sid)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: spat = row.split('.')[0].split('_')[-1] x = int(spat.split('x')[-1]) y = int(spat.split('x')[0].split('y')[-1]) this_region = regions.Region().from_list( [x, x + 10, y, y + 10]) geom = this_region.export_as_geom() if geom is not None: surveys.append({ 'Name': row.split('.')[0], 'ID': sid, 'Agency': 'EU', 'Date': utils.this_date(), 'MetadataLink': self.cop_10_aux_url, 'MetadataDate': utils.this_date(), 'DataLink': self.cop_10_url + row, 'DataType': '3', 'DataSource': 'copernicus', 'HorizontalDatum': 'epsg:4326', 'VerticalDatum': 'msl', 'Info': '', 'geom': geom }) if self.verbose: _prog.end( 0, 'scanned {} tiles in {}.'.format(len(rows), self.cop_10_url)) f = f_utils.Fetch(self.cop30_vrt_url, headers=self.headers, verbose=True) page = f.fetch_xml() fns = page.findall('.//SourceFilename') if self.verbose: _prog = utils.CliProgress('scanning {} tiles in {}...'.format( len(fns), self.cop30_url)) for i, fn in enumerate(fns): sid = fn.text.split('/')[-1].split('.')[0] if self.verbose: _prog.update_perc((i, len(fns))) self.FRED._attribute_filter(["ID = '{}'".format(sid)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: spat = fn.text.split('_10_')[-1].split('_DEM')[0] xsplit = '_E' if 'E' in spat else '_W' ysplit = 'S' if 'S' in spat else 'N' x = int(spat.split(xsplit)[-1].split('_')[0]) y = int(spat.split(xsplit)[0].split(ysplit)[-1].split('_')[0]) if xsplit == '_W': x = x * -1 if ysplit == 'S': y = y * -1 this_region = regions.Region().from_list([x, x + 1, y, y + 1]) geom = this_region.export_as_geom() if geom is not None: surveys.append({ 'Name': fn.text.split('.')[0].split('/')[-1], 'ID': sid, 'Agency': 'EU', 'Date': utils.this_date(), 'MetadataLink': '', 'MetadataDate': utils.this_date(), 'DataLink': self.cop30_url + fn.text.split('/')[-1] + '?token=', 'DataType': '1', 'DataSource': 'copernicus', 'HorizontalDatum': 'epsg:4326', 'Etcetra': self.cop30_rurl, 'VerticalDatum': 'msl', 'Info': '', 'geom': geom }) if self.verbose: _prog.end( 0, 'scanned {} tiles in {}.'.format(len(fns), self.cop30_url)) utils.echo_msg('added {} COPERNICUS DEM tiles'.format( len(surveys))) self.FRED._add_surveys(surveys) self.FRED._close_ds()
def _load_nhd(self): """USGS NHD (HIGH-RES U.S. Only) Fetch NHD (NHD High/Plus) data from TNM to fill in near-shore areas. High resoultion data varies by location... """ self.p_region.export_as_ogr('region_buff.shp') xsize, ysize, gt = self.p_region.geo_transform(x_inc=self.inc) utils.run_cmd('gdal_rasterize -ts {} {} -te {} -burn -9999 -a_nodata -9999 \ -ot Int32 -co COMPRESS=DEFLATE -a_srs EPSG:{} region_buff.shp {}\ '.format(xsize, ysize, self.p_region.format('te'), self.epsg, self.u_mask), verbose=self.verbose) utils.remove_glob('region_buff.*') this_tnm = fetches.tnm.TheNationalMap(src_region=self.p_region, weight=self.weight, verbose=self.verbose, where="Name LIKE '%Hydro%'", extents='HU-4 Subregion,HU-8 Subbasin').run() #fl = fetches._fetch_modules['tnm'](waffles_proc_region(wg), ["Name LIKE '%Hydro%'"], None, True) r_shp = [] for result in this_tnm.results: #fl._parse_results(e = 'HU-2 Region,HU-4 Subregion,HU-8 Subbasin'): if f_utils.Fetch(result[0], verbose=self.verbose).fetch_file(os.path.join(result[2], result[1])) == 0: gdb_zip = os.path.join(result[2], result[1]) gdb_files = utils.unzip(gdb_zip) gdb_bn = os.path.basename('.'.join(gdb_zip.split('.')[:-1])) gdb = gdb_bn + '.gdb' utils.run_cmd('ogr2ogr {}_NHDArea.shp {} NHDArea -clipdst {} -overwrite 2>&1\ '.format(gdb_bn, gdb, regions.region_format(wg['region'], 'ul_lr')), verbose=False) if os.path.exists('{}_NHDArea.shp'.format(gdb_bn)): r_shp.append('{}_NHDArea.shp'.format(gdb_bn)) utils.run_cmd('ogr2ogr {}_NHDPlusBurnWaterBody.shp {} NHDPlusBurnWaterBody -clipdst {} -overwrite 2>&1\ '.format(gdb_bn, gdb, self.p_region.format('ul_lr')), verbose=False) if os.path.exists('{}_NHDPlusBurnWaterBody.shp'.format(gdb_bn)): r_shp.append('{}_NHDPlusBurnWaterBody.shp'.format(gdb_bn)) utils.run_cmd('ogr2ogr {}_NHDWaterBody.shp {} NHDWaterBody -where "FType = 390" -clipdst {} -overwrite 2>&1\ '.format(gdb_bn, gdb, self.p_region.format('ul_lr')), verbose=False) if os.path.exists('{}_NHDWaterBody.shp'.format(gdb_bn)): r_shp.append('{}_NHDWaterBody.shp'.format(gdb_bn)) utils.remove_glob(gbd) else: utils.echo_error_msg('unable to fetch {}'.format(result)) [utils.run_cmd('ogr2ogr -skipfailures -update -append nhdArea_merge.shp {} 2>&1\ '.format(shp), verbose=False) for shp in r_shp] utils.run_cmd('gdal_rasterize -burn 1 nhdArea_merge.shp {}'.format(self.u_mask), verbose=True) utils.remove_glob('nhdArea_merge.*', 'NHD_*', *r_shp) ## ============================================== ## update wet/dry mask with nhd data ## ============================================== utils.echo_msg('filling the coast mask with NHD data...') c_ds = gdal.Open(self.u_mask) c_ds_arr = c_ds.GetRasterBand(1).ReadAsArray() #c_ds = gdal.Open(u_mask) for this_xyz in demfun.parse(c_ds): xpos, ypos = utils._geo2pixel(this_xyz.x, this_xyz.y, self.dst_gt) try: if self.coast_array[ypos, xpos] == self.ds_config['ndv']: if this_xyz.z == 1: self.coast_array[ypos, xpos] = 0 except: pass c_ds = None utils.remove_glob('{}*'.format(self.u_mask))
elif arg == '-agg_level' or arg == '--agg_level' or arg == '-a': agg_level = utils.int_or(argv[i + 1]) i = i + 1 elif arg == '-help' or arg == '--help' or arg == '-h': sys.stderr.write(_usage) sys.exit(1) elif arg == '-version' or arg == '--version': sys.stderr.write('{}\n'.format(_version)) sys.exit(1) elif elev is None: elev = arg else: sys.stderr.write(_usage) sys.exit(1) i = i + 1 if elev is None: sys.stderr.write(_usage) utils.echo_error_msg('you must enter an input file') sys.exit(1) dst_gdal = elev.split('.')[0] + '_fltr.tif' utils.echo_msg('filtering {} to {}'.format(elev, dst_gdal)) demfun.filter_outliers_slp(elev, dst_gdal, chunk_size=chunk_size, chunk_step=chunk_step, agg_level=agg_level) ### End
def yield_xyz(self, entry): ## IceSat Data import h5py ln = 0 if entry[1].split('.')[-1] == 'h5' and 'ATL' in self.short_name: if f_utils.Fetch(entry[0], callback=self.callback, verbose=self.verbose, headers=self.headers).fetch_file(entry[1]) == 0: h5_file = entry[1] h5 = h5py.File(h5_file, 'r') for g in h5['/']: if 'gt' in g: this_xyz = xyzfun.XYZPoint(w=1, src_srs='epsg:4326') try: h_ph = h5['{}/land_segments/dem_h'.format(g)] lon_ph = h5['{}/land_segments/longitude'.format(g)] lat_ph = h5['{}/land_segments/latitude'.format(g)] except: continue ofn = h5_file[:-3] dataset = np.vstack((lon_ph, lat_ph, h_ph)).transpose() if self.region is not None and self.region.valid_p(): dataset = dataset[dataset[:, 0] > self.region.xmin, :] dataset = dataset[dataset[:, 0] < self.region.xmax, :] dataset = dataset[dataset[:, 1] > self.region.ymin, :] dataset = dataset[dataset[:, 1] < self.region.ymax, :] if self.region.zmin is not None: dataset = dataset[ dataset[:, 2] > self.region.zmin, :] if self.region.zmax is not None: dataset = dataset[ dataset[:, 2] < self.region.zmax, :] for point in dataset: this_xyz.x = point[0] this_xyz.y = point[1] this_xyz.z = point[2] this_xyz.w = self.weight if self.dst_srs is not None: this_xyz.warp(dst_srs=self.dst_srs) ln += 1 yield (this_xyz) dataset = None if self.verbose: utils.echo_msg('parsed {} data records from {}'.format( ln, entry[1])) utils.remove_glob('{}*'.format(entry[1]))
def _update(self): """Update the FRED reference vector after scanning the relevant metadata from Digital Coast. """ #self.FRED = FRED(verbose=self.verbose, local=True) self.FRED._open_ds(1) for ld in self._dc_dirs: cols = [] surveys = [] page = f_utils.Fetch(self._dc_htdata_url + ld).fetch_html() if page is None: continue tr = page.xpath('//table')[0].xpath('.//tr') if len(tr) <= 0: continue [cols.append(i.text_content()) for i in tr[0]] if self.verbose: _prog = utils.CliProgress( 'scanning {} datasets in {}...'.format(len(tr), ld)) for i in range(1, len(tr)): if self.callback(): break if self.verbose: _prog.update_perc((i, len(tr))) #dc['ID #'])) cells = tr[i].getchildren() dc = {} for j, cell in enumerate(cells): cl = cell.xpath('a') if len(cl) > 0: if cols[j] == 'Dataset Name': dc[cols[j]] = cell.text_content() dc['Metadata'] = cl[0].get('href') else: dc[cols[j]] = cl[0].get('href') else: dc[cols[j]] = cell.text_content() self.FRED._attribute_filter(["ID = '{}'".format(dc['ID #'])]) if self.FRED.layer is None or len(self.FRED.layer) == 0: if 'Metadata' in dc.keys(): this_xml = f_utils.iso_xml(dc['Metadata']) h_epsg, v_epsg = this_xml.reference_system() geom = this_xml.bounds(geom=True) if geom is not None: if self.verbose: _prog.update_perc( (i, len(tr)), msg='{} ** adding: {} **'.format( _prog.opm, dc['ID #'])) surveys.append({ 'Name': dc['Dataset Name'], 'ID': dc['ID #'], 'Date': this_xml.date(), 'MetadataLink': dc['Metadata'], 'MetadataDate': this_xml.xml_date(), 'DataLink': dc['https'], 'IndexLink': dc['Tile Index'], 'Link': self._dc_url, 'DataType': ld.split('_')[0], 'DataSource': 'dc', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Info': this_xml.abstract(), 'geom': geom }) self.FRED._add_surveys(surveys) if self.verbose: _prog.end(0, 'scanned {} datasets in {}.'.format(len(tr), ld)) utils.echo_msg('added {} surveys from {}'.format( len(surveys), ld)) self.FRED._close_ds()
f_j = json.loads(feat.ExportToJson()) for key in f_j['properties'].keys(): _results[-1][key] = feat.GetField(key) #this_layer = None if close_p: self._close_ds() if self._verbose: _prog.end( 0, 'filtered \033[1m{}\033[m data records from FRED'.format( len(_results))) #clear where #where = [] return (_results) ## ============================================== ## lambdas for the FRED using the module object `mod` ## ============================================== _filter_FRED = lambda mod: mod.FRED._filter( region=mod.region, where=mod.where, layers=[mod.name]) _update_FRED = lambda mod, s: mod.FRED._add_surveys(s) _filter_FRED_index = lambda mod: [ utils.echo_msg(json.dumps(f, indent=2)) for f in _filter_FRED(mod) ] ### End
def fetches_cli(argv=sys.argv): """run fetches from command-line See `fetches_cli_usage` for full cli options. """ i_regions = [] these_regions = [] mods = [] mod_opts = {} want_list = False want_proc = False want_verbose = True stop_threads = False ## ============================================== ## parse command line arguments. ## ============================================== i = 1 while i < len(argv): arg = argv[i] if arg == '--region' or arg == '-R': i_regions.append(str(argv[i + 1])) i = i + 1 elif arg[:2] == '-R': i_regions.append(str(arg[2:])) elif arg == '--list' or arg == '-l': want_list = True elif arg == '--process' or arg == '-p': want_proc = True elif arg == '--quiet' or arg == '-q': want_verbose = False elif arg == '--help' or arg == '-h': sys.stderr.write(fetches_usage) sys.exit(1) elif arg == '--version' or arg == '-v': print('{}, version {}'.format(os.path.basename(sys.argv[0]), fetches.__version__)) sys.exit(1) elif arg == '--modules' or arg == '-m': try: if argv[i + 1] in FetchesFactory.mods.keys(): sys.stderr.write( _fetches_module_long_desc({ k: FetchesFactory.mods[k] for k in (argv[i + 1], ) })) else: sys.stderr.write( _fetches_module_long_desc(FetchesFactory.mods)) except: sys.stderr.write(_fetches_module_long_desc( FetchesFactory.mods)) sys.exit(0) elif arg[0] == '-': sys.stderr.write(fetches_usage) sys.exit(0) else: mods.append(arg) i = i + 1 if len(mods) == 0: sys.stderr.write(fetches_usage) utils.echo_error_msg('you must select at least one fetch module') sys.exit(-1) for i_region in i_regions: tmp_region = regions.Region().from_string(i_region) if tmp_region.valid_p(check_xy=True): these_regions.append(tmp_region) else: i_region_s = i_region.split(':') tmp_region = regions.ogr_wkts(i_region_s[0]) for i in tmp_region: if i.valid_p(): if len(i_region_s) > 1: these_regions.append(regions.Region().from_string( '/'.join([i.format('str'), i_region_s[1]]))) else: these_regions.append(i) if not these_regions: these_regions = [regions.Region().from_string('-R-180/180/-90/90')] if want_verbose: utils.echo_msg('parsed {} region(s)'.format(len(these_regions))) for rn, this_region in enumerate(these_regions): if stop_threads: return x_fs = [ FetchesFactory(mod=mod, src_region=this_region, verbose=want_verbose).acquire(dst_srs='epsg:4326') for mod in mods ] for x_f in x_fs: if x_f is None: continue if want_verbose: utils.echo_msg( 'running fetch module {} on region {}...'.format( x_f.name, this_region.format('str'))) x_f.run() if want_verbose: utils.echo_msg('found {} data files.'.format(len(x_f.results))) if len(x_f.results) == 0: break if want_list: for result in x_f.results: print(result[0]) else: fr = f_utils.fetch_results(x_f, want_proc=want_proc) fr.daemon = True _p = utils.CliProgress('fetching {} remote data files'.format( len(x_f.results))) try: fr.start() while True: time.sleep(2) sys.stderr.write('\x1b[2K\r') perc = float( (len(x_f.results) - fr.fetch_q.qsize())) / len( x_f.results) * 100 if len( x_f.results) > 0 else 1 if want_verbose: _p.update_perc( (len(x_f.results) - fr.fetch_q.qsize(), len(x_f.results))) sys.stderr.flush() if not fr.is_alive(): break except (KeyboardInterrupt, SystemExit): utils.echo_error_msg( 'user breakage...please wait for while fetches exits.') x_f.status = -1 stop_threads = True while not fr.fetch_q.empty(): try: fr.fetch_q.get(False) except Empty: continue fr.fetch_q.task_done() fr.join() _p.end(x_f.status, 'fetched {} remote data files'.format(len(x_f.results))) if want_verbose: utils.echo_msg('ran fetch module {} on region {}...\ '.format(x_f.name, this_region.format('str')))
def _parse_dataset(self, catalog_url): ntCatXml = f_utils.iso_xml(catalog_url) this_ds = ntCatXml.xml_doc.findall('.//th:dataset', namespaces=ntCatXml.namespaces) this_ds_services = ntCatXml.xml_doc.findall( './/th:service', namespaces=ntCatXml.namespaces) if self.verbose: _prog = utils.CliProgress('scanning {} datasets in {}...'.format( len(this_ds), this_ds[0].attrib['name'])) surveys = [] for i, node in enumerate(this_ds): this_title = node.attrib['name'] this_id = node.attrib['ID'] if self.verbose: _prog.update_perc((i, len(this_ds))) self.FRED._attribute_filter(["ID = '{}'".format(this_id)]) if self.FRED.layer is None or len(self.FRED.layer) == 0: subCatRefs = node.findall('.//th:catalogRef', namespaces=ntCatXml.namespaces) if len(subCatRefs) > 0: self._parse_catalog(catalog_url) break try: ds_path = node.attrib['urlPath'] except: continue iso_url = False wcs_url = False http_url = False for service in this_ds_services: service_name = service.attrib['name'] if service_name == 'iso': iso_url = '{}{}{}'.format(self._ngdc_url, service.attrib['base'], ds_path) if service_name == 'wcs': wcs_url = '{}{}{}'.format(self._ngdc_url, service.attrib['base'], ds_path) if service_name == 'http': http_url = '{}{}{}'.format(self._ngdc_url, service.attrib['base'], ds_path) this_xml = f_utils.iso_xml(iso_url) title = this_xml.title() h_epsg, v_epsg = this_xml.reference_system() zv = this_xml.xml_doc.findall( './/gmd:dimension/gmd:MD_Band/gmd:sequenceIdentifier/gco:MemberName/gco:aName/gco:CharacterString', namespaces=this_xml.namespaces) if zv is not None: for zvs in zv: print(zvs.text) if zvs.text == 'bathy' or zvs.text == 'Band1' or zvs.text == 'z': zvar = zvs.text break else: zvar = 'z' geom = this_xml.bounds(geom=True) if geom is not None: surveys.append({ 'Name': title, 'ID': this_id, 'Agency': 'NOAA', 'Date': this_xml.date(), 'MetadataLink': this_xml.url, 'MetadataDate': this_xml.xml_date(), 'DataLink': http_url, 'IndexLink': wcs_url, 'Link': self._nt_catalog, 'DataType': 'raster', 'DataSource': 'ncei_thredds', 'HorizontalDatum': h_epsg, 'VerticalDatum': v_epsg, 'Etcetra': zvar, 'Info': this_xml.abstract(), 'geom': geom }) self.FRED._add_surveys(surveys) if self.verbose: _prog.end( 0, 'scanned {} datasets in {}.'.format(len(this_ds), this_ds[0].attrib['name'])) utils.echo_msg('added {} surveys from {}'.format( len(surveys), this_ds[0].attrib['name']))