def load_table(FP, JOIN_FP, selection_unique_ids_str, SERIES, SERIES_BOTH, SERIES_FIELD): logger.debug('Loading danco AHAP table...') ## Load aerial source table # Build where clause, including selecting only ids in selection # where = "(sde.{}.{} IN ({}))".format(FP, JOIN_FP, selection_unique_ids_str) where = "({}.{} IN ({}))".format(FP, JOIN_FP, selection_unique_ids_str) # Add series if only medium or high is desired, else add nothing and load both if SERIES != SERIES_BOTH: # where += " AND (sde.{}.{} = '{}')".format(FP, SERIES_FIELD, SERIES) where += " AND ({}.{} = '{}')".format(FP, SERIES_FIELD, SERIES) aia = query_footprint(FP, db=DB, table=True, where=where) aia_ct = len(aia) # Remove duplicates - there are identical records, but on different src_drives # Mainly seen on src_drives: USGS_s31 and USGS_s71 # If this actually removes anything, a debug message will be logged. # TODO: Add option to keep all locations, only useful for copying from drives # as there should be one of each file on the server aia = aia.drop_duplicates(subset=JOIN_FP) aia_dd = len(aia) if aia_dd != aia_ct: logger.debug( 'Duplicates dropped, identical records on multiples drives.') logger.info('Records loaded in AHAP table: {:,}'.format(len(aia))) return aia
def dem_exists(dataframe, catalogid_field, out_field='dem_exists'): """ Takes a dataframe and determines if each catalogid in catalogid_field has been turned into a DEM. Parameters ---------- dataframe : pd.DataFrame or gpd.GeoDataFrame Dataframe of one ID per row. catalogid_field : STR Field in dataframe with catalogids. out_field : TYPE, optional The name of the field to create. The default is 'dem_exists'. Returns ------- None. """ dems = query_footprint('pgc_dem_setsm_strips', table=True, columns=['catalogid1', 'catalogid2']) dem_ids = list(dems['catalogid1']) + list(dems['catalogid2']) dataframe[out_field] = dataframe[catalogid_field].apply( lambda x: x in dem_ids)
def archive_id_lut(): # Look up table names on danco print('Creating look-up table from danco table...') luts = { 'GE01': 'index_dg_catalogid_to_ge_archiveid_ge01', 'IK01': 'index_dg_catalogid_to_ge_archiveid_ik' } # Verify sensor # if sensor in luts: # pass # else: # print('{} look up table not found. Sensor must be in {}'.format(sensor, luts.keys())) # Create list to store tuples of (old id, new id) lut = [] # Create tuples for each sensor, append to list for sensor in luts.keys(): lu_df = query_footprint(layer=luts[sensor], table=True) # Combine old ids and new ids in tuples in a list sensor_lut = list(zip(lu_df.crssimageid, lu_df.catalog_identifier)) for entry in sensor_lut: lut.append(entry) # Convert list of tuples to dictionary lu_dict = dict(lut) return lu_dict
def is_stereo(dataframe, catalogid_field, out_field='is_stereo'): """ Takes a dataframe and determines if each catalogd in catalogid_field is a stereo image. """ stereo_ids = query_footprint('pgc_imagery_catalogids_stereo', table=True, columns=['CATALOG_ID']) stereo_ids = list(stereo_ids) dataframe[out_field] = dataframe[catalogid_field].apply( lambda x: x in stereo_ids)
def load_src(layer, where, columns, write_source=False): """ Load danco layer specified with where clause and columns provided. """ ## Load source footprint logger.debug('Loading source footprint (with any provided SQL)...') logger.debug('Loading {}...'.format(layer)) src = query_footprint(layer=layer, where=where, columns=columns) logger.info('Loaded source features before selection: {}'.format(len(src))) if write_source is True: src.to_file(r'C:\temp\src.shp') return src
def calculate_density(grid_p, footprint_p, out_path=None, date_col=None, rasterize=False): if not isinstance(grid_p, gpd.GeoDataFrame): logger.info('Loading grid...') if 'gdb' in grid_p: gdb, layer = grid_p.split('.gdb\\') gdb = '{}.gdb'.format(gdb) grid = gpd.read_file(gdb, layer=layer) else: grid = gpd.read_file(grid_p) else: grid = copy.deepcopy(grid_p) danco_footprints = list_danco_db('footprint') if isinstance(footprint_p, gpd.GeoDataFrame): footprint = copy.deepcopy(footprint_p) elif footprint_p in danco_footprints: logger.info('Loading footprint from danco...') footprint = query_footprint(footprint_p) else: logger.info('Loading footprint...') if 'gdb' in footprint_p: gdb, layer = footprint_p.split('.gdb\\') gdb = '{}.gdb'.format(gdb) footprint = gpd.read_file(gdb, layer=layer) else: footprint = gpd.read_file(footprint_p) logger.info('Calculating density...') density = get_count(grid, footprint, date_col=date_col) # Convert any tuple columns to strings (occurs with agg-ing same column multiple ways) density.columns = [ str(x) if type(x) == tuple else x for x in density.columns ] if rasterize: logger.info('Rasterizing...') vec_out = '/vsimem/density_temp.shp' density.to_file(vec_out) # TODO: Finish this # rasterize_options = gdal.RasterizeOptions(xRes=, yRes=, ) if out_path: 1 == 1 # Rasterize to out_path else: if out_path: logger.info('Writing density...') density.to_file(out_path) return density
def main(args): MFP_PATH = args.mfp_path CATALOG_ID = args.field_of_int IDS_OUT_DIR = args.ids_out_dir mfp_ids_tbl = 'pgc_imagery_catalogids' ID_OUT_NAME = os.path.basename(MFP_PATH).split('.')[0] ID_OUT_BASENAME = '{}_{}.txt'.format(ID_OUT_NAME, args.field_of_int) if not IDS_OUT_DIR: IDS_OUT_PATH = os.path.join(os.path.dirname(MFP_PATH), ID_OUT_BASENAME) else: IDS_OUT_PATH = os.path.join(IDS_OUT_DIR, ID_OUT_BASENAME) logging.info('Reading IDs from danco table: {}...'.format(mfp_ids_tbl)) pgc_cid_fp = query_footprint(mfp_ids_tbl, table=True, columns=[CATALOG_ID]) cids = list(pgc_cid_fp[CATALOG_ID]) logging.info('Writing IDs to {}...'.format(IDS_OUT_PATH)) write_ids(cids, IDS_OUT_PATH)
def dems_from_stereo(aoi_path=None, coords=None, months=None, min_date=None, max_date=None, multispec=False, dem_path='dem_path', strip_types=['strips, strips_v4']): # Params stereo_lyr_name = 'dg_imagery_index_stereo_with_earthdem_region' DATE_COL = 'acqdate' # name of date field in stereo footprint SENSOR_COL = 'platform' # name of sensor field in stereo footprint if aoi_path or coords: if aoi_path: # Load AOI aoi = gpd.read_file(aoi_path) # Check AOI crs stereo_crs = layer_crs(stereo_lyr_name) if aoi.crs != stereo_crs: logger.debug('Reprojecting AOI to match footprint: AOI -> {}'.format(stereo_crs)) aoi = aoi.to_crs(stereo_crs) elif coords: lon = float(coords[0]) lat = float(coords[1]) loc = Point(lon, lat) aoi = gpd.GeoDataFrame(geometry=[loc], crs="EPSG:4326") # Get AOI bounds minx, miny, maxx, maxy = aoi.total_bounds pad = 3 where = "x1 > {} AND y1 > {} AND x1 < {} and y1 < {}".format(minx-pad, miny-pad, maxx+pad, maxy+pad) # Add constraints to SQL if min_date: where = check_where(where) where += """{} > '{}'""".format(DATE_COL, min_date) if max_date: where = check_where(where) where += """{} < '{}'""".format(DATE_COL, max_date) # Add to SQL clause to just select multispectral sensors if multispec: where = check_where(where) where += """{} IN ('WV02', 'WV03')""".format(SENSOR_COL) if months: month_terms = [""" {} LIKE '%%-{}-%%'""".format(DATE_COL, month) for month in months] month_sql = " OR ".join(month_terms) month_sql = "({})".format(month_sql) where = check_where(where) where += month_sql # Load stereo footprint logger.info('Loading stereo footprint...') logger.debug('SQL where: {}'.format(where)) stereo = query_footprint(stereo_lyr_name, where=where, dryrun=False) logger.debug('Initial matches with SQL: {}'.format(len(stereo))) # Intersect with AOI logger.info('Finding footprints that intersect with AOI...') stereo_matches = gpd.overlay(stereo, aoi) logger.debug('Matches with AOI intersection: {}'.format(len(stereo_matches))) # Create paths if platform.system() == 'Windows': basepath = r'V:\pgc\data\elev\dem\setsm\ArcticDEM\region' elif platform.syystem() == 'Linux': basepath = r'/mnt/pgc/data/elev/dem/setsm/ArcticDEM/region' # Look for DEM paths for all pairnames pairname_paths = pd.DataFrame() for i, row in stereo_matches.iterrows(): pairname_dems = loc_pairname_dems(basepath, row['region_id'], row['pairname'], strip_types=) pairname_paths = pd.concat([pairname_paths, pairname_dems]) # Join back to matches, one row per DEM found per pairname # or if no DEM found for pairname, a single row with dem_path=NaN matches_paths = pd.merge(stereo_matches, pairname_paths, how='outer', left_on='pairname', right_on='pairname') # Split into found and missing dems, report found_dems = matches_paths[~matches_paths[dem_path].isna()] missing_dems = matches_paths[matches_paths[dem_path].isna()] logger.info('Found pairnames: {}/{}'.format(len(found_dems['pairname'].unique()), len(stereo_matches))) logger.info('Found DEMs: {}'.format(len(found_dems))) if not found_dems.empty: logger.debug('Found DEMs summary: ') logger.debug('Total: {}'.format(len(found_dems))) logger.debug('Min acqdate: {}'.format(found_dems['acqdate'].min())) logger.debug('Max acqdate: {}'.format(found_dems['acqdate'].max())) logger.debug('Sensors: {}'.format(sorted(found_dems['platform'].unique()))) logger.debug('Found DEMs:\n{}'.format(found_dems['pairname'])) logger.debug('') if not missing_dems.empty: logger.debug('Missing DEMs summary: ') logger.debug('Total: {}'.format(len(missing_dems))) logger.debug('Min acqdate: {}'.format(missing_dems['acqdate'].min())) logger.debug('Max acqdate: {}'.format(missing_dems['acqdate'].max())) logger.debug('Sensors: {}'.format(sorted(missing_dems['platform'].unique()))) logger.debug('Missing DEMs:\n{}'.format(missing_dems['pairname'])) return found_dems
def select_AHAP(PHOTO_IDS=None, AOI_P=None, resolution=None, repeat=False, write=None): # String literals LAYER = 'usgs_index_aerial_image_archive' DB = 'imagery' # Path to AHAP photo extents shapefile PHOTO_EXT_P = r'E:\disbr007\general\aerial\AHAP\AHAP_Photo_Extents\AHAP_Photo_Extents.shp' # Identifier in AHAP photos shp PHOTO_ID = 'PHOTO_ID' # Identified in AHAP photos table UNIQUE_ID = 'unique_id' SERIES = 'series' # Load danco AHAP imagery table logger.info('Reading AHAP danco table') where = "campaign = 'AHAP" if resolution: where += "series = '{}'".format(resolution) aia = query_footprint(LAYER, db=DB, table=True, where="campaign = 'AHAP'") # Load photo extents logger.info("Loading AHAP photo extent shapefile...") PHOTO_EXT = gpd.read_file(PHOTO_EXT_P) if AOI_P: logger.info('Reading AOI shapefile....') # Load AOI and match crs AOI = gpd.read_file(os.path.join(AOI_P)) AOI = AOI.to_crs(PHOTO_EXT.crs) logger.info('Selecting AHAP imagery by location...') # Select Photo Extents by intersection with AOI polygons selection = gpd.sjoin(PHOTO_EXT, AOI, how='inner', op='intersects') elif PHOTO_IDS: if os.path.isfile(PHOTO_IDS[0]): ids = read_ids(PHOTO_IDS[0]) elif isinstance(PHOTO_IDS, list): ids = PHOTO_IDS selection = PHOTO_EXT[PHOTO_EXT[PHOTO_ID].isin(ids)] # Remove duplicate Photo Extents if specified if repeat is False: selection = selection.drop_duplicates(subset=PHOTO_ID) # Join to table with filenames selection = pd.merge(selection, aia, how='left', left_on=PHOTO_ID, right_on=UNIQUE_ID) logger.info('Selected features found: {:,}'.format(len(selection))) # Write out shapefile if write is not None: logger.info('Writing AHAP selection to: {}'.format(write)) selection.to_file(write) return selection
def refresh(last_refresh, refresh_region, refresh_imagery, max_cc, min_cc, sensors, aoi_path=None, use_land=True, refresh_thru=None, drop_onhand=True): ''' Select ids for imagery order cloudcover: cloudcover <= arg ''' if not refresh_thru: # Use today's date refresh_thru = datetime.datetime.now().strftime('%Y-%m-%d') where = "(acqdate >= '{}' AND acqdate <= '{}') AND (cloudcover >= {} AND cloudcover <= {})".format( last_refresh, refresh_thru, min_cc, max_cc) if sensors: where += " AND (platform IN ({}))".format(str(sensors)[1:-1]) if aoi_path: aoi_where = generate_rough_aoi_where(aoi_path=aoi_path, x_fld='x1', y_fld='y1', pad=20.0) where += " AND {}".format(aoi_where) logger.debug('where: {}'.format(where)) # Load regions shp regions_path = r"E:\disbr007\imagery_orders\all_regions.shp" logger.debug('Regions path: {}'.format(regions_path)) # regions = gpd.read_file(regions_path, driver='ESRI_Shapefile') regions = query_footprint('pgc_earthdem_regions') # Load not on hand footprint -> since last refresh logger.info('Performing initial selection...') supported_refresh_imagery = ['mono_stereo', 'mono', 'stereo'] logger.debug('Refresh imagery: {}'.format(refresh_imagery)) if refresh_imagery in supported_refresh_imagery: if refresh_imagery == 'mono_stereo': noh_recent = query_footprint('index_dg', where=where) if refresh_imagery == 'mono': noh_recent = mono_noh(where=where, noh=drop_onhand) if refresh_imagery == 'stereo': noh_recent = stereo_noh(where=where, noh=drop_onhand) else: logger.warning( """Refresh imagery type unrecognized, supported refresh imagery options include: {}""".format(supported_refresh_imagery)) logger.info('Initial IDs found: {:,}'.format(len(noh_recent))) # noh_recent = noh_recent.drop_duplicates(subset='catalogid') ### Spatial join to identify region logger.info('Identifying region of selected imagery...') # Save original columns noh_recent_cols = list(noh_recent) noh_recent_cols.append(loc_name_fld) # Calculate centroid noh_recent['centroid'] = noh_recent.centroid noh_recent.set_geometry('centroid', inplace=True) # Locate region of centroid noh_recent = gpd.sjoin(noh_recent, regions, how='left', op='within') noh_recent.drop('centroid', axis=1, inplace=True) noh_recent.set_geometry('geom', inplace=True) ### Identify only those in the region of interest # Get regions of interest based on type of refresh roi = refresh_region_lut(refresh_region) logger.debug('Regions included: {}'.format(roi)) # Select region of interest noh_recent_roi = noh_recent[noh_recent[loc_name_fld].isin(roi)] # # Return to original columns # noh_recent_roi = noh_recent_roi[noh_recent_cols] logger.info('IDs in region(s) of interest: {:,}'.format( len(noh_recent_roi))) # Select only those features that intersect land polygons if use_land: logger.info( 'Selecting only imagery within land inclusion shapefile...') land_shp = r'E:\disbr007\imagery_orders\coastline_include_fix_geom_dis.shp' land = gpd.read_file(land_shp) # Drop 'index' columns if they exists drop_cols = [x for x in list(noh_recent_roi) if 'index' in x] noh_recent_roi = noh_recent_roi.drop(columns=drop_cols) noh_recent_roi = gpd.sjoin(noh_recent_roi, land, how='left') noh_recent_roi = noh_recent_roi[noh_recent_cols] logger.info('IDs over land: {}'.format(len(noh_recent_roi))) if aoi_path: # Drop 'index' columns if they exists drop_cols = [x for x in list(noh_recent_roi) if 'index' in x] noh_recent_roi = noh_recent_roi.drop(columns=drop_cols) aoi = gpd.read_file(aoi_path) noh_recent_roi = select_in_aoi(noh_recent_roi, aoi) # noh_recent_roi = noh_recent_roi[noh_recent_cols] logger.info('IDs over AOI: {}'.format(len(noh_recent_roi))) return noh_recent_roi
def main(args): # Parse args out_path = args.out_path num_ids = args.number_ids update_ordered = args.update_ordered use_land = args.do_not_use_land remove_oh = args.do_not_remove_oh sensors = args.sensors within_sensor = args.within_sensor min_date = args.min_date max_date = args.max_date min_ovlp = args.min_ovlp max_suneldiff = args.max_suneldiff min_meansunel = args.min_meansunel min_datediff = args.min_datediff max_datediff = args.max_datediff aoi_path = args.aoi projects = args.projects region_names = args.region_names out_footprint = args.out_footprint # Check for existence of aoi and out_path directory if aoi_path: if not os.path.exists(aoi_path): logger.error('AOI path does not exist: {}'.aoi_path) sys.exit() aoi = gpd.read_file(aoi_path) if not os.path.exists(os.path.dirname(out_path)): logger.warning('Out directory does not exist, creating: {}'.format( os.path.dirname(out_path))) os.makedirs(os.path.dirname(out_path)) if out_footprint: if not os.path.exists(os.path.dirname(out_footprint)): logger.warning('Out directory does not exist, creating: {}'.format( os.path.dirname(out_footprint))) os.makedirs(os.path.dirname(out_footprint)) where = create_where(sensors=sensors, min_date=min_date, max_date=max_date, min_datediff=min_datediff, max_datediff=max_datediff, min_ovlp=min_ovlp, max_suneldiff=max_suneldiff, min_meansunel=min_meansunel, within_sensor=within_sensor, noh=remove_oh, projects=projects, region_names=region_names) logger.info('Getting size of table with query...') table_total = count_table(xtrack_tbl, where=where) logger.info('Total table size with query: {:,}'.format(table_total)) if remove_oh: # Get all onhand and ordered ids logger.info('Loading all onhand and ordered IDs...') oh_ids = set(onhand_ids(update=update_ordered)) logger.info('Onhand and ordered IDs loaded: {:,}'.format(len(oh_ids))) else: oh_ids = set() # Load land shapefile if necessary if use_land: land = gpd.read_file(land_shp) # %% Iterate # Iterate chunks of table, calculating area and adding id1, id2, area to dictionary all_ids = [] master = gpd.GeoDataFrame() limit = chunk_size offset = 0 while offset < table_total: # Load chunk logger.info('Loading chunk: {:,} - {:,}'.format( offset, offset + limit)) chunk = query_footprint( xtrack_tbl, columns=columns, # orderby=orderby, orderby_asc=False, where=where, limit=limit, offset=offset, dryrun=False) remaining_records = len(chunk) # Remove records where both IDs are onhand if remove_oh: logger.info('Dropping records where both IDs are on onhand...') chunk = chunk[~((chunk['catalogid1'].isin(oh_ids)) & (chunk['catalogid2'].isin(oh_ids)))] remaining_records = len(chunk) logger.info('Remaining records: {:,}'.format(remaining_records)) if remaining_records == 0: continue # Find only IDs in AOI if provided if aoi_path: logger.info('Finding IDs in AOI...') chunk = select_in_aoi(chunk, aoi=aoi) remaining_records = len(chunk) logger.debug( 'Remaining records in AOI: {:,}'.format(remaining_records)) if remaining_records == 0: continue if use_land: logger.info('Selecting IDs over land only...') chunk = select_in_aoi(chunk, aoi=land, centroid=True) remaining_records = len(chunk) logger.info('Remaining records over land: {:,}'.format(len(chunk))) if remaining_records == 0: continue # %% Calculate area for chunk logger.info('Calculating area...') chunk = area_calc(chunk, area_col=area_col) # Combine with master logger.info('Combining chunk with master...') master = pd.concat([master, chunk]) # Increase offset offset += limit # Select n records with highest area master = master.sort_values(by=area_col) master[cid1_oh_fld] = master[catid1_fld].isin(oh_ids) master[cid2_oh_fld] = master[catid2_fld].isin(oh_ids) if remove_oh: noh_str = ' not_on_hand' else: noh_str = '' logger.info('Finding {:,} out of {:,} IDs{}, starting with largest ' 'area...'.format(num_ids, len(master), noh_str)) out_ids = set() kept_rows = set() num_kept_ids = len(out_ids) for i, row in master.iterrows(): cid1 = row[catid1_fld] cid2 = row[catid2_fld] if not row[cid1_oh_fld] and cid1 not in out_ids: out_ids.add(cid1) kept_rows.add(row.name) if not row[cid2_oh_fld] and cid2 not in out_ids: out_ids.add(cid2) kept_rows.add(row.name) num_kept_ids = len(out_ids) if num_kept_ids >= num_ids: logger.info( '{:,} IDs not on hand located. {:,} sqkm minimum kept.'.format( num_kept_ids, row[area_col])) break if num_kept_ids < num_ids: logger.warning( 'Only {:,} IDs found. Minimum area kept: {:,.2f}'.format( num_kept_ids, row[area_col])) # Select kept pairs (rows) kept_pairs = master[master.index.isin(kept_rows)] if out_footprint: logger.info('Writing footprint of pairs to: {}'.format(out_footprint)) kept_pairs.to_file(out_footprint) #%% Write if not os.path.exists(os.path.dirname(out_path)): os.makedirs(os.path.dirname(out_path)) logger.info('Writing {:,} IDs to: {}'.format(len(out_ids), out_path)) write_ids(out_ids, out_path)
def swir_ordering(include_regions, out_ids): # Load earth_dem regions and pull region_ids out for selected regions edem = query_footprint(edem_lyr) regions = { 'arctic': list(edem[edem['project'] == 'ArcticDEM']['region_id']), 'antarctic': list(edem[edem['project'] == 'REMA']['region_id']), 'conus': ['earthdem_04_great_lakes', 'earthdem_03_conus'], 'global': list(edem['region_id']) } selected_region_ids = list() for sr in include_regions: selected_region_ids.extend(regions[sr]) selected_region_ids = set(selected_region_ids) # Get selected regions selected_regions = edem[edem[region_id].isin(selected_region_ids)] uu = gpd.GeoDataFrame(geometry=[selected_regions.unary_union], crs='epsg:4326') # Iterate through index_dg and pull out SWIR IDs, then select only those in regions of interest master_swir = gpd.GeoDataFrame() offset = 0 num_processed = 0 swir_archive_count = count_table(index_dg, where=swir_where) logger.debug('Size of table for {} with WHERE {}: {:,}'.format( index_dg, swir_where, swir_archive_count)) while num_processed <= swir_archive_count: logger.info('Loading with OFFSET {:,} LIMIT {:,}'.format( offset, limit)) logger.debug('WHERE {}'.format(swir_where)) swir = query_footprint(index_dg, where=swir_where, limit=limit, offset=offset) swir.geometry = swir.geometry.centroid logger.debug('Loaded SWIR: {:,}'.format(len(swir))) # Select in regions selected_swir_chunk = gpd.sjoin(swir, uu, op='within') offset += limit num_processed = offset logger.debug('SWIR IDs in selected regions in chunk: {:,}'.format( len(selected_swir_chunk))) if len(selected_swir_chunk) != 0: master_swir = pd.concat([master_swir, selected_swir_chunk]) logger.info('SWIR IDs in regions: {:,}'.format(len(master_swir))) # Remove on hand swir oh_swir = set( list( query_footprint(pgc_oh, where=pgc_swir_where, table=True)['catalog_id'])) selected_swir_noh = set(list(master_swir['catalogid'])) - oh_swir logger.info('SWIR IDs in regions and not on hand: {:,}'.format( len(selected_swir_noh))) # Write selected SWIR IDs with open(out_ids, 'w') as src: logger.debug('Writing IDs to: {}'.format(out_ids)) for swir_id in selected_swir_noh: src.write('{}\n'.format(swir_id)) return selected_swir_noh
parser.add_argument('--sort_by_date_descending', action='store_true', help='Sort by date descending.') args = parser.parse_args() begin_date = args.begin_date end_date = args.end_date platform = args.platform out_path = args.out_path sort_by_date = args.sort_by_date sort_by_date_descending = args.sort_by_date_descending # Do it logger.info('Loading records...') platform_noh = query_danco.query_footprint( 'dg_imagery_index_all_notonhand_cc20', where="platform = '{}'".format(platform)) platform_noh['acqdate'] = pd.to_datetime(platform_noh.acqdate) selection = select_by_date(platform_noh, date_begin=begin_date, date_end=end_date) if sort_by_date: ascending = True elif sort_by_date_descending: ascending = False if sort_by_date or sort_by_date_descending: logger.info('Sorting by date...') selection = selection.sort_values(by='acqdate', ascending=ascending) out_name = '{}_{}_to_{}'.format(platform, date_words(begin_date), date_words(end_date))
def dem_selector(AOI_PATH, COORDS=None, MONTHS=None, MIN_DATE=None, MAX_DATE=None, MULTISPEC=False, OUT_STEREO_FP=None, OUT_ID_LIST=None, CLOUDCOVER=None): """ Select stereopairs over an AOI, either from a passed DEM_FP, or from the danco database. Parameters ---------- AOI_PATH : os.path.abspath Path to AOI shapefile. COORDS : LIST xy coordinates in WGS84 to use for selection. MONTHS : LIST, optional List of month integers to include. The default is None. MIN_DATE : STR, optional Minimum DEM date to include. E.g '2015-01-30'. The default is None. MAX_DATE : STR, optional Maximum DEM date to include. The default is None. MULTISPEC : BOOL, optional True to only select stereo from multispectral sources. The default is False. CLOUDCOVER : INT Only include pairs with cloudcover below this threshold OUT_STEREO_FP : os.path.abspath, optional Path to write DEM footprints shapefile to. The default is None. OUT_ID_LIST : os.path.abspath, optional Path to write catalogids of selected stereopair catalogids to. The default is None. Returns ------- geopandas.GeoDataFrame : Dataframe of footprints matching selection. """ #### PARAMETERS #### STEREO_FP = 'dg_imagery_index_stereo' # stereo footprint tablename CATALOGID = 'catalogid' # field name in danco footprint for catalogids DATE_COL = 'acqdate' # name of date field in stereo footprint SENSOR_COL = 'platform' # name of sensor field in stereo footprint PAIRNAME_COL = 'pairname' # name of field with unique pairnames CLOUDCOVER_COL = 'cloudcover' # name of field with cloudcover STEREOPAIR_ID = 'stereopair' # name of field with stereopair catalogid MONTH_COL = 'month' # name of field to create in footprint if months are requested #### SETUP #### def check_where(where): """Checks if the input string exists already, if so formats correctly for adding to SQL""" if where: where += ' AND ' return where # Create logger logging.config.dictConfig(LOGGING_CONFIG('DEBUG')) logger = logging.getLogger(__name__) #### LOAD INPUTS #### # Load AOI logger.info('Reading AOI...') if AOI_PATH: aoi = gpd.read_file(AOI_PATH) elif COORDS: lon = float(COORDS[0]) lat = float(COORDS[1]) loc = Point(lon, lat) aoi = gpd.GeoDataFrame(geometry=[loc], crs="EPSG:4326") # Load stereopairs footprint # Get bounds of aoi to reduce query size, with padding minx, miny, maxx, maxy = aoi.total_bounds pad = 10 # Get DEM footprint crs - this loads no records, but it # will allow getting the crs of the footprints stereo = query_footprint(STEREO_FP, where="1=2") # Load stereo # Build SQL clause to select stereo in the area of the AOI, helps with load times stereo_where = """x1 > {} AND x1 < {} AND y1 > {} AND y1 < {}""".format(minx - pad, maxx + pad, miny - pad, maxy + pad) # Add date constraints to SQL if MIN_DATE: stereo_where = check_where(stereo_where) stereo_where += """{} > '{}'""".format(DATE_COL, MIN_DATE) if MAX_DATE: stereo_where = check_where(stereo_where) stereo_where += """{} < '{}'""".format(DATE_COL, MAX_DATE) # Add to SQL clause to just select multispectral sensors if MULTISPEC: stereo_where = check_where(stereo_where) stereo_where += """{} IN ('WV02', 'WV03')""".format(SENSOR_COL) if CLOUDCOVER: stereo_where = check_where(stereo_where) stereo_where += """{} <= {}""".format(CLOUDCOVER_COL, CLOUDCOVER) # Load DEM footprints with SQL stereo = query_footprint(STEREO_FP, where=stereo_where) # If only certain months requested, reduce to those if MONTHS: stereo['temp_date'] = pd.to_datetime(stereo[DATE_COL]) stereo[MONTH_COL] = stereo['temp_date'].dt.month stereo.drop(columns=['temp_date'], inplace=True) stereo = stereo[stereo[MONTH_COL].isin(MONTHS)] logger.info( 'Stereopairs matching criteria (before AOI selection): {}'.format( len(stereo))) # Check coordinate system match and if not reproject AOI if aoi.crs != stereo.crs: aoi = aoi.to_crs(stereo.crs) #### SELECT stereo OVER ALL AOIS #### logger.info('Selecting stereopairs over AOI...') # Select by location # stereo = gpd.overlay(stereo, aoi, how='intersection') stereo = gpd.sjoin(stereo, aoi, how='inner') # Remove duplicates resulting from intersection (not sure why DUPs) stereo = stereo.drop_duplicates(subset=(PAIRNAME_COL)) logger.info('Stereopairs found over AOI: {}'.format(len(stereo))) if len(stereo) == 0: logger.error('No stereopairss found over AOI, exiting...') sys.exit() #### WRITE FOOTPRINT AND TXT OF MATCHES #### # Write footprint out if OUT_STEREO_FP: logger.info( 'Writing stereopair footprint to file: {}'.format(OUT_STEREO_FP)) stereo.to_file(OUT_STEREO_FP) # Write list of IDs ou if OUT_ID_LIST: logger.info( 'Writing list of catalogids to file: {}'.format(OUT_ID_LIST)) write_stereopair_ids(list(stereo[CATALOGID]), list(stereo[STEREOPAIR_ID]), header='catalogid, stereopair', out_path=OUT_ID_LIST) #### Summary Statistics #### count = len(stereo) min_date = stereo[DATE_COL].min() max_date = stereo[DATE_COL].max() logger.info("SUMMARY of STEREOPAIR SELECTION:") logger.info("Number of STEREOPAIRS: {}".format(count)) logger.info("Earliest date: {}".format(min_date)) logger.info("Latest date: {}".format(max_date)) return stereo
# Query footprint where = 'x1 > {} AND x1 < {} AND y1 > {} and y1 < {}'.format(minx, maxx, miny, maxy) logger.debug('Where clause for query: {}'.format(where)) count = count_table(danco_lyr, where=where, table=True, noh=noh) logger.debug('Count for table with where clause: {:,}'.format(count)) #%% usfs_fps = gpd.GeoDataFrame() offset = 0 while offset < count: logger.debug('Loading records: {:,} - {:,}'.format(offset, offset+limit)) # Load footprints fps = query_footprint(danco_lyr, where=where, limit=limit, offset=offset, noh=noh) # Intersect to find USFS footprints logger.debug('Identifying records on USFS land...') slice_usfs_fps = gpd.sjoin(fps, usfs, op='within') logger.debug('USFS records found: {}'.format(len(slice_usfs_fps))) # Merge to master dataframe usfs_fps = pd.concat([usfs_fps, slice_usfs_fps]) logger.debug('Total USFS records found: {}'.format(len(usfs_fps))) # Increase offset offset += limit usfs_fps_catids = set(usfs_fps['catalogid']) #%% # Remove onhand IDs # oh = onhand_ids() # mfp_ids = pgc_ids()