def remove_ids(src, oth, out_path=None, src_field=None, oth_field=None, write_text=None): logger.info('\nRemoving IDs in: {}\nFrom: {}'.format(oth, src)) # Determine input types src_type = type_parser(src) oth_type = type_parser(oth) logger.debug('Source file type: {}'.format(src_type)) logger.debug('Remove file type: {}'.format(oth_type)) # Read in IDs logger.info('Reading IDs in source: {}'.format(os.path.basename(src))) if src_type == 'shp': src = gpd.read_file(src) src_ids = read_ids(src, field=src_field) logger.info('Reading IDs to remove: {}'.format(os.path.basename(oth))) oth_ids = read_ids(oth, field=oth_field) logger.info('Source IDs: {:,}'.format(len(src_ids))) if len(src_ids) != len(list(set(src_ids))): logger.info('Unique source IDs: {:,}'.format(len(list(set(src_ids))))) logger.info('Other IDs: {:,}'.format(len(oth_ids))) if len(oth_ids) != len(list(set(oth_ids))): logger.info('Unique other IDs: {:,}'.format(len(list(set(oth_ids))))) # Remove IDs in other from source rem_ids = set(src_ids) - set(oth_ids) ids_removed = len(src_ids) - len(rem_ids) logger.info('IDs removed: {:,}'.format(ids_removed)) logger.info('Remaining IDs: {:,}'.format(len(rem_ids))) # Write source out without other IDs if ids_removed != 0: logger.info('Writing remaining IDs to: {}'.format(out_path)) if src_type == 'shp' or src_type == 'df': out = src[src[src_field].isin(rem_ids)] if out_path: out.to_file(out_path) if write_text: write_ids(rem_ids, write_text) elif src_type == 'id_only_txt': out = rem_ids write_ids(out, out_path) else: logger.error('Source type not supported: {}'.format(src_type)) else: logger.warning('No IDs removed from source.') out = None return out
def load_selection(input_path, JOIN_SEL, exclude_list=exclude_list): """ Loads the selection and returns a list of ids and count. """ logger.info('Loading selection...') ## Load selection footprint if input_path.endswith('shp'): selection = gpd.read_file(input_path) # selection_count = len(selection) if exclude_list: with open(exclude_list, 'r') as el: exclude_ids = el.readlines() exclude_ids = [ei.strip('\n') for ei in exclude_ids] logger.debug('Excluding IDs:\n{}'.format( '\n'.join(exclude_ids))) selection = selection[~selection[JOIN_SEL].isin(exclude_ids)] selection_unique_ids = list(selection[JOIN_SEL].unique()) elif input_path.endswith('txt'): selection_unique_ids = read_ids(input_path) selection_count = len(set(selection_unique_ids)) logger.info('Scenes in selection: {:,}'.format(selection_count)) selection_unique_ids_str = str(selection_unique_ids).replace( '[', '').replace(']', '') return selection_unique_ids_str, selection_count
def get_ids(source, id_of_int, field=None, source_write=None): """ Takes a 'source' that is either a directory or file and returns the IDs from that source. Parameters: ------- source : os.path.abspath Either a file or directory path id_of_int : STR Type of ID to parse source for, one of: CATALOG_ID or SCENE_ID usually field : STR If source is a shp, dbf, etc, the field name to pull IDs from Returns: ------- ids : LIST """ # Determine type of source (file or directory) if os.path.isdir(source): source_ids = imagery_directory_IDs(source, id_of_int) elif os.path.isfile(source): if field is None: field = id_of_int # Read in text file of IDs of interest (or shapfile, or CLI entry) source_ids = read_ids(source, field=field) source_ids = set(source_ids) else: logger.warning('Unknown source type for: {}'.format(source)) source_ids = None if source_write is not None: logger.info('Writing IDs to: {}'.format(source_write)) write_ids(source_ids, source_write) return source_ids
def create_selector(selector_path, selection_method): """ Create the selector object, based on selection_method. If selection method is by ID, creates a list of IDs. If selection method is by location, creates a geo- dataframe to use for selection. """ logger.debug('Creating selector from: {}'.format( os.path.basename(selector_path))) if selection_method == 'ID': selector = read_ids(selector_path) elif selection_method == 'LOC': selector = gpd.read_file(selector_path) elif not selection_method: selector = None return selector
def lookup_id_order(txt_file, all_orders=None, write_missing=False): ''' takes a txt_file of ids, returns an excel file with the order location of each id txt_file: txt file of ids, one per line all_orders: df containing ids and order sheets ''' global ordered_ids_path if isinstance(all_orders, pd.DataFrame): pass else: all_orders = pd.read_pickle(ordered_ids_path) txt_ids = read_ids(txt_file) ids_loc = all_orders.loc[all_orders['ids'].isin(txt_ids)] ids_loc.to_excel(os.path.join(os.path.dirname(txt_file), 'order_sources.xlsx'), index=False) if write_ids: ordered = set(ids_loc.ids) missing = [x for x in txt_ids if x not in ordered] write_ids(missing, os.path.join(os.path.dirname(txt_file), 'not_in_order.txt')) return ids_loc
def select_AHAP(PHOTO_IDS=None, AOI_P=None, resolution=None, repeat=False, write=None): # String literals LAYER = 'usgs_index_aerial_image_archive' DB = 'imagery' # Path to AHAP photo extents shapefile PHOTO_EXT_P = r'E:\disbr007\general\aerial\AHAP\AHAP_Photo_Extents\AHAP_Photo_Extents.shp' # Identifier in AHAP photos shp PHOTO_ID = 'PHOTO_ID' # Identified in AHAP photos table UNIQUE_ID = 'unique_id' SERIES = 'series' # Load danco AHAP imagery table logger.info('Reading AHAP danco table') where = "campaign = 'AHAP" if resolution: where += "series = '{}'".format(resolution) aia = query_footprint(LAYER, db=DB, table=True, where="campaign = 'AHAP'") # Load photo extents logger.info("Loading AHAP photo extent shapefile...") PHOTO_EXT = gpd.read_file(PHOTO_EXT_P) if AOI_P: logger.info('Reading AOI shapefile....') # Load AOI and match crs AOI = gpd.read_file(os.path.join(AOI_P)) AOI = AOI.to_crs(PHOTO_EXT.crs) logger.info('Selecting AHAP imagery by location...') # Select Photo Extents by intersection with AOI polygons selection = gpd.sjoin(PHOTO_EXT, AOI, how='inner', op='intersects') elif PHOTO_IDS: if os.path.isfile(PHOTO_IDS[0]): ids = read_ids(PHOTO_IDS[0]) elif isinstance(PHOTO_IDS, list): ids = PHOTO_IDS selection = PHOTO_EXT[PHOTO_EXT[PHOTO_ID].isin(ids)] # Remove duplicate Photo Extents if specified if repeat is False: selection = selection.drop_duplicates(subset=PHOTO_ID) # Join to table with filenames selection = pd.merge(selection, aia, how='left', left_on=PHOTO_ID, right_on=UNIQUE_ID) logger.info('Selected features found: {:,}'.format(len(selection))) # Write out shapefile if write is not None: logger.info('Writing AHAP selection to: {}'.format(write)) selection.to_file(write) return selection
'--match_field', type=str, default='catalog_id', help='Field to match on. Usually one of catalog_id or scene_id.') parser.add_argument( '--ids_field', type=str, default=None, help='Field in shp or dbf src of IDs containing the IDs.') parser.add_argument( '--dryrun', action='store_true', help='Read IDs and locate in src, but do not perform move.') args = parser.parse_args() ids_src = args.ids src_dir = args.src_dir dst_dir = args.dst_dir match_field = args.match_field ids_field = args.ids_field dryrun = args.dryrun ids = read_ids(ids_src, field=ids_field) match_and_move(ids, src_dir, dst_dir, match_field=match_field, dryrun=dryrun)
out_dir = args.out_dir out_suffix = args.out_suffix raster_ext = args.raster_ext move_meta = args.move_meta overwrite = args.overwrite dryrun = args.dryrun # Check if list of rasters given or directory if os.path.isdir(args.rasters[0]): logger.debug('Directory of rasters provided: {}'.format(args.rasters[0])) if not raster_ext: logger.warning('Directory provided, but no extension to identify rasters. Provide raster_ext.') r_ps = os.listdir(rasters[0]) # logger.info(r_ps) rasters = [os.path.join(rasters[0], r_p) for r_p in r_ps if r_p.endswith(raster_ext)] if len(rasters) == 0: logger.error('No rasters provided.') raise Exception elif rasters[0].endswith('.txt'): rasters = read_ids(rasters[0]) # If list passed as args, no need to parse paths logger.info('Input shapefile:\n{}'.format(shp_path)) logger.info('Input rasters:\n{}'.format('\n'.join(rasters))) logger.info('Output directory:\n{}'.format(out_dir)) if not dryrun: clip_rasters(shp_path, rasters, out_dir=out_dir, out_suffix=out_suffix, raster_ext=raster_ext, move_meta=move_meta, overwrite=overwrite)
def dem_selector(AOI_PATH=None, COORDS=None, SELECT_IDS_PATH=None, select_field=None, strips=True, DEM_FP=None, INTRACK=None, MONTHS=None, MIN_DATE=None, MAX_DATE=None, DATE_COL=None, MULTISPEC=False, RES=None, DENSITY_THRESH=None, LOCATE_DEMS=False, CALC_VALID=False, VALID_ON='dem', VALID_THRESH=None, OUT_DEM_FP=None, OUT_ID_LIST=None, BOTH_IDS=False, IMAGE1_IDS=False, OUT_FILEPATH_LIST=None): """ Select DEMs over an AOI, either from a passed DEM_FP, or from the danco database. Parameters ---------- AOI_PATH : os.path.abspath Path to AOI shapefile. COORDS : tuple Lon, Lat to use instead of aoi. SELECT_IDS_PATH : os.path.abspath Path to text file of DEM IDs to select. select_field : list Name of field(s) in DEM database to select IDs in SELECT_IDS_PATH from strips : bool True to select from strip DEM database, False to use scenes database DEM_FP : os.path.abspath, optional Path to a footprint of DEMs. The default is None. MONTHS : LIST, optional List of month integers to include. The default is None. MIN_DATE : STR, optional Minimum DEM date to include. E.g '2015-01-30'. The default is None. MAX_DATE : STR, optional Maximum DEM date to include. The default is None. MULTISPEC : BOOL, optional True to only select DEMs from multispectral sources. The default is False. DENSITY_THRESH : FLOAT, optional Minimum density value to keep. The default is None. DEM_FP_OUTPATH : os.path.abspath, optional Path to write DEM footprints shapefile to. The default is None. OUT_ID_LIST : os.path.abspath, optional Path to write catalogids of selected DEMs to. Only one per DEM. The default is None. Returns ------- geopandas.GeoDataFrame : Dataframe of footprints matching selection. """ #### PARAMETERS #### # TODO: Make this an arg: scenes or strips (if strips use Eriks gdb) DEM_SCENE_DB = 'sandwich-pool.dem' # Sandwich DEM database DEM_SCENE_LYR = 'dem.scene_dem_master' # Sandwich DEM footprint tablename # TODO: Move this to a config file with MFP location DEM_STRIP_GDB = r'E:\disbr007\dem\setsm\footprints\dem_strips_v4_20201120.gdb' DEM_STRIP_LYR = Path(DEM_STRIP_GDB).stem #'dem_strips_v4_20201120' DEM_STRIP_GDB_CRS = 'epsg:4326' # These are only used when verifying that DEMs exist - not necessary for sandwich or Eriks gdb) WINDOWS_OS = 'Windows' # value returned by platform.system() for windows LINUX_OS = 'Linux' # value return by platform.system() for linux WINDOWS_LOC = 'win_path' # field name of windows path in footprint LINUX_LOC = 'filepath' # linux path field DEM_FNAME = 'dem_name' # field name with filenames (with ext) # PAIRNAME = 'pairname' fields = { 'DEMS_GEOM': 'wkb_geometry', # Sandwich DEMs geometry name # Used only for writing catalogids to text file if requested 'SCENEDEMID': 'scenedemid', 'STRIPDEMID': 'stripdemid', 'CATALOGID1': 'catalogid1', # field name in danco DEM footprint for catalogids 'CATALOGID2': 'catalogid2', 'DEM_ID': 'dem_id', 'PAIRNAME': 'pairname', 'LOCATION': 'LOCATION', # field name in footprint with path to dem file 'PLATFORM_PATH': 'PLATFORM_PATH', # ceated field with platform specfic path to dem file 'VALID_ON': 'VALID_ON', # created field name to hold path to file to calc valid on (bitmask, 10m, etc) 'BITMASK': 'bitmask', # created field name in footprint to hold path to bitmask 'DATE_COL': 'acqdate1', # name of date field in dems footprint 'DENSITY_COL': 'density', # name of density field in dems footprint 'SENSOR_COL': 'sensor1', # name of sensor field in dems footprint 'RES_COL': 'dem_res', } if strips: fields = {k: v.upper() for k, v in fields.items()} # Created fields VALID_PERC = 'valid_perc' # created field name in footprint to store valid percent MONTH_COL = 'month' # name of field to create in dems footprint if months are requested #### SETUP #### def check_where(where): """Checks if the input string exists already, if so formats correctly for adding to SQL WHERE clause""" if where: where += ' AND ' return where #### LOAD INPUTS #### # Load AOI if AOI_PATH: logger.info('Reading AOI: {}'.format(AOI_PATH)) aoi = gpd.read_file(AOI_PATH) elif COORDS: logger.info('Reading coordinates...') lon = float(COORDS[0]) lat = float(COORDS[1]) loc = Point(lon, lat) aoi = gpd.GeoDataFrame(geometry=[loc], crs="EPSG:4326") else: aoi = None # Load DEM footprints from either local strips GDB or sandwich table if DEM_FP or strips: # Load DEM index or footprint if not DEM_FP: logger.info( 'Loading DEMs footprint from: {}'.format(DEM_STRIP_GDB)) if aoi.crs != DEM_STRIP_GDB_CRS: aoi_bbox = aoi.to_crs(DEM_STRIP_GDB_CRS) else: aoi_bbox = aoi dems = gpd.read_file(DEM_STRIP_GDB, layer=DEM_STRIP_LYR, driver='OpenFileGDB', bbox=aoi_bbox) logger.debug('DEMs footprint loaded: {:,}'.format(len(dems))) else: logger.info('Reading provided DEM footprint...') dems = gpd.read_file(DEM_FP) logger.debug('DEMs loaded: {;,}'.format(len(dems))) # Subset by parameters provided if SELECT_IDS_PATH: select_ids = read_ids(SELECT_IDS_PATH) try: dems = dems[dems[select_field].isin(select_ids)] except KeyError: logger.error("Field '{}' not found in DEM footprint. " "Available fields:\n" "{}".format(select_field, '\n'.join(list(dems)))) logger.debug('DEMs remaining after:' ' {} in {}'.format(select_field, select_ids)) if MIN_DATE: dems = dems[dems[fields['DATE_COL']] > MIN_DATE] logger.debug('DEMs remaining after min_date > {}: ' '{:,}'.format(MIN_DATE, len(dems))) if MAX_DATE: dems = dems[dems[fields['DATE_COL']] < MAX_DATE] logger.debug('DEMs remaining after max_date < {}: ' '{:,}'.format(MAX_DATE, len(dems))) if RES: dems = dems[dems[fields['RES_COL']] == RES] logger.debug('DEMs remaining after resolution = {}: ' '{:,}'.format(RES, len(dems))) if MULTISPEC: dems = dems[dems[fields['SENSOR_COL']].isin(['WV02', 'WV03'])] logger.debug('DEMs remaining after multispectral selection: ' '{:,}'.format(len(dems))) if DENSITY_THRESH: dems = dems[dems[fields['DENSITY_COL']] > DENSITY_THRESH] logger.debug('DEMs remaining after density > {}: ' '{:,}'.format(DENSITY_THRESH, len(dems))) if INTRACK: dems = dems[dems[IS_XTRACK] == 0] # int_pat = re.compile('(WV01|WV02|WV03)') # dems = dems[dems[fields['PAIRNAME']].str.contains(int_pat) == True] logger.debug('DEMs remaining after selecting only intrack: ' '{:,}'.format(len(dems))) logger.debug('Remaining DEMs after initial subsetting: ' '{:,}'.format(len(dems))) if len(dems) == 0: logger.warning('No DEMS found.') # sys.exit() if aoi is not None: # Check coordinate system match and if not reproject AOI if aoi.crs != dems.crs: aoi = aoi.to_crs(dems.crs) logger.info('Selecting DEMs over AOI...') # Select by location dem_cols = list(dems) dems = gpd.sjoin(dems, aoi) dems = dems[dem_cols] # Remove duplicates resulting from intersection (not sure why DUPs) dems = dems.drop_duplicates(subset=(fields['DEM_ID'])) else: # Load DEMs dems_where = "" if aoi is not None: # Get DEM footprint crs - this loads only one record, but it # will allow getting the crs of the footprints with Postgres(db_name=DEM_SCENE_DB) as dem_db: crs_sql = generate_sql(layer=DEM_SCENE_LYR, geom_col=fields['DEMS_GEOM'], encode_geom_col='geom', limit=1) # logger.debug(crs_sql) dems = dem_db.sql2gdf(sql=crs_sql) # Build SQL clause to select DEMs that intersect AOI # Reproject if necessary if aoi.crs != dems.crs: aoi = aoi.to_crs(dems.crs) # Create PostGIS intersection where clause dems_where = intersect_aoi_where(aoi=aoi, geom_col=fields['DEMS_GEOM']) # Add date constraints to SQL if SELECT_IDS_PATH: select_ids = read_ids(SELECT_IDS_PATH) ids_where = [ """{} IN ({})""".format(sf, str(select_ids)[1:-1]) for sf in select_field ] ids_where = "({})".format(" OR ".join(ids_where)) dems_where = check_where(dems_where) dems_where += ids_where if MIN_DATE: dems_where = check_where(dems_where) dems_where += """{} > '{}'""".format(fields['DATE_COL'], MIN_DATE) if MAX_DATE: dems_where = check_where(dems_where) dems_where += """{} < '{}'""".format(fields['DATE_COL'], MAX_DATE) # Add resolution constraints if RES: dems_where = check_where(dems_where) dems_where += """{} = {}""".format(fields['RES_COL'], RES) # Add to SQL clause to just select multispectral sensors if MULTISPEC: dems_where = check_where(dems_where) dems_where += """{} IN ('WV02', 'WV03')""".format( fields['SENSOR_COL']) # Add density threshold to SQL if DENSITY_THRESH: dems_where = check_where(dems_where) dems_where += """{} > {}""".format(fields['DENSITY_COL'], DENSITY_THRESH) if INTRACK: dems_where = check_where(dems_where) intrack_wheres = [ """({} LIKE {})""".format(fields['PAIRNAME'], sensor) for sensor in ['WV01', 'WV02', 'WV03'] ] dems_where += "({})".format(" OR ".join(intrack_wheres)) # Load DEM footprints with SQL logger.info('Loading DEMs from {}.{}'.format(DEM_SCENE_DB, DEM_SCENE_LYR)) with Postgres(db_name=DEM_SCENE_DB) as dem_db: dems_sql = generate_sql(layer=DEM_SCENE_LYR, where=dems_where) logger.debug('SQL: {}'.format(dems_sql)) dems = dem_db.sql2gdf(sql=dems_sql, geom_col=fields['DEMS_GEOM']) # If only certain months requested, reduce to those if MONTHS: logger.debug('Selecting by month...') dems['temp_date'] = pd.to_datetime(dems[fields['DATE_COL']]) dems[MONTH_COL] = dems['temp_date'].dt.month dems.drop(columns=['temp_date'], inplace=True) dems = dems[dems[MONTH_COL].isin(MONTHS)] logger.info('DEMS remaining after months: {:,}'.format(len(dems))) logger.info('DEMs found matching specifications: {:,}'.format(len(dems))) if len(dems) == 0: logger.error('No DEMs found matching specifications, exiting...') sys.exit() # Create full path to server location, used for checking validity # Determine operating system for locating DEMs if LOCATE_DEMS: OS = platform.system() if OS == WINDOWS_OS: dems[fields['PLATFORM_PATH']] = dems[fields['LOCATION']].apply( lambda x: nunatak2windows(x)) elif OS == LINUX_OS: dems[fields['PLATFORM_PATH']] = dems[fields['LOCATION']] # This was removed after DEMs moved to tape # # Subset to only those DEMs that actually can be found # logger.info('Checking for existence on file-system...') # dems = dems[dems[fields['PLATFORM_PATH']].apply(lambda x: os.path.exists(x))==True] #### GET VALID DATA PERCENT #### if CALC_VALID: # TODO: convert to valid aoi w/in bounds of footprint logger.info( 'Determining percent of non-NoData pixels over AOI for each DEM using *_{}...' .format(VALID_ON)) dems[fields['VALID_ON']] = dems[fields['PLATFORM_PATH']].\ apply(lambda x: get_aux_file(dem_path=x, aux_file=VALID_ON)) dems[VALID_PERC] = -9999.0 for row in tqdm(dems[[fields['VALID_ON'], VALID_PERC]].itertuples(index=True), total=len(dems)): vp = valid_percent(gdal_ds=row[1]) dems.at[row.Index, VALID_PERC] = vp if VALID_THRESH: dems = dems[dems[VALID_PERC] > VALID_THRESH] #### WRITE FOOTPRINT AND TXT OF MATCHES #### # Write list of IDs out if OUT_ID_LIST: logger.info('Writing list of DEM catalogids to file: ' '{}'.format(OUT_ID_LIST)) if IMAGE1_IDS: logger.info('Locating Image 1 IDs for each DEM...') dems['META_TXT'] = dems[fields['PLATFORM_PATH']]. \ apply(lambda x: get_aux_file(dem_path=x, aux_file='meta')) dems['Image1_cid'] = dems['META_TXT'].apply( lambda x: get_dem_image1_id(x)) dem_ids = list(set(dems['Image1_cid'])) else: dem_ids = list(dems[fields['CATALOGID1']]) if BOTH_IDS: dem_ids += list(dems[fields['CATALOGID2']]) write_ids(dem_ids, OUT_ID_LIST) # Write footprint out if OUT_DEM_FP: logger.info('Writing DEMs footprint to file: {}'.format(OUT_DEM_FP)) # dems.to_file(OUT_DEM_FP) write_gdf(dems, OUT_DEM_FP) # Write list of filepaths to DEMs if OUT_FILEPATH_LIST: logger.info('Writing selected DEM system filepaths to: ' '{}'.format(OUT_FILEPATH_LIST)) try: filepaths = list(dems[fields['PLATFORM_PATH']]) except KeyError as e: logger.error('PLATOFRM_PATH field not found - use --locate_dems ' 'flag to generate field.') logger.error(e) sys.exit() write_ids(filepaths, OUT_FILEPATH_LIST) #### Summary Statistics #### count = len(dems) min_date = dems[fields['DATE_COL']].min() max_date = dems[fields['DATE_COL']].max() if fields['DENSITY_COL'] in list(dems): min_density = dems[fields['DENSITY_COL']].min() max_density = dems[fields['DENSITY_COL']].max() avg_density = dems[fields['DENSITY_COL']].mean() if VALID_THRESH: min_valid = dems[VALID_PERC].min() max_valid = dems[VALID_PERC].max() avg_valid = dems[VALID_PERC].mean() logger.info("SUMMARY of DEM SELECTION:") logger.info("Number of DEMs: {:,}".format(count)) logger.info("Earliest date: {}".format(min_date)) logger.info("Latest date: {}".format(max_date)) if fields['DENSITY_COL'] in list(dems): logger.info("Minimum density: {:.2}".format(min_density)) logger.info("Maximum density: {:.2}".format(max_density)) logger.info("Average density: {:.2}".format(avg_density)) if VALID_THRESH: logger.info('Minimum valid percentage over AOI: {}'.format(min_valid)) logger.info('Maximum valid percentage over AOI: {}'.format(max_valid)) logger.info('Average valid percentage over AOI: {}'.format(avg_valid)) return dems