def classify_rts(sub_objects_path, super_objects_path, headwall_candidates_out=None, headwall_candidates_centroid_out=None, rts_predis_out=None, rts_candidates_out=None, aoi_path=None, headwall_candidates_in=None, aoi=None): logger.info('Classifying RTS...') #%% RULESET # Headwall Rules logger.info('Setting up headwall candidate rules...') # Ruggedness r_ruggedness = create_rule(rule_type=threshold_rule, in_field=rug_mean, op=operator.gt, threshold=0.2, out_field=True) # Surface Area Ratio r_saratio = create_rule(rule_type=threshold_rule, in_field=sa_rat_mean, op=operator.gt, threshold=1.01, out_field=True) # Slope (min) r_slope_min = create_rule(rule_type=threshold_rule, in_field=slope_mean, op=operator.gt, threshold=8, out_field=True) # Slope (max) r_slope_max = create_rule(rule_type=threshold_rule, in_field=slope_mean, op=operator.lt, threshold=25, out_field=True) # NDVI r_ndvi = create_rule(rule_type=threshold_rule, in_field=ndvi_mean, op=operator.lt, threshold=0, out_field=True) # MED r_med = create_rule(rule_type=threshold_rule, in_field=med_mean, op=operator.lt, threshold=0, out_field=True) # Curvature (high) r_curve = create_rule(rule_type=threshold_rule, in_field=cur_mean, op=operator.gt, threshold=2.5, out_field=True) # Difference in DEMs r_delev = create_rule(rule_type=threshold_rule, in_field=delev_mean, op=operator.lt, threshold=-0.5, out_field=True) # All simple threshold rules r_simple_thresholds = [ r_ruggedness, r_saratio, r_slope_min, r_slope_max, r_ndvi, r_med, r_curve, r_delev ] # Adjacency rules # Adjacent Curvature r_adj_high_curv = create_rule(rule_type=adj_or_is_rule, in_field=cur_mean, op=operator.gt, threshold=30, out_field=True) r_adj_low_curv = create_rule( rule_type=adj_or_is_rule, in_field=cur_mean, op=operator.lt, threshold=-15, # -30 out_field=True) # Adjacent MED r_adj_low_med = create_rule(rule_type=adj_or_is_rule, in_field=med_mean, op=operator.lt, threshold=-0.2, out_field=True) # Adjacent or is high edge # r_adh_high_edge = create_rule(rule_type=adj_or_is_rule, # in_field=edge_mean, # op=operator.gt, # threshold=0.18, # out_field=True) # All adjacent rules r_adj_rules = [r_adj_low_curv, r_adj_high_curv, r_adj_low_med] #%% RTS Rules logger.info('Setting up RTS candidate rules...') r_rts_ndvi = create_rule(rule_type=threshold_rule, in_field=ndvi_mean, op=operator.lt, threshold=0, out_field=True) r_rts_med = create_rule(rule_type=threshold_rule, in_field=med_mean, op=operator.lt, threshold=0.1, out_field=True) r_rts_slope_low = create_rule(rule_type=threshold_rule, in_field=slope_mean, op=operator.gt, threshold=3, out_field=True) r_rts_slope_high = create_rule(rule_type=threshold_rule, in_field=slope_mean, op=operator.lt, threshold=20, out_field=True) r_rts_delev = create_rule(rule_type=threshold_rule, in_field=delev_mean, op=operator.lt, threshold=-0.5, out_field=True) r_rts_conhw = create_rule(rule_type=threshold_rule, in_field=contains_hw, op=operator.eq, threshold=True, out_field=True) r_rts_simple_thresholds = [ r_rts_ndvi, r_rts_med, r_rts_slope_low, r_rts_slope_high, r_rts_delev, r_rts_conhw ] #%% HEADWALL CANDIDATES logger.info('Classifying headwall candidate objects...') #%% Load candidate headwall objects if not headwall_candidates_in: logger.info('Loading headwall candidate objects...') if aoi_path: # aoi = gpd.read_file(aoi_path) aoi = read_vec(aoi_path) logger.info('Subsetting objects to AOI...') gdf = select_in_aoi(read_vec(sub_objects_path), aoi, centroid=True) hwc = ImageObjects(objects_path=gdf, value_fields=value_fields) else: hwc = ImageObjects(objects_path=sub_objects_path, value_fields=value_fields) #%% Classify headwalls logger.info('Determining headwall candidates...') hwc.classify_objects(hw_candidate, threshold_rules=r_simple_thresholds, adj_rules=r_adj_rules) logger.info('Headwall candidates found: {:,}'.format( len(hwc.objects[hwc.objects[hwc.class_fld] == hw_candidate]))) #%% Write headwall candidates logger.info('Writing headwall candidates...') hwc.write_objects(headwall_candidates_out, to_str_cols=to_str_cols, overwrite=True) # if headwall_candidates_centroid_out: # hwc_centroid = ImageObjects( # copy.deepcopy( # hwc.objects.set_geometry(hwc.objects.geometry.centroid))) # hwc_centroid.write_objects(headwall_candidates_centroid_out, # overwrite=True) else: hwc = ImageObjects(objects_path=headwall_candidates_in, value_fields=value_fields) #%% RETROGRESSIVE THAW SLUMPS #%% Load super objects logger.info('Loading RTS candidate objects...') so = ImageObjects(super_objects_path, value_fields=value_fields) logger.info('Determining RTS candidates...') #%% Find objects that contain headwalls of a higher elevation than # themselves so.objects[contains_hw_gtr] = so.objects.apply( lambda x: overlay_any_objects(x.geometry, hwc.objects[hwc.objects[hwc.class_fld] == hw_candidate], predicate='contains', threshold=x[elev_mean], other_value_field=elev_mean, op=operator.gt), axis=1) so.objects[contains_hw] = so.objects.apply(lambda x: overlay_any_objects( x.geometry, hwc.objects[hwc.objects[hwc.class_fld] == hw_candidate], predicate='contains', ), axis=1) so.objects[contains_hw_cent] = so.objects.apply( lambda x: overlay_any_objects(x.geometry, hwc.objects[hwc.objects[hwc.class_fld] == hw_candidate], predicate='contains', others_centroid=True), axis=1) so.objects[contains_hw_gtr] = so.objects.apply( lambda x: overlay_any_objects(x.geometry, hwc.objects[hwc.objects[hwc.class_fld] == hw_candidate], predicate='contains', threshold=x[elev_mean], other_value_field=elev_mean, op=operator.gt, others_centroid=True), axis=1) #%% Classify so.classify_objects(class_name=rts_candidate, threshold_rules=r_rts_simple_thresholds) # # Add bool field for RTS candidate or not # so.objects[rts_cand_bool] = np.where(so.objects[so.class_fld] == rts_candidate, # 1, # 0) logger.info('RTS candidates found: {}'.format( len(so.objects[so.objects[so.class_fld] == rts_candidate]))) if rts_predis_out: # Write classified objects before growing so.write_objects(rts_predis_out, to_str_cols=to_str_cols, overwrite=True) #%% Dissolve touching candidates rts_dissolved = dissolve_touching( so.objects[so.objects[so.class_fld] == rts_candidate]) so.objects = pd.concat( [so.objects[so.objects[so.class_fld] != rts_candidate], rts_dissolved]) #%% Write RTS candidates logger.info('Writing RTS candidates...') so.write_objects(rts_candidates_out, to_str_cols=to_str_cols, overwrite=True) return rts_candidates_out
out_field=True) # Adjacent MED r_adj_low_med = create_rule(rule_type='adjacent', in_field=med_mean, op=operator.lt, threshold=-0.2, out_field=True) # All adjacent rules r_adj_rules = [r_adj_low_curv, r_adj_high_curv, r_adj_low_med] #%% Load candidate headwall objects logger.info('Loading headwall candidate objects...') if aoi_p: aoi = gpd.read_file(aoi_p) logger.info('Subsetting objects to AOI...') gdf = select_in_aoi(gpd.read_file(hw_obj_p), aoi, centroid=True) hwc = ImageObjects(objects_path=gdf, value_fields=value_fields) else: hwc = ImageObjects(objects_path=hw_obj_p, value_fields=value_fields) #%% Subset by simple thresholds first logger.info('Determining headwall candidates...') hwc.apply_rules(r_simple_thresholds, out_field=simple_thresholds) #%% Get neighbors for those objects that meet thresholds hwc.get_neighbors(subset=hwc.objects[hwc.objects[simple_thresholds]]) #%% hwc.compute_area() # hwc.calc_object_stats() hwc.compute_neighbor_values(cur_mean)
def refresh(last_refresh, refresh_region, refresh_imagery, max_cc, min_cc, sensors, aoi_path=None, use_land=True, refresh_thru=None, drop_onhand=True): ''' Select ids for imagery order cloudcover: cloudcover <= arg ''' if not refresh_thru: # Use today's date refresh_thru = datetime.datetime.now().strftime('%Y-%m-%d') where = "(acqdate >= '{}' AND acqdate <= '{}') AND (cloudcover >= {} AND cloudcover <= {})".format( last_refresh, refresh_thru, min_cc, max_cc) if sensors: where += " AND (platform IN ({}))".format(str(sensors)[1:-1]) if aoi_path: aoi_where = generate_rough_aoi_where(aoi_path=aoi_path, x_fld='x1', y_fld='y1', pad=20.0) where += " AND {}".format(aoi_where) logger.debug('where: {}'.format(where)) # Load regions shp regions_path = r"E:\disbr007\imagery_orders\all_regions.shp" logger.debug('Regions path: {}'.format(regions_path)) # regions = gpd.read_file(regions_path, driver='ESRI_Shapefile') regions = query_footprint('pgc_earthdem_regions') # Load not on hand footprint -> since last refresh logger.info('Performing initial selection...') supported_refresh_imagery = ['mono_stereo', 'mono', 'stereo'] logger.debug('Refresh imagery: {}'.format(refresh_imagery)) if refresh_imagery in supported_refresh_imagery: if refresh_imagery == 'mono_stereo': noh_recent = query_footprint('index_dg', where=where) if refresh_imagery == 'mono': noh_recent = mono_noh(where=where, noh=drop_onhand) if refresh_imagery == 'stereo': noh_recent = stereo_noh(where=where, noh=drop_onhand) else: logger.warning( """Refresh imagery type unrecognized, supported refresh imagery options include: {}""".format(supported_refresh_imagery)) logger.info('Initial IDs found: {:,}'.format(len(noh_recent))) # noh_recent = noh_recent.drop_duplicates(subset='catalogid') ### Spatial join to identify region logger.info('Identifying region of selected imagery...') # Save original columns noh_recent_cols = list(noh_recent) noh_recent_cols.append(loc_name_fld) # Calculate centroid noh_recent['centroid'] = noh_recent.centroid noh_recent.set_geometry('centroid', inplace=True) # Locate region of centroid noh_recent = gpd.sjoin(noh_recent, regions, how='left', op='within') noh_recent.drop('centroid', axis=1, inplace=True) noh_recent.set_geometry('geom', inplace=True) ### Identify only those in the region of interest # Get regions of interest based on type of refresh roi = refresh_region_lut(refresh_region) logger.debug('Regions included: {}'.format(roi)) # Select region of interest noh_recent_roi = noh_recent[noh_recent[loc_name_fld].isin(roi)] # # Return to original columns # noh_recent_roi = noh_recent_roi[noh_recent_cols] logger.info('IDs in region(s) of interest: {:,}'.format( len(noh_recent_roi))) # Select only those features that intersect land polygons if use_land: logger.info( 'Selecting only imagery within land inclusion shapefile...') land_shp = r'E:\disbr007\imagery_orders\coastline_include_fix_geom_dis.shp' land = gpd.read_file(land_shp) # Drop 'index' columns if they exists drop_cols = [x for x in list(noh_recent_roi) if 'index' in x] noh_recent_roi = noh_recent_roi.drop(columns=drop_cols) noh_recent_roi = gpd.sjoin(noh_recent_roi, land, how='left') noh_recent_roi = noh_recent_roi[noh_recent_cols] logger.info('IDs over land: {}'.format(len(noh_recent_roi))) if aoi_path: # Drop 'index' columns if they exists drop_cols = [x for x in list(noh_recent_roi) if 'index' in x] noh_recent_roi = noh_recent_roi.drop(columns=drop_cols) aoi = gpd.read_file(aoi_path) noh_recent_roi = select_in_aoi(noh_recent_roi, aoi) # noh_recent_roi = noh_recent_roi[noh_recent_cols] logger.info('IDs over AOI: {}'.format(len(noh_recent_roi))) return noh_recent_roi
def main(args): # Parse args out_path = args.out_path num_ids = args.number_ids update_ordered = args.update_ordered use_land = args.do_not_use_land remove_oh = args.do_not_remove_oh sensors = args.sensors within_sensor = args.within_sensor min_date = args.min_date max_date = args.max_date min_ovlp = args.min_ovlp max_suneldiff = args.max_suneldiff min_meansunel = args.min_meansunel min_datediff = args.min_datediff max_datediff = args.max_datediff aoi_path = args.aoi projects = args.projects region_names = args.region_names out_footprint = args.out_footprint # Check for existence of aoi and out_path directory if aoi_path: if not os.path.exists(aoi_path): logger.error('AOI path does not exist: {}'.aoi_path) sys.exit() aoi = gpd.read_file(aoi_path) if not os.path.exists(os.path.dirname(out_path)): logger.warning('Out directory does not exist, creating: {}'.format( os.path.dirname(out_path))) os.makedirs(os.path.dirname(out_path)) if out_footprint: if not os.path.exists(os.path.dirname(out_footprint)): logger.warning('Out directory does not exist, creating: {}'.format( os.path.dirname(out_footprint))) os.makedirs(os.path.dirname(out_footprint)) where = create_where(sensors=sensors, min_date=min_date, max_date=max_date, min_datediff=min_datediff, max_datediff=max_datediff, min_ovlp=min_ovlp, max_suneldiff=max_suneldiff, min_meansunel=min_meansunel, within_sensor=within_sensor, noh=remove_oh, projects=projects, region_names=region_names) logger.info('Getting size of table with query...') table_total = count_table(xtrack_tbl, where=where) logger.info('Total table size with query: {:,}'.format(table_total)) if remove_oh: # Get all onhand and ordered ids logger.info('Loading all onhand and ordered IDs...') oh_ids = set(onhand_ids(update=update_ordered)) logger.info('Onhand and ordered IDs loaded: {:,}'.format(len(oh_ids))) else: oh_ids = set() # Load land shapefile if necessary if use_land: land = gpd.read_file(land_shp) # %% Iterate # Iterate chunks of table, calculating area and adding id1, id2, area to dictionary all_ids = [] master = gpd.GeoDataFrame() limit = chunk_size offset = 0 while offset < table_total: # Load chunk logger.info('Loading chunk: {:,} - {:,}'.format( offset, offset + limit)) chunk = query_footprint( xtrack_tbl, columns=columns, # orderby=orderby, orderby_asc=False, where=where, limit=limit, offset=offset, dryrun=False) remaining_records = len(chunk) # Remove records where both IDs are onhand if remove_oh: logger.info('Dropping records where both IDs are on onhand...') chunk = chunk[~((chunk['catalogid1'].isin(oh_ids)) & (chunk['catalogid2'].isin(oh_ids)))] remaining_records = len(chunk) logger.info('Remaining records: {:,}'.format(remaining_records)) if remaining_records == 0: continue # Find only IDs in AOI if provided if aoi_path: logger.info('Finding IDs in AOI...') chunk = select_in_aoi(chunk, aoi=aoi) remaining_records = len(chunk) logger.debug( 'Remaining records in AOI: {:,}'.format(remaining_records)) if remaining_records == 0: continue if use_land: logger.info('Selecting IDs over land only...') chunk = select_in_aoi(chunk, aoi=land, centroid=True) remaining_records = len(chunk) logger.info('Remaining records over land: {:,}'.format(len(chunk))) if remaining_records == 0: continue # %% Calculate area for chunk logger.info('Calculating area...') chunk = area_calc(chunk, area_col=area_col) # Combine with master logger.info('Combining chunk with master...') master = pd.concat([master, chunk]) # Increase offset offset += limit # Select n records with highest area master = master.sort_values(by=area_col) master[cid1_oh_fld] = master[catid1_fld].isin(oh_ids) master[cid2_oh_fld] = master[catid2_fld].isin(oh_ids) if remove_oh: noh_str = ' not_on_hand' else: noh_str = '' logger.info('Finding {:,} out of {:,} IDs{}, starting with largest ' 'area...'.format(num_ids, len(master), noh_str)) out_ids = set() kept_rows = set() num_kept_ids = len(out_ids) for i, row in master.iterrows(): cid1 = row[catid1_fld] cid2 = row[catid2_fld] if not row[cid1_oh_fld] and cid1 not in out_ids: out_ids.add(cid1) kept_rows.add(row.name) if not row[cid2_oh_fld] and cid2 not in out_ids: out_ids.add(cid2) kept_rows.add(row.name) num_kept_ids = len(out_ids) if num_kept_ids >= num_ids: logger.info( '{:,} IDs not on hand located. {:,} sqkm minimum kept.'.format( num_kept_ids, row[area_col])) break if num_kept_ids < num_ids: logger.warning( 'Only {:,} IDs found. Minimum area kept: {:,.2f}'.format( num_kept_ids, row[area_col])) # Select kept pairs (rows) kept_pairs = master[master.index.isin(kept_rows)] if out_footprint: logger.info('Writing footprint of pairs to: {}'.format(out_footprint)) kept_pairs.to_file(out_footprint) #%% Write if not os.path.exists(os.path.dirname(out_path)): os.makedirs(os.path.dirname(out_path)) logger.info('Writing {:,} IDs to: {}'.format(len(out_ids), out_path)) write_ids(out_ids, out_path)