Beispiel #1
0
def classify_rts(sub_objects_path,
                 super_objects_path,
                 headwall_candidates_out=None,
                 headwall_candidates_centroid_out=None,
                 rts_predis_out=None,
                 rts_candidates_out=None,
                 aoi_path=None,
                 headwall_candidates_in=None,
                 aoi=None):
    logger.info('Classifying RTS...')

    #%% RULESET
    # Headwall Rules
    logger.info('Setting up headwall candidate rules...')
    # Ruggedness
    r_ruggedness = create_rule(rule_type=threshold_rule,
                               in_field=rug_mean,
                               op=operator.gt,
                               threshold=0.2,
                               out_field=True)
    # Surface Area Ratio
    r_saratio = create_rule(rule_type=threshold_rule,
                            in_field=sa_rat_mean,
                            op=operator.gt,
                            threshold=1.01,
                            out_field=True)
    # Slope (min)
    r_slope_min = create_rule(rule_type=threshold_rule,
                              in_field=slope_mean,
                              op=operator.gt,
                              threshold=8,
                              out_field=True)
    # Slope (max)
    r_slope_max = create_rule(rule_type=threshold_rule,
                              in_field=slope_mean,
                              op=operator.lt,
                              threshold=25,
                              out_field=True)
    # NDVI
    r_ndvi = create_rule(rule_type=threshold_rule,
                         in_field=ndvi_mean,
                         op=operator.lt,
                         threshold=0,
                         out_field=True)
    # MED
    r_med = create_rule(rule_type=threshold_rule,
                        in_field=med_mean,
                        op=operator.lt,
                        threshold=0,
                        out_field=True)
    # Curvature (high)
    r_curve = create_rule(rule_type=threshold_rule,
                          in_field=cur_mean,
                          op=operator.gt,
                          threshold=2.5,
                          out_field=True)
    # Difference in DEMs
    r_delev = create_rule(rule_type=threshold_rule,
                          in_field=delev_mean,
                          op=operator.lt,
                          threshold=-0.5,
                          out_field=True)

    # All simple threshold rules
    r_simple_thresholds = [
        r_ruggedness, r_saratio, r_slope_min, r_slope_max, r_ndvi, r_med,
        r_curve, r_delev
    ]

    # Adjacency rules
    # Adjacent Curvature
    r_adj_high_curv = create_rule(rule_type=adj_or_is_rule,
                                  in_field=cur_mean,
                                  op=operator.gt,
                                  threshold=30,
                                  out_field=True)
    r_adj_low_curv = create_rule(
        rule_type=adj_or_is_rule,
        in_field=cur_mean,
        op=operator.lt,
        threshold=-15,  # -30
        out_field=True)
    # Adjacent MED
    r_adj_low_med = create_rule(rule_type=adj_or_is_rule,
                                in_field=med_mean,
                                op=operator.lt,
                                threshold=-0.2,
                                out_field=True)
    # Adjacent or is high edge
    # r_adh_high_edge = create_rule(rule_type=adj_or_is_rule,
    #                               in_field=edge_mean,
    #                               op=operator.gt,
    #                               threshold=0.18,
    #                               out_field=True)
    # All adjacent rules
    r_adj_rules = [r_adj_low_curv, r_adj_high_curv, r_adj_low_med]

    #%% RTS Rules
    logger.info('Setting up RTS candidate rules...')
    r_rts_ndvi = create_rule(rule_type=threshold_rule,
                             in_field=ndvi_mean,
                             op=operator.lt,
                             threshold=0,
                             out_field=True)

    r_rts_med = create_rule(rule_type=threshold_rule,
                            in_field=med_mean,
                            op=operator.lt,
                            threshold=0.1,
                            out_field=True)

    r_rts_slope_low = create_rule(rule_type=threshold_rule,
                                  in_field=slope_mean,
                                  op=operator.gt,
                                  threshold=3,
                                  out_field=True)

    r_rts_slope_high = create_rule(rule_type=threshold_rule,
                                   in_field=slope_mean,
                                   op=operator.lt,
                                   threshold=20,
                                   out_field=True)

    r_rts_delev = create_rule(rule_type=threshold_rule,
                              in_field=delev_mean,
                              op=operator.lt,
                              threshold=-0.5,
                              out_field=True)

    r_rts_conhw = create_rule(rule_type=threshold_rule,
                              in_field=contains_hw,
                              op=operator.eq,
                              threshold=True,
                              out_field=True)

    r_rts_simple_thresholds = [
        r_rts_ndvi, r_rts_med, r_rts_slope_low, r_rts_slope_high, r_rts_delev,
        r_rts_conhw
    ]

    #%% HEADWALL CANDIDATES
    logger.info('Classifying headwall candidate objects...')
    #%% Load candidate headwall objects
    if not headwall_candidates_in:
        logger.info('Loading headwall candidate objects...')
        if aoi_path:
            # aoi = gpd.read_file(aoi_path)
            aoi = read_vec(aoi_path)
            logger.info('Subsetting objects to AOI...')
            gdf = select_in_aoi(read_vec(sub_objects_path), aoi, centroid=True)
            hwc = ImageObjects(objects_path=gdf, value_fields=value_fields)
        else:
            hwc = ImageObjects(objects_path=sub_objects_path,
                               value_fields=value_fields)

        #%% Classify headwalls
        logger.info('Determining headwall candidates...')
        hwc.classify_objects(hw_candidate,
                             threshold_rules=r_simple_thresholds,
                             adj_rules=r_adj_rules)
        logger.info('Headwall candidates found: {:,}'.format(
            len(hwc.objects[hwc.objects[hwc.class_fld] == hw_candidate])))

        #%% Write headwall candidates
        logger.info('Writing headwall candidates...')
        hwc.write_objects(headwall_candidates_out,
                          to_str_cols=to_str_cols,
                          overwrite=True)
        # if headwall_candidates_centroid_out:
        #     hwc_centroid = ImageObjects(
        #         copy.deepcopy(
        #             hwc.objects.set_geometry(hwc.objects.geometry.centroid)))
        #     hwc_centroid.write_objects(headwall_candidates_centroid_out,
        #                                overwrite=True)
    else:
        hwc = ImageObjects(objects_path=headwall_candidates_in,
                           value_fields=value_fields)

    #%% RETROGRESSIVE THAW SLUMPS
    #%% Load super objects
    logger.info('Loading RTS candidate objects...')
    so = ImageObjects(super_objects_path, value_fields=value_fields)
    logger.info('Determining RTS candidates...')

    #%% Find objects that contain headwalls of a higher elevation than
    # themselves
    so.objects[contains_hw_gtr] = so.objects.apply(
        lambda x: overlay_any_objects(x.geometry,
                                      hwc.objects[hwc.objects[hwc.class_fld] ==
                                                  hw_candidate],
                                      predicate='contains',
                                      threshold=x[elev_mean],
                                      other_value_field=elev_mean,
                                      op=operator.gt),
        axis=1)
    so.objects[contains_hw] = so.objects.apply(lambda x: overlay_any_objects(
        x.geometry,
        hwc.objects[hwc.objects[hwc.class_fld] == hw_candidate],
        predicate='contains',
    ),
                                               axis=1)
    so.objects[contains_hw_cent] = so.objects.apply(
        lambda x: overlay_any_objects(x.geometry,
                                      hwc.objects[hwc.objects[hwc.class_fld] ==
                                                  hw_candidate],
                                      predicate='contains',
                                      others_centroid=True),
        axis=1)
    so.objects[contains_hw_gtr] = so.objects.apply(
        lambda x: overlay_any_objects(x.geometry,
                                      hwc.objects[hwc.objects[hwc.class_fld] ==
                                                  hw_candidate],
                                      predicate='contains',
                                      threshold=x[elev_mean],
                                      other_value_field=elev_mean,
                                      op=operator.gt,
                                      others_centroid=True),
        axis=1)

    #%% Classify
    so.classify_objects(class_name=rts_candidate,
                        threshold_rules=r_rts_simple_thresholds)
    # # Add bool field for RTS candidate or not
    # so.objects[rts_cand_bool] = np.where(so.objects[so.class_fld] == rts_candidate,
    #                                   1,
    #                                   0)

    logger.info('RTS candidates found: {}'.format(
        len(so.objects[so.objects[so.class_fld] == rts_candidate])))

    if rts_predis_out:
        # Write classified objects before growing
        so.write_objects(rts_predis_out,
                         to_str_cols=to_str_cols,
                         overwrite=True)

    #%% Dissolve touching candidates
    rts_dissolved = dissolve_touching(
        so.objects[so.objects[so.class_fld] == rts_candidate])
    so.objects = pd.concat(
        [so.objects[so.objects[so.class_fld] != rts_candidate], rts_dissolved])

    #%% Write RTS candidates
    logger.info('Writing RTS candidates...')
    so.write_objects(rts_candidates_out,
                     to_str_cols=to_str_cols,
                     overwrite=True)

    return rts_candidates_out
Beispiel #2
0
                             out_field=True)
# Adjacent MED
r_adj_low_med = create_rule(rule_type='adjacent',
                            in_field=med_mean,
                            op=operator.lt,
                            threshold=-0.2,
                            out_field=True)
# All adjacent rules
r_adj_rules = [r_adj_low_curv, r_adj_high_curv, r_adj_low_med]

#%% Load candidate headwall objects
logger.info('Loading headwall candidate objects...')
if aoi_p:
    aoi = gpd.read_file(aoi_p)
    logger.info('Subsetting objects to AOI...')
    gdf = select_in_aoi(gpd.read_file(hw_obj_p), aoi, centroid=True)
    hwc = ImageObjects(objects_path=gdf, value_fields=value_fields)
else:
    hwc = ImageObjects(objects_path=hw_obj_p, value_fields=value_fields)

#%% Subset by simple thresholds first
logger.info('Determining headwall candidates...')
hwc.apply_rules(r_simple_thresholds, out_field=simple_thresholds)

#%% Get neighbors for those objects that meet thresholds
hwc.get_neighbors(subset=hwc.objects[hwc.objects[simple_thresholds]])

#%%
hwc.compute_area()
# hwc.calc_object_stats()
hwc.compute_neighbor_values(cur_mean)
Beispiel #3
0
def refresh(last_refresh,
            refresh_region,
            refresh_imagery,
            max_cc,
            min_cc,
            sensors,
            aoi_path=None,
            use_land=True,
            refresh_thru=None,
            drop_onhand=True):
    '''
    Select ids for imagery order
    cloudcover: cloudcover <= arg
    '''
    if not refresh_thru:
        # Use today's date
        refresh_thru = datetime.datetime.now().strftime('%Y-%m-%d')

    where = "(acqdate >= '{}' AND acqdate <= '{}') AND (cloudcover >= {} AND cloudcover <= {})".format(
        last_refresh, refresh_thru, min_cc, max_cc)
    if sensors:
        where += " AND (platform IN ({}))".format(str(sensors)[1:-1])

    if aoi_path:
        aoi_where = generate_rough_aoi_where(aoi_path=aoi_path,
                                             x_fld='x1',
                                             y_fld='y1',
                                             pad=20.0)
        where += " AND {}".format(aoi_where)

    logger.debug('where: {}'.format(where))

    # Load regions shp
    regions_path = r"E:\disbr007\imagery_orders\all_regions.shp"
    logger.debug('Regions path: {}'.format(regions_path))
    # regions = gpd.read_file(regions_path, driver='ESRI_Shapefile')
    regions = query_footprint('pgc_earthdem_regions')

    # Load not on hand footprint -> since last refresh
    logger.info('Performing initial selection...')
    supported_refresh_imagery = ['mono_stereo', 'mono', 'stereo']
    logger.debug('Refresh imagery: {}'.format(refresh_imagery))
    if refresh_imagery in supported_refresh_imagery:
        if refresh_imagery == 'mono_stereo':
            noh_recent = query_footprint('index_dg', where=where)
        if refresh_imagery == 'mono':
            noh_recent = mono_noh(where=where, noh=drop_onhand)
        if refresh_imagery == 'stereo':
            noh_recent = stereo_noh(where=where, noh=drop_onhand)
    else:
        logger.warning(
            """Refresh imagery type unrecognized, supported refresh imagery 
              options include: {}""".format(supported_refresh_imagery))

    logger.info('Initial IDs found: {:,}'.format(len(noh_recent)))
    # noh_recent = noh_recent.drop_duplicates(subset='catalogid')

    ### Spatial join to identify region
    logger.info('Identifying region of selected imagery...')
    # Save original columns
    noh_recent_cols = list(noh_recent)
    noh_recent_cols.append(loc_name_fld)
    # Calculate centroid
    noh_recent['centroid'] = noh_recent.centroid
    noh_recent.set_geometry('centroid', inplace=True)
    # Locate region of centroid
    noh_recent = gpd.sjoin(noh_recent, regions, how='left', op='within')
    noh_recent.drop('centroid', axis=1, inplace=True)
    noh_recent.set_geometry('geom', inplace=True)

    ### Identify only those in the region of interest
    # Get regions of interest based on type of refresh
    roi = refresh_region_lut(refresh_region)
    logger.debug('Regions included: {}'.format(roi))
    # Select region of interest
    noh_recent_roi = noh_recent[noh_recent[loc_name_fld].isin(roi)]
    # # Return to original columns
    # noh_recent_roi = noh_recent_roi[noh_recent_cols]

    logger.info('IDs in region(s) of interest: {:,}'.format(
        len(noh_recent_roi)))

    # Select only those features that intersect land polygons
    if use_land:
        logger.info(
            'Selecting only imagery within land inclusion shapefile...')
        land_shp = r'E:\disbr007\imagery_orders\coastline_include_fix_geom_dis.shp'
        land = gpd.read_file(land_shp)
        # Drop 'index' columns if they exists
        drop_cols = [x for x in list(noh_recent_roi) if 'index' in x]
        noh_recent_roi = noh_recent_roi.drop(columns=drop_cols)
        noh_recent_roi = gpd.sjoin(noh_recent_roi, land, how='left')
        noh_recent_roi = noh_recent_roi[noh_recent_cols]

        logger.info('IDs over land: {}'.format(len(noh_recent_roi)))

    if aoi_path:
        # Drop 'index' columns if they exists
        drop_cols = [x for x in list(noh_recent_roi) if 'index' in x]
        noh_recent_roi = noh_recent_roi.drop(columns=drop_cols)

        aoi = gpd.read_file(aoi_path)
        noh_recent_roi = select_in_aoi(noh_recent_roi, aoi)
        # noh_recent_roi = noh_recent_roi[noh_recent_cols]
        logger.info('IDs over AOI: {}'.format(len(noh_recent_roi)))

    return noh_recent_roi
def main(args):
    # Parse args
    out_path = args.out_path
    num_ids = args.number_ids
    update_ordered = args.update_ordered
    use_land = args.do_not_use_land
    remove_oh = args.do_not_remove_oh
    sensors = args.sensors
    within_sensor = args.within_sensor
    min_date = args.min_date
    max_date = args.max_date
    min_ovlp = args.min_ovlp
    max_suneldiff = args.max_suneldiff
    min_meansunel = args.min_meansunel
    min_datediff = args.min_datediff
    max_datediff = args.max_datediff
    aoi_path = args.aoi
    projects = args.projects
    region_names = args.region_names
    out_footprint = args.out_footprint

    # Check for existence of aoi and out_path directory
    if aoi_path:
        if not os.path.exists(aoi_path):
            logger.error('AOI path does not exist: {}'.aoi_path)
            sys.exit()
        aoi = gpd.read_file(aoi_path)
    if not os.path.exists(os.path.dirname(out_path)):
        logger.warning('Out directory does not exist, creating: {}'.format(
            os.path.dirname(out_path)))
        os.makedirs(os.path.dirname(out_path))
    if out_footprint:
        if not os.path.exists(os.path.dirname(out_footprint)):
            logger.warning('Out directory does not exist, creating: {}'.format(
                os.path.dirname(out_footprint)))
            os.makedirs(os.path.dirname(out_footprint))

    where = create_where(sensors=sensors,
                         min_date=min_date,
                         max_date=max_date,
                         min_datediff=min_datediff,
                         max_datediff=max_datediff,
                         min_ovlp=min_ovlp,
                         max_suneldiff=max_suneldiff,
                         min_meansunel=min_meansunel,
                         within_sensor=within_sensor,
                         noh=remove_oh,
                         projects=projects,
                         region_names=region_names)

    logger.info('Getting size of table with query...')
    table_total = count_table(xtrack_tbl, where=where)
    logger.info('Total table size with query: {:,}'.format(table_total))

    if remove_oh:
        # Get all onhand and ordered ids
        logger.info('Loading all onhand and ordered IDs...')
        oh_ids = set(onhand_ids(update=update_ordered))
        logger.info('Onhand and ordered IDs loaded: {:,}'.format(len(oh_ids)))
    else:
        oh_ids = set()

    # Load land shapefile if necessary
    if use_land:
        land = gpd.read_file(land_shp)

    # %% Iterate
    # Iterate chunks of table, calculating area and adding id1, id2, area to dictionary
    all_ids = []
    master = gpd.GeoDataFrame()
    limit = chunk_size
    offset = 0
    while offset < table_total:
        # Load chunk
        logger.info('Loading chunk: {:,} - {:,}'.format(
            offset, offset + limit))
        chunk = query_footprint(
            xtrack_tbl,
            columns=columns,
            # orderby=orderby, orderby_asc=False,
            where=where,
            limit=limit,
            offset=offset,
            dryrun=False)

        remaining_records = len(chunk)

        # Remove records where both IDs are onhand
        if remove_oh:
            logger.info('Dropping records where both IDs are on onhand...')
            chunk = chunk[~((chunk['catalogid1'].isin(oh_ids)) &
                            (chunk['catalogid2'].isin(oh_ids)))]
            remaining_records = len(chunk)
            logger.info('Remaining records: {:,}'.format(remaining_records))
            if remaining_records == 0:
                continue

        # Find only IDs in AOI if provided
        if aoi_path:
            logger.info('Finding IDs in AOI...')
            chunk = select_in_aoi(chunk, aoi=aoi)
            remaining_records = len(chunk)
            logger.debug(
                'Remaining records in AOI: {:,}'.format(remaining_records))
            if remaining_records == 0:
                continue

        if use_land:
            logger.info('Selecting IDs over land only...')
            chunk = select_in_aoi(chunk, aoi=land, centroid=True)

            remaining_records = len(chunk)
            logger.info('Remaining records over land: {:,}'.format(len(chunk)))
            if remaining_records == 0:
                continue
        # %% Calculate area for chunk
        logger.info('Calculating area...')
        chunk = area_calc(chunk, area_col=area_col)

        # Combine with master
        logger.info('Combining chunk with master...')
        master = pd.concat([master, chunk])

        # Increase offset
        offset += limit

    # Select n records with highest area
    master = master.sort_values(by=area_col)
    master[cid1_oh_fld] = master[catid1_fld].isin(oh_ids)
    master[cid2_oh_fld] = master[catid2_fld].isin(oh_ids)

    if remove_oh:
        noh_str = ' not_on_hand'
    else:
        noh_str = ''
    logger.info('Finding {:,} out of {:,} IDs{}, starting with largest '
                'area...'.format(num_ids, len(master), noh_str))

    out_ids = set()
    kept_rows = set()
    num_kept_ids = len(out_ids)
    for i, row in master.iterrows():
        cid1 = row[catid1_fld]
        cid2 = row[catid2_fld]
        if not row[cid1_oh_fld] and cid1 not in out_ids:
            out_ids.add(cid1)
            kept_rows.add(row.name)
        if not row[cid2_oh_fld] and cid2 not in out_ids:
            out_ids.add(cid2)
            kept_rows.add(row.name)
        num_kept_ids = len(out_ids)
        if num_kept_ids >= num_ids:
            logger.info(
                '{:,} IDs not on hand located. {:,} sqkm minimum kept.'.format(
                    num_kept_ids, row[area_col]))
            break

    if num_kept_ids < num_ids:
        logger.warning(
            'Only {:,} IDs found. Minimum area kept: {:,.2f}'.format(
                num_kept_ids, row[area_col]))

    # Select kept pairs (rows)
    kept_pairs = master[master.index.isin(kept_rows)]
    if out_footprint:
        logger.info('Writing footprint of pairs to: {}'.format(out_footprint))
        kept_pairs.to_file(out_footprint)

    #%% Write
    if not os.path.exists(os.path.dirname(out_path)):
        os.makedirs(os.path.dirname(out_path))
    logger.info('Writing {:,} IDs to: {}'.format(len(out_ids), out_path))
    write_ids(out_ids, out_path)