Esempio n. 1
0
    def load_table(FP, JOIN_FP, selection_unique_ids_str, SERIES, SERIES_BOTH,
                   SERIES_FIELD):
        logger.debug('Loading danco AHAP table...')
        ## Load aerial source table
        # Build where clause, including selecting only ids in selection
        # where = "(sde.{}.{} IN ({}))".format(FP, JOIN_FP, selection_unique_ids_str)
        where = "({}.{} IN ({}))".format(FP, JOIN_FP, selection_unique_ids_str)
        # Add series if only medium or high is desired, else add nothing and load both
        if SERIES != SERIES_BOTH:
            # where += " AND (sde.{}.{} = '{}')".format(FP, SERIES_FIELD, SERIES)
            where += " AND ({}.{} = '{}')".format(FP, SERIES_FIELD, SERIES)
        aia = query_footprint(FP, db=DB, table=True, where=where)
        aia_ct = len(aia)
        # Remove duplicates - there are identical records, but on different src_drives
        # Mainly seen on src_drives: USGS_s31 and USGS_s71
        # If this actually removes anything, a debug message will be logged.
        # TODO: Add option to keep all locations, only useful for copying from drives
        #       as there should be one of each file on the server
        aia = aia.drop_duplicates(subset=JOIN_FP)
        aia_dd = len(aia)
        if aia_dd != aia_ct:
            logger.debug(
                'Duplicates dropped, identical records on multiples drives.')

        logger.info('Records loaded in AHAP table: {:,}'.format(len(aia)))

        return aia
Esempio n. 2
0
def dem_exists(dataframe, catalogid_field, out_field='dem_exists'):
    """
    Takes a dataframe and determines if each catalogid in catalogid_field
    has been turned into a DEM.

    Parameters
    ----------
    dataframe : pd.DataFrame or gpd.GeoDataFrame
        Dataframe of one ID per row.
    catalogid_field : STR
        Field in dataframe with catalogids.
    out_field : TYPE, optional
        The name of the field to create. The default is 'dem_exists'.

    Returns
    -------
    None.

    """
    dems = query_footprint('pgc_dem_setsm_strips',
                           table=True,
                           columns=['catalogid1', 'catalogid2'])
    dem_ids = list(dems['catalogid1']) + list(dems['catalogid2'])
    dataframe[out_field] = dataframe[catalogid_field].apply(
        lambda x: x in dem_ids)
Esempio n. 3
0
def archive_id_lut():
    # Look up table names on danco
    print('Creating look-up table from danco table...')

    luts = {
        'GE01': 'index_dg_catalogid_to_ge_archiveid_ge01',
        'IK01': 'index_dg_catalogid_to_ge_archiveid_ik'
    }

    # Verify sensor
    #    if sensor in luts:
    #        pass
    #    else:
    #        print('{} look up table not found. Sensor must be in {}'.format(sensor, luts.keys()))

    # Create list to store tuples of (old id, new id)
    lut = []

    # Create tuples for each sensor, append to list
    for sensor in luts.keys():
        lu_df = query_footprint(layer=luts[sensor], table=True)
        # Combine old ids and new ids in tuples in a list
        sensor_lut = list(zip(lu_df.crssimageid, lu_df.catalog_identifier))
        for entry in sensor_lut:
            lut.append(entry)

    # Convert list of tuples to dictionary
    lu_dict = dict(lut)

    return lu_dict
Esempio n. 4
0
def is_stereo(dataframe, catalogid_field, out_field='is_stereo'):
    """
    Takes a dataframe and determines if each catalogd in catalogid_field
    is a stereo image.
    """
    stereo_ids = query_footprint('pgc_imagery_catalogids_stereo',
                                 table=True,
                                 columns=['CATALOG_ID'])
    stereo_ids = list(stereo_ids)
    dataframe[out_field] = dataframe[catalogid_field].apply(
        lambda x: x in stereo_ids)
Esempio n. 5
0
def load_src(layer, where, columns, write_source=False):
    """
    Load danco layer specified with where clause and columns provided.
    """
    ## Load source footprint
    logger.debug('Loading source footprint (with any provided SQL)...')
    logger.debug('Loading {}...'.format(layer))
    src = query_footprint(layer=layer, where=where, columns=columns)
    logger.info('Loaded source features before selection: {}'.format(len(src)))
    if write_source is True:
        src.to_file(r'C:\temp\src.shp')
    return src
Esempio n. 6
0
def calculate_density(grid_p,
                      footprint_p,
                      out_path=None,
                      date_col=None,
                      rasterize=False):
    if not isinstance(grid_p, gpd.GeoDataFrame):
        logger.info('Loading grid...')
        if 'gdb' in grid_p:
            gdb, layer = grid_p.split('.gdb\\')
            gdb = '{}.gdb'.format(gdb)
            grid = gpd.read_file(gdb, layer=layer)
        else:
            grid = gpd.read_file(grid_p)
    else:
        grid = copy.deepcopy(grid_p)

    danco_footprints = list_danco_db('footprint')
    if isinstance(footprint_p, gpd.GeoDataFrame):
        footprint = copy.deepcopy(footprint_p)
    elif footprint_p in danco_footprints:
        logger.info('Loading footprint from danco...')
        footprint = query_footprint(footprint_p)
    else:
        logger.info('Loading footprint...')
        if 'gdb' in footprint_p:
            gdb, layer = footprint_p.split('.gdb\\')
            gdb = '{}.gdb'.format(gdb)
            footprint = gpd.read_file(gdb, layer=layer)
        else:
            footprint = gpd.read_file(footprint_p)

    logger.info('Calculating density...')
    density = get_count(grid, footprint, date_col=date_col)
    # Convert any tuple columns to strings (occurs with agg-ing same column multiple ways)
    density.columns = [
        str(x) if type(x) == tuple else x for x in density.columns
    ]
    if rasterize:
        logger.info('Rasterizing...')
        vec_out = '/vsimem/density_temp.shp'
        density.to_file(vec_out)
        # TODO: Finish this
        # rasterize_options = gdal.RasterizeOptions(xRes=, yRes=, )
        if out_path:
            1 == 1
            # Rasterize to out_path
    else:
        if out_path:
            logger.info('Writing density...')
            density.to_file(out_path)

    return density
Esempio n. 7
0
def main(args):
    MFP_PATH = args.mfp_path
    CATALOG_ID = args.field_of_int
    IDS_OUT_DIR = args.ids_out_dir
    mfp_ids_tbl = 'pgc_imagery_catalogids'

    ID_OUT_NAME = os.path.basename(MFP_PATH).split('.')[0]
    ID_OUT_BASENAME = '{}_{}.txt'.format(ID_OUT_NAME, args.field_of_int)
    if not IDS_OUT_DIR:
        IDS_OUT_PATH = os.path.join(os.path.dirname(MFP_PATH), ID_OUT_BASENAME)
    else:
        IDS_OUT_PATH = os.path.join(IDS_OUT_DIR, ID_OUT_BASENAME)

    logging.info('Reading IDs from danco table: {}...'.format(mfp_ids_tbl))
    pgc_cid_fp = query_footprint(mfp_ids_tbl, table=True, columns=[CATALOG_ID])

    cids = list(pgc_cid_fp[CATALOG_ID])

    logging.info('Writing IDs to {}...'.format(IDS_OUT_PATH))
    write_ids(cids, IDS_OUT_PATH)
Esempio n. 8
0
def dems_from_stereo(aoi_path=None,
                     coords=None,
                     months=None,
                     min_date=None, max_date=None,
                     multispec=False,
                     dem_path='dem_path',
                     strip_types=['strips, strips_v4']):

    # Params
    stereo_lyr_name = 'dg_imagery_index_stereo_with_earthdem_region'
    DATE_COL = 'acqdate' # name of date field in stereo footprint
    SENSOR_COL = 'platform' # name of sensor field in stereo footprint
    
    if aoi_path or coords:
        if aoi_path:
            # Load AOI
            aoi = gpd.read_file(aoi_path)
            # Check AOI crs
            stereo_crs = layer_crs(stereo_lyr_name)
            if aoi.crs != stereo_crs:
                logger.debug('Reprojecting AOI to match footprint: AOI -> {}'.format(stereo_crs))
                aoi = aoi.to_crs(stereo_crs)
        elif coords:
            lon = float(coords[0])
            lat = float(coords[1])
            loc = Point(lon, lat)
            aoi = gpd.GeoDataFrame(geometry=[loc], crs="EPSG:4326")
    
        # Get AOI bounds
        minx, miny, maxx, maxy = aoi.total_bounds
        pad = 3
        where = "x1 > {} AND y1 > {} AND x1 < {} and y1 < {}".format(minx-pad, miny-pad, maxx+pad, maxy+pad)
    
    # Add constraints to SQL
    if min_date:
        where = check_where(where)
        where += """{} > '{}'""".format(DATE_COL, min_date)
    if max_date:
        where = check_where(where)
        where += """{} < '{}'""".format(DATE_COL, max_date)
    # Add to SQL clause to just select multispectral sensors
    if multispec:
        where = check_where(where)
        where += """{} IN ('WV02', 'WV03')""".format(SENSOR_COL)
    if months:
        month_terms = [""" {} LIKE '%%-{}-%%'""".format(DATE_COL, month) for month in months]
        month_sql = " OR ".join(month_terms)
        month_sql = "({})".format(month_sql)
        where = check_where(where)
        where += month_sql

    # Load stereo footprint
    logger.info('Loading stereo footprint...')
    logger.debug('SQL where: {}'.format(where))
    stereo = query_footprint(stereo_lyr_name, where=where, dryrun=False)
    logger.debug('Initial matches with SQL: {}'.format(len(stereo)))

    # Intersect with AOI
    logger.info('Finding footprints that intersect with AOI...')
    stereo_matches = gpd.overlay(stereo, aoi)
    logger.debug('Matches with AOI intersection: {}'.format(len(stereo_matches)))

    # Create paths
    if platform.system() == 'Windows':
        basepath = r'V:\pgc\data\elev\dem\setsm\ArcticDEM\region'
    elif platform.syystem() == 'Linux':
        basepath = r'/mnt/pgc/data/elev/dem/setsm/ArcticDEM/region'

    # Look for DEM paths for all pairnames
    pairname_paths = pd.DataFrame()
    for i, row in stereo_matches.iterrows():
        pairname_dems = loc_pairname_dems(basepath, row['region_id'], row['pairname'], strip_types=)
        pairname_paths = pd.concat([pairname_paths, pairname_dems])

    # Join back to matches, one row per DEM found per pairname
    # or if no DEM found for pairname, a single row with dem_path=NaN
    matches_paths = pd.merge(stereo_matches, pairname_paths, how='outer',
                             left_on='pairname', right_on='pairname')

    # Split into found and missing dems, report
    found_dems = matches_paths[~matches_paths[dem_path].isna()]
    missing_dems = matches_paths[matches_paths[dem_path].isna()]

    logger.info('Found pairnames: {}/{}'.format(len(found_dems['pairname'].unique()),
                                                len(stereo_matches)))
    logger.info('Found DEMs: {}'.format(len(found_dems)))

    if not found_dems.empty:
        logger.debug('Found DEMs summary: ')
        logger.debug('Total:       {}'.format(len(found_dems)))
        logger.debug('Min acqdate: {}'.format(found_dems['acqdate'].min()))
        logger.debug('Max acqdate: {}'.format(found_dems['acqdate'].max()))
        logger.debug('Sensors:     {}'.format(sorted(found_dems['platform'].unique())))
        logger.debug('Found DEMs:\n{}'.format(found_dems['pairname']))
        logger.debug('')
    if not missing_dems.empty:
        logger.debug('Missing DEMs summary: ')
        logger.debug('Total:        {}'.format(len(missing_dems)))
        logger.debug('Min acqdate:  {}'.format(missing_dems['acqdate'].min()))
        logger.debug('Max acqdate:  {}'.format(missing_dems['acqdate'].max()))
        logger.debug('Sensors:      {}'.format(sorted(missing_dems['platform'].unique())))
        logger.debug('Missing DEMs:\n{}'.format(missing_dems['pairname']))

    return found_dems
Esempio n. 9
0
def select_AHAP(PHOTO_IDS=None,
                AOI_P=None,
                resolution=None,
                repeat=False,
                write=None):
    # String literals
    LAYER = 'usgs_index_aerial_image_archive'
    DB = 'imagery'
    # Path to AHAP photo extents shapefile
    PHOTO_EXT_P = r'E:\disbr007\general\aerial\AHAP\AHAP_Photo_Extents\AHAP_Photo_Extents.shp'
    # Identifier in AHAP photos shp
    PHOTO_ID = 'PHOTO_ID'
    # Identified in AHAP photos table
    UNIQUE_ID = 'unique_id'
    SERIES = 'series'

    # Load danco AHAP imagery table
    logger.info('Reading AHAP danco table')
    where = "campaign = 'AHAP"
    if resolution:
        where += "series = '{}'".format(resolution)
    aia = query_footprint(LAYER, db=DB, table=True, where="campaign = 'AHAP'")

    # Load photo extents
    logger.info("Loading AHAP photo extent shapefile...")
    PHOTO_EXT = gpd.read_file(PHOTO_EXT_P)

    if AOI_P:
        logger.info('Reading AOI shapefile....')
        # Load AOI and match crs
        AOI = gpd.read_file(os.path.join(AOI_P))
        AOI = AOI.to_crs(PHOTO_EXT.crs)

        logger.info('Selecting AHAP imagery by location...')
        # Select Photo Extents by intersection with AOI polygons
        selection = gpd.sjoin(PHOTO_EXT, AOI, how='inner', op='intersects')
    elif PHOTO_IDS:
        if os.path.isfile(PHOTO_IDS[0]):
            ids = read_ids(PHOTO_IDS[0])
        elif isinstance(PHOTO_IDS, list):
            ids = PHOTO_IDS
        selection = PHOTO_EXT[PHOTO_EXT[PHOTO_ID].isin(ids)]

    # Remove duplicate Photo Extents if specified
    if repeat is False:
        selection = selection.drop_duplicates(subset=PHOTO_ID)

    # Join to table with filenames
    selection = pd.merge(selection,
                         aia,
                         how='left',
                         left_on=PHOTO_ID,
                         right_on=UNIQUE_ID)

    logger.info('Selected features found: {:,}'.format(len(selection)))

    # Write out shapefile
    if write is not None:
        logger.info('Writing AHAP selection to: {}'.format(write))
        selection.to_file(write)

    return selection
Esempio n. 10
0
def refresh(last_refresh,
            refresh_region,
            refresh_imagery,
            max_cc,
            min_cc,
            sensors,
            aoi_path=None,
            use_land=True,
            refresh_thru=None,
            drop_onhand=True):
    '''
    Select ids for imagery order
    cloudcover: cloudcover <= arg
    '''
    if not refresh_thru:
        # Use today's date
        refresh_thru = datetime.datetime.now().strftime('%Y-%m-%d')

    where = "(acqdate >= '{}' AND acqdate <= '{}') AND (cloudcover >= {} AND cloudcover <= {})".format(
        last_refresh, refresh_thru, min_cc, max_cc)
    if sensors:
        where += " AND (platform IN ({}))".format(str(sensors)[1:-1])

    if aoi_path:
        aoi_where = generate_rough_aoi_where(aoi_path=aoi_path,
                                             x_fld='x1',
                                             y_fld='y1',
                                             pad=20.0)
        where += " AND {}".format(aoi_where)

    logger.debug('where: {}'.format(where))

    # Load regions shp
    regions_path = r"E:\disbr007\imagery_orders\all_regions.shp"
    logger.debug('Regions path: {}'.format(regions_path))
    # regions = gpd.read_file(regions_path, driver='ESRI_Shapefile')
    regions = query_footprint('pgc_earthdem_regions')

    # Load not on hand footprint -> since last refresh
    logger.info('Performing initial selection...')
    supported_refresh_imagery = ['mono_stereo', 'mono', 'stereo']
    logger.debug('Refresh imagery: {}'.format(refresh_imagery))
    if refresh_imagery in supported_refresh_imagery:
        if refresh_imagery == 'mono_stereo':
            noh_recent = query_footprint('index_dg', where=where)
        if refresh_imagery == 'mono':
            noh_recent = mono_noh(where=where, noh=drop_onhand)
        if refresh_imagery == 'stereo':
            noh_recent = stereo_noh(where=where, noh=drop_onhand)
    else:
        logger.warning(
            """Refresh imagery type unrecognized, supported refresh imagery 
              options include: {}""".format(supported_refresh_imagery))

    logger.info('Initial IDs found: {:,}'.format(len(noh_recent)))
    # noh_recent = noh_recent.drop_duplicates(subset='catalogid')

    ### Spatial join to identify region
    logger.info('Identifying region of selected imagery...')
    # Save original columns
    noh_recent_cols = list(noh_recent)
    noh_recent_cols.append(loc_name_fld)
    # Calculate centroid
    noh_recent['centroid'] = noh_recent.centroid
    noh_recent.set_geometry('centroid', inplace=True)
    # Locate region of centroid
    noh_recent = gpd.sjoin(noh_recent, regions, how='left', op='within')
    noh_recent.drop('centroid', axis=1, inplace=True)
    noh_recent.set_geometry('geom', inplace=True)

    ### Identify only those in the region of interest
    # Get regions of interest based on type of refresh
    roi = refresh_region_lut(refresh_region)
    logger.debug('Regions included: {}'.format(roi))
    # Select region of interest
    noh_recent_roi = noh_recent[noh_recent[loc_name_fld].isin(roi)]
    # # Return to original columns
    # noh_recent_roi = noh_recent_roi[noh_recent_cols]

    logger.info('IDs in region(s) of interest: {:,}'.format(
        len(noh_recent_roi)))

    # Select only those features that intersect land polygons
    if use_land:
        logger.info(
            'Selecting only imagery within land inclusion shapefile...')
        land_shp = r'E:\disbr007\imagery_orders\coastline_include_fix_geom_dis.shp'
        land = gpd.read_file(land_shp)
        # Drop 'index' columns if they exists
        drop_cols = [x for x in list(noh_recent_roi) if 'index' in x]
        noh_recent_roi = noh_recent_roi.drop(columns=drop_cols)
        noh_recent_roi = gpd.sjoin(noh_recent_roi, land, how='left')
        noh_recent_roi = noh_recent_roi[noh_recent_cols]

        logger.info('IDs over land: {}'.format(len(noh_recent_roi)))

    if aoi_path:
        # Drop 'index' columns if they exists
        drop_cols = [x for x in list(noh_recent_roi) if 'index' in x]
        noh_recent_roi = noh_recent_roi.drop(columns=drop_cols)

        aoi = gpd.read_file(aoi_path)
        noh_recent_roi = select_in_aoi(noh_recent_roi, aoi)
        # noh_recent_roi = noh_recent_roi[noh_recent_cols]
        logger.info('IDs over AOI: {}'.format(len(noh_recent_roi)))

    return noh_recent_roi
Esempio n. 11
0
def main(args):
    # Parse args
    out_path = args.out_path
    num_ids = args.number_ids
    update_ordered = args.update_ordered
    use_land = args.do_not_use_land
    remove_oh = args.do_not_remove_oh
    sensors = args.sensors
    within_sensor = args.within_sensor
    min_date = args.min_date
    max_date = args.max_date
    min_ovlp = args.min_ovlp
    max_suneldiff = args.max_suneldiff
    min_meansunel = args.min_meansunel
    min_datediff = args.min_datediff
    max_datediff = args.max_datediff
    aoi_path = args.aoi
    projects = args.projects
    region_names = args.region_names
    out_footprint = args.out_footprint

    # Check for existence of aoi and out_path directory
    if aoi_path:
        if not os.path.exists(aoi_path):
            logger.error('AOI path does not exist: {}'.aoi_path)
            sys.exit()
        aoi = gpd.read_file(aoi_path)
    if not os.path.exists(os.path.dirname(out_path)):
        logger.warning('Out directory does not exist, creating: {}'.format(
            os.path.dirname(out_path)))
        os.makedirs(os.path.dirname(out_path))
    if out_footprint:
        if not os.path.exists(os.path.dirname(out_footprint)):
            logger.warning('Out directory does not exist, creating: {}'.format(
                os.path.dirname(out_footprint)))
            os.makedirs(os.path.dirname(out_footprint))

    where = create_where(sensors=sensors,
                         min_date=min_date,
                         max_date=max_date,
                         min_datediff=min_datediff,
                         max_datediff=max_datediff,
                         min_ovlp=min_ovlp,
                         max_suneldiff=max_suneldiff,
                         min_meansunel=min_meansunel,
                         within_sensor=within_sensor,
                         noh=remove_oh,
                         projects=projects,
                         region_names=region_names)

    logger.info('Getting size of table with query...')
    table_total = count_table(xtrack_tbl, where=where)
    logger.info('Total table size with query: {:,}'.format(table_total))

    if remove_oh:
        # Get all onhand and ordered ids
        logger.info('Loading all onhand and ordered IDs...')
        oh_ids = set(onhand_ids(update=update_ordered))
        logger.info('Onhand and ordered IDs loaded: {:,}'.format(len(oh_ids)))
    else:
        oh_ids = set()

    # Load land shapefile if necessary
    if use_land:
        land = gpd.read_file(land_shp)

    # %% Iterate
    # Iterate chunks of table, calculating area and adding id1, id2, area to dictionary
    all_ids = []
    master = gpd.GeoDataFrame()
    limit = chunk_size
    offset = 0
    while offset < table_total:
        # Load chunk
        logger.info('Loading chunk: {:,} - {:,}'.format(
            offset, offset + limit))
        chunk = query_footprint(
            xtrack_tbl,
            columns=columns,
            # orderby=orderby, orderby_asc=False,
            where=where,
            limit=limit,
            offset=offset,
            dryrun=False)

        remaining_records = len(chunk)

        # Remove records where both IDs are onhand
        if remove_oh:
            logger.info('Dropping records where both IDs are on onhand...')
            chunk = chunk[~((chunk['catalogid1'].isin(oh_ids)) &
                            (chunk['catalogid2'].isin(oh_ids)))]
            remaining_records = len(chunk)
            logger.info('Remaining records: {:,}'.format(remaining_records))
            if remaining_records == 0:
                continue

        # Find only IDs in AOI if provided
        if aoi_path:
            logger.info('Finding IDs in AOI...')
            chunk = select_in_aoi(chunk, aoi=aoi)
            remaining_records = len(chunk)
            logger.debug(
                'Remaining records in AOI: {:,}'.format(remaining_records))
            if remaining_records == 0:
                continue

        if use_land:
            logger.info('Selecting IDs over land only...')
            chunk = select_in_aoi(chunk, aoi=land, centroid=True)

            remaining_records = len(chunk)
            logger.info('Remaining records over land: {:,}'.format(len(chunk)))
            if remaining_records == 0:
                continue
        # %% Calculate area for chunk
        logger.info('Calculating area...')
        chunk = area_calc(chunk, area_col=area_col)

        # Combine with master
        logger.info('Combining chunk with master...')
        master = pd.concat([master, chunk])

        # Increase offset
        offset += limit

    # Select n records with highest area
    master = master.sort_values(by=area_col)
    master[cid1_oh_fld] = master[catid1_fld].isin(oh_ids)
    master[cid2_oh_fld] = master[catid2_fld].isin(oh_ids)

    if remove_oh:
        noh_str = ' not_on_hand'
    else:
        noh_str = ''
    logger.info('Finding {:,} out of {:,} IDs{}, starting with largest '
                'area...'.format(num_ids, len(master), noh_str))

    out_ids = set()
    kept_rows = set()
    num_kept_ids = len(out_ids)
    for i, row in master.iterrows():
        cid1 = row[catid1_fld]
        cid2 = row[catid2_fld]
        if not row[cid1_oh_fld] and cid1 not in out_ids:
            out_ids.add(cid1)
            kept_rows.add(row.name)
        if not row[cid2_oh_fld] and cid2 not in out_ids:
            out_ids.add(cid2)
            kept_rows.add(row.name)
        num_kept_ids = len(out_ids)
        if num_kept_ids >= num_ids:
            logger.info(
                '{:,} IDs not on hand located. {:,} sqkm minimum kept.'.format(
                    num_kept_ids, row[area_col]))
            break

    if num_kept_ids < num_ids:
        logger.warning(
            'Only {:,} IDs found. Minimum area kept: {:,.2f}'.format(
                num_kept_ids, row[area_col]))

    # Select kept pairs (rows)
    kept_pairs = master[master.index.isin(kept_rows)]
    if out_footprint:
        logger.info('Writing footprint of pairs to: {}'.format(out_footprint))
        kept_pairs.to_file(out_footprint)

    #%% Write
    if not os.path.exists(os.path.dirname(out_path)):
        os.makedirs(os.path.dirname(out_path))
    logger.info('Writing {:,} IDs to: {}'.format(len(out_ids), out_path))
    write_ids(out_ids, out_path)
Esempio n. 12
0
def swir_ordering(include_regions, out_ids):
    # Load earth_dem regions and pull region_ids out for selected regions
    edem = query_footprint(edem_lyr)
    regions = {
        'arctic': list(edem[edem['project'] == 'ArcticDEM']['region_id']),
        'antarctic': list(edem[edem['project'] == 'REMA']['region_id']),
        'conus': ['earthdem_04_great_lakes', 'earthdem_03_conus'],
        'global': list(edem['region_id'])
    }
    selected_region_ids = list()
    for sr in include_regions:
        selected_region_ids.extend(regions[sr])
    selected_region_ids = set(selected_region_ids)

    # Get selected regions
    selected_regions = edem[edem[region_id].isin(selected_region_ids)]
    uu = gpd.GeoDataFrame(geometry=[selected_regions.unary_union],
                          crs='epsg:4326')

    # Iterate through index_dg and pull out SWIR IDs, then select only those in regions of interest
    master_swir = gpd.GeoDataFrame()
    offset = 0
    num_processed = 0
    swir_archive_count = count_table(index_dg, where=swir_where)
    logger.debug('Size of table for {} with WHERE {}: {:,}'.format(
        index_dg, swir_where, swir_archive_count))
    while num_processed <= swir_archive_count:
        logger.info('Loading with OFFSET {:,} LIMIT {:,}'.format(
            offset, limit))
        logger.debug('WHERE {}'.format(swir_where))
        swir = query_footprint(index_dg,
                               where=swir_where,
                               limit=limit,
                               offset=offset)
        swir.geometry = swir.geometry.centroid
        logger.debug('Loaded SWIR: {:,}'.format(len(swir)))
        # Select in regions
        selected_swir_chunk = gpd.sjoin(swir, uu, op='within')
        offset += limit
        num_processed = offset

        logger.debug('SWIR IDs in selected regions in chunk: {:,}'.format(
            len(selected_swir_chunk)))
        if len(selected_swir_chunk) != 0:
            master_swir = pd.concat([master_swir, selected_swir_chunk])

    logger.info('SWIR IDs in regions: {:,}'.format(len(master_swir)))

    # Remove on hand swir
    oh_swir = set(
        list(
            query_footprint(pgc_oh, where=pgc_swir_where,
                            table=True)['catalog_id']))
    selected_swir_noh = set(list(master_swir['catalogid'])) - oh_swir
    logger.info('SWIR IDs in regions and not on hand: {:,}'.format(
        len(selected_swir_noh)))

    # Write selected SWIR IDs
    with open(out_ids, 'w') as src:
        logger.debug('Writing IDs to: {}'.format(out_ids))
        for swir_id in selected_swir_noh:
            src.write('{}\n'.format(swir_id))

    return selected_swir_noh
Esempio n. 13
0
    parser.add_argument('--sort_by_date_descending',
                        action='store_true',
                        help='Sort by date descending.')

    args = parser.parse_args()
    begin_date = args.begin_date
    end_date = args.end_date
    platform = args.platform
    out_path = args.out_path
    sort_by_date = args.sort_by_date
    sort_by_date_descending = args.sort_by_date_descending

    # Do it
    logger.info('Loading records...')
    platform_noh = query_danco.query_footprint(
        'dg_imagery_index_all_notonhand_cc20',
        where="platform = '{}'".format(platform))
    platform_noh['acqdate'] = pd.to_datetime(platform_noh.acqdate)
    selection = select_by_date(platform_noh,
                               date_begin=begin_date,
                               date_end=end_date)
    if sort_by_date:
        ascending = True
    elif sort_by_date_descending:
        ascending = False
    if sort_by_date or sort_by_date_descending:
        logger.info('Sorting by date...')
        selection = selection.sort_values(by='acqdate', ascending=ascending)

    out_name = '{}_{}_to_{}'.format(platform, date_words(begin_date),
                                    date_words(end_date))
Esempio n. 14
0
def dem_selector(AOI_PATH,
                 COORDS=None,
                 MONTHS=None,
                 MIN_DATE=None,
                 MAX_DATE=None,
                 MULTISPEC=False,
                 OUT_STEREO_FP=None,
                 OUT_ID_LIST=None,
                 CLOUDCOVER=None):
    """
    Select stereopairs over an AOI, either from a passed DEM_FP, or from
    the danco database.

    Parameters
    ----------
    AOI_PATH : os.path.abspath
        Path to AOI shapefile.
    COORDS : LIST
        xy coordinates in WGS84 to use for selection.
    MONTHS : LIST, optional
        List of month integers to include. The default is None.
    MIN_DATE : STR, optional
        Minimum DEM date to include. E.g '2015-01-30'. The default is None.
    MAX_DATE : STR, optional
        Maximum DEM date to include. The default is None.
    MULTISPEC : BOOL, optional
        True to only select stereo from multispectral sources. The default is False.
    CLOUDCOVER : INT
        Only include pairs with cloudcover below this threshold
    OUT_STEREO_FP : os.path.abspath, optional
        Path to write DEM footprints shapefile to. The default is None.
    OUT_ID_LIST : os.path.abspath, optional
        Path to write catalogids of selected stereopair catalogids to. The default is None.

    Returns
    -------
    geopandas.GeoDataFrame : Dataframe of footprints matching selection.

    """
    #### PARAMETERS ####
    STEREO_FP = 'dg_imagery_index_stereo'  # stereo footprint tablename
    CATALOGID = 'catalogid'  # field name in danco footprint for catalogids
    DATE_COL = 'acqdate'  # name of date field in stereo footprint
    SENSOR_COL = 'platform'  # name of sensor field in stereo footprint
    PAIRNAME_COL = 'pairname'  # name of field with unique pairnames
    CLOUDCOVER_COL = 'cloudcover'  # name of field with cloudcover
    STEREOPAIR_ID = 'stereopair'  # name of field with stereopair catalogid

    MONTH_COL = 'month'  # name of field to create in footprint if months are requested

    #### SETUP ####
    def check_where(where):
        """Checks if the input string exists already,
           if so formats correctly for adding to SQL"""
        if where:
            where += ' AND '
        return where

    # Create logger
    logging.config.dictConfig(LOGGING_CONFIG('DEBUG'))
    logger = logging.getLogger(__name__)

    #### LOAD INPUTS ####
    # Load AOI
    logger.info('Reading AOI...')
    if AOI_PATH:
        aoi = gpd.read_file(AOI_PATH)
    elif COORDS:
        lon = float(COORDS[0])
        lat = float(COORDS[1])
        loc = Point(lon, lat)
        aoi = gpd.GeoDataFrame(geometry=[loc], crs="EPSG:4326")

    # Load stereopairs footprint
    # Get bounds of aoi to reduce query size, with padding
    minx, miny, maxx, maxy = aoi.total_bounds
    pad = 10
    # Get DEM footprint crs - this loads no records, but it
    # will allow getting the crs of the footprints
    stereo = query_footprint(STEREO_FP, where="1=2")
    # Load stereo
    # Build SQL clause to select stereo in the area of the AOI, helps with load times
    stereo_where = """x1 > {} AND x1 < {} AND 
                      y1 > {} AND y1 < {}""".format(minx - pad, maxx + pad,
                                                    miny - pad, maxy + pad)
    # Add date constraints to SQL
    if MIN_DATE:
        stereo_where = check_where(stereo_where)
        stereo_where += """{} > '{}'""".format(DATE_COL, MIN_DATE)
    if MAX_DATE:
        stereo_where = check_where(stereo_where)
        stereo_where += """{} < '{}'""".format(DATE_COL, MAX_DATE)
    # Add to SQL clause to just select multispectral sensors
    if MULTISPEC:
        stereo_where = check_where(stereo_where)
        stereo_where += """{} IN ('WV02', 'WV03')""".format(SENSOR_COL)
    if CLOUDCOVER:
        stereo_where = check_where(stereo_where)
        stereo_where += """{} <= {}""".format(CLOUDCOVER_COL, CLOUDCOVER)

    # Load DEM footprints with SQL
    stereo = query_footprint(STEREO_FP, where=stereo_where)

    # If only certain months requested, reduce to those
    if MONTHS:
        stereo['temp_date'] = pd.to_datetime(stereo[DATE_COL])
        stereo[MONTH_COL] = stereo['temp_date'].dt.month
        stereo.drop(columns=['temp_date'], inplace=True)
        stereo = stereo[stereo[MONTH_COL].isin(MONTHS)]

    logger.info(
        'Stereopairs matching criteria (before AOI selection): {}'.format(
            len(stereo)))

    # Check coordinate system match and if not reproject AOI
    if aoi.crs != stereo.crs:
        aoi = aoi.to_crs(stereo.crs)

    #### SELECT stereo OVER ALL AOIS ####
    logger.info('Selecting stereopairs over AOI...')
    # Select by location
    # stereo = gpd.overlay(stereo, aoi, how='intersection')
    stereo = gpd.sjoin(stereo, aoi, how='inner')
    # Remove duplicates resulting from intersection (not sure why DUPs)
    stereo = stereo.drop_duplicates(subset=(PAIRNAME_COL))
    logger.info('Stereopairs found over AOI: {}'.format(len(stereo)))
    if len(stereo) == 0:
        logger.error('No stereopairss found over AOI, exiting...')
        sys.exit()

    #### WRITE FOOTPRINT AND TXT OF MATCHES ####
    # Write footprint out
    if OUT_STEREO_FP:
        logger.info(
            'Writing stereopair footprint to file: {}'.format(OUT_STEREO_FP))
        stereo.to_file(OUT_STEREO_FP)
    # Write list of IDs ou
    if OUT_ID_LIST:
        logger.info(
            'Writing list of catalogids to file: {}'.format(OUT_ID_LIST))
        write_stereopair_ids(list(stereo[CATALOGID]),
                             list(stereo[STEREOPAIR_ID]),
                             header='catalogid, stereopair',
                             out_path=OUT_ID_LIST)

    #### Summary Statistics ####
    count = len(stereo)
    min_date = stereo[DATE_COL].min()
    max_date = stereo[DATE_COL].max()

    logger.info("SUMMARY of STEREOPAIR SELECTION:")
    logger.info("Number of STEREOPAIRS: {}".format(count))
    logger.info("Earliest date: {}".format(min_date))
    logger.info("Latest date: {}".format(max_date))

    return stereo
Esempio n. 15
0
# Query footprint
where = 'x1 > {} AND x1 < {} AND y1 > {} and y1 < {}'.format(minx, maxx,
                                                             miny, maxy)
logger.debug('Where clause for query: {}'.format(where))

count = count_table(danco_lyr, where=where, table=True, noh=noh)
logger.debug('Count for table with where clause: {:,}'.format(count))

#%%
usfs_fps = gpd.GeoDataFrame()
offset = 0
while offset < count:
    logger.debug('Loading records: {:,} - {:,}'.format(offset, offset+limit))
    # Load footprints
    fps = query_footprint(danco_lyr, where=where, limit=limit, offset=offset, noh=noh)
    # Intersect to find USFS footprints
    logger.debug('Identifying records on USFS land...')
    slice_usfs_fps = gpd.sjoin(fps, usfs, op='within')
    logger.debug('USFS records found: {}'.format(len(slice_usfs_fps)))
    # Merge to master dataframe
    usfs_fps = pd.concat([usfs_fps, slice_usfs_fps])
    logger.debug('Total USFS records found: {}'.format(len(usfs_fps)))
    # Increase offset
    offset += limit

usfs_fps_catids = set(usfs_fps['catalogid'])
#%%
# Remove onhand IDs
# oh = onhand_ids()
# mfp_ids = pgc_ids()