def search(aoi, start_date=None, end_date=None, product_type=None, api='devseed'): """ Search Sentinel-2 images covering an AOI and timespan using a given API. Args: aoi (geojson.Polygon): area of interest start_date (datetime.datetime): start of the search time range end_date (datetime.datetime): end of the search time range product_type (str, optional): either 'L1C' or 'L2A' api (str, optional): either devseed (default), scihub, planet or gcloud Returns: list of image objects """ # list available images if api == 'devseed': import search_devseed images = search_devseed.search(aoi, start_date, end_date, 'Sentinel-2')['features'] images = [metadata_parser.DevSeedParser(img) for img in images] elif api == 'scihub': import search_scihub if product_type is not None: product_type = 'S2MSI{}'.format(product_type[1:]) images = search_scihub.search(aoi, start_date, end_date, satellite='Sentinel-2', product_type=product_type) images = [metadata_parser.SciHubParser(img) for img in images] elif api == 'planet': import search_planet images = search_planet.search(aoi, start_date, end_date, item_types=['Sentinel2L1C']) images = [metadata_parser.PlanetParser(img) for img in images] elif api == 'gcloud': import search_gcloud images = search_gcloud.search(aoi, start_date, end_date) images = [metadata_parser.GcloudParser(img) for img in images] # sort images by acquisition day, then by mgrs id images.sort(key=(lambda k: (k.date.date(), k.mgrs_id))) # remove duplicates (same acquisition date but different mgrs tile id) seen = set() unique_images = [] for img in images: if img.date not in seen: seen.add(img.date) unique_images.append(img) print('Found {} images'.format(len(unique_images))) return [vars(img) for img in unique_images]
def search(aoi, start_date=None, end_date=None, api='devseed'): """ Search Landsat-8 images covering an AOI and timespan using a given API. Args: aoi (geojson.Polygon): area of interest start_date (datetime.datetime): start of the search time range end_date (datetime.datetime): end of the search time range api (str, optional): either devseed (default) or planet Returns: list of image objects """ # list available images if api == 'devseed': import search_devseed images = search_devseed.search(aoi, start_date, end_date, 'Landsat-8') images.sort(key=lambda k: (k['properties']['datetime'], k['properties'] ['eo:row'], k['properties']['eo:column'])) for i in images: # add some metadata at the root of the dict i['date'] = dateutil.parser.parse(i['properties']['datetime']) i['cloud_cover'] = i['properties']['eo:cloud_cover'] i['thumbnail'] = i['assets']['thumbnail']['href'] elif api == 'planet': import search_planet images = search_planet.search(aoi, start_date, end_date, item_types=['Landsat8L1G']) # sort images by acquisition date, then by acquisiton row and path images.sort(key=lambda k: (k['properties']['acquired'], k['properties'] ['wrs_row'], k['properties']['wrs_path'])) # remove duplicates (same acquisition day) seen = set() images = [ x for x in images if not (x['properties']['acquired'] in seen or # seen.add() returns None seen.add(x['properties']['acquired'])) ] print('Found {} images'.format(len(images))) return images
def get_time_series(aoi, start_date=None, end_date=None, item_types=['PSScene3Band'], asset_type='analytic', out_dir='', parallel_downloads=multiprocessing.cpu_count()): """ Main function: download and crop of Planet images. """ # list available images images = search_planet.search(aoi, start_date, end_date, item_types=item_types) print('Found {} images'.format(len(images))) # build filenames fnames = [ os.path.join(out_dir, '{}.tif'.format(fname_from_metadata(x))) for x in images ] # convert aoi coordinates to utm ulx, uly, lrx, lry, utm_zone, lat_band = utils.utm_bbx(aoi) # activate images and download crops utils.mkdir_p(out_dir) print('Downloading {} crops...'.format(len(images)), end=' ') parallel.run_calls(download_crop, list(zip(fnames, images)), extra_args=(asset_type, ulx, uly, lrx, lry, utm_zone, lat_band), pool_type='threads', nb_workers=parallel_downloads, timeout=300) # embed some metadata in the image files for f, img in zip(fnames, images): # embed some metadata as gdal geotiff tags if os.path.isfile(f): for k, v in metadata_from_metadata_dict(img).items(): utils.set_geotif_metadata_item(f, k, v) return
def find_crossovers(record): # build a geometry object representing the hyperion image geometry = usgs_record_to_geojson(record) # parse the hyperion date usgs_image_acquisition_time = \ datetime.strptime(record["extended"]["Scene Start Time"], '%Y:%j:%H:%M:%S.%f') # calculate the time bounds for the crossover search window_start, window_end = \ calculate_crossover_window( usgs_image_acquisition_time, CROSS_OVER_WINDOW_HOURS) # search the planet API search_results = \ search_planet.search(geometry, window_start, window_end) crossovers = [] if len(search_results) > 0: for result in search_results: planet_image_acquisition_time = \ dateutil.parser.parse(result["properties"]["acquired"]).replace(tzinfo=None) acquisition_offset = \ time_between_dates( usgs_image_acquisition_time, planet_image_acquisition_time) crossover = { "id": result["id"], "acquisition_offset": acquisition_offset, "properties": result["properties"], } crossovers.append(crossover) return crossovers
def get_time_series(aoi, start_date=None, end_date=None, bands=['B04'], out_dir='', search_api='devseed', product_type=None, parallel_downloads=multiprocessing.cpu_count()): """ Main function: crop and download a time series of Sentinel-2 images. """ utils.print_elapsed_time.t0 = datetime.datetime.now() # list available images if search_api == 'devseed': if product_type is not None: print( "WARNING: product_type option is available only with search_api='scihub'" ) images = search_devseed.search(aoi, start_date, end_date, 'Sentinel-2')['results'] elif search_api == 'scihub': import search_scihub if product_type is not None: product_type = 'S2MSI{}'.format(product_type[1:]) images = search_scihub.search(aoi, start_date, end_date, satellite='Sentinel-2', product_type=product_type) elif search_api == 'planet': if product_type is not None: print( "WARNING: product_type option is available only with search_api='scihub'" ) import search_planet images = search_planet.search(aoi, start_date, end_date, item_types=['Sentinel2L1C']) # sort images by acquisition date, then by mgrs id images.sort( key=lambda k: date_and_mgrs_id_from_metadata_dict(k, search_api)) # remove duplicates (same acquisition day, different mgrs tile id) seen = set() images = [ x for x in images if not ( date_and_mgrs_id_from_metadata_dict(x, search_api)[0] in seen or # seen.add() returns None seen.add(date_and_mgrs_id_from_metadata_dict(x, search_api)[0])) ] print('Found {} images'.format(len(images))) utils.print_elapsed_time() # choose wether to use http or s3 if WE_CAN_ACCESS_AWS_THROUGH_S3: aws_url_from_metadata_dict = aws_s3_url_from_metadata_dict else: aws_url_from_metadata_dict = aws_http_url_from_metadata_dict # build urls, filenames and crops coordinates crops_args = [] for img in images: url_base = aws_url_from_metadata_dict(img, search_api) name = filename_from_metadata_dict(img, search_api) coords = utils.utm_bbx( aoi, # convert aoi coordates to utm utm_zone=int(utm_zone_from_metadata_dict(img, search_api)), r=60) # round to multiples of 60 (B01 resolution) for b in bands: fname = os.path.join(out_dir, '{}_band_{}.tif'.format(name, b)) if 'MSIL2A' in title_from_metadata_dict(img, search_api): url = '{}/R{}m/{}.jp2'.format(url_base, band_resolution(b), b) else: url = '{}/{}.jp2'.format(url_base, b) crops_args.append((fname, url, *coords)) # download crops utils.mkdir_p(out_dir) print('Downloading {} crops ({} images with {} bands)...'.format( len(crops_args), len(images), len(bands)), end=' ') parallel.run_calls(utils.crop_with_gdal_translate, crops_args, extra_args=('UInt16', ), pool_type='threads', nb_workers=parallel_downloads) utils.print_elapsed_time() # discard images that failed to download images = [ x for x in images if bands_files_are_valid(x, bands, search_api, out_dir) ] # discard images that are totally covered by clouds utils.mkdir_p(os.path.join(out_dir, 'cloudy')) urls = [aws_http_url_from_metadata_dict(img, search_api) for img in images] print('Reading {} cloud masks...'.format(len(urls)), end=' ') cloudy = parallel.run_calls( is_image_cloudy_at_location, urls, extra_args=(utils.geojson_lonlat_to_utm(aoi), ), pool_type='threads', nb_workers=parallel_downloads, verbose=True) for img, cloud in zip(images, cloudy): name = filename_from_metadata_dict(img, search_api) if cloud: for b in bands: f = '{}_band_{}.tif'.format(name, b) shutil.move(os.path.join(out_dir, f), os.path.join(out_dir, 'cloudy', f)) print('{} cloudy images out of {}'.format(sum(cloudy), len(images))) images = [i for i, c in zip(images, cloudy) if not c] utils.print_elapsed_time() # embed some metadata in the remaining image files print('Embedding metadata in geotiff headers...') for img in images: name = filename_from_metadata_dict(img, search_api) d = format_metadata_dict(img) for b in bands: # embed some metadata as gdal geotiff tags f = os.path.join(out_dir, '{}_band_{}.tif'.format(name, b)) utils.set_geotif_metadata(f, metadata=d) utils.print_elapsed_time()
def get_time_series(aoi, start_date=None, end_date=None, item_types=['PSScene3Band'], asset_type='analytic', out_dir='', parallel_downloads=multiprocessing.cpu_count(), clip_and_ship=True, no_crop=False, satellite_id=None, search_type='contains', remove_duplicates=True): """ Main function: crop and download Planet images. """ # list available images items = search_planet.search(aoi, start_date, end_date, item_types=item_types, satellite_id=satellite_id, search_type=search_type, remove_duplicates=remove_duplicates) print('Found {} images'.format(len(items))) # list the requested asset for each available (and allowed) image print('Listing available {} assets...'.format(asset_type), flush=True, end=' ') assets = parallel.run_calls(get_item_asset_info, items, extra_args=(asset_type, ), pool_type='threads', nb_workers=parallel_downloads, timeout=600) # remove 'None' (ie not allowed) assets and corresponding items items = [i for (i, a) in zip(items, assets) if a] assets = [a for a in assets if a] print('Have permissions for {} images'.format(len(items))) # activate the allowed assets print('Requesting activation of {} images...'.format(len(assets)), flush=True, end=' ') parallel.run_calls(request_activation, assets, pool_type='threads', nb_workers=parallel_downloads, timeout=600) # warn user about quota usage n = len(assets) if clip_and_ship: a = n * area.area(aoi) else: a = np.sum(area.area(i['geometry']) for i in items) print('Your current quota usage is {}'.format(get_quota()), flush=True) print('Downloading these {} images will increase it by {:.3f} km²'.format( n, a / 1e6), flush=True) # build filenames ext = 'zip' if clip_and_ship else 'tif' out_dir = os.path.abspath(os.path.expanduser(out_dir)) fnames = [ os.path.join(out_dir, '{}.{}'.format(fname_from_metadata(i), ext)) for i in items ] if clip_and_ship: print('Requesting clip of {} images...'.format(len(assets)), flush=True, end=' ') clips = parallel.run_calls(request_clip, list(zip(items, assets)), extra_args=(aoi, ), pool_type='threads', nb_workers=parallel_downloads, timeout=3600) # remove clips that were rejected ok = [i for i, x in enumerate(clips) if x] clips = [clips[i] for i in range(len(clips)) if i in ok] fnames = [fnames[i] for i in range(len(fnames)) if i in ok] print('Downloading {} clips...'.format(len(clips)), end=' ', flush=True) parallel.run_calls(download_clip, list(zip(clips, fnames)), pool_type='threads', nb_workers=parallel_downloads, timeout=3600) elif no_crop: # download full images os.makedirs(out_dir, exist_ok=True) print('Downloading {} full images...'.format(len(assets)), end=' ') parallel.run_calls(download_asset, list(zip(fnames, assets)), pool_type='threads', nb_workers=parallel_downloads, timeout=1200) else: if asset_type in [ 'udm', 'visual', 'analytic', 'analytic_dn', 'analytic_sr' ]: aoi_type = 'utm_rectangle' aoi = utils.utm_bbx(aoi) else: aoi_type = 'lonlat_polygon' # download crops with gdal through vsicurl os.makedirs(out_dir, exist_ok=True) print('Downloading {} crops...'.format(len(assets)), end=' ') parallel.run_calls(download_crop, list(zip(fnames, assets)), extra_args=(aoi, aoi_type), pool_type='threads', nb_workers=parallel_downloads, timeout=300) # embed some metadata in the image files for f, img in zip(fnames, items): # embed some metadata as gdal geotiff tags if os.path.isfile(f): utils.set_geotif_metadata_items( f, metadata_from_metadata_dict(img))
def get_time_series(aoi, start_date=None, end_date=None, bands=[8], out_dir='', search_api='devseed', parallel_downloads=100, debug=False): """ Main function: crop and download a time series of Landsat-8 images. """ utils.print_elapsed_time.t0 = datetime.datetime.now() # list available images seen = set() if search_api == 'devseed': images = search_devseed.search(aoi, start_date, end_date, 'Landsat-8')['results'] images.sort(key=lambda k: (k['acquisitionDate'], k['row'], k['path'])) # remove duplicates (same acquisition day) images = [ x for x in images if not (x['acquisitionDate'] in seen or # seen.add() returns None seen.add(x['acquisitionDate'])) ] elif search_api == 'planet': import search_planet images = search_planet.search(aoi, start_date, end_date, item_types=['Landsat8L1G']) # sort images by acquisition date, then by acquisiton row and path images.sort(key=lambda k: (k['properties']['acquired'], k['properties'] ['wrs_row'], k['properties']['wrs_path'])) # remove duplicates (same acquisition day) images = [ x for x in images if not (x['properties']['acquired'] in seen or # seen.add() returns None seen.add(x['properties']['acquired'])) ] print('Found {} images'.format(len(images))) utils.print_elapsed_time() # build urls urls = parallel.run_calls(aws_urls_from_metadata_dict, list(images), extra_args=(search_api, ), pool_type='threads', nb_workers=parallel_downloads, verbose=False) # build gdal urls and filenames download_urls = [] fnames = [] for img, bands_urls in zip(images, urls): name = filename_from_metadata_dict(img, search_api) for b in set(bands + ['QA']): # the QA band is needed for cloud detection download_urls += [ s for s in bands_urls if s.endswith('B{}.TIF'.format(b)) ] fnames.append( os.path.join(out_dir, '{}_band_{}.tif'.format(name, b))) # convert aoi coordinates to utm ulx, uly, lrx, lry, utm_zone, lat_band = utils.utm_bbx(aoi) # download crops utils.mkdir_p(out_dir) print('Downloading {} crops ({} images with {} bands)...'.format( len(download_urls), len(images), len(bands) + 1), end=' ') parallel.run_calls(utils.crop_with_gdal_translate, list(zip(fnames, download_urls)), extra_args=(ulx, uly, lrx, lry, utm_zone, lat_band), pool_type='threads', nb_workers=parallel_downloads) utils.print_elapsed_time() # discard images that failed to download images = [ x for x in images if bands_files_are_valid(x, list(set(bands + ['QA'])), search_api, out_dir) ] # discard images that are totally covered by clouds utils.mkdir_p(os.path.join(out_dir, 'cloudy')) names = [filename_from_metadata_dict(img, search_api) for img in images] qa_names = [ os.path.join(out_dir, '{}_band_QA.tif'.format(f)) for f in names ] cloudy = parallel.run_calls(is_image_cloudy, qa_names, pool_type='processes', nb_workers=parallel_downloads, verbose=False) for name, cloud in zip(names, cloudy): if cloud: for b in list(set(bands + ['QA'])): f = '{}_band_{}.tif'.format(name, b) shutil.move(os.path.join(out_dir, f), os.path.join(out_dir, 'cloudy', f)) print('{} cloudy images out of {}'.format(sum(cloudy), len(images))) images = [i for i, c in zip(images, cloudy) if not c] utils.print_elapsed_time() # group band crops per image crops = [] # list of lists: [[crop1_b1, crop1_b2 ...], [crop2_b1 ...] ...] for img in images: name = filename_from_metadata_dict(img, search_api) crops.append([ os.path.join(out_dir, '{}_band_{}.tif'.format(name, b)) for b in bands ]) # embed some metadata in the remaining image files for bands_fnames in crops: for f in bands_fnames: # embed some metadata as gdal geotiff tags for k, v in metadata_from_metadata_dict(img, search_api).items(): utils.set_geotif_metadata_item(f, k, v)
def get_time_series(aoi, start_date=None, end_date=None, bands=['B04'], out_dir='', search_api='devseed', parallel_downloads=multiprocessing.cpu_count()): """ Main function: crop and download a time series of Sentinel-2 images. """ utils.print_elapsed_time.t0 = datetime.datetime.now() # list available images if search_api == 'devseed': images = search_devseed.search(aoi, start_date, end_date, 'Sentinel-2')['results'] elif search_api == 'scihub': import search_scihub images = search_scihub.search(aoi, start_date, end_date, satellite='Sentinel-2') elif search_api == 'planet': import search_planet images = search_planet.search(aoi, start_date, end_date, item_types=['Sentinel2L1C']) # sort images by acquisition date, then by mgrs id images.sort(key=lambda k: date_and_mgrs_id_from_metadata_dict(k, search_api)) # remove duplicates (same acquisition day, different mgrs tile id) seen = set() images = [x for x in images if not (date_and_mgrs_id_from_metadata_dict(x, search_api)[0] in seen or # seen.add() returns None seen.add(date_and_mgrs_id_from_metadata_dict(x, search_api)[0]))] print('Found {} images'.format(len(images))) utils.print_elapsed_time() # build urls and filenames urls = [] fnames = [] for img in images: url = aws_url_from_metadata_dict(img, search_api) name = filename_from_metadata_dict(img, search_api) for b in bands: urls.append('{}{}.jp2'.format(url, b)) fnames.append(os.path.join(out_dir, '{}_band_{}.tif'.format(name, b))) # convert aoi coordates to utm ulx, uly, lrx, lry, utm_zone, lat_band = utils.utm_bbx(aoi) # download crops utils.mkdir_p(out_dir) print('Downloading {} crops ({} images with {} bands)...'.format(len(urls), len(images), len(bands)), end=' ') parallel.run_calls(utils.crop_with_gdal_translate, list(zip(fnames, urls)), extra_args=(ulx, uly, lrx, lry, utm_zone, lat_band, 'UInt16'), pool_type='threads', nb_workers=parallel_downloads) utils.print_elapsed_time() # discard images that failed to download images = [x for x in images if bands_files_are_valid(x, bands, search_api, out_dir)] # discard images that are totally covered by clouds utils.mkdir_p(os.path.join(out_dir, 'cloudy')) urls = [aws_url_from_metadata_dict(img, search_api) for img in images] print('Reading {} cloud masks...'.format(len(urls)), end=' ') cloudy = parallel.run_calls(is_image_cloudy_at_location, urls, extra_args=(utils.geojson_lonlat_to_utm(aoi),), pool_type='threads', nb_workers=parallel_downloads, verbose=True) for img, cloud in zip(images, cloudy): name = filename_from_metadata_dict(img, search_api) if cloud: for b in bands: f = '{}_band_{}.tif'.format(name, b) shutil.move(os.path.join(out_dir, f), os.path.join(out_dir, 'cloudy', f)) print('{} cloudy images out of {}'.format(sum(cloudy), len(images))) images = [i for i, c in zip(images, cloudy) if not c] utils.print_elapsed_time() # group band crops per image crops = [] # list of lists: [[crop1_b1, crop1_b2 ...], [crop2_b1 ...] ...] for img in images: name = filename_from_metadata_dict(img, search_api) crops.append([os.path.join(out_dir, '{}_band_{}.tif'.format(name, b)) for b in bands]) # embed some metadata in the remaining image files for bands_fnames in crops: for f in bands_fnames: # embed some metadata as gdal geotiff tags for k, v in metadata_from_metadata_dict(img, search_api).items(): utils.set_geotif_metadata_item(f, k, v)