def search(aoi, start_date=None, end_date=None, product_type=None, api='devseed'): """ Search Sentinel-2 images covering an AOI and timespan using a given API. Args: aoi (geojson.Polygon): area of interest start_date (datetime.datetime): start of the search time range end_date (datetime.datetime): end of the search time range product_type (str, optional): either 'L1C' or 'L2A' api (str, optional): either devseed (default), scihub, planet or gcloud Returns: list of image objects """ # list available images if api == 'devseed': import search_devseed images = search_devseed.search(aoi, start_date, end_date, 'Sentinel-2')['features'] images = [metadata_parser.DevSeedParser(img) for img in images] elif api == 'scihub': import search_scihub if product_type is not None: product_type = 'S2MSI{}'.format(product_type[1:]) images = search_scihub.search(aoi, start_date, end_date, satellite='Sentinel-2', product_type=product_type) images = [metadata_parser.SciHubParser(img) for img in images] elif api == 'planet': import search_planet images = search_planet.search(aoi, start_date, end_date, item_types=['Sentinel2L1C']) images = [metadata_parser.PlanetParser(img) for img in images] elif api == 'gcloud': import search_gcloud images = search_gcloud.search(aoi, start_date, end_date) images = [metadata_parser.GcloudParser(img) for img in images] # sort images by acquisition day, then by mgrs id images.sort(key=(lambda k: (k.date.date(), k.mgrs_id))) # remove duplicates (same acquisition date but different mgrs tile id) seen = set() unique_images = [] for img in images: if img.date not in seen: seen.add(img.date) unique_images.append(img) print('Found {} images'.format(len(unique_images))) return [vars(img) for img in unique_images]
def search(aoi, start_date=None, end_date=None, api='devseed'): """ Search Landsat-8 images covering an AOI and timespan using a given API. Args: aoi (geojson.Polygon): area of interest start_date (datetime.datetime): start of the search time range end_date (datetime.datetime): end of the search time range api (str, optional): either devseed (default) or planet Returns: list of image objects """ # list available images if api == 'devseed': import search_devseed images = search_devseed.search(aoi, start_date, end_date, 'Landsat-8') images.sort(key=lambda k: (k['properties']['datetime'], k['properties'] ['eo:row'], k['properties']['eo:column'])) for i in images: # add some metadata at the root of the dict i['date'] = dateutil.parser.parse(i['properties']['datetime']) i['cloud_cover'] = i['properties']['eo:cloud_cover'] i['thumbnail'] = i['assets']['thumbnail']['href'] elif api == 'planet': import search_planet images = search_planet.search(aoi, start_date, end_date, item_types=['Landsat8L1G']) # sort images by acquisition date, then by acquisiton row and path images.sort(key=lambda k: (k['properties']['acquired'], k['properties'] ['wrs_row'], k['properties']['wrs_path'])) # remove duplicates (same acquisition day) seen = set() images = [ x for x in images if not (x['properties']['acquired'] in seen or # seen.add() returns None seen.add(x['properties']['acquired'])) ] print('Found {} images'.format(len(images))) return images
def get_time_series(aoi, start_date=None, end_date=None, bands=[8], out_dir='', search_api='devseed', parallel_downloads=100, debug=False): """ Main function: crop and download a time series of Landsat-8 images. """ utils.print_elapsed_time.t0 = datetime.datetime.now() # list available images seen = set() if search_api == 'devseed': images = search_devseed.search(aoi, start_date, end_date, 'Landsat-8')['results'] images.sort(key=lambda k: (k['acquisitionDate'], k['row'], k['path'])) # remove duplicates (same acquisition day) images = [ x for x in images if not (x['acquisitionDate'] in seen or # seen.add() returns None seen.add(x['acquisitionDate'])) ] elif search_api == 'planet': import search_planet images = search_planet.search(aoi, start_date, end_date, item_types=['Landsat8L1G']) # sort images by acquisition date, then by acquisiton row and path images.sort(key=lambda k: (k['properties']['acquired'], k['properties'] ['wrs_row'], k['properties']['wrs_path'])) # remove duplicates (same acquisition day) images = [ x for x in images if not (x['properties']['acquired'] in seen or # seen.add() returns None seen.add(x['properties']['acquired'])) ] print('Found {} images'.format(len(images))) utils.print_elapsed_time() # build urls urls = parallel.run_calls(aws_urls_from_metadata_dict, list(images), extra_args=(search_api, ), pool_type='threads', nb_workers=parallel_downloads, verbose=False) # build gdal urls and filenames download_urls = [] fnames = [] for img, bands_urls in zip(images, urls): name = filename_from_metadata_dict(img, search_api) for b in set(bands + ['QA']): # the QA band is needed for cloud detection download_urls += [ s for s in bands_urls if s.endswith('B{}.TIF'.format(b)) ] fnames.append( os.path.join(out_dir, '{}_band_{}.tif'.format(name, b))) # convert aoi coordinates to utm ulx, uly, lrx, lry, utm_zone, lat_band = utils.utm_bbx(aoi) # download crops utils.mkdir_p(out_dir) print('Downloading {} crops ({} images with {} bands)...'.format( len(download_urls), len(images), len(bands) + 1), end=' ') parallel.run_calls(utils.crop_with_gdal_translate, list(zip(fnames, download_urls)), extra_args=(ulx, uly, lrx, lry, utm_zone, lat_band), pool_type='threads', nb_workers=parallel_downloads) utils.print_elapsed_time() # discard images that failed to download images = [ x for x in images if bands_files_are_valid(x, list(set(bands + ['QA'])), search_api, out_dir) ] # discard images that are totally covered by clouds utils.mkdir_p(os.path.join(out_dir, 'cloudy')) names = [filename_from_metadata_dict(img, search_api) for img in images] qa_names = [ os.path.join(out_dir, '{}_band_QA.tif'.format(f)) for f in names ] cloudy = parallel.run_calls(is_image_cloudy, qa_names, pool_type='processes', nb_workers=parallel_downloads, verbose=False) for name, cloud in zip(names, cloudy): if cloud: for b in list(set(bands + ['QA'])): f = '{}_band_{}.tif'.format(name, b) shutil.move(os.path.join(out_dir, f), os.path.join(out_dir, 'cloudy', f)) print('{} cloudy images out of {}'.format(sum(cloudy), len(images))) images = [i for i, c in zip(images, cloudy) if not c] utils.print_elapsed_time() # group band crops per image crops = [] # list of lists: [[crop1_b1, crop1_b2 ...], [crop2_b1 ...] ...] for img in images: name = filename_from_metadata_dict(img, search_api) crops.append([ os.path.join(out_dir, '{}_band_{}.tif'.format(name, b)) for b in bands ]) # embed some metadata in the remaining image files for bands_fnames in crops: for f in bands_fnames: # embed some metadata as gdal geotiff tags for k, v in metadata_from_metadata_dict(img, search_api).items(): utils.set_geotif_metadata_item(f, k, v)
def get_time_series(aoi, start_date=None, end_date=None, bands=['B04'], out_dir='', search_api='devseed', parallel_downloads=multiprocessing.cpu_count()): """ Main function: crop and download a time series of Sentinel-2 images. """ utils.print_elapsed_time.t0 = datetime.datetime.now() # list available images if search_api == 'devseed': images = search_devseed.search(aoi, start_date, end_date, 'Sentinel-2')['results'] elif search_api == 'scihub': import search_scihub images = search_scihub.search(aoi, start_date, end_date, satellite='Sentinel-2') elif search_api == 'planet': import search_planet images = search_planet.search(aoi, start_date, end_date, item_types=['Sentinel2L1C']) # sort images by acquisition date, then by mgrs id images.sort(key=lambda k: date_and_mgrs_id_from_metadata_dict(k, search_api)) # remove duplicates (same acquisition day, different mgrs tile id) seen = set() images = [x for x in images if not (date_and_mgrs_id_from_metadata_dict(x, search_api)[0] in seen or # seen.add() returns None seen.add(date_and_mgrs_id_from_metadata_dict(x, search_api)[0]))] print('Found {} images'.format(len(images))) utils.print_elapsed_time() # build urls and filenames urls = [] fnames = [] for img in images: url = aws_url_from_metadata_dict(img, search_api) name = filename_from_metadata_dict(img, search_api) for b in bands: urls.append('{}{}.jp2'.format(url, b)) fnames.append(os.path.join(out_dir, '{}_band_{}.tif'.format(name, b))) # convert aoi coordates to utm ulx, uly, lrx, lry, utm_zone, lat_band = utils.utm_bbx(aoi) # download crops utils.mkdir_p(out_dir) print('Downloading {} crops ({} images with {} bands)...'.format(len(urls), len(images), len(bands)), end=' ') parallel.run_calls(utils.crop_with_gdal_translate, list(zip(fnames, urls)), extra_args=(ulx, uly, lrx, lry, utm_zone, lat_band, 'UInt16'), pool_type='threads', nb_workers=parallel_downloads) utils.print_elapsed_time() # discard images that failed to download images = [x for x in images if bands_files_are_valid(x, bands, search_api, out_dir)] # discard images that are totally covered by clouds utils.mkdir_p(os.path.join(out_dir, 'cloudy')) urls = [aws_url_from_metadata_dict(img, search_api) for img in images] print('Reading {} cloud masks...'.format(len(urls)), end=' ') cloudy = parallel.run_calls(is_image_cloudy_at_location, urls, extra_args=(utils.geojson_lonlat_to_utm(aoi),), pool_type='threads', nb_workers=parallel_downloads, verbose=True) for img, cloud in zip(images, cloudy): name = filename_from_metadata_dict(img, search_api) if cloud: for b in bands: f = '{}_band_{}.tif'.format(name, b) shutil.move(os.path.join(out_dir, f), os.path.join(out_dir, 'cloudy', f)) print('{} cloudy images out of {}'.format(sum(cloudy), len(images))) images = [i for i, c in zip(images, cloudy) if not c] utils.print_elapsed_time() # group band crops per image crops = [] # list of lists: [[crop1_b1, crop1_b2 ...], [crop2_b1 ...] ...] for img in images: name = filename_from_metadata_dict(img, search_api) crops.append([os.path.join(out_dir, '{}_band_{}.tif'.format(name, b)) for b in bands]) # embed some metadata in the remaining image files for bands_fnames in crops: for f in bands_fnames: # embed some metadata as gdal geotiff tags for k, v in metadata_from_metadata_dict(img, search_api).items(): utils.set_geotif_metadata_item(f, k, v)
import datetime import utils import search_devseed aoi = utils.geojson_geometry_object(29.9793, 31.1346, 5000, 5000) results = search_devseed.search(aoi, start_date=datetime.datetime(2019, 1, 15), end_date=datetime.datetime(2019, 1, 30), satellite='Sentinel-2') expected_titles = [ 'S2B_MSIL1C_20190129T083209_N0207_R021_T36RUU_20190129T103220', 'S2A_MSIL1C_20190124T083231_N0207_R021_T36RUU_20190124T095836', 'S2B_MSIL1C_20190119T083259_N0207_R021_T36RUU_20190119T104924' ] assert ([r['properties']['sentinel:product_id'] for r in results] == expected_titles)