def test_missing_dependency_dataframe(monkeypatch): api = SentinelAPI("mock_user", "mock_password") with pytest.raises(ImportError): monkeypatch.setitem(sys.modules, "pandas", None) api.to_dataframe({"test":"test"}) with pytest.raises(ImportError): monkeypatch.setitem(sys.modules, "geopandas", None) api.to_geodataframe({"test":"tst"})
def test_missing_dependency_dataframe(monkeypatch): api = SentinelAPI("mock_user", "mock_password") with pytest.raises(ImportError): monkeypatch.setitem(sys.modules, "pandas", None) api.to_dataframe({"test": "test"}) with pytest.raises(ImportError): monkeypatch.setitem(sys.modules, "geopandas", None) api.to_geodataframe({"test": "tst"})
def sentinel_query_task(parameters, task_id=None): def validate_name(string: str): return string.replace('_', '-') def order_by(order_by='ingestiondate', ascending=False): prefix_order_by = '+' if ascending else '-' order_by = prefix_order_by + order_by return order_by task = DownloadTask.objects.get(pk=task_id) platformname = parameters['products'] longitude_min, longitude_max = parameters['longitude'] latitude_min, latitude_max = parameters['latitude'] extent = Polygon([[(longitude_min, latitude_max), (longitude_max, latitude_max), (longitude_max, latitude_min), (longitude_min, latitude_min), (longitude_min, latitude_max)]]) extent = geojson_to_wkt(extent) order_by = order_by('ingestiondate', ascending=False) api = SentinelAPI(user, password, api_url) products = api.query(extent, date=(parameters['time']), platformname='SENTINEL-1', producttype='GRD', orbitdirection='DESCENDING') products_df = api.to_dataframe(products) task.execution_start = datetime.now() task.update_status('WAIT', "Download Sentinel Query. ") return products_df
def job(): #Checking Sentinel data on SciHub, based on search requirements api = SentinelAPI(oah_user, oah_pass, 'https://apihub.copernicus.eu/apihub/') count = api.count(area=wkt, date=(start_date, end_date), platformname=platformname, area_relation='Contains', raw=None, cloudcoverpercentage=(min_cloud, max_cloud), limit=20, processinglevel=processinglevel) now = datetime.now() now = now.strftime("%d/%m/%Y %H:%M:%S") print(now + ' - Checking for new data') if count > 0: # Write available image data to dataframe products = api.query(area=wkt, date=(start_date, end_date), platformname=platformname, area_relation='Contains', raw=None, cloudcoverpercentage=(min_cloud, max_cloud), limit=20, processinglevel=processinglevel) products_df = api.to_dataframe(products) detail = products_df.iat[0, 4] # Get and format important data of satellite imagery img_sat = products_df.iloc[0, 36] # Get satellite name img_proc_lvl = products_df.iloc[0, 37] # Get image processing level img_date = products_df.iloc[0, 4][6:16] # Get acquisition date img_time = products_df.iloc[0, 4][17:25] # Get acquisition time img_cloud = str(products_df.iloc[0, 21])[:5] + ' %' # Get cloud coverage #Prepare e-mail content subject = "New satellite image available - " + img_date body = "Properites if the new satellite imagery is the following.\n\n" + 'Satellite: ' + img_sat + '\n' + 'Processing level: ' + img_proc_lvl + '\n' + 'Timestamp of imagery: ' + img_date + ', ' + img_time + '\n' + 'Cloud cover percentage: ' + img_cloud message = f'Subject:{subject}\n\n{body}' #Send e-mail and go to sleep context = ssl.create_default_context() with smtplib.SMTP_SSL(smtp_server, port, context=context) as server: server.login(sender_email, password) server.sendmail(sender_email, receiver_email, message.encode("utf8")) now = datetime.now() now = now.strftime("%d/%m/%Y %H:%M:%S") print(now + ' - Mail has been sent') time.sleep(82800) #23 hours return else: # If no new image available, print message print(now + ' - There is no new data available') return
def download_sentinel(current_date, past_date, json, ndvi_tiles, df_predios, ndvi_dir, rgb_dir, ngb_dir, sentinel_dir): # Variables datos geojson_file = 'Tiles-chile.geojson' cloud_directory = '' #Storage access client = storage.Client.from_service_account_json( 'ADL-forestal-segmentation-7dc429779824.json') bucket = client.get_bucket('ranger-app') # Sentinel API api = SentinelAPI('matias-arauco', 'arauco2019', 'https://scihub.copernicus.eu/apihub/') footprint = geojson_to_wkt(json) products = api.query(footprint, date=(past_date, current_date), platformname='Sentinel-2', cloudcoverpercentage=(0, 30)) products_df = api.to_dataframe(products) products_df_sorted = products_df.loc[products_df['tileid'].isnull()] products_df_sorted = products_df_sorted.sort_values( ['cloudcoverpercentage', 'ingestiondate'], ascending=[True, True]) print(products_df_sorted) index = products_df_sorted.index ite = 0 for i in index: file_name = products_df_sorted['title'][ite][:] year, month, day = products_df_sorted['title'][ite][ 11:15], products_df_sorted['title'][ite][ 15:17], products_df_sorted['title'][ite][17:19] tile = products_df_sorted['title'][ite][39:44] date = year + '-' + month + '-' + day print('Descargando el Tile: ', tile, ', con fecha: ', date) api.download(i, directory_path=sentinel_dir) upload_file(bucket, sentinel_dir, file_name + '.zip') ndvi_clipping(sentinel_dir, file_name, tile, date, ndvi_tiles, df_predios, ndvi_dir, rgb_dir, ngb_dir, bucket) os.remove(sentinel_dir + file_name + '.zip') ite = ite + 1
def ndvihesaplama(request): api = SentinelAPI('flavves', 'BATUhan123.', 'https://scihub.copernicus.eu/dhus') footprint = geojson_to_wkt(read_geojson('media/map.geojson')) products = api.query(footprint, date=('20191219', date(2019, 12, 29)), platformname='Sentinel-2') # pandas dataframe yap products_df = api.to_dataframe(products) # filtreleme products_df_sorted = products_df.sort_values( ['cloudcoverpercentage', 'ingestiondate'], ascending=[True, True]) products_df_sorted = products_df_sorted.head(1) df = products_df_sorted NotDefteriKaydi = df.values.tolist() str_denemesi = str(NotDefteriKaydi) Kaydetmeye_basla = list(str_denemesi.split(",")) yerler = [0, 7, 8, 9, 12, 14, 18, 19, 20] isimler = [ "Dosya adı:", "Uydu adı", "Dosya boyutu", "Çekim tarihi", "Orbit numarası", "Bulut", "vejetasyon", "su", "not vejetasyon" ] i = 0 with open("media/books/txt/deneme.txt", "w") as dosya: for sira in yerler: print(isimler[i] + ":" + Kaydetmeye_basla[sira]) yaz = (isimler[i] + ":" + Kaydetmeye_basla[sira]) i = i + 1 dosya.write(yaz) dosya.write("\n") dosya.close() file_path = ('media\\books\\txt\\deneme.txt') #full path to text. data_file = open(file_path, 'r') data = data_file.read() data = list(data.split("\n")) context = {'deneme': data} return render(request, "todo_app/ndvihesaplama.html", context, backend().badana())
def job(): #Sentinel műholdképek keresése SciHub-on, keresési kritériumok alapján api = SentinelAPI(oah_user,oah_pass, 'https://apihub.copernicus.eu/apihub/') count=api.count(area=wkt, date=(start_date, end_date), platformname=platformname,area_relation='Contains',raw=None,cloudcoverpercentage=(min_cloud,max_cloud),limit=20, processinglevel = processinglevel) now = datetime.now() now = now.strftime("%d/%m/%Y %H:%M:%S") print(now+' - Új adat keresése') if count>0: # Elérhető műholdképek adatainak dataframe-be írása products = api.query(area=wkt, date=(start_date, end_date), platformname=platformname,area_relation='Contains',raw=None,cloudcoverpercentage=(min_cloud,max_cloud),limit=20, processinglevel = processinglevel) products_df = api.to_dataframe(products) detail=products_df.iat[0,4] # E-mailbe írandó adatok formázása img_sat=products_df.iloc[0,36] # Műhold név img_proc_lvl=products_df.iloc[0,37] # Feldolgozási szint img_date=products_df.iloc[0,4][6:16] # Felvétel rögzítésének dátuma img_time=products_df.iloc[0,4][17:25] # Felvétel rögzítésének időpontja img_cloud=str(products_df.iloc[0,21])[:5]+' %' # Felvétel felhőzöttsége #E-mail tartalom előkészítése subject="Új műholdkép - "+img_date body="A vizsgált területről készült új műholdkép adatai.\n\n"+'Műhold: '+img_sat+'\n'+'Feldolgozási szint: '+img_proc_lvl+'\n'+'Felvétel rögzítve: '+img_date+', '+img_time+'\n'+'Felvétel felhőzöttsége: '+img_cloud message=f'Subject:{subject}\n\n{body}' #E-mail küldése és keresés szüneteltetése context = ssl.create_default_context() with smtplib.SMTP_SSL(smtp_server, port, context=context) as server: server.login(sender_email, password) server.sendmail(sender_email, receiver_email, message.encode("utf8")) now = datetime.now() now = now.strftime("%d/%m/%Y %H:%M:%S") print(now+' - E-mail elküldve') time.sleep(82800) #23 óra return else: # Jelezzen ha nincs új műholdkép print(now+' - Nem érhető el új műholdkép') return
def _downloader(self, datelist, sensor, continent, obs_folder, obs_lst, uid, psw): local_obs_date = self._available_dec(obs_lst) api = SentinelAPI(uid, psw) if continent == 'Africa': footprint = geojson_to_wkt( read_geojson(r'L:/HSL/poi/AOI_Africa.geojson')) elif continent == 'West_Asia': footprint = geojson_to_wkt( read_geojson(r'L:/HSL/poi/AOI_West_Asia.geojson')) elif continent == 'Europe': footprint = geojson_to_wkt( read_geojson(r'L:/HSL/poi/AOI_Europe.geojson')) products = api.query(footprint, filename=f'S3{sensor}_*', producttype='SY_2_V10___', date=(datelist[0], datelist[-1])) products_df = api.to_dataframe(products) if products_df.size != 0: products_df_sorted = products_df.sort_values(['ingestiondate'], ascending=[True]) products_df_sorted = products_df_sorted.head(24) download_list = products_df_sorted[ ~products_df_sorted['beginposition']. isin(local_obs_date)].drop_duplicates(subset=['beginposition'], keep='last') if download_list.size != 0: downloaded = api.download_all(download_list.index, directory_path=obs_folder, n_concurrent_dl=2) if len(downloaded[2]) != 0: print('Info ! Some dataset were not downloaded') self._unzipper(obs_folder) else: pass
def satquery(geojson, date_from=None, date_to=None, platform='Sentinel-2', cloud_cover_percentage=95): """ Query products with given properties. :param geojson: str The geojson file path for footprint. :param date_from: datetime, optional :param date_to: datetime, optional The time interval filter based on the Sensing Date of the products :param platform: string 'Sentinel-1' or 'Sentinel-2' :param cloud_cover_percentage, Maximum cloud coverage percentage. Hundred percent cloud cover means no clear sky is visible. Default is 95% :return: Pandas DataFrame, Return the products from a query response as a Pandas DataFrame with the values in their appropriate Python types. """ api = SentinelAPI(USERNAME, PASSWORD, 'https://scihub.copernicus.eu/dhus') footprint = geojson_to_wkt(read_geojson(geojson), decimals=6) kwargs = dict() kwargs['platformname'] = platform if platform == 'Sentinel-1': # Level-1 Ground Range Detected (GRD) products kwargs['producttype'] = 'GRD' elif platform == 'Sentinel-2': kwargs['cloudcoverpercentage'] = (0, cloud_cover_percentage) products = api.query(footprint, date=(date_from, date_to), area_relation='Contains', **kwargs) df = api.to_dataframe(products) return df.sort_values(by='beginposition')
class Downloader: def __init__(self, str_username, str_password, str_link): self.api = SentinelAPI(str_username, str_password, str_link) self.products = None def search_polygon(self, footprint: object, str_date_start: str, str_date_end: str, str_platform_name: str, percentage: object): print('searching') self.products = self.api.query(footprint, date=(str_date_start, str_date_end), platformname=str_platform_name, cloudcoverpercentage=(percentage[0], percentage[1])) size = self.api.get_products_size(self.products) print(f'found {size}GiB of data') # print(self.products) def download_zip(self, path): self.api.download_all(self.products, path, max_attempt, True) def download_products(self, path, download_file): if download_file: self.download_zip(path) print('downloaded') df_products = self.api.to_dataframe(self.products) return df_products def download_geoproduct(self, path, download_file): if download_file: self.download_zip(path) # print('download Geos') gdf_products = self.api.to_geodataframe(self.products) return gdf_products def download_json(self): return self.api.to_geojson(self.products) def download_one(self, key, path): self.api.download(key, path, True)
def main(csvpath, apipath, qp): # Read password file # ================== try: with open(apipath) as f: (usrnam, psswrd) = f.readline().split(" ") if psswrd.endswith("\n"): psswrd = psswrd[:-1] except IOError: sys.exit("Error reading the password file!") # Connect to API using <username> and <password> # =============================================== print("Connecting to SciHub API...") api = SentinelAPI(usrnam, psswrd, "https://scihub.copernicus.eu/dhus") # Search by SciHub query keywords # =============================== products = api.query(qp['footprint'], beginposition=(qp['strtime'], qp['endtime']), endposition=(qp['strtime'], qp['endtime']), platformname=qp['platformname'], producttype=qp['producttype']) # Convert to Pandas DataFrame and sort by date ascending # ====================================================== products_df = api.to_dataframe(products) products_df_sorted = products_df.sort_values('beginposition', ascending=True) # Save to CSV file # ================ print(f"Saving list to {os.path.basename(csvpath)}") prep_csv = products_df_sorted[['uuid', 'title']] prep_csv.insert(2, "downloaded", False, allow_duplicates=True) prep_csv.to_csv(csvpath, index=False) print('Finished!')
def download_best(_box: box, download_path: str, user: str, pw: str) -> tp.List[str]: _api = SentinelAPI(user, pw, 'https://scihub.copernicus.eu/dhus') file_path = os.path.join(download_path, "save.csv") if not os.path.exists(file_path): products = _api.query( _box, date=('NOW-1MONTH', 'NOW'), platformname='Sentinel-2', processinglevel='Level-1C', cloudcoverpercentage=(0, 10), ) products_df = _api.to_dataframe(products) tile_ids = [] def _unknown_tile_id(x: str, t_ids: tp.List) -> bool: ret_val = x in t_ids if not ret_val: t_ids.append(x) return not ret_val # sort products products_df_sorted = products_df.sort_values(["cloudcoverpercentage"], ascending=[True]) # sort out tiles double tiles with higher cloud coverage first_tiles = [ _unknown_tile_id(x, tile_ids) for x in list(products_df_sorted['tileid'].array) ] # first_titles = np.vectorize(_unknown_tile_id(lambda x:x, tile_ids))(products_df_sorted['tileid'].array) products_df_sorted_unique = products_df_sorted[first_tiles] if not os.path.exists(download_path): os.makedirs(download_path) products_df_sorted_unique.to_csv(file_path) else: products_df_sorted_unique = pd.read_pickle(file_path) products_df_sorted_unique['area'] = [ __estimate_area(loads(e)) for e in list(products_df_sorted_unique['footprint'].array) ] # sort out areas smaller than three quarter of the full size of 100 km * 100 km products_df_sorted_unique_larger = products_df_sorted_unique[ products_df_sorted_unique['area'] > 100000 * 100000 / 4 * 3] _api.download_all(products_df_sorted_unique_larger.uuid, download_path) # estimate area from footprint return [ os.path.join(download_path, x) for x in products_df_sorted_unique.title ]
def test_to_pandas(products): df = SentinelAPI.to_dataframe(products) assert type(df).__name__ == "DataFrame" assert len(products) == len(df) assert set(products) == set(df.index)
class SentinelLoader: def __init__(self, dataPath, user, password, apiUrl='https://scihub.copernicus.eu/apihub/', showProgressbars=True, dateToleranceDays=5, cloudCoverage=(0,80), deriveResolutions=True, cacheApiCalls=True, cacheTilesData=True, loglevel=logging.INFO): logging.basicConfig(level=loglevel) self.api = SentinelAPI(user, password, apiUrl, show_progressbars=showProgressbars) self.dataPath = dataPath self.user = user self.password = password self.dateToleranceDays=dateToleranceDays self.cloudCoverage=cloudCoverage self.deriveResolutions=deriveResolutions self.cacheApiCalls=cacheApiCalls self.cacheTilesData=cacheTilesData def getProductBandTiles(self, geoPolygon, bandName, resolution, dateReference): """Downloads and returns file names with Sentinel2 tiles that best fit the polygon area at the desired date reference. It will perform up/downsampling if deriveResolutions is True and the desired resolution is not available for the required band.""" logger.info("Getting contents. band=%s, resolution=%s, date=%s", bandName, resolution, dateReference) #find tiles that intercepts geoPolygon within date-tolerance and date+dateTolerance dateTolerance = timedelta(days=self.dateToleranceDays) dateObj = datetime.now() if dateReference != 'now': dateObj = datetime.strptime(dateReference, '%Y-%m-%d') dateFrom = dateObj-dateTolerance dateTo = dateObj+dateTolerance resolutionDownload = resolution if self.deriveResolutions: if resolution=='10m': if bandName in ['B01', 'B09']: resolutionDownload = '60m' elif bandName in ['B05', 'B06', 'B07', 'B11', 'B12', 'B8A', 'SCL']: resolutionDownload = '20m' elif resolution=='20m': if bandName in ['B08']: resolutionDownload = '10m' elif bandName in ['B01', 'B09']: resolutionDownload = '60m' elif resolution=='60m': if bandName in ['B08']: resolutionDownload = '10m' logger.info("Querying API for candidate tiles") area = Polygon(geoPolygon).wkt #query cache key area_hash = hashlib.md5(area.encode()).hexdigest() apicache_file = self.dataPath + "/apiquery/Sentinel-2-S2MSI2A-%s-%s-%s-%s-%s.csv" % (area_hash, dateFrom.strftime("%Y%m%d"), dateTo.strftime("%Y%m%d"), self.cloudCoverage[0], self.cloudCoverage[1]) products_df = None if self.cacheApiCalls: if os.path.isfile(apicache_file): logger.debug("Using cached API query contents") products_df = pd.read_csv(apicache_file) os.system("touch -c %s" % apicache_file) else: logger.debug("Querying remote API") products = self.api.query(area, date=(dateFrom.strftime("%Y%m%d"), dateTo.strftime("%Y%m%d")), platformname='Sentinel-2', producttype='S2MSI2A', cloudcoverpercentage=self.cloudCoverage) products_df = self.api.to_dataframe(products) logger.debug("Caching API query results for later usage") saveFile(apicache_file, products_df.to_csv(index=True)) logger.debug("Found %d products", len(products_df)) if len(products_df)==0: raise Exception('Could not find any tiles for the specified parameters') products_df_sorted = products_df.sort_values(['ingestiondate','cloudcoverpercentage'], ascending=[False, False]) #select the best product. if geoPolygon() spans multiple tiles, select the best of them missing = Polygon(geoPolygon) desiredRegion = Polygon(geoPolygon) selectedTiles = [] footprints = [desiredRegion] for index, pf in products_df_sorted.iterrows(): #osgeo.ogr.Geometry footprint = gmlToPolygon(pf['gmlfootprint']) if missing.area>0: if missing.intersects(footprint)==True: missing = (missing.symmetric_difference(footprint)).difference(footprint) selectedTiles.append(index) footprints.append(footprint) if missing.area>0: raise Exception('Could not find tiles for the whole selected area at date range') logger.debug("Tiles selected for covering the entire desired area: %s", selectedTiles) # g = gpd.GeoSeries(footprints) # g.plot(cmap=plt.get_cmap('jet'), alpha=0.5) #download tiles data tileFiles = [] for index, sp in products_df.loc[selectedTiles].iterrows(): url = "https://scihub.copernicus.eu/dhus/odata/v1/Products('%s')/Nodes('%s.SAFE')/Nodes('MTD_MSIL2A.xml')/$value" % (sp['uuid'], sp['title']) meta_cache_file = self.dataPath + "/products/%s-MTD_MSIL2A.xml" % (sp['uuid']) mcontents = '' if self.cacheTilesData: if os.path.isfile(meta_cache_file): logger.debug('Reusing cached metadata info for tile \'%s\'', sp['uuid']) mcontents = loadFile(meta_cache_file) os.system("touch -c %s" % meta_cache_file) else: logger.debug('Getting metadata info for tile \'%s\' remotelly', sp['uuid']) r = requests.get(url, auth=(self.user, self.password)) if r.status_code!=200: raise Exception("Could not get metadata info. status=%s" % r.status_code) mcontents = r.content.decode("utf-8") saveFile(meta_cache_file, mcontents) rexp = "<IMAGE_FILE>GRANULE\/([0-9A-Z_]+)\/IMG_DATA\/R%s\/([0-9A-Z_]+_%s_%s)<\/IMAGE_FILE>" % (resolutionDownload, bandName, resolutionDownload) # print(mcontents) m = re.search(rexp, mcontents) if m==None: raise Exception("Could not find image metadata. uuid=%s, resolution=%s, band=%s" % (sp['uuid'], resolutionDownload, bandName)) rexp1 = "<PRODUCT_START_TIME>([\-0-9]+)T[0-9\:\.]+Z<\/PRODUCT_START_TIME>" m1 = re.search(rexp1, mcontents) if m1==None: raise Exception("Could not find product date from metadata") downloadFilename = self.dataPath + "/products/%s/%s/%s.tiff" % (m1.group(1), sp['uuid'], m.group(2)) if not os.path.exists(os.path.dirname(downloadFilename)): os.makedirs(os.path.dirname(downloadFilename)) if not self.cacheTilesData or not os.path.isfile(downloadFilename): tmp_tile_filejp2 = "%s/tmp/%s.jp2" % (self.dataPath, uuid.uuid4().hex) tmp_tile_filetiff = "%s/tmp/%s.tiff" % (self.dataPath, uuid.uuid4().hex) if not os.path.exists(os.path.dirname(tmp_tile_filejp2)): os.makedirs(os.path.dirname(tmp_tile_filejp2)) url = "https://scihub.copernicus.eu/dhus/odata/v1/Products('%s')/Nodes('%s.SAFE')/Nodes('GRANULE')/Nodes('%s')/Nodes('IMG_DATA')/Nodes('R%s')/Nodes('%s.jp2')/$value" % (sp['uuid'], sp['title'], m.group(1), resolutionDownload, m.group(2)) logger.info('Downloading tile uuid=\'%s\', resolution=\'%s\', band=\'%s\'', sp['uuid'], resolutionDownload, bandName) downloadFile(url, tmp_tile_filejp2, self.user, self.password) #remove near black features on image border due to compression artifacts. if not removed, some black pixels #will be present on final image, specially when there is an inclined crop in source images logger.debug('Removing near black compression artifacts') os.system("nearblack -o %s %s" % (tmp_tile_filetiff, tmp_tile_filejp2)) os.remove(tmp_tile_filejp2) os.system("gdal_translate %s %s" % (tmp_tile_filetiff, downloadFilename)) os.remove(tmp_tile_filetiff) else: logger.debug('Reusing tile data from cache') os.system("touch -c %s" % downloadFilename) filename = downloadFilename if resolution!=resolutionDownload: filename = self.dataPath + "/products/%s/%s/%s-%s.tiff" % (m1.group(1), sp['uuid'], m.group(2), resolution) logger.info("Resampling band %s originally in resolution %s to %s" % (bandName, resolutionDownload, resolution)) rexp = "([0-9]+).*" rnumber = re.search(rexp, resolution) if not self.cacheTilesData or not os.path.isfile(filename): os.system("gdalwarp -tr %s %s %s %s" % (rnumber.group(1), rnumber.group(1), downloadFilename, filename)) tileFiles.append(filename) return tileFiles def cropRegion(self, geoPolygon, sourceGeoTiffs): """Returns an image file with contents from a bunch of GeoTiff files cropped to the specified geoPolygon. Pay attention to the fact that a new file is created at each request and you should delete it after using it""" logger.debug("Cropping polygon %s from %d files" % (geoPolygon, len(sourceGeoTiffs))) desiredRegion = Polygon(geoPolygon) #show tile images # for fn in tilesData: # ds = gdal.Open(fn).ReadAsArray() # plt.figure(figsize=(10,10)) # plt.imshow(ds[0]) source_tiles = ' '.join(sourceGeoTiffs) tmp_file = "%s/tmp/%s.tiff" % (self.dataPath, uuid.uuid4().hex) if not os.path.exists(os.path.dirname(tmp_file)): os.makedirs(os.path.dirname(tmp_file)) #define output bounds in destination srs reference bounds = desiredRegion.bounds s1 = convertWGS84To3857(bounds[0], bounds[1]) s2 = convertWGS84To3857(bounds[2], bounds[3]) logger.debug('Combining tiles into a single image. tmpfile=%s' % tmp_file) os.system("gdalwarp -multi -srcnodata 0 -t_srs EPSG:3857 -te %s %s %s %s %s %s" % (s1[0],s1[1],s2[0],s2[1],source_tiles,tmp_file)) return tmp_file def getRegionHistory(self, geoPolygon, bandOrIndexName, resolution, dateFrom, dateTo, daysStep=5): """Gets a series of GeoTIFF files for a region for a specific band and resolution in a date range""" dateFromObj = datetime.strptime(dateFrom, '%Y-%m-%d') dateToObj = datetime.strptime(dateTo, '%Y-%m-%d') dateRef = dateFromObj regionHistoryFiles = [] while dateRef <= dateToObj: logger.debug(dateRef) dateRefStr = dateRef.strftime("%Y-%m-%d") regionFile = None if bandOrIndexName in ['NDVI', 'NDWI']: regionFile = self.getRegionIndex(geoPolygon, bandOrIndexName, resolution, dateRefStr) else: regionFile = self.getRegionBand(geoPolygon, bandOrIndexName, resolution, dateRefStr) tmp_tile_file = "%s/tmp/%s-%s-%s-%s.tiff" % (self.dataPath, dateRefStr, bandOrIndexName, resolution, uuid.uuid4().hex) os.system("mv %s %s" % (regionFile,tmp_tile_file)) regionHistoryFiles.append(tmp_tile_file) dateRef = dateRef + timedelta(days=daysStep) return regionHistoryFiles def getRegionBand(self, geoPolygon, bandName, resolution, dateReference): regionTileFiles = self.getProductBandTiles(geoPolygon, bandName, resolution, dateReference) return self.cropRegion(geoPolygon, regionTileFiles) def _getBandDataFloat(self, geoPolygon, bandName, resolution, dateReference): bandFile = self.getRegionBand(geoPolygon, bandName, resolution, dateReference) gdalBand = gdal.Open(bandFile) geoTransform = gdalBand.GetGeoTransform() projection = gdalBand.GetProjection() data = gdalBand.ReadAsArray().astype(np.float) os.remove(bandFile) return data, geoTransform, projection def getRegionIndex(self, geoPolygon, indexName, resolution, dateReference): if indexName=='NDVI': #get band 04 red,geoTransform,projection = self._getBandDataFloat(geoPolygon, 'B04', resolution, dateReference) #get band 08 nir,_,_ = self._getBandDataFloat(geoPolygon, 'B08', resolution, dateReference) #calculate ndvi ndvi = ((nir - red)/(nir + red)) #save file tmp_file = "%s/tmp/ndvi-%s.tiff" % (self.dataPath, uuid.uuid4().hex) saveGeoTiff(ndvi, tmp_file, geoTransform, projection) return tmp_file elif indexName=='NDWI': #get band 03 b03,geoTransform,projection = self._getBandDataFloat(geoPolygon, 'B03', resolution, dateReference) #get band 08 b08,_,_ = self._getBandDataFloat(geoPolygon, 'B08', resolution, dateReference) #calculate ndwi = ((b03 - b08)/(b03 + b08)) #save file tmp_file = "%s/tmp/ndwi-%s.tiff" % (self.dataPath, uuid.uuid4().hex) saveGeoTiff(ndwi, tmp_file, geoTransform, projection) return tmp_file else: raise Exception('\'indexName\' must be NDVI or NDWI') def cleanupCache(self, filesNotUsedDays): os.system("find %s -type f -name '*' -mtime +%s -exec rm {} \;" % (self.dataPath, filesNotUsedDays))
print("Dia: " + str(it.date()) + " já obtido previamente. Skipping.") continue successful = False while not successful: for tries in range(0, 5, 1): try: print("Dia: " + str(it.date())) api = SentinelAPI('amneves', 'Amnandre12') footprint = geojson_to_wkt(read_geojson('geo.geojson')) products = api.query(footprint, date=(it.date().strftime("%Y%m%d"), (it + timedelta(days=1)).date().strftime("%Y%m%d")), platformname='Sentinel-2', producttype='S2MSI1C', area_relation='Contains', cloudcoverpercentage=(0, 30)) dataframe = api.to_dataframe(products) count = dataframe.shape[0] print(str(count) + " produto(s) neste dia.") #api.download_all(products) #download(api, products) if count == 1: nome = dataframe.get_values()[0][0] p = multiprocessing.Process(target=foo, name="Foo", args=(api,products)) p.start() print("A aguardar download de " + str(nome) + ".zip") p.join(60 * 60)#60*60 = 1h if p.is_alive(): print("Demorou tempo demais. Voltar a tentar...") # Terminate foo p.terminate() p.join()
def download_sentinel1( footprint='C:\\Users\\krist\\Documents\\Syntese\\code\\notebooks\\ljundal.geojson', folder='C:\\Users\\krist\\Documents\\Syntese\\data\\s1\\download', username='******', password='******', date_start=['01', '06', '2020'], date_end=['14', '06', '2020']): ''' The following function downloads sentinel-1 data if it happens to be online. If it is online, it innitiates the LTR. The function needs a footprint. This foorprint can be made directly using e.g. modis.get_fires() function. Here, a foodprint for each fire has been made. This function also gives the date of the fire, which can be used. The function then download all Sentinel-1 images within that footprint, defined by the start and end data. The function right now donwload the data, and can initiate LRT. A later version should include automatic download after LTR. more options can be added in the api, see https://pypi.org/project/sentinelsat/. Note, from Copernicus hub, only two images can be downloaded at the same time. This is therefor run one at a time in a loop. ... Input: footprint[str]: A string with the path to a footprint in geojson format. folder[str]: The path to which the downloaded images will be saved. username[str]: Copernicus Hub username password[str]: Copernicus Hub password date_start[list]: a list with the date in string format DAY-MONTH-YEAR (with 0 if e.g. 01) date_end[list]: a list with data in strin format DAY-MONTH-YEAR (eithout 0 if e.g. 01) output: NIL: No output. images will be donwloaded. Example Author: Kristian Soerensen July 2020 [email protected] ''' #initializing API api = SentinelAPI(username, password, 'https://scihub.copernicus.eu/dhus') Data_start = (str(date_start[2]) + str(date_start[1]) + str(date_start[0])) #fetching all product Footprint = geojson_to_wkt(read_geojson(footprint)) #Footprint = footprint products = api.query(Footprint, date=(Data_start, date(int(date_end[2]), int(date_end[1]), int(date_end[0]))), producttype='GRD', orbitdirection='DESCENDING') cwd = os.getcwd() os.chdir(folder) #turning into dataframe products_df = api.to_dataframe(products) print(len(products_df), ' Products are found.') #checking if file in folder for i in range(len(products_df)): if os.path.isfile(products_df.iloc[i].filename) == True: #drop the file if it is in the folder products_df.drop(products_df.index[i]) print('(download_sentinel1)\n Product:', products_df.iloc[i].title, ' Already exists.') #download all products in pd, 1 at a time. for i in range(len(products_df)): api.download(products_df.iloc[i].uuid) os.chdir(cwd) return None
def sent2_query(user, passwd, geojsonfile, start_date, end_date, cloud='100', output_folder=None, api=True): """ A convenience function that wraps sentinelsat query & download Notes ----------- I have found the sentinesat sometimes fails to download the second image, so I have written some code to avoid this - choose api = False for this Parameters ----------- user : string username for esa hub passwd : string password for hub geojsonfile : string AOI polygon of interest start_date : string date of beginning of search end_date : string date of end of search output_folder : string where you intend to download the imagery cloud : string (optional) include a cloud filter in the search """ ##set up your copernicus username and password details, and copernicus download site... BE CAREFUL if you share this script with others though! api = SentinelAPI(user, passwd) # NOWT WRONG WITH API - # TODO Maybe improve check of library so it doesn't use a global # if oldsat is True: # footprint = get_coordinates(geojsonfile) # else: footprint = geojson_to_wkt(read_geojson(geojsonfile)) products = api.query(footprint, ((start_date, end_date)), platformname="Sentinel-2", cloudcoverpercentage="[0 TO " + cloud + "]") #,producttype="GRD") products_df = api.to_dataframe(products) if api is True and output_folder != None: api.download_all(directory_path=output_folder) else: prods = np.arange(len(products)) # the api was proving flaky whereas the cmd line always works hence this # is alternate the download option if output_folder != None: # procList = [] for prod in prods: #os.chdir(output_folder) sceneID = products[prod]['id'] cmd = [ 'sentinel', 'download', '-p', output_folder, user, passwd, sceneID ] print(sceneID + ' downloading') subprocess.call(cmd) #[p.wait() for p in procList] return products_df, products
def test_to_pandas(products): df = SentinelAPI.to_dataframe(products) assert '44517f66-9845-4792-a988-b5ae6e81fd3e' in df.index
def test_to_pandas(products): df = SentinelAPI.to_dataframe(products) assert type(df).__name__ == 'DataFrame' assert '44517f66-9845-4792-a988-b5ae6e81fd3e' in df.index assert len(products) == len(df)
class SentinelDownloader(object): def __init__(self, user, password, api_url='https://scihub.copernicus.eu/dhus'): try: from sentinelsat import SentinelAPI except ImportError as e: gs.fatal(_("Module requires sentinelsat library: {}").format(e)) try: import pandas except ImportError as e: gs.fatal(_("Module requires pandas library: {}").format(e)) # init logger root = logging.getLogger() root.addHandler(logging.StreamHandler(sys.stderr)) # connect SciHub via API self._api = SentinelAPI(user, password, api_url=api_url) self._products_df_sorted = None def filter(self, area, area_relation, clouds=None, producttype=None, limit=None, query={}, start=None, end=None, sortby=[], asc=True): args = {} if clouds: args['cloudcoverpercentage'] = (0, int(clouds)) if producttype: args['producttype'] = producttype if producttype.startswith('S2'): args['platformname'] = 'Sentinel-2' else: args['platformname'] = 'Sentinel-1' if not start: start = 'NOW-60DAYS' else: start = start.replace('-', '') if not end: end = 'NOW' else: end = end.replace('-', '') if query: redefined = [ value for value in args.keys() if value in query.keys() ] if redefined: gs.warning( "Query overrides already defined options ({})".format( ','.join(redefined))) args.update(query) gs.verbose( "Query: area={} area_relation={} date=({}, {}) args={}".format( area, area_relation, start, end, args)) products = self._api.query(area=area, area_relation=area_relation, date=(start, end), **args) products_df = self._api.to_dataframe(products) if len(products_df) < 1: gs.message(_('No product found')) return # sort and limit to first sorted product if sortby: self._products_df_sorted = products_df.sort_values( sortby, ascending=[asc] * len(sortby)) else: self._products_df_sorted = products_df if limit: self._products_df_sorted = self._products_df_sorted.head( int(limit)) gs.message( _('{} Sentinel product(s) found').format( len(self._products_df_sorted))) def list(self): if self._products_df_sorted is None: return for idx in range(len(self._products_df_sorted['uuid'])): if 'cloudcoverpercentage' in self._products_df_sorted: ccp = '{0:2.0f}%'.format( self._products_df_sorted['cloudcoverpercentage'][idx]) else: ccp = 'cloudcover_NA' print('{0} {1} {2} {3} {4}'.format( self._products_df_sorted['uuid'][idx], self._products_df_sorted['identifier'][idx], self._products_df_sorted['beginposition'][idx].strftime( "%Y-%m-%dT%H:%M:%SZ"), ccp, self._products_df_sorted['producttype'][idx], )) def download(self, output): if self._products_df_sorted is None: return if not os.path.exists(output): os.makedirs(output) gs.message(_('Downloading data into <{}>...').format(output)) for idx in range(len(self._products_df_sorted['uuid'])): gs.message('{} -> {}.SAFE'.format( self._products_df_sorted['uuid'][idx], os.path.join(output, self._products_df_sorted['identifier'][idx]))) # download self._api.download(self._products_df_sorted['uuid'][idx], output) def save_footprints(self, map_name): if self._products_df_sorted is None: return try: from osgeo import ogr, osr except ImportError as e: gs.fatal( _("Option <footprints> requires GDAL library: {}").format(e)) gs.message(_("Writing footprints into <{}>...").format(map_name)) driver = ogr.GetDriverByName("GPKG") tmp_name = gs.tempfile() + '.gpkg' data_source = driver.CreateDataSource(tmp_name) srs = osr.SpatialReference() srs.ImportFromEPSG(4326) # features can be polygons or multi-polygons layer = data_source.CreateLayer(str(map_name), srs, ogr.wkbMultiPolygon) # attributes attrs = OrderedDict([("uuid", ogr.OFTString), ("ingestiondate", ogr.OFTString), ("cloudcoverpercentage", ogr.OFTInteger), ("producttype", ogr.OFTString), ("identifier", ogr.OFTString)]) for key in attrs.keys(): field = ogr.FieldDefn(key, attrs[key]) layer.CreateField(field) # features for idx in range(len(self._products_df_sorted['uuid'])): wkt = self._products_df_sorted['footprint'][idx] feature = ogr.Feature(layer.GetLayerDefn()) newgeom = ogr.CreateGeometryFromWkt(wkt) # convert polygons to multi-polygons newgeomtype = ogr.GT_Flatten(newgeom.GetGeometryType()) if newgeomtype == ogr.wkbPolygon: multigeom = ogr.Geometry(ogr.wkbMultiPolygon) multigeom.AddGeometryDirectly(newgeom) feature.SetGeometry(multigeom) else: feature.SetGeometry(newgeom) for key in attrs.keys(): if key == 'ingestiondate': value = self._products_df_sorted[key][idx].strftime( "%Y-%m-%dT%H:%M:%SZ") else: value = self._products_df_sorted[key][idx] feature.SetField(key, value) layer.CreateFeature(feature) feature = None data_source = None # coordinates of footprints are in WKT -> fp precision issues # -> snap gs.run_command('v.import', input=tmp_name, output=map_name, layer=map_name, snap=1e-10, quiet=True) def set_uuid(self, uuid_list): """Set products by uuid. TODO: Find better implementation :param uuid: uuid to download """ from sentinelsat.sentinel import SentinelAPIError self._products_df_sorted = {'uuid': []} for uuid in uuid_list: try: odata = self._api.get_product_odata(uuid, full=True) except SentinelAPIError as e: gs.error('{0}. UUID {1} skipped'.format(e, uuid)) continue for k, v in odata.items(): if k == 'id': k = 'uuid' elif k == 'Sensing start': k = 'beginposition' elif k == 'Product type': k = 'producttype' elif k == 'Cloud cover percentage': k = 'cloudcoverpercentage' elif k == 'Identifier': k = 'identifier' elif k == 'Ingestion Date': k = 'ingestiondate' elif k == 'footprint': pass else: continue if k not in self._products_df_sorted: self._products_df_sorted[k] = [] self._products_df_sorted[k].append(v)
def test_missing_dependency_dataframe(monkeypatch): with pytest.raises(ImportError): monkeypatch.setitem(sys.modules, "pandas", None) SentinelAPI.to_dataframe({"test": "test"})
class Sentinel2Loader: def __init__(self, dataPath, user, password, apiUrl='https://apihub.copernicus.eu/apihub/', showProgressbars=True, dateToleranceDays=5, cloudCoverage=(0, 80), deriveResolutions=True, cacheApiCalls=True, cacheTilesData=True, loglevel=logging.DEBUG, nirBand='B08'): logging.basicConfig(level=loglevel) self.api = SentinelAPI(user, password, apiUrl, show_progressbars=showProgressbars) self.dataPath = dataPath self.user = user self.password = password self.dateToleranceDays = dateToleranceDays self.cloudCoverage = cloudCoverage self.deriveResolutions = deriveResolutions self.cacheApiCalls = cacheApiCalls self.cacheTilesData = cacheTilesData self.nirBand = nirBand def getProductBandTiles(self, geoPolygon, bandName, resolution, dateReference): """Downloads and returns file names with Sentinel2 tiles that best fit the polygon area at the desired date reference. It will perform up/downsampling if deriveResolutions is True and the desired resolution is not available for the required band.""" logger.debug("Getting contents. band=%s, resolution=%s, date=%s", bandName, resolution, dateReference) #find tiles that intercepts geoPolygon within date-tolerance and date+dateTolerance dateTolerance = timedelta(days=self.dateToleranceDays) dateObj = datetime.now() if dateReference != 'now': dateObj = datetime.strptime(dateReference, '%Y-%m-%d') dateFrom = dateObj - dateTolerance dateTo = dateObj dateL2A = datetime.strptime('2018-12-18', '%Y-%m-%d') productLevel = '2A' if dateObj < dateL2A: logger.debug( 'Reference date %s before 2018-12-18. Will use Level1C tiles (no atmospheric correction)' % (dateObj)) productLevel = '1C' resolutionDownload = resolution if self.deriveResolutions: if productLevel == '2A': if resolution == '10m': if bandName in ['B01', 'B09']: resolutionDownload = '60m' elif bandName in [ 'B05', 'B06', 'B07', 'B11', 'B12', 'B8A', 'SCL' ]: resolutionDownload = '20m' elif resolution == '20m': if bandName in ['B08']: resolutionDownload = '10m' elif bandName in ['B01', 'B09']: resolutionDownload = '60m' elif resolution == '60m': if bandName in ['B08']: resolutionDownload = '10m' elif productLevel == '1C': resolutionDownload = '10m' logger.debug("Querying API for candidate tiles") bbox = rasterio.features.bounds(geoPolygon) geoPolygon = [(bbox[0], bbox[3]), (bbox[0], bbox[1]), (bbox[2], bbox[1]), (bbox[2], bbox[3])] area = Polygon(geoPolygon).wkt #query cache key area_hash = hashlib.md5(area.encode()).hexdigest() apicache_file = self.dataPath + "/apiquery/Sentinel-2-S2MSI%s-%s-%s-%s-%s-%s.csv" % ( productLevel, area_hash, dateFrom.strftime("%Y%m%d"), dateTo.strftime("%Y%m%d"), self.cloudCoverage[0], self.cloudCoverage[1]) products_df = None if self.cacheApiCalls: if os.path.isfile(apicache_file): logger.debug("Using cached API query contents") products_df = pd.read_csv(apicache_file) os.system("touch -c %s" % apicache_file) else: logger.debug("Querying remote API") productType = 'S2MSI%s' % productLevel products = self.api.query( area, date=(dateFrom.strftime("%Y%m%d"), dateTo.strftime("%Y%m%d")), platformname='Sentinel-2', producttype=productType, cloudcoverpercentage=self.cloudCoverage) products_df = self.api.to_dataframe(products) logger.debug("Caching API query results for later usage") saveFile(apicache_file, products_df.to_csv(index=True)) logger.debug("Found %d products", len(products_df)) if len(products_df) == 0: raise Exception( 'Could not find any tiles for the specified parameters') products_df_sorted = products_df.sort_values( ['ingestiondate', 'cloudcoverpercentage'], ascending=[False, False]) #select the best product. if geoPolygon() spans multiple tiles, select the best of them missing = Polygon(geoPolygon) desiredRegion = Polygon(geoPolygon) selectedTiles = [] footprints = [desiredRegion] for index, pf in products_df_sorted.iterrows(): #osgeo.ogr.Geometry footprint = gmlToPolygon(pf['gmlfootprint']) if missing.area > 0: if missing.intersects(footprint) == True: missing = (missing.symmetric_difference(footprint) ).difference(footprint) selectedTiles.append(index) footprints.append(footprint) if missing.area > 0: raise Exception( 'Could not find tiles for the whole selected area at date range' ) logger.debug("Tiles selected for covering the entire desired area: %s", selectedTiles) # g = gpd.GeoSeries(footprints) # g.plot(cmap=plt.get_cmap('jet'), alpha=0.5) #download tiles data tileFiles = [] for index, sp in products_df.loc[selectedTiles].iterrows(): url = "https://apihub.copernicus.eu/apihub/odata/v1/Products('%s')/Nodes('%s.SAFE')/Nodes('MTD_MSIL%s.xml')/$value" % ( sp['uuid'], sp['title'], productLevel) meta_cache_file = self.dataPath + "/products/%s-MTD_MSIL%s.xml" % ( sp['uuid'], productLevel) mcontents = '' if self.cacheTilesData and os.path.isfile(meta_cache_file): logger.debug('Reusing cached metadata info for tile \'%s\'', sp['uuid']) mcontents = loadFile(meta_cache_file) os.system("touch -c %s" % meta_cache_file) else: logger.debug('Getting metadata info for tile \'%s\' remotelly', sp['uuid']) r = requests.get(url, auth=(self.user, self.password)) if r.status_code != 200: raise Exception("Could not get metadata info. status=%s" % r.status_code) mcontents = r.content.decode("utf-8") saveFile(meta_cache_file, mcontents) rexp = "<IMAGE_FILE>GRANULE\/([0-9A-Z_]+)\/IMG_DATA\/([0-9A-Z_]+_%s)<\/IMAGE_FILE>" % ( bandName) if productLevel == '2A': rexp = "<IMAGE_FILE>GRANULE\/([0-9A-Z_]+)\/IMG_DATA\/R%s\/([0-9A-Z_]+_%s_%s)<\/IMAGE_FILE>" % ( resolutionDownload, bandName, resolutionDownload) m = re.search(rexp, mcontents) if m == None: raise Exception( "Could not find image metadata. uuid=%s, resolution=%s, band=%s" % (sp['uuid'], resolutionDownload, bandName)) rexp1 = "<PRODUCT_START_TIME>([\-0-9]+)T[0-9\:\.]+Z<\/PRODUCT_START_TIME>" m1 = re.search(rexp1, mcontents) if m1 == None: raise Exception("Could not find product date from metadata") date1 = m1.group(1) downloadFilename = self.dataPath + "/products/%s/%s/%s.tiff" % ( date1, sp['uuid'], m.group(2)) if not os.path.exists(os.path.dirname(downloadFilename)): os.makedirs(os.path.dirname(downloadFilename)) if not self.cacheTilesData or not os.path.isfile(downloadFilename): tmp_tile_filejp2 = "%s/tmp/%s.jp2" % (self.dataPath, uuid.uuid4().hex) tmp_tile_filetiff = "%s/tmp/%s.tiff" % (self.dataPath, uuid.uuid4().hex) if not os.path.exists(os.path.dirname(tmp_tile_filejp2)): os.makedirs(os.path.dirname(tmp_tile_filejp2)) if productLevel == '2A': url = "https://apihub.copernicus.eu/apihub/odata/v1/Products('%s')/Nodes('%s.SAFE')/Nodes('GRANULE')/Nodes('%s')/Nodes('IMG_DATA')/Nodes('R%s')/Nodes('%s.jp2')/$value" % ( sp['uuid'], sp['title'], m.group(1), resolutionDownload, m.group(2)) elif productLevel == '1C': url = "https://apihub.copernicus.eu/apihub/odata/v1/Products('%s')/Nodes('%s.SAFE')/Nodes('GRANULE')/Nodes('%s')/Nodes('IMG_DATA')/Nodes('%s.jp2')/$value" % ( sp['uuid'], sp['title'], m.group(1), m.group(2)) logger.info( 'Downloading tile uuid=\'%s\', resolution=\'%s\', band=\'%s\', date=\'%s\'', sp['uuid'], resolutionDownload, bandName, date1) downloadFile(url, tmp_tile_filejp2, self.user, self.password) #remove near black features on image border due to compression artifacts. if not removed, some black pixels #will be present on final image, specially when there is an inclined crop in source images if bandName == 'TCI': logger.debug('Removing near black compression artifacts') ret = os.system("which nearblack") if ret != 0: raise Exception( "gdal nearblack utility was not found in the system. install it" ) ret = os.system("which gdal_translate") if ret != 0: raise Exception( "gdal gdal_translate utility was not found in the system. install it" ) ret = os.system("nearblack -o %s %s" % (tmp_tile_filetiff, tmp_tile_filejp2)) if ret != 0: raise Exception( "Error during 'nearblack' execution. code=%d" % ret) ret = os.system("gdal_translate %s %s" % (tmp_tile_filetiff, downloadFilename)) if ret != 0: raise Exception( "Error during 'gdal_translate' execution. code=%d" % ret) os.remove(tmp_tile_filetiff) else: os.system("gdal_translate %s %s" % (tmp_tile_filejp2, downloadFilename)) os.remove(tmp_tile_filejp2) else: logger.debug('Reusing tile data from cache') os.system("touch -c %s" % downloadFilename) filename = downloadFilename if resolution != resolutionDownload: filename = self.dataPath + "/products/%s/%s/%s-%s.tiff" % ( m1.group(1), sp['uuid'], m.group(2), resolution) logger.debug( "Resampling band %s originally in resolution %s to %s" % (bandName, resolutionDownload, resolution)) rexp = "([0-9]+).*" rnumber = re.search(rexp, resolution) if not self.cacheTilesData or not os.path.isfile(filename): os.system("gdalwarp -tr %s %s %s %s" % (rnumber.group(1), rnumber.group(1), downloadFilename, filename)) tileFiles.append(filename) return tileFiles def cropRegion(self, geoPolygon, sourceGeoTiffs): """Returns an image file with contents from a bunch of GeoTiff files cropped to the specified geoPolygon. Pay attention to the fact that a new file is created at each request and you should delete it after using it""" logger.debug("Cropping polygon from %d files" % (len(sourceGeoTiffs))) desiredRegion = Polygon(geoPolygon) # #show tile images # for fn in tilesData: # ds = gdal.Open(fn).ReadAsArray() # plt.figure(figsize=(10,10)) # plt.imshow(ds[0]) source_tiles = ' '.join(sourceGeoTiffs) tmp_file = "%s/tmp/%s.tiff" % (self.dataPath, uuid.uuid4().hex) if not os.path.exists(os.path.dirname(tmp_file)): os.makedirs(os.path.dirname(tmp_file)) #define output bounds in destination srs reference bounds = desiredRegion.bounds s1 = convertWGS84To3857(bounds[0], bounds[1]) s2 = convertWGS84To3857(bounds[2], bounds[3]) logger.debug( 'Combining tiles into a single image. sources=%s tmpfile=%s' % (source_tiles, tmp_file)) os.system( "gdalwarp -multi -srcnodata 0 -t_srs EPSG:3857 -te %s %s %s %s %s %s" % (s1[0], s1[1], s2[0], s2[1], source_tiles, tmp_file)) return tmp_file def getRegionHistory(self, geoPolygon, bandOrIndexName, resolution, dateFrom, dateTo, daysStep=5, ignoreMissing=True, minVisibleLand=0, visibleLandPolygon=None, keepVisibleWithCirrus=False, interpolateMissingDates=False): """Gets a series of GeoTIFF files for a region for a specific band and resolution in a date range""" logger.info("Getting region history for band %s from %s to %s at %s" % (bandOrIndexName, dateFrom, dateTo, resolution)) dateFromObj = datetime.strptime(dateFrom, '%Y-%m-%d') dateToObj = datetime.strptime(dateTo, '%Y-%m-%d') dateRef = dateFromObj regionHistoryFiles = [] if visibleLandPolygon is None: visibleLandPolygon = geoPolygon lastSuccessfulFile = None pendingInterpolations = 0 while dateRef <= dateToObj: logger.debug(dateRef) dateRefStr = dateRef.strftime("%Y-%m-%d") regionFile = None try: cirrus = 0 if keepVisibleWithCirrus: cirrus = 1 if minVisibleLand > 0: try: labelsFile = self.getRegionBand( visibleLandPolygon, "SCL", resolution, dateRefStr) ldata = gdal.Open(labelsFile).ReadAsArray() ldata[ldata == 1] = 0 ldata[ldata == 2] = 0 ldata[ldata == 3] = 0 ldata[ldata == 4] = 1 ldata[ldata == 5] = 1 ldata[ldata == 6] = 1 ldata[ldata == 7] = 0 ldata[ldata == 8] = 0 ldata[ldata == 9] = 0 ldata[ldata == 10] = cirrus ldata[ldata == 11] = 1 os.remove(labelsFile) s = np.shape(ldata) visibleLandRatio = np.sum(ldata) / (s[0] * s[1]) if visibleLandRatio < minVisibleLand: raise Exception( "Too few land shown in image. visible ratio=%s" % visibleLandRatio) else: logger.debug( 'Minimum visible land detected. visible ratio=%s' % visibleLandRatio) except Exception as exp: logger.warning( 'Could not filter minimum visible land using SCL band. dateRef=%s err=%s' % (dateRefStr, exp)) if bandOrIndexName in [ 'NDVI', 'NDWI', 'NDWI_MacFeeters', 'NDMI' ]: regionFile = self.getRegionIndex(geoPolygon, bandOrIndexName, resolution, dateRefStr) else: regionFile = self.getRegionBand(geoPolygon, bandOrIndexName, resolution, dateRefStr) tmp_tile_file = "%s/tmp/%s-%s-%s-%s.tiff" % ( self.dataPath, dateRefStr, bandOrIndexName, resolution, uuid.uuid4().hex) useImage = True if pendingInterpolations > 0: previousData = gdal.Open(lastSuccessfulFile).ReadAsArray() nextData = gdal.Open(regionFile).ReadAsArray() # print(np.shape(previousData)) # print(np.shape(nextData)) na = np.empty(np.shape(previousData)) # print("INT") # print(np.shape(na)) #FIXME NOT WORKING. PERFORM 2D TIME INTERPOLATION raise Exception('Interpolation not yet implemented') logger.info("Calculating %s interpolated images" % pendingInterpolations) series = pd.Series([previousData]) for i in range(0, pendingInterpolations): series.add([na]) series.add([nextData]) idata = series.interpolate() # print(np.shape(idata)) pendingInterpolations = 0 #add good image os.system("mv %s %s" % (regionFile, tmp_tile_file)) regionHistoryFiles.append(tmp_tile_file) lastSuccessfulFile = tmp_tile_file except Exception as e: if ignoreMissing: logger.info( "Couldn't get data for %s using the specified filter. err=%s" % (dateRefStr, e)) else: if interpolateMissingDates: if lastSuccessfulFile != None: pendingInterpolations = pendingInterpolations + 1 else: raise e dateRef = dateRef + timedelta(days=daysStep) return regionHistoryFiles def getRegionBand(self, geoPolygon, bandName, resolution, dateReference): regionTileFiles = self.getProductBandTiles(geoPolygon, bandName, resolution, dateReference) return self.cropRegion(geoPolygon, regionTileFiles) def _getBandDataFloat(self, geoPolygon, bandName, resolution, dateReference): bandFile = self.getRegionBand(geoPolygon, bandName, resolution, dateReference) gdalBand = gdal.Open(bandFile) geoTransform = gdalBand.GetGeoTransform() projection = gdalBand.GetProjection() data = gdalBand.ReadAsArray().astype(np.float) os.remove(bandFile) return data, geoTransform, projection def getRegionIndex(self, geoPolygon, indexName, resolution, dateReference): if indexName == 'NDVI': #get band 04 red, geoTransform, projection = self._getBandDataFloat( geoPolygon, 'B04', resolution, dateReference) #get band 08 nir, _, _ = self._getBandDataFloat(geoPolygon, self.nirBand, resolution, dateReference) #calculate ndvi ndvi = ((nir - red) / (nir + red)) #save file tmp_file = "%s/tmp/ndvi-%s.tiff" % (self.dataPath, uuid.uuid4().hex) saveGeoTiff(ndvi, tmp_file, geoTransform, projection) return tmp_file elif indexName == 'NDWI': #get band 08 b08, geoTransform, projection = self._getBandDataFloat( geoPolygon, self.nirBand, resolution, dateReference) #get band 11 b11, _, _ = self._getBandDataFloat(geoPolygon, 'B11', resolution, dateReference) #calculate ndwi = ((b08 - b11) / (b08 + b11)) #save file tmp_file = "%s/tmp/ndwi-%s.tiff" % (self.dataPath, uuid.uuid4().hex) saveGeoTiff(ndwi, tmp_file, geoTransform, projection) return tmp_file elif indexName == 'NDWI_MacFeeters': #get band 03 b03, geoTransform, projection = self._getBandDataFloat( geoPolygon, 'B03', resolution, dateReference) #get band 08 b08, _, _ = self._getBandDataFloat(geoPolygon, self.nirBand, resolution, dateReference) #calculate ndwi = ((b03 - b08) / (b03 + b08)) #save file tmp_file = "%s/tmp/ndwi-%s.tiff" % (self.dataPath, uuid.uuid4().hex) saveGeoTiff(ndwi, tmp_file, geoTransform, projection) return tmp_file elif indexName == 'NDMI': #get band 03 nir, geoTransform, projection = self._getBandDataFloat( geoPolygon, 'B03', resolution, dateReference) #get band 08 swir, _, _ = self._getBandDataFloat(geoPolygon, 'B10', resolution, dateReference) #calculate ndmi = ((nir - swir) / (nir + swir)) #save file tmp_file = "%s/tmp/ndmi-%s.tiff" % (self.dataPath, uuid.uuid4().hex) saveGeoTiff(ndmi, tmp_file, geoTransform, projection) return tmp_file elif indexName == 'EVI': #https://github.com/sentinel-hub/custom-scripts/tree/master/sentinel-2 # index = 2.5 * (B08 - B04) / ((B08 + 6.0 * B04 - 7.5 * B02) + 1.0) #get band 04 b04, geoTransform, projection = self._getBandDataFloat( geoPolygon, 'B04', resolution, dateReference) #get band 08 b08, _, _ = self._getBandDataFloat(geoPolygon, self.nirBand, resolution, dateReference) #get band 02 b02, _, _ = self._getBandDataFloat(geoPolygon, 'B02', resolution, dateReference) #calculate evi = 2.5 * (b08 - b04) / ((b08 + (6.0 * b04) - (7.5 * b02)) + 1.0) #save file tmp_file = "%s/tmp/ndmi-%s.tiff" % (self.dataPath, uuid.uuid4().hex) saveGeoTiff(evi, tmp_file, geoTransform, projection) return tmp_file else: raise Exception( '\'indexName\' must be NDVI, NDWI, NDWI_MacFeeters, or NDMI') def cleanupCache(self, filesNotUsedDays): os.system("find %s -type f -name '*' -mtime +%s -exec rm {} \;" % (self.dataPath, filesNotUsedDays))
def badana(self): global kappa kappa=1 self.deneme="badana içine girdiğini gösteren kod" print(self.deneme) print(self.adana) api = SentinelAPI('flavves', 'şifre', 'https://scihub.copernicus.eu/dhus') footprint = geojson_to_wkt(read_geojson('media/map.geojson')) print(footprint) self.products = api.query(footprint,date=('20191219', date(2019, 12, 29)),platformname='Sentinel-2') products_df = api.to_dataframe(self.products) print("oluyor galiba") self.products_df_sorted = products_df.sort_values(['cloudcoverpercentage', 'ingestiondate'], ascending=[True, True]) self.products_df_sorted = self.products_df_sorted.head(1) self.df=self.products_df_sorted self.NotDefteriKaydi = self.df.values.tolist() self.str_denemesi=str(self.NotDefteriKaydi) self.Kaydetmeye_basla=list(self.str_denemesi.split(",")) self.yerler=[0,7,8,9,12,14,18,19,20] self.isimler=["Dosya adı:","Uydu adı","Dosya boyutu","Çekim tarihi","Orbit numarası","Bulut yüzdesi","vejetasyon yüzdesi","su yüzdesi","not vejetasyon yüzdesi"] self.i=0 with open("media/books/txt/deneme.txt", "w") as self.dosya: for self.sira in self.yerler: print(self.isimler[self.i]+":"+self.Kaydetmeye_basla[self.sira]) self.yaz=(self.isimler[self.i]+":"+self.Kaydetmeye_basla[self.sira]) self.i=self.i+1 self.dosya.write(self.yaz) self.dosya.write("\n") self.dosya.close() print(self.products_df_sorted) print("indirme başladı") #burasını blockladım çünkü 1 gb arşiv indiriyor #api.download_all(self.products_df_sorted.index) print("indirme bitti") self.veri_cekme=self.products_df_sorted.index self.veri_cekme1=self.veri_cekme[0] """ Bu işlem arşivden çıkarmak için gerekli arşivin adı indirdiğimiz verinin title adı oluyor """ self.arsiv_adi=api.get_product_odata(self.veri_cekme1) self.arsiv_adi=self.arsiv_adi["title"] self.arsiv_adi=str(self.arsiv_adi) print(self.arsiv_adi) self.a = Archive(self.arsiv_adi+'.zip') self.a.extract() self.img_data_klasor_ismi=os.listdir((self.arsiv_adi+".SAFE"+'/GRANULE')) self.img_data_klasor_ismi=self.img_data_klasor_ismi[0] self.img_data_klasor_ismi=str(self.img_data_klasor_ismi) self.dosya_yer_=(self.arsiv_adi+".SAFE"+'/GRANULE/'+self.img_data_klasor_ismi+'/IMG_DATA') self.resim_isim=os.listdir(self.dosya_yer_) print(self.dosya_yer_) """ şimdi ise resimleri rasterio ile bi kullanalım """ if self.resim_isim == "R10m" or "R20m" or "R60m": self.dosya_yer_=(self.arsiv_adi+".SAFE"+'/GRANULE/'+self.img_data_klasor_ismi+'/IMG_DATA/R60m') self.resim_isim=os.listdir(self.dosya_yer_) self.resim_isim[2] self.resim_isim[3] self.jp2ler = [self.resim_isim[2],self.resim_isim[3]] self.bands = [] #burası bizim jp2 dosyalarımızı okuyacak for self.jp2 in self.jp2ler: with rasterio.open(self.dosya_yer_+"/"+self.jp2) as self.f: self.bands.append(self.f.read(1)) #resimlerimizi ayrıştırdık özel bantlara self.band_red=self.bands[0] self.band_nir=self.bands[1] print("bant değerleri hesaplandı") print(self.bands[0],self.bands[1]) else: self.resim_isim[2] self.resim_isim[3] self.jp2ler = [self.resim_isim[2],self.resim_isim[3]] self.bands = [] #burası bizim jp2 dosyalarımızı okuyacak for self.jp2 in self.jp2ler: with rasterio.open(self.dosya_yer_+"/"+self.jp2) as f: self.bands.append(self.f.read(1)) #resimlerimizi ayrıştırdık özel bantlara self.band_red=self.bands[0] self.band_nir=self.bands[1] print("bant değerleri hesaplandı") print(self.bands[0],self.bands[1]) # Klasik NDVI denklemi ile hesaplama print("ndvı hesaplanıyor") np.seterr(divide='ignore', invalid='ignore') # Calculate NDVI. This is the equation at the top of this guide expressed in code self.ndvi = (self.band_nir.astype(float) - self.band_red.astype(float)) / (self.band_nir + self.band_red) #su için yapıyorum bu analizi ## ### self.ndvi=(self.band_red.astype(float) - self.band_nir.astype(float)) / (self.band_red + self.band_nir) ### ### np.nanmin(self.ndvi), np.nanmax(self.ndvi) print("ndvı değerler aralıkları") print(np.nanmin(self.ndvi), np.nanmax(self.ndvi)) # görüntümüze bakalım renklerine ayrılmış bir görüntümüz var # çizim yapacağız bunun için gerekli kütüphaneler ekleniyor # NDVI bilindiği üzere 1 ve -1 arasındaki değerlerde sınıflandırılır. # Biz de bu değerleri renklerle göstermek istiyoruz. # Bunun için alınan sayısal değerleri farklı renk spektrumlarına atayarak elimizde NDVI için renklendirilmiş bir görüntümüz olacaktır # # Bir orta nokta belirledik ve bu sola ve sağa olacak şekilde renklendiriyoru renk spekturumunu da aşağıda paylaşacağım class RenkNormalizasyonu(colors.Normalize): def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False): self.midpoint = midpoint colors.Normalize.__init__(self, vmin, vmax, clip) def __call__(self, value, clip=None): x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1] return np.ma.masked_array(np.interp(value, x, y), np.isnan(value)) self.min=np.nanmin(self.ndvi) self.max=np.nanmax(self.ndvi) self.mid=0.1 print("bitti mi şimdi bok biter") print(self.min,self.max) self.fig = plt.figure(figsize=(20,10)) self.ax = self.fig.add_subplot(111) self.cmap = plt.cm.RdYlGn self.cax = self.ax.imshow(self.ndvi, cmap=self.cmap, clim=(self.min, self.max), norm=RenkNormalizasyonu(midpoint=self.mid,vmin=self.min, vmax=self.max)) self.ax.axis('off') self.ax.set_title('NDVI görüntüsü', fontsize=18, fontweight='bold') self.cbar = self.fig.colorbar(self.cax, orientation='horizontal', shrink=0.65) #normalde byu alttaki gibi kaydetsin ama şimdilik benim yazdığım gibi yapsın olur mu cnm muck #self.fig_kaydet="resimler/"+self.resim_isim[2]+".tif" self.fig_kaydet="media/books/covers/denemeresmi.png" self.fig.savefig(self.fig_kaydet, dpi=200, bbox_inches='tight', pad_inches=0.7) self.fig_kaydet_tif="media/books/covers/denemeresmi.tif" self.fig.savefig(self.fig_kaydet_tif, dpi=200, bbox_inches='tight', pad_inches=0.7)
class Downloader(object): def __init__(self, username, password, satellite, order_id, directory=Path('/data/')): # The connection to ESA scihub self.api = SentinelAPI(username, password, 'https://scihub.copernicus.eu/dhus', timeout=500.00) # Sentinel-5p currently has its own pre-operations hub self.api_s5p = SentinelAPI(user='******', password='******', api_url='https://s5phub.copernicus.eu/dhus') # Use the current datetime to name the download order self.order_id = order_id # Use ordered dict to store the metadata of the queries products self.products = OrderedDict() self.satellite = satellite self.directory = directory # if not self.directory.exists(): # Create directory if it does not exist # os.makedirs(self.directory) def query(self, footprint, startdate, enddate): if self.satellite == 's1' or self.satellite == 'all': self.query_s1(footprint, startdate, enddate) if self.satellite == 's2' or self.satellite == 'all': self.query_s2(footprint, startdate, enddate) if self.satellite == 's3' or self.satellite == 'all': self.query_s3(footprint, startdate, enddate) if self.satellite == 's5p' or self.satellite == 'all': self.query_s5p(footprint, startdate, enddate) def query_s1(self, footprint, startdate, enddate): # Define producttypes (here it is Sentinel-1 GRDH products) producttypes = ['GRD'] # Loop over producttypes and update the query dictionary # TODO: Fix this inefficient way of querying the relative orbits print(str(footprint)) if FLAGS.s2_intersection: for producttype in producttypes: queried_products = self.api.query( area=footprint, date=(startdate, enddate), platformname='Sentinel-1', #area_relation='Contains', producttype=producttype, sensoroperationalmode='IW', polarisationmode='VV VH') self.products.update(queried_products) self.intersect_products() elif FLAGS.s1_relative_orbit == [0]: for producttype in producttypes: queried_products = self.api.query( area=footprint, date=(startdate, enddate), platformname='Sentinel-1', #area_relation='Contains', producttype=producttype, sensoroperationalmode='IW', polarisationmode='VV VH') self.products.update(queried_products) else: for producttype in producttypes: for relative_orbit in FLAGS.s1_relative_orbit: queried_products = self.api.query( area=footprint, date=(startdate, enddate), platformname='Sentinel-1', producttype=producttype, #area_relation='Contains', sensoroperationalmode='IW', relativeorbitnumber=relative_orbit) self.products.update(queried_products) def query_s2(self, footprint, startdate, enddate): # Load parameters from FLAGS max_cloudcoverage = FLAGS.s2_max_cloudcoverage # Define producttypes (here it is Sentinel-2 L2A products) producttypes = [ 'S2MSI2Ap', 'S2MSI2A' ] # Producttype names differ depending on the year they were published # Loop over producttypes and update the query dictionary # TODO: Fix this inefficient way of querying the relative orbits if FLAGS.s2_relative_orbit == [0]: for producttype in producttypes: queried_products = self.api.query( footprint, date=(startdate, enddate), platformname='Sentinel-2', producttype=producttype, cloudcoverpercentage=(0, max_cloudcoverage), order_by='-ingestiondate') self.only_complete_tile(queried_products) self.products.update(queried_products) else: for producttype in producttypes: for relative_orbit in FLAGS.s2_relative_orbit: queried_products = self.api.query( footprint, date=(startdate, enddate), platformname='Sentinel-2', relativeorbitnumber=relative_orbit, producttype=producttype, cloudcoverpercentage=(0, max_cloudcoverage)) self.only_complete_tile(queried_products) self.products.update(queried_products) def query_s3(self, footprint, startdate, enddate): queried_products = self.api.query(footprint, date=(startdate, enddate), platformname='Sentinel-3', producttype='SL_2_LST___', productlevel='L2') self.products.update(queried_products) def query_s5p(self, footprint, startdate, enddate): kwargs = {} producttypedescriptions = [ 'Ozone', 'Sulphur Dioxide', 'Nitrogen Dioxide', 'Methane', 'Formaldehyde', 'Carbon Monoxide', 'Aerosol Index', 'Aerosol Layer Height', 'Cloud' ] # producttypedescriptions = ['Ozone'] # Loop over producttypes and update the query dictionary for producttypedescription in producttypedescriptions: queried_products = self.api_s5p.query( footprint, date=(startdate, enddate), platformname='Sentinel-5 Precursor', processinglevel='L2', producttypedescription=producttypedescription, **kwargs) # Remove any 'Suomi-NPP VIIRS Clouds' products which are returned as 'Cloud' (they shouldn't have been) # https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-5p/products-algorithms if producttypedescription == 'Cloud': temp_queried_products = queried_products.copy() for key in queried_products.keys(): if queried_products[key][ 'producttypedescription'] != 'Cloud': del temp_queried_products[key] queried_products = temp_queried_products self.products.update(queried_products) def print_num_and_size_of_products(self): logging.info('Number of products = ' + str(len(list(self.products)))) logging.info('Total size [GB] = ' + str(self.api.get_products_size(self.products))) # https://sentinelsat.readthedocs.io/en/master/api.html#lta-products # TODO: Get LTA retrieval to work properly (install of newest sentinelsat version is in dockerfile) # Retry every 30 min (+10 second buffertime) to request LTA products. @tenacity.retry(stop=tenacity.stop_after_attempt(200), wait=tenacity.wait_fixed(1810)) def download_zipfiles(self): zipfiles_directory = self.directory / 'zipfiles' if len(self.products) == 0: logging.info('Unable to find any products for the selected biome') sys.exit(0) return if not zipfiles_directory.exists( ): # Create directory if it does not exist os.makedirs(zipfiles_directory) # Get the products to be downloaded. The sample() funcitons permutes the dataframe, such that a new LTA product # is request at every retry. The optimal solution would have been to rearrange the dataframe by rotating the # index at every retry, but this is a quick and dirty way to achieve something similar. # (https://stackoverflow.com/a/34879805/12045808). products_df = self.queried_products_as_df().sample(frac=1) # NOTE: The code below is only useful while the Sentinel-5p has a different api than the others. After this has # been fixed, the code should be reduced to the following single line: # Download all zipfiles (it automatically checks if zipfiles already exist) # self.api.download_all(self.products, directory_path=zipfiles_directory) # Download all zipfiles # But for now, use the following code: non_s5p_products = products_df[ products_df['platformname'] != 'Sentinel-5 Precursor'] s5p_products = products_df[products_df['platformname'] == 'Sentinel-5 Precursor'] if len(non_s5p_products): logging.info("Downloading Sentinel-1/2/3 products") try: downloaded, triggered, failed = self.api.download_all( non_s5p_products.to_dict(into=OrderedDict, orient='index'), directory_path=zipfiles_directory) logging.info("Downloaded: " + str(downloaded)) logging.info("Triggered: " + str(triggered)) logging.info("failed: " + str(failed)) except InvalidChecksumError: logging.info("Error downloading products due to CheckSumError") except Exception: logging.info("Error downloading products due to unkown error") else: logging.info("No Sentinel-1/2/3 products found in query") if len(s5p_products): logging.info("Downloading Sentinel-5p products") self.api_s5p.download_all(s5p_products.to_dict(into=OrderedDict, orient='index'), directory_path=zipfiles_directory) else: logging.info("No Sentinel-5p products found in query") # The Sentinel-5p data has wrongly been given the filetype .zip, but it should be .nc, so make a copy with # .nc extension. A copy is made instead of renaming so sentinelsat doesn't re-download the file every time # it is run. s5p_downloaded_files = zipfiles_directory.glob('S5P*.zip') logging.debug( "Renaming downloaded Sentinel-5p files from .zip to .nc (due to bug in SentinelSat)" ) for file in s5p_downloaded_files: if not file.with_suffix('.nc').exists(): shutil.copy(str(file), str(file.with_suffix('.nc'))) def queried_products_as_geojson(self): return self.api.to_geojson(self.products) def only_complete_tile(self, products): found_one = False delete_list = [] for i in products: local_footprint = products.get(i).get('footprint') elements = local_footprint.split(',') if len(elements) == 5 and found_one == False: found_one = True continue else: delete_list.append(i) for i in delete_list: del products[i] def intersect_products(self): print('Found ' + str(len(self.products)) + ' products') S2_geojson_path = (self.directory / 'orders' / FLAGS.s2_order_id).with_suffix('.geojson') ground_geojsons = read_geojson(S2_geojson_path) products_geojsons = self.queried_products_as_geojson() ground_polygon = ground_geojsons.get('features')[0].get( 'geometry').get('coordinates') ground_polygon = geometry.Polygon(ground_polygon[0][0]) titles = [] ids = [] for item in products_geojsons.get('features'): id = item.get('properties').get('id') item = item.get('properties').get('title') item = (item[17:25] + item[48:55]) titles.append(item) ids.append([item, id]) unique = list(set(titles)) unique.sort() union_list = [] for i, element in enumerate(unique): local_polygon = Polygon() for j in range(len(titles)): if titles[j] == element: item = products_geojsons.get('features')[j] item = item.get('geometry').get('coordinates') item = geometry.Polygon(item[0][0]) item = affinity.scale(item, xfact=1.01, yfact=1.01) polygons = [item, local_polygon] local_polygons = unary_union(polygons) local_polygon = item union_list.append([local_polygons, element]) found_id = None for index, element in enumerate(union_list): wkt = element[0].wkt if ground_polygon.within(element[0]): found_id = element[1] break for i in ids: if found_id != i[0]: del self.products[i[1]] print('Reduced the products to ' + str(len(self.products)) + ' products') def queried_products_as_df(self): return self.api.to_dataframe(self.products) def save_queried_products(self): orders_directory = self.directory / 'orders' if not orders_directory.exists(): os.makedirs(orders_directory) # Save the queried products to a geojson file (e.g. to be loaded into QGIS) geojson_path = (self.directory / 'orders' / self.order_id).with_suffix('.geojson') with geojson_path.open('w') as geojson_file: geojson_data = self.api.to_geojson(self.products) geojson_file.write(str(geojson_data)) # Save the queried products as pandas df in a pkl file (preferred format when working in Python) df_path = (self.directory / 'orders' / self.order_id).with_suffix('.pkl') df = self.api.to_dataframe(self.products) df.to_pickle(df_path) def save_queried_products_location(self, path): path = Path(path) path = path.parent.absolute() path = path / 'log' # Save the queried products to a geojson file (e.g. to be loaded into QGIS) geojson_path = (path / self.order_id).with_suffix('.geojson') with geojson_path.open('w') as geojson_file: geojson_data = self.api.to_geojson(self.products) geojson_file.write(str(geojson_data)) # Save the queried products as pandas df in a pkl file (preferred format when working in Python) df_path = (path / self.order_id).with_suffix('.pkl') df = self.api.to_dataframe(self.products) df.to_pickle(df_path)
def test_to_pandas_empty(products): df = SentinelAPI.to_dataframe({}) assert type(df).__name__ == 'DataFrame' assert len(df) == 0
def test_to_pandas_empty(): df = SentinelAPI.to_dataframe({}) assert type(df).__name__ == "DataFrame" assert len(df) == 0
class Downloader(object): def __init__(self, username, password, satellite, order_id, directory=Path('/data/')): # The connection to ESA scihub self.api = SentinelAPI(username, password, 'https://scihub.copernicus.eu/dhus') # Sentinel-5p currently has its own pre-operations hub self.api_s5p = SentinelAPI(user='******', password='******', api_url='https://s5phub.copernicus.eu/dhus') # Use the current datetime to name the download order self.order_id = order_id # Use ordered dict to store the metadata of the queries products self.products = OrderedDict() self.satellite = satellite self.directory = directory # if not self.directory.exists(): # Create directory if it does not exist # os.makedirs(self.directory) def query(self, footprint, startdate, enddate): if self.satellite == 's1' or self.satellite == 'all': self.query_s1(footprint, startdate, enddate) if self.satellite == 's2' or self.satellite == 'all': self.query_s2(footprint, startdate, enddate) if self.satellite == 's3' or self.satellite == 'all': self.query_s3(footprint, startdate, enddate) if self.satellite == 's5p' or self.satellite == 'all': self.query_s5p(footprint, startdate, enddate) def query_s1(self, footprint, startdate, enddate): # Define producttypes (here it is Sentinel-1 GRDH products) producttypes = ['GRD'] # Loop over producttypes and update the query dictionary # TODO: Fix this inefficient way of querying the relative orbits if FLAGS.s1_relative_orbit == [0]: for producttype in producttypes: queried_products = self.api.query(footprint, date=(startdate, enddate), platformname='Sentinel-1', producttype=producttype, sensoroperationalmode='IW') self.products.update(queried_products) else: for producttype in producttypes: for relative_orbit in FLAGS.s1_relative_orbit: queried_products = self.api.query( footprint, date=(startdate, enddate), platformname='Sentinel-1', producttype=producttype, sensoroperationalmode='IW', relativeorbitnumber=relative_orbit) self.products.update(queried_products) def query_s2(self, footprint, startdate, enddate): # Load parameters from FLAGS max_cloudcoverage = FLAGS.s2_max_cloudcoverage # Define producttypes (here it is Sentinel-2 L2A products) producttypes = [ 'S2MSI2Ap', 'S2MSI2A' ] # Producttype names differ depending on the year they were published # Loop over producttypes and update the query dictionary # TODO: Fix this inefficient way of querying the relative orbits if FLAGS.s2_relative_orbit == [0]: for producttype in producttypes: queried_products = self.api.query( footprint, date=(startdate, enddate), platformname='Sentinel-2', producttype=producttype, cloudcoverpercentage=(0, max_cloudcoverage)) self.products.update(queried_products) else: for producttype in producttypes: for relative_orbit in FLAGS.s2_relative_orbit: queried_products = self.api.query( footprint, date=(startdate, enddate), platformname='Sentinel-2', relativeorbitnumber=relative_orbit, producttype=producttype, cloudcoverpercentage=(0, max_cloudcoverage)) self.products.update(queried_products) def query_s3(self, footprint, startdate, enddate): queried_products = self.api.query(footprint, date=(startdate, enddate), platformname='Sentinel-3', producttype='SL_2_LST___', productlevel='L2') self.products.update(queried_products) def query_s5p(self, footprint, startdate, enddate): kwargs = {} producttypedescriptions = [ 'Ozone', 'Sulphur Dioxide', 'Nitrogen Dioxide', 'Methane', 'Formaldehyde', 'Carbon Monoxide', 'Aerosol Index', 'Aerosol Layer Height', 'Cloud' ] # producttypedescriptions = ['Ozone'] # Loop over producttypes and update the query dictionary for producttypedescription in producttypedescriptions: queried_products = self.api_s5p.query( footprint, date=(startdate, enddate), platformname='Sentinel-5 Precursor', processinglevel='L2', producttypedescription=producttypedescription, **kwargs) # Remove any 'Suomi-NPP VIIRS Clouds' products which are returned as 'Cloud' (they shouldn't have been) # https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-5p/products-algorithms if producttypedescription == 'Cloud': temp_queried_products = queried_products.copy() for key in queried_products.keys(): if queried_products[key][ 'producttypedescription'] != 'Cloud': del temp_queried_products[key] queried_products = temp_queried_products self.products.update(queried_products) def print_num_and_size_of_products(self): logging.info('Number of products = ' + str(len(list(self.products)))) logging.info('Total size [GB] = ' + str(self.api.get_products_size(self.products))) # https://sentinelsat.readthedocs.io/en/master/api.html#lta-products # TODO: Get LTA retrieval to work properly (install of newest sentinelsat version is in dockerfile) # Retry every 30 min (+10 second buffertime) to request LTA products. @tenacity.retry(stop=tenacity.stop_after_attempt(200), wait=tenacity.wait_fixed(1810)) def download_zipfiles(self): zipfiles_directory = self.directory / 'zipfiles' if not zipfiles_directory.exists( ): # Create directory if it does not exist os.makedirs(zipfiles_directory) # Get the products to be downloaded. The sample() funcitons permutes the dataframe, such that a new LTA product # is request at every retry. The optimal solution would have been to rearrange the dataframe by rotating the # index at every retry, but this is a quick and dirty way to achieve something similar. # (https://stackoverflow.com/a/34879805/12045808). products_df = self.queried_products_as_df().sample(frac=1) # NOTE: The code below is only useful while the Sentinel-5p has a different api than the others. After this has # been fixed, the code should be reduced to the following single line: # Download all zipfiles (it automatically checks if zipfiles already exist) # self.api.download_all(self.products, directory_path=zipfiles_directory) # Download all zipfiles # But for now, use the following code: non_s5p_products = products_df[ products_df['platformname'] != 'Sentinel-5 Precursor'] s5p_products = products_df[products_df['platformname'] == 'Sentinel-5 Precursor'] if len(non_s5p_products): logging.info("Downloading Sentinel-1/2/3 products") self.api.download_all(non_s5p_products.to_dict(into=OrderedDict, orient='index'), directory_path=zipfiles_directory) else: logging.info("No Sentinel-1/2/3 products found in query") if len(s5p_products): logging.info("Downloading Sentinel-5p products") self.api_s5p.download_all(s5p_products.to_dict(into=OrderedDict, orient='index'), directory_path=zipfiles_directory) else: logging.info("No Sentinel-5p products found in query") # The Sentinel-5p data has wrongly been given the filetype .zip, but it should be .nc, so make a copy with # .nc extension. A copy is made instead of renaming so sentinelsat doesn't re-download the file every time # it is run. s5p_downloaded_files = zipfiles_directory.glob('S5P*.zip') logging.debug( "Renaming downloaded Sentinel-5p files from .zip to .nc (due to bug in SentinelSat)" ) for file in s5p_downloaded_files: if not file.with_suffix('.nc').exists(): shutil.copy(str(file), str(file.with_suffix('.nc'))) def queried_products_as_geojson(self): return self.api.to_geojson(self.products) def queried_products_as_df(self): return self.api.to_dataframe(self.products) def save_queried_products(self): orders_directory = self.directory / 'orders' if not orders_directory.exists(): os.makedirs(orders_directory) # Save the queried products to a geojson file (e.g. to be loaded into QGIS) geojson_path = (self.directory / 'orders' / self.order_id).with_suffix('.geojson') with geojson_path.open('w') as geojson_file: geojson_data = self.api.to_geojson(self.products) geojson_file.write(str(geojson_data)) # Save the queried products as pandas df in a pkl file (preferred format when working in Python) df_path = (self.directory / 'orders' / self.order_id).with_suffix('.pkl') df = self.api.to_dataframe(self.products) df.to_pickle(df_path)
def test_to_pandas(products): df = SentinelAPI.to_dataframe(products) assert type(df).__name__ == 'DataFrame' assert len(products) == len(df) assert set(products) == set(df.index)
class SentinelDownloader(object): def __init__(self, user, password, api_url='https://scihub.copernicus.eu/apihub'): self._apiname = api_url self._user = user self._password = password # init logger root = logging.getLogger() root.addHandler(logging.StreamHandler( sys.stderr )) if self._apiname == 'https://scihub.copernicus.eu/apihub': try: from sentinelsat import SentinelAPI except ImportError as e: gs.fatal(_("Module requires sentinelsat library: {}").format(e)) # connect SciHub via API self._api = SentinelAPI(self._user, self._password, api_url=self._apiname ) elif self._apiname == 'USGS_EE': try: import landsatxplore.api from landsatxplore.errors import EarthExplorerError except ImportError as e: gs.fatal(_("Module requires landsatxplore library: {}").format(e)) api_login = False while api_login is False: # avoid login conflict in possible parallel execution try: self._api = landsatxplore.api.API(self._user, self._password) api_login = True except EarthExplorerError as e: time.sleep(1) self._products_df_sorted = None def filter(self, area, area_relation, clouds=None, producttype=None, limit=None, query={}, start=None, end=None, sortby=[], asc=True, relativeorbitnumber=None): args = {} if clouds: args['cloudcoverpercentage'] = (0, int(clouds)) if relativeorbitnumber: args['relativeorbitnumber'] = relativeorbitnumber if producttype.startswith('S2') and int(relativeorbitnumber) > 143: gs.warning("This relative orbit number is out of range") elif int(relativeorbitnumber) > 175: gs.warning(_("This relative orbit number is out of range")) if producttype: args['producttype'] = producttype if producttype.startswith('S2'): args['platformname'] = 'Sentinel-2' else: args['platformname'] = 'Sentinel-1' if not start: start = 'NOW-60DAYS' else: start = start.replace('-', '') if not end: end = 'NOW' else: end = end.replace('-', '') if query: redefined = [value for value in args.keys() if value in query.keys()] if redefined: gs.warning(_("Query overrides already defined options ({})").format( ','.join(redefined) )) args.update(query) gs.verbose(_("Query: area={} area_relation={} date=({}, {}) args={}").format( area, area_relation, start, end, args )) products = self._api.query( area=area, area_relation=area_relation, date=(start, end), **args ) products_df = self._api.to_dataframe(products) if len(products_df) < 1: gs.message(_("No product found")) return # sort and limit to first sorted product if sortby: self._products_df_sorted = products_df.sort_values( sortby, ascending=[asc] * len(sortby) ) else: self._products_df_sorted = products_df if limit: self._products_df_sorted = self._products_df_sorted.head(int(limit)) gs.message(_("{} Sentinel product(s) found").format(len(self._products_df_sorted))) def list(self): if self._products_df_sorted is None: return id_kw = ('uuid', 'entity_id') identifier_kw = ('identifier', 'display_id') cloud_kw = ('cloudcoverpercentage', 'cloud_cover') time_kw = ('beginposition', 'acquisition_date') kw_idx = 1 if self._apiname == 'USGS_EE' else 0 for idx in range(len(self._products_df_sorted[id_kw[kw_idx]])): if cloud_kw[kw_idx] in self._products_df_sorted: ccp = '{0:2.0f}%'.format( float(self._products_df_sorted[cloud_kw[kw_idx]][idx])) else: ccp = 'cloudcover_NA' print_str = '{0} {1}'.format( self._products_df_sorted[id_kw[kw_idx]][idx], self._products_df_sorted[identifier_kw[kw_idx]][idx]) if kw_idx == 1: time_string = self._products_df_sorted[time_kw[kw_idx]][idx] else: time_string = self._products_df_sorted[ time_kw[kw_idx]][idx].strftime("%Y-%m-%dT%H:%M:%SZ") print_str += ' {0} {1}'.format(time_string, ccp) if kw_idx == 0: print_str += ' {0}'.format( self._products_df_sorted['producttype'][idx]) print(print_str) def download(self, output, sleep=False, maxretry=False, datasource='ESA_COAH'): if self._products_df_sorted is None: return create_dir(output) gs.message(_("Downloading data into <{}>...").format(output)) if datasource == 'USGS_EE': from landsatxplore.earthexplorer import EarthExplorer from landsatxplore.errors import EarthExplorerError from zipfile import ZipFile ee_login = False while ee_login is False: # avoid login conflict in possible parallel execution try: ee = EarthExplorer(self._user, self._password) ee_login = True except EarthExplorerError as e: time.sleep(1) for idx in range(len(self._products_df_sorted['entity_id'])): scene = self._products_df_sorted['entity_id'][idx] identifier = self._products_df_sorted['display_id'][idx] zip_file = os.path.join(output, '{}.zip'.format(identifier)) gs.message(_("Downloading {}...").format(identifier)) try: ee.download(identifier=identifier, output_dir=output, timeout=600) except EarthExplorerError as e: gs.fatal(_(e)) ee.logout() # extract .zip to get "usual" .SAFE with ZipFile(zip_file, 'r') as zip: safe_name = zip.namelist()[0].split('/')[0] outpath = os.path.join(output, safe_name) zip.extractall(path=output) gs.message(_("Downloaded to <{}>").format(outpath)) try: os.remove(zip_file) except Exception as e: gs.warning(_("Unable to remove {0}:{1}").format( zip_file, e)) elif datasource == "ESA_COAH": for idx in range(len(self._products_df_sorted['uuid'])): gs.message('{} -> {}.SAFE'.format( self._products_df_sorted['uuid'][idx], os.path.join(output, self._products_df_sorted['identifier'][idx]) )) # download out = self._api.download(self._products_df_sorted['uuid'][idx], output) if sleep: x = 1 online = out['Online'] while not online: # sleep is in minutes so multiply by 60 time.sleep(int(sleep) * 60) out = self._api.download(self._products_df_sorted['uuid'][idx], output) x += 1 if x > maxretry: online = True elif datasource == 'GCS': for scene_id in self._products_df_sorted['identifier']: gs.message(_("Downloading {}...").format(scene_id)) dl_code = download_gcs(scene_id, output) if dl_code == 0: gs.message(_("Downloaded to {}").format( os.path.join(output, '{}.SAFE'.format(scene_id)))) else: # remove incomplete file del_folder = os.path.join(output, '{}.SAFE'.format(scene_id)) try: shutil.rmtree(del_folder) except Exception as e: gs.warning(_("Unable to removed unfinished " "download {}".format(del_folder))) def save_footprints(self, map_name): if self._products_df_sorted is None: return if self._apiname == 'USGS_EE': gs.fatal(_( "USGS Earth Explorer does not support footprint download.")) try: from osgeo import ogr, osr except ImportError as e: gs.fatal(_("Option <footprints> requires GDAL library: {}").format(e)) gs.message(_("Writing footprints into <{}>...").format(map_name)) driver = ogr.GetDriverByName("GPKG") tmp_name = gs.tempfile() + '.gpkg' data_source = driver.CreateDataSource(tmp_name) srs = osr.SpatialReference() srs.ImportFromEPSG(4326) # features can be polygons or multi-polygons layer = data_source.CreateLayer(str(map_name), srs, ogr.wkbMultiPolygon) # attributes attrs = OrderedDict([ ("uuid", ogr.OFTString), ("ingestiondate", ogr.OFTString), ("cloudcoverpercentage", ogr.OFTInteger), ("producttype", ogr.OFTString), ("identifier", ogr.OFTString) ]) # Sentinel-1 data does not have cloudcoverpercentage prod_types = [type for type in self._products_df_sorted["producttype"]] s1_types = ["SLC", "GRD"] if any(type in prod_types for type in s1_types): del attrs["cloudcoverpercentage"] for key in attrs.keys(): field = ogr.FieldDefn(key, attrs[key]) layer.CreateField(field) # features for idx in range(len(self._products_df_sorted['uuid'])): wkt = self._products_df_sorted['footprint'][idx] feature = ogr.Feature(layer.GetLayerDefn()) newgeom = ogr.CreateGeometryFromWkt(wkt) # convert polygons to multi-polygons newgeomtype = ogr.GT_Flatten(newgeom.GetGeometryType()) if newgeomtype == ogr.wkbPolygon: multigeom = ogr.Geometry(ogr.wkbMultiPolygon) multigeom.AddGeometryDirectly(newgeom) feature.SetGeometry(multigeom) else: feature.SetGeometry(newgeom) for key in attrs.keys(): if key == 'ingestiondate': value = self._products_df_sorted[key][idx].strftime("%Y-%m-%dT%H:%M:%SZ") else: value = self._products_df_sorted[key][idx] feature.SetField(key, value) layer.CreateFeature(feature) feature = None data_source = None # coordinates of footprints are in WKT -> fp precision issues # -> snap gs.run_command('v.import', input=tmp_name, output=map_name, layer=map_name, snap=1e-10, quiet=True ) def get_products_from_uuid_usgs(self, uuid_list): scenes = [] for uuid in uuid_list: metadata = self._api.metadata(uuid, 'SENTINEL_2A') scenes.append(metadata) scenes_df = pandas.DataFrame.from_dict(scenes) self._products_df_sorted = scenes_df gs.message(_("{} Sentinel product(s) found").format( len(self._products_df_sorted))) def set_uuid(self, uuid_list): """Set products by uuid. TODO: Find better implementation :param uuid: uuid to download """ if self._apiname == 'USGS_EE': self.get_products_from_uuid_usgs(uuid_list) else: from sentinelsat.sentinel import SentinelAPIError self._products_df_sorted = {'uuid': []} for uuid in uuid_list: try: odata = self._api.get_product_odata(uuid, full=True) except SentinelAPIError as e: gs.error(_("{0}. UUID {1} skipped".format(e, uuid))) continue for k, v in odata.items(): if k == 'id': k = 'uuid' elif k == 'Sensing start': k = 'beginposition' elif k == 'Product type': k = 'producttype' elif k == 'Cloud cover percentage': k = 'cloudcoverpercentage' elif k == 'Identifier': k = 'identifier' elif k == 'Ingestion Date': k = 'ingestiondate' elif k == 'footprint': pass else: continue if k not in self._products_df_sorted: self._products_df_sorted[k] = [] self._products_df_sorted[k].append(v) def filter_USGS(self, area, area_relation, clouds=None, producttype=None, limit=None, query={}, start=None, end=None, sortby=[], asc=True, relativeorbitnumber=None): if area_relation != 'Intersects': gs.fatal(_( "USGS Earth Explorer only supports area_relation" " 'Intersects'")) if relativeorbitnumber: gs.fatal(_( "USGS Earth Explorer does not support 'relativeorbitnumber'" " option.")) if producttype and producttype != 'S2MSI1C': gs.fatal(_( "USGS Earth Explorer only supports producttype S2MSI1C")) if query: if not any(key in query for key in ['identifier', 'filename', 'usgs_identifier']): gs.fatal(_( "USGS Earth Explorer only supports query options" " 'filename', 'identifier' or 'usgs_identifier'.")) if 'usgs_identifier' in query: # get entityId from usgs identifier and directly save results usgs_id = query['usgs_identifier'] check_s2l1c_identifier(usgs_id, source='usgs') # entity_id = self._api.lookup('SENTINEL_2A', [usgs_id], # inverse=True) entity_id = self._api.get_entity_id([usgs_id], 'SENTINEL_2A') self.get_products_from_uuid_usgs(entity_id) return else: if "filename" in query: esa_id = query['filename'].replace('.SAFE', '') else: esa_id = query['identifier'] check_s2l1c_identifier(esa_id, source='esa') esa_prod_id = esa_id.split('_')[-1] utm_tile = esa_id.split('_')[-2] acq_date = esa_id.split('_')[2].split('T')[0] acq_date_string = '{0}-{1}-{2}'.format( acq_date[:4], acq_date[4:6], acq_date[6:]) start_date = end_date = acq_date_string # build the USGS style S2-identifier if utm_tile.startswith('T'): utm_tile_base = utm_tile[1:] bbox = get_bbox_from_S2_UTMtile(utm_tile_base) else: # get coordinate pairs from wkt string str_1 = 'POLYGON((' str_2 = '))' coords = area[area.find(str_1)+len(str_1):area.rfind(str_2)].split(',') # add one space to first pair for consistency coords[0] = ' ' + coords[0] lons = [float(pair.split(' ')[1]) for pair in coords] lats = [float(pair.split(' ')[2]) for pair in coords] bbox = (min(lons), min(lats), max(lons), max(lats)) start_date = start end_date = end usgs_args = { 'dataset': 'SENTINEL_2A', 'bbox': bbox, 'start_date': start_date, 'end_date': end_date } if clouds: usgs_args['max_cloud_cover'] = clouds if limit: usgs_args['max_results'] = limit scenes = self._api.search(**usgs_args) self._api.logout() if query: # check if the UTM-Tile is correct, remove otherwise for scene in scenes: if scene['display_id'].split('_')[1] != utm_tile: scenes.remove(scene) # remove redundant scene if len(scenes) == 2: for scene in scenes: prod_id = scene['display_id'].split('_')[-1] if prod_id != esa_prod_id: scenes.remove(scene) if len(scenes) < 1: gs.message(_("No product found")) return scenes_df = pandas.DataFrame.from_dict(scenes) if sortby: # replace sortby keywords with USGS keywords for idx, keyword in enumerate(sortby): if keyword == 'cloudcoverpercentage': sortby[idx] = 'cloud_cover' # turn cloudcover to float to make it sortable scenes_df['cloud_cover'] = pandas.to_numeric( scenes_df['cloud_cover']) elif keyword == 'ingestiondate': sortby[idx] = 'acquisition_date' # what does sorting by footprint mean elif keyword == 'footprint': sortby[idx] = 'display_id' self._products_df_sorted = scenes_df.sort_values( sortby, ascending=[asc] * len(sortby), ignore_index=True ) else: self._products_df_sorted = scenes_df gs.message(_("{} Sentinel product(s) found").format( len(self._products_df_sorted)))
class SentinelDownloader(object): def __init__( self, user, password, api_url="https://apihub.copernicus.eu/apihub", cred_req=True, ): self._apiname = api_url self._user = user self._password = password self._cred_req = cred_req # init logger root = logging.getLogger() root.addHandler(logging.StreamHandler(sys.stderr)) if self._apiname not in ["USGS_EE", "GCS"]: try: from sentinelsat import SentinelAPI except ImportError as e: gs.fatal( _("Module requires sentinelsat library: {}").format(e)) # connect SciHub via API self._api = SentinelAPI(self._user, self._password, api_url=self._apiname) elif self._apiname == "USGS_EE": api_login = False while api_login is False: # avoid login conflict in possible parallel execution try: self._api = landsatxplore.api.API(self._user, self._password) api_login = True except EarthExplorerError as e: time.sleep(1) self._products_df_sorted = None def filter( self, area, area_relation, clouds=None, producttype=None, limit=None, query={}, start=None, end=None, sortby=[], asc=True, relativeorbitnumber=None, ): # Dict to identify plaforms from requested product platforms = { "SL": "Sentinel-1", "GR": "Sentinel-1", "OC": "Sentinel-1", "S2": "Sentinel-2", "S3": "Sentinel-3", } args = {} if clouds: args["cloudcoverpercentage"] = (0, int(clouds)) if relativeorbitnumber: args["relativeorbitnumber"] = relativeorbitnumber if producttype.startswith("S2") and int(relativeorbitnumber) > 143: gs.warning("This relative orbit number is out of range") elif int(relativeorbitnumber) > 175: gs.warning(_("This relative orbit number is out of range")) if producttype: if producttype.startswith("S3"): # Using custom product names for Sentinel-3 products that look less cryptic split = [0, 2, 4, 5, 8] args["producttype"] = "_".join([ producttype[i:j] for i, j in zip(split, split[1:] + [None]) ][1:]).ljust(11, "_") else: args["producttype"] = producttype args["platformname"] = platforms[producttype[0:2]] if not start: start = "NOW-60DAYS" else: start = start.replace("-", "") if not end: end = "NOW" else: end = end.replace("-", "") if query: redefined = [ value for value in args.keys() if value in query.keys() ] if redefined: gs.warning( _("Query overrides already defined options ({})").format( ",".join(redefined))) args.update(query) gs.verbose( _("Query: area={} area_relation={} date=({}, {}) args={}").format( area, area_relation, start, end, args)) if self._cred_req is False: # in the main function it is ensured that there is an "identifier" query self._products_df_sorted = pandas.DataFrame( {"identifier": [query["identifier"]]}) return products = self._api.query(area=area, area_relation=area_relation, date=(start, end), **args) products_df = self._api.to_dataframe(products) if len(products_df) < 1: gs.message(_("No product found")) return # sort and limit to first sorted product if sortby: self._products_df_sorted = products_df.sort_values( sortby, ascending=[asc] * len(sortby)) else: self._products_df_sorted = products_df if limit: self._products_df_sorted = self._products_df_sorted.head( int(limit)) gs.message( _("{} Sentinel product(s) found").format( len(self._products_df_sorted))) def list(self): if self._products_df_sorted is None: return id_kw = ("uuid", "entity_id") identifier_kw = ("identifier", "display_id") cloud_kw = ("cloudcoverpercentage", "cloud_cover") time_kw = ("beginposition", "acquisition_date") kw_idx = 1 if self._apiname == "USGS_EE" else 0 for idx in range(len(self._products_df_sorted[id_kw[kw_idx]])): if cloud_kw[kw_idx] in self._products_df_sorted: ccp = "{0:2.0f}%".format( float(self._products_df_sorted[cloud_kw[kw_idx]][idx])) else: ccp = "cloudcover_NA" print_str = "{0} {1}".format( self._products_df_sorted[id_kw[kw_idx]][idx], self._products_df_sorted[identifier_kw[kw_idx]][idx], ) if kw_idx == 1: time_string = self._products_df_sorted[time_kw[kw_idx]][idx] else: time_string = self._products_df_sorted[ time_kw[kw_idx]][idx].strftime("%Y-%m-%dT%H:%M:%SZ") print_str += " {0} {1}".format(time_string, ccp) if kw_idx == 0: print_str += " {0}".format( self._products_df_sorted["producttype"][idx]) print_str += " {0}".format( self._products_df_sorted["size"][idx]) print(print_str) def skip_existing(self, output, pattern_file): prod_df_type = type(self._products_df_sorted) # Check i skipping is possible/required if prod_df_type != dict: if self._products_df_sorted.empty: return elif not self._products_df_sorted or os.path.exists(output) == False: return # Check if ingestion date is returned by API if "ingestiondate" not in self._products_df_sorted: gs.warning( _("Ingestiondate not returned. Cannot filter previously downloaded scenes" )) return # Check for previously downloaded scenes existing_files = [ f for f in os.listdir(output) if re.search(r".zip$|.safe$|.ZIP$|.SAFE$", f) ] if len(existing_files) <= 1: return # Filter by ingestion date skiprows = [] for idx, display_id in enumerate( self._products_df_sorted["identifier"]): existing_file = [ sfile for sfile in existing_files if display_id in sfile ] if existing_file: creation_time = datetime.fromtimestamp( os.path.getctime(existing_file[0])) if self._products_df_sorted["ingestiondate"][ idx] <= creation_time: gs.message( _("Skipping scene: {} which is already downloaded.". format(self._products_df_sorted["identifier"][idx]))) skiprows.append(display_id) if prod_df_type == dict: for scene in skiprows: idx = self._products_df_sorted["identifier"].index(scene) for key in self._products_df_sorted: self._products_df_sorted[key].pop(idx) else: self._products_df_sorted = self._products_df_sorted[ ~self._products_df_sorted["identifier"].isin(skiprows)] def download(self, output, sleep=False, maxretry=False, datasource="ESA_COAH"): if self._products_df_sorted is None: return create_dir(output) gs.message(_("Downloading data into <{}>...").format(output)) if datasource == "USGS_EE": from landsatxplore.earthexplorer import EarthExplorer from landsatxplore.errors import EarthExplorerError from zipfile import ZipFile ee_login = False while ee_login is False: # avoid login conflict in possible parallel execution try: ee = EarthExplorer(self._user, self._password) ee_login = True except EarthExplorerError as e: time.sleep(1) for idx in range(len(self._products_df_sorted["entity_id"])): scene = self._products_df_sorted["entity_id"][idx] identifier = self._products_df_sorted["display_id"][idx] zip_file = os.path.join(output, "{}.zip".format(identifier)) gs.message(_("Downloading {}...").format(identifier)) try: ee.download(identifier=identifier, output_dir=output, timeout=600) except EarthExplorerError as e: gs.fatal(_(e)) ee.logout() # extract .zip to get "usual" .SAFE with ZipFile(zip_file, "r") as zip: safe_name = zip.namelist()[0].split("/")[0] outpath = os.path.join(output, safe_name) zip.extractall(path=output) gs.message(_("Downloaded to <{}>").format(outpath)) try: os.remove(zip_file) except Exception as e: gs.warning( _("Unable to remove {0}:{1}").format(zip_file, e)) elif datasource == "ESA_COAH": for idx in range(len(self._products_df_sorted["uuid"])): gs.message("{} -> {}.SAFE".format( self._products_df_sorted["uuid"][idx], os.path.join(output, self._products_df_sorted["identifier"][idx]), )) # download out = self._api.download(self._products_df_sorted["uuid"][idx], output) if sleep: x = 1 online = out["Online"] while not online: # sleep is in minutes so multiply by 60 time.sleep(int(sleep) * 60) out = self._api.download( self._products_df_sorted["uuid"][idx], output) x += 1 if x > maxretry: online = True elif datasource == "GCS": for scene_id in self._products_df_sorted["identifier"]: gs.message(_("Downloading {}...").format(scene_id)) dl_code = download_gcs(scene_id, output) if dl_code == 0: gs.message( _("Downloaded to {}").format( os.path.join(output, "{}.SAFE".format(scene_id)))) else: # remove incomplete file del_folder = os.path.join(output, "{}.SAFE".format(scene_id)) try: shutil.rmtree(del_folder) except Exception as e: gs.warning( _("Unable to removed unfinished " "download {}".format(del_folder))) def save_footprints(self, map_name): if self._products_df_sorted is None: return if self._apiname == "USGS_EE": gs.fatal( _("USGS Earth Explorer does not support footprint download.")) try: from osgeo import ogr, osr except ImportError as e: gs.fatal( _("Option <footprints> requires GDAL library: {}").format(e)) gs.message(_("Writing footprints into <{}>...").format(map_name)) driver = ogr.GetDriverByName("GPKG") tmp_name = gs.tempfile() + ".gpkg" data_source = driver.CreateDataSource(tmp_name) srs = osr.SpatialReference() srs.ImportFromEPSG(4326) # features can be polygons or multi-polygons layer = data_source.CreateLayer(str(map_name), srs, ogr.wkbMultiPolygon) # attributes attrs = OrderedDict([ ("uuid", ogr.OFTString), ("ingestiondate", ogr.OFTString), ("cloudcoverpercentage", ogr.OFTInteger), ("producttype", ogr.OFTString), ("identifier", ogr.OFTString), ]) # Sentinel-1 data does not have cloudcoverpercentage prod_types = [type for type in self._products_df_sorted["producttype"]] if not any(type in prod_types for type in cloudcover_products): del attrs["cloudcoverpercentage"] for key in attrs.keys(): field = ogr.FieldDefn(key, attrs[key]) layer.CreateField(field) # features for idx in range(len(self._products_df_sorted["uuid"])): wkt = self._products_df_sorted["footprint"][idx] feature = ogr.Feature(layer.GetLayerDefn()) newgeom = ogr.CreateGeometryFromWkt(wkt) # convert polygons to multi-polygons newgeomtype = ogr.GT_Flatten(newgeom.GetGeometryType()) if newgeomtype == ogr.wkbPolygon: multigeom = ogr.Geometry(ogr.wkbMultiPolygon) multigeom.AddGeometryDirectly(newgeom) feature.SetGeometry(multigeom) else: feature.SetGeometry(newgeom) for key in attrs.keys(): if key == "ingestiondate": value = self._products_df_sorted[key][idx].strftime( "%Y-%m-%dT%H:%M:%SZ") else: value = self._products_df_sorted[key][idx] feature.SetField(key, value) layer.CreateFeature(feature) feature = None data_source = None # coordinates of footprints are in WKT -> fp precision issues # -> snap gs.run_command( "v.import", input=tmp_name, output=map_name, layer=map_name, snap=1e-10, quiet=True, ) def get_products_from_uuid_usgs(self, uuid_list): scenes = [] for uuid in uuid_list: metadata = self._api.metadata(uuid, "SENTINEL_2A") scenes.append(metadata) scenes_df = pandas.DataFrame.from_dict(scenes) self._products_df_sorted = scenes_df gs.message( _("{} Sentinel product(s) found").format( len(self._products_df_sorted))) def set_uuid(self, uuid_list): """Set products by uuid. TODO: Find better implementation :param uuid: uuid to download """ if self._apiname == "USGS_EE": self.get_products_from_uuid_usgs(uuid_list) else: from sentinelsat.sentinel import SentinelAPIError self._products_df_sorted = {"uuid": []} for uuid in uuid_list: try: odata = self._api.get_product_odata(uuid, full=True) except SentinelAPIError as e: gs.error(_("{0}. UUID {1} skipped".format(e, uuid))) continue for k, v in odata.items(): if k == "id": k = "uuid" elif k == "Sensing start": k = "beginposition" elif k == "Product type": k = "producttype" elif k == "Cloud cover percentage": k = "cloudcoverpercentage" elif k == "Identifier": k = "identifier" elif k == "Ingestion Date": k = "ingestiondate" elif k == "footprint": pass else: continue if k not in self._products_df_sorted: self._products_df_sorted[k] = [] self._products_df_sorted[k].append(v) def filter_USGS( self, area, area_relation, clouds=None, producttype=None, limit=None, query={}, start=None, end=None, sortby=[], asc=True, relativeorbitnumber=None, ): if area_relation != "Intersects": gs.fatal( _("USGS Earth Explorer only supports area_relation" " 'Intersects'")) if relativeorbitnumber: gs.fatal( _("USGS Earth Explorer does not support 'relativeorbitnumber'" " option.")) if producttype and producttype != "S2MSI1C": gs.fatal( _("USGS Earth Explorer only supports producttype S2MSI1C")) if query: if not any( key in query for key in ["identifier", "filename", "usgs_identifier"]): gs.fatal( _("USGS Earth Explorer only supports query options" " 'filename', 'identifier' or 'usgs_identifier'.")) if "usgs_identifier" in query: # get entityId from usgs identifier and directly save results usgs_id = query["usgs_identifier"] check_s2l1c_identifier(usgs_id, source="usgs") # entity_id = self._api.lookup('SENTINEL_2A', [usgs_id], # inverse=True) entity_id = self._api.get_entity_id([usgs_id], "SENTINEL_2A") self.get_products_from_uuid_usgs(entity_id) return else: if "filename" in query: esa_id = query["filename"].replace(".SAFE", "") else: esa_id = query["identifier"] check_s2l1c_identifier(esa_id, source="esa") esa_prod_id = esa_id.split("_")[-1] utm_tile = esa_id.split("_")[-2] acq_date = esa_id.split("_")[2].split("T")[0] acq_date_string = "{0}-{1}-{2}".format(acq_date[:4], acq_date[4:6], acq_date[6:]) start_date = end_date = acq_date_string # build the USGS style S2-identifier if utm_tile.startswith("T"): utm_tile_base = utm_tile[1:] bbox = get_bbox_from_S2_UTMtile(utm_tile_base) else: # get coordinate pairs from wkt string str_1 = "POLYGON((" str_2 = "))" coords = area[area.find(str_1) + len(str_1):area.rfind(str_2)].split(",") # add one space to first pair for consistency coords[0] = " " + coords[0] lons = [float(pair.split(" ")[1]) for pair in coords] lats = [float(pair.split(" ")[2]) for pair in coords] bbox = (min(lons), min(lats), max(lons), max(lats)) start_date = start end_date = end usgs_args = { "dataset": "SENTINEL_2A", "bbox": bbox, "start_date": start_date, "end_date": end_date, } if clouds: usgs_args["max_cloud_cover"] = clouds if limit: usgs_args["max_results"] = limit scenes = self._api.search(**usgs_args) self._api.logout() if query: # check if the UTM-Tile is correct, remove otherwise for scene in scenes: if scene["display_id"].split("_")[1] != utm_tile: scenes.remove(scene) # remove redundant scene if len(scenes) == 2: for scene in scenes: prod_id = scene["display_id"].split("_")[-1] if prod_id != esa_prod_id: scenes.remove(scene) if len(scenes) < 1: gs.message(_("No product found")) return scenes_df = pandas.DataFrame.from_dict(scenes) if sortby: # replace sortby keywords with USGS keywords for idx, keyword in enumerate(sortby): if keyword == "cloudcoverpercentage": sortby[idx] = "cloud_cover" # turn cloudcover to float to make it sortable scenes_df["cloud_cover"] = pandas.to_numeric( scenes_df["cloud_cover"]) elif keyword == "ingestiondate": sortby[idx] = "acquisition_date" # what does sorting by footprint mean elif keyword == "footprint": sortby[idx] = "display_id" self._products_df_sorted = scenes_df.sort_values(sortby, ascending=[asc] * len(sortby), ignore_index=True) else: self._products_df_sorted = scenes_df gs.message( _("{} Sentinel product(s) found").format( len(self._products_df_sorted)))
# from pysal.contrib.viz import mapping as maps # connect to the API api = SentinelAPI('phillr', 'testme2019', 'https://scihub.copernicus.eu/dhus') # search by polygon, time, and SciHub query keywords footprint = geojson_to_wkt(read_geojson('sample-polygone.geojson')) products = api.query(footprint, date=('20190101', date(2019, 2, 27)), platformname='Sentinel-2', cloudcoverpercentage=(0, 10), processinglevel='Level-1C') # todo check for processed data len(products) # convert to Pandas DataFrame products_df = api.to_dataframe(products) # sort for most recent and lowest cloud cover products_df_sorted = products_df.sort_values(['ingestiondate', 'cloudcoverpercentage'], ascending=[True, True]) test = products_df_sorted.head(1) test['cloudcoverpercentage'] test['ingestiondate'] set(products_df['processinglevel']) # GeoJSON FeatureCollection containing footprints and metadata of the scenes geojson_products = api.to_geojson(products) # GeoPandas GeoDataFrame with the metadata of the scenes and the footprints as geometries geodata_products = api.to_geodataframe(products)
def sent2_amazon(user, passwd, geojsonfile, start_date, end_date, output_folder, tile=None, cloud='100'): """ Query the ESA catalogue then download S2 from AWS with correct renaming of stuff Uses joblib to parallelise multiple files from aws Way quicker than ESA-based download Notes: ------------------------ Credit to sentinelsat for the query aspect of this function, and sentinelhub for the AWS aspect. Parameters ---------- user : string username for esa hub passwd : string password for hub geojsonfile : string AOI polygon of interest start_date : string date of beginning of search end_date : string date of end of search output_folder : string where you intend to download the imagery tile : string S2 tile cloud : string (optional) include a cloud filter in the search """ # Examples of sentinehub usage: #download_safe_format('S2A_OPER_PRD_MSIL1C_PDMC_20160121T043931_R069_V20160103T171947_20160103T171947') #download_safe_format('S2A_MSIL1C_20170414T003551_N0204_R016_T54HVH_20170414T003551') #download_safe_format(tile=('T38TML','2015-12-19'), entire_product=True) #entire prod really mean whole tile in old format! Avoid! #download_safe_format(tile=('T54HVH','2017-04-14')) # Use sentinel sat to query api = SentinelAPI(user, passwd) # if oldsat is True: # footprint = get_coordinates(geojsonfile) # else: footprint = geojson_to_wkt(read_geojson(geojsonfile)) products = api.query(footprint, ((start_date, end_date)), platformname="Sentinel-2", cloudcoverpercentage="[0 TO " + cloud + "]") #,producttype="GRD") products_df = api.to_dataframe(products) # If using an aoi shape this is the option to follow at present until I # write a native function if tile is None: Parallel(n_jobs=-1, verbose=2)( delayed(download_safe_format)(i, folder=output_folder) for i in products_df.identifier) # If the tile id is known then use this - likely handy for oldfmt else: # A kludge for now until I spend more than two mins writing this func dateList = [] for prod in products_df['ingestiondate']: date1 = prod.strftime('%Y-%m-%d') dateList.append(date1) Parallel(n_jobs=-1, verbose=2)( delayed(download_safe_format)(tile=(tile, i), folder=output_folder) for i in dateList) return products_df, products