def search_api(sname, bbox, time, collection=None): """ API search of the different satellite granules and return metadata dictionary :param sname: short name satellite product, ex: 'MOD03' :param bbox: polygon with the search bounding box :param time: time interval as datetime (init_time_datetime,final_time_datetime) :return metas: a dictionary with all the metadata for the API search """ logging.info('search_api - CMR API search for {0} collection {1}'.format(sname,collection)) maxg=1000 time_esmf=(dt_to_esmf(time[0]),dt_to_esmf(time[1])) api = GranuleQuery() search = api.parameters( short_name=sname, downloadable=True, polygon=bbox, temporal=time_esmf) sh=search.hits() if sh>maxg: logging.warning('search_api - the number of hits {0} is larger than the limit {1}'.format(sh,maxg)) logging.warning('search_api - any satellite data with prefix {} used'.format(sname)) logging.warning('search_api - use a reduced bounding box or a reduced time interval') metas = [] else: metas = api.get(sh) if collection: metas = [m for m in metas if m['collection_concept_id'] == collection] logging.info('search_api - {} hits in this range'.format(len(metas))) return metas
def __init__(self, products: List[str], aoi: List[Union[float, int]] = None, begindate: datetime = None, enddate: datetime = None, tile_filter: List[str] = None, version: str = "006") -> None: """Initialize instance ModisQuery class. This creates an instance of `ModisQuery` with the basic query parameters. The `aoi` needs to be a list if either 2 or 4 coordinates as `float` or `int`, depending on if a point or bounding box is requested. The tile_filter needs to be specified as list of MODIS tile IDs in format hXXvXX (e.g. `h20v08`). Args: products (List[str]): List of product codes to be queried / downloaded. aoi (List[Union[float, int]]): Area of interes (point as lon/lat or bounding box as xmin, ymin, xmax, ymax). begindate (datetime): Start date for query. enddate (datetime): End date for query. tile_filter (List[str]): List of tiles to be queried / downloaded (refines initial results). version (str): MODIS collection version. Raises: AssertionError: If no product code is supplied or `if len(aoi) not in [2, 4]`. """ assert products, "No product IDs supplied!" self.begin = begindate self.end = enddate self.tile_filter = tile_filter self.api = GranuleQuery() # construct query self.api.parameters( short_name=products, version=version, temporal=(begindate, enddate) ) if aoi is not None: if len(aoi) == 2: self.api.point(*aoi) elif len(aoi) == 4: self.api.bounding_box(*aoi) else: raise ValueError("Expected point or bounding box as AOI")
def query(self, start_date: str, end_date: str, product: str, provider: str = 'LPDAAC_ECS', bbox: List = []) -> List[Dict]: """ Search CMR database for spectral MODIS tiles matching a temporal range, defined by a start date and end date. Returns metadata containing the URL of each image. Parameters ---------- start_date: string Start date yyyy-mm-dd end_date: string End date yyyy-mm-dd product: string Product name provider: string Provider (default is 'LPDAAC_ECS') bbox: List[float] Bounding box [lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat] Returns ---------- List[Dict] List of granules """ q = GranuleQuery() prod, ver = product['products'][0].split('.') q.short_name(prod).version(ver) q.temporal(f"{start_date}T00:00:00Z", f"{end_date}T23:59:59Z") if (len(bbox) >= 4): q.bounding_box(*bbox[:4]) _granules = q.get_all() # filter dates if 'day_offset' in product.keys(): day_offset = products[product]['day_offset'] else: day_offset = 0 granules = [] for gran in _granules: # CMR uses day 1 of window - correct this to be middle of window date = (dateparser(gran['time_start'].split('T')[0]) + datetime.timedelta(days=day_offset)).date() if (dateparser(start_date).date() <= date <= dateparser(end_date).date()): granules.append(gran) logger.info("%s granules found within %s - %s" % (len(granules), start_date, end_date)) return granules
def search_api_sat(self, sname, bounds, time, collection=None): """ API search of the different satellite granules and return metadata dictionary :param sname: short name satellite product, ex: 'MOD03' :param bounds: bounding box as (lon_min,lon_max,lat_min,lat_max) :param time: time interval (init_time_iso,final_time_iso) :param collection: id of the collection to specify :return metas: a dictionary with all the metadata for the API search """ maxg = 1000 # creating polygon with the search bounding box lonmin, lonmax, latmin, latmax = bounds bbox = [(lonmin, latmax), (lonmin, latmin), (lonmax, latmin), (lonmax, latmax), (lonmin, latmax)] time_utcf = (utc_to_utcf(time[0]), utc_to_utcf(time[1])) api = GranuleQuery() search = api.parameters(short_name=sname, downloadable=True, polygon=bbox, temporal=time_utcf) sh = search.hits() if sh > maxg: logging.warning( "The number of hits %s is larger than the limit %s." % (sh, maxg)) logging.warning("Any satellite data with prefix %s used." % self.prefix) logging.warning( "Use a reduced bounding box or a reduced time interval.") metas = [] else: metas = api.get(sh) if collection: metas = [ m for m in metas if m['collection_concept_id'] == collection ] logging.info( 'search_api_sat - CMR API gives {0} hits for {1} of collection {2}' .format(len(metas), sname, collection)) return metas
def query_cmr(self, params=None): """ Queries CMR for one or more data sets short-names using the spatio-temporal constraints defined in params. Returns a json list of CMR records. """ if params is None: return None self.granules = {} datasets = self._expand_datasets(params) for d in datasets: cmr_api = GranuleQuery() g = cmr_api.parameters( short_name=d['name'], version=d['version'], temporal=d['temporal'], bounding_box=d['bounding_box']).get_all() self.granules[d['name']] = g self.cmr_download_size(self.granules) return self.granules
def fetch_datasets(self, download_type='ROI_polygon', roi_polygon=None, startdate=None, enddate=None, cloudcover_max=5, product_shortname="SPL3SMP", max_products=-1, inverse_polygon_order=False): """Query NASA API for products See: https://modis.ornl.gov/data/modis_webservice.html """ from cmr import GranuleQuery self.product_shortname = product_shortname self.roi_polygon = roi_polygon if download_type == 'ROI_polygon': if roi_polygon.split('.')[-1] == 'geojson': list_coords = from_geojson_to_list_coords(self.roi_polygon) #print(list_coords) else: raise RuntimeError("Unknown download type") if inverse_polygon_order: list_coords = list_coords[::-1] api = GranuleQuery().polygon(list_coords).short_name(self.product_shortname).temporal(startdate, enddate) n_produtes = api.hits() #print(f"{n_produtes} products found for these parameters") if max_products == -1: max_products = n_produtes self.list_products = api.get(limit=max_products) self.list_products_id = [ f["producer_granule_id"] for f in self.list_products]
def search_api_sat(self, sname, bbox, time, version=None): """ API search of the different satellite granules and return metadata dictionary :param sname: short name satellite product, ex: 'MOD03' :param bbox: polygon with the search bounding box :param time: time interval (init_time_iso,final_time_iso) :return metas: a dictionary with all the metadata for the API search """ maxg=100 time_utcf=(utc_to_utcf(time[0]),utc_to_utcf(time[1])) api = GranuleQuery() if not version: search = api.parameters( short_name=sname, downloadable=True, polygon=bbox, temporal=time_utcf ) else: search = api.parameters( short_name=sname, downloadable=True, polygon=bbox, temporal=time_utcf, version=version ) sh=search.hits() logging.info("CMR API: %s gets %s hits in this range" % (sname,sh)) if sh>maxg: logging.warning("The number of hits %s is larger than the limit %s." % (sh,maxg)) logging.warning("Any satellite data with prefix %s used." % self.prefix) logging.warning("Use a reduced bounding box or a reduced time interval.") metas = [] else: metas = api.get(sh) return metas
def search_api(sname, bbox, time, maxg=50, platform="", version=""): """ API search of the different satellite granules :param sname: short name :param bbox: polygon with the search bounding box :param time: time interval (init_time,final_time) :param maxg: max number of granules to process :param platform: string with the platform :param version: string with the version :return granules: dictionary with the metadata of the search Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH. Angel Farguell ([email protected]), 2018-09-17 """ api = GranuleQuery() if not version: if not platform: search = api.parameters(short_name=sname, downloadable=True, polygon=bbox, temporal=time) else: search = api.parameters(short_name=sname, platform=platform, downloadable=True, polygon=bbox, temporal=time) else: if not platform: search = api.parameters(short_name=sname, downloadable=True, polygon=bbox, temporal=time, version=version) else: search = api.parameters(short_name=sname, platform=platform, downloadable=True, polygon=bbox, temporal=time, version=version) sh = search.hits() print "%s gets %s hits in this range" % (sname, sh) if sh > maxg: print "The number of hits %s is larger than the limit %s." % (sh, maxg) print "Use a reduced bounding box or a reduced time interval." granules = [] else: granules = api.get(sh) return granules
def query(start_date, end_date, product='MCD43A4.006', provider='LPDAAC_ECS'): """ Search CMR database for spectral MODIS tiles matching a temporal range, defined by a start date and end date. Returns metadata containing the URL of each image. """ q = GranuleQuery() prod, ver = product.split('.') q.short_name(prod).version(ver) q.temporal('%sT00:00:00Z' % str(start_date), '%sT23:59:00Z' % str(end_date)) _granules = q.get_all() # filter dates day_offset = products[product]['day_offset'] granules = [] for gran in _granules: # CMR uses day 1 of window - correct this to be middle of window date = (dateparser(gran['time_start'].split('T')[0]) + datetime.timedelta(days=day_offset)).date() if (start_date <= date <= end_date): granules.append(gran) logger.info("%s granules found within %s - %s" % (len(granules), start_date, end_date)) return granules
def get_modis(destination, date, search_window, footprint, choose, clip, reproject): """ Retrieves MODIS scenes. Scenes must entirely contain <footprint> within date +/- search_window. """ workdir = path.join(destination, "modis") makedirs(workdir, exist_ok=True) footprint_path = path.join(destination, footprint) searchFootprint = shape(fiona.open(footprint_path)[0]["geometry"]) date_window = timedelta(days=search_window) date_range = (date - date_window, date + date_window) cmrAPI = GranuleQuery() results = (cmrAPI.short_name(MODIS_SNOW_CMR_SHORT_NAME).bounding_box( *searchFootprint.bounds).temporal(*date_range).get()) results = json_normalize(results) print(results["time_start"].iloc[0]) print(date) results["time_start"] = to_datetime( results["time_start"]).dt.tz_localize(None) results["timedeltas"] = (date - results.time_start).abs() results = results.sort_values(by="timedeltas", ascending=True) results["browse"] = [[ _i["href"] for _i in image_links if _i.get("title", "") == "(BROWSE)" ][0] for image_links in results.links] print(results.iloc[0].links) image_iloc = 0 if choose: print( tabulate( results[[ "cloud_cover", "day_night_flag", "timedeltas", "browse" ]].reset_index(drop=True), headers="keys", )) image_iloc = int( input("Choose image ID [0-{}]: ".format(len(results) - 1))) image_url = results.iloc[image_iloc].links[0]["href"] MODIS_DL_COMMAND = ("wget " "--http-user=$EARTHDATA_USERNAME " "--http-password=$EARTHDATA_PASSWORD " "--no-check-certificate --auth-no-challenge " '-r --reject "index.html*" -np -e robots=off -nH -nd ' "--directory-prefix={destination} " "{image_url}".format(destination=workdir, image_url=image_url)) _sh = Popen(MODIS_DL_COMMAND, shell=True).communicate() ## have to do some gdal magic with MODIS data. ## 1) turn it into a TIFF with the right projection. MODISfile = glob(path.join(workdir, "MOD10A1*.hdf*"))[0] output_file = path.join(workdir, "MODIS_reproj.tif") MODIS_CONVERT_COMMAND = ( "gdalwarp " "HDF4_EOS:EOS_GRID:{path}:MOD_Grid_Snow_500m:NDSI_Snow_Cover " "-cutline {footprint} -crop_to_cutline -dstnodata 9999 " "-t_srs {projection} -r cubic " "-s_srs '+proj=sinu +R=6371007.181 +nadgrids=@null +wktext' " "{output_tif} ".format( path=MODISfile, footprint=footprint_path, projection=reproject, output_tif=output_file, )) _sh = Popen(MODIS_CONVERT_COMMAND, shell=True).communicate() print(MODISfile, output_file)
## Search/Download - sandbox from cmr import CollectionQuery, GranuleQuery import json import requests import urlparse import os import sys api = GranuleQuery() fire = GranuleQuery() #MOD14: bounding box = Colorado (gps coord sorted lat,lon; counterclockwise) MOD14granules = api.parameters( short_name="MOD14", platform="Terra", downloadable=True, polygon=[(-109.0507527,40.99898), (-109.0698568,37.0124375), (-102.0868788,36.9799819),(-102.0560592,40.999126), (-109.0507527,40.99898)], temporal=("2017-01-01T00:00:00Z", "2017-01-07T00:00:00Z") #time start,end ) print "MOD14 gets %s hits in this range" % MOD14granules.hits() MOD14granules = api.get(10) #MOD03: geoloc data for MOD14 MOD03granules = api.parameters( short_name="MOD03", platform="Terra", downloadable=True, polygon=[(-109.0507527,40.99898), (-109.0698568,37.0124375), (-102.0868788,36.9799819),(-102.0560592,40.999126), (-109.0507527,40.99898)], temporal=("2017-01-01T00:00:00Z", "2017-01-07T00:00:00Z") #time start,end ) print "MOD03 gets %s hits in this range" % MOD03granules.hits()
class ModisQuery(object): """Class for querying and downloading MODIS data. The user can create a query and send it to NASA's CMR servers. The response can be either just printed to console or passed to the `download` method, to fetch the resulting HDF files to local disk. """ def __init__(self, products: List[str], aoi: List[Union[float, int]] = None, begindate: datetime = None, enddate: datetime = None, tile_filter: List[str] = None, version: str = "006") -> None: """Initialize instance ModisQuery class. This creates an instance of `ModisQuery` with the basic query parameters. The `aoi` needs to be a list if either 2 or 4 coordinates as `float` or `int`, depending on if a point or bounding box is requested. The tile_filter needs to be specified as list of MODIS tile IDs in format hXXvXX (e.g. `h20v08`). Args: products (List[str]): List of product codes to be queried / downloaded. aoi (List[Union[float, int]]): Area of interes (point as lon/lat or bounding box as xmin, ymin, xmax, ymax). begindate (datetime): Start date for query. enddate (datetime): End date for query. tile_filter (List[str]): List of tiles to be queried / downloaded (refines initial results). version (str): MODIS collection version. Raises: AssertionError: If no product code is supplied or `if len(aoi) not in [2, 4]`. """ assert products, "No product IDs supplied!" self.begin = begindate self.end = enddate self.tile_filter = tile_filter self.api = GranuleQuery() # construct query self.api.parameters( short_name=products, version=version, temporal=(begindate, enddate) ) if aoi is not None: if len(aoi) == 2: self.api.point(*aoi) elif len(aoi) == 4: self.api.bounding_box(*aoi) else: raise ValueError("Expected point or bounding box as AOI") def search(self, match_begin: bool = True) -> None: """Send quert to MODIS CMR servers. Constructs the query from parameters passed to `__init__` and sends the query to the NASA servers. The returned results will be stored in a class variable. To deal with overlapping date ranges of composite products, the specified start date can be matched to the MODIS native timestamp. Args: match_begin (bool): Flag to match begin date with native MODIS timestamp (no data with timestamp earlier than begindate is allowed). """ # init results dict self.results = {} # if no dates supplied, we can't be match if self.begin is None and self.end is None: match_begin = False log.debug("Starting query") # get all results results_all = self.api.get_all() log.debug("Query complete, filtering results") for result in self._parse_response(results_all): # skip tiles outside of filter if self.tile_filter and result["tile"]: if result["tile"] not in self.tile_filter: continue # enforce dates if required if match_begin: if self.begin is not None: if result["time_start"] < self.begin.date(): continue if self.end is not None: if result["time_start"] > self.end.date(): continue filename = result["filename"] del result["filename"] self.results.update({filename: result}) # final results self.nresults = len(self.results) log.debug("Search complete. Total results: %s, filtered: %s", len(results_all), self.nresults) @staticmethod def _parse_response(query: List[dict]) -> dict: """Generator for parsing API response. Args: query (List[dict]): Query returned by CMR API. Returns: dict: Parsed query as dict. """ tile_regxp = re.compile(r".+(h\d+v\d+).+") for entry in query: entry_parsed = dict( filename=entry["producer_granule_id"], time_start=pd.Timestamp(entry["time_start"]).date(), time_end=pd.Timestamp(entry["time_end"]).date(), updated=entry["updated"], link=entry["links"][0]["href"], ) try: tile = tile_regxp.search(entry_parsed["filename"]).group(1) except AttributeError: tile = None entry_parsed.update({"tile": tile}) yield entry_parsed @staticmethod def _parse_hdfxml(response): result = {} tree = ElementTree.fromstring(response.content) for entry in tree.iter(tag='GranuleURMetaData'): for datafile in entry.iter(tag="DataFiles"): for datafilecont in datafile.iter(tag="DataFileContainer"): for content in datafilecont: if content.tag in ["Checksum", "FileSize"]: result.update({content.tag: int(content.text)}) return result def _fetch(self, session: SessionWithHeaderRedirection, url: str, destination: Path, overwrite: bool, check: bool, ) -> Tuple[str, Union[None, Exception]]: """Helper function to fetch HDF files Args: session (SessionWithHeaderRedirection): requests session to fetch file. url (str): URL for file. destination (Path): Target directory. overwrite (bool): Overwrite existing. check (bool): Check file size and checksum. Returns: Tuple[str, Union[None, Exception]]: Returns tuple with either (filename, None) for success and (URL, Exception) for error. """ filename = url.split("/")[-1] filename_full = destination.joinpath(filename) if not exists(filename_full) or overwrite: filename_temp = filename_full.with_suffix(".modapedl") try: with session.get(url, stream=True, allow_redirects=True) as response: response.raise_for_status() with open(filename_temp, "wb") as openfile: shutil.copyfileobj(response.raw, openfile, length=16*1024*1024) if check: with session.get(url + ".xml", allow_redirects=True) as response: response.raise_for_status() file_metadata = self._parse_hdfxml(response) # check filesize assert filename_temp.stat().st_size == file_metadata["FileSize"] with open(filename_temp, "rb") as openfile: checksum = cksum(openfile) # check checksum assert checksum == file_metadata["Checksum"] shutil.move(filename_temp, filename_full) except (HTTPError, ConnectionError, AssertionError, FileNotFoundError) as e: try: filename_temp.unlink() except FileNotFoundError: pass return (filename, e) else: log.info("%s exists in target. Please set overwrite to True.", filename_full) return (filename, None) def download(self, targetdir: Path, username: str, password: str, overwrite: bool = False, multithread: bool = False, nthreads: int = 4, max_retries: int = -1, robust: bool = False, ) -> List: """Download MODIS HDF files. This method downloads the MODIS HDF files contained in the server response to the `search` call. This requires NASA earthdata credentials. To speed up download, multiple threads can be used. Args: targetdir (Path): Target directory for files being downloaded. username (str): Earthdata username. password (str): Earthdata password. overwrite (bool): Replace existing files. multithread (bool): Use multiple threads for downloading. nthreads (int): Number of threads. max_retries (int): Maximum number of retries for failed downloads (for no max, set -1). robust (bool): Perform robust downloading (checks file size and checksum). Raises: DownloadError: If one or more errors were encountered during downloading. Returns: List of downloaded MODIS HDF filenames. """ # make sure target directory is dir and exists assert targetdir.is_dir() assert targetdir.exists() retry_count = 0 to_download = self.results.copy() downloaded = [] while True: with SessionWithHeaderRedirection(username, password) as session: backoff = min(450, 2**retry_count) retries = Retry(total=5, backoff_factor=backoff, status_forcelist=[502, 503, 504]) session.mount( "https://", HTTPAdapter(pool_connections=nthreads, pool_maxsize=nthreads*2, max_retries=retries) ) if multithread: log.debug("Multithreaded download using %s threads. Warming up connection pool.", nthreads) # warm up pool _ = session.get(list(to_download.values())[0]["link"], stream=True, allow_redirects=True) with ThreadPoolExecutor(nthreads) as executor: futures = [executor.submit(self._fetch, session, values["link"], targetdir, overwrite, robust) for key, values in to_download.items()] downloaded_temp = [x.result() for x in futures] else: log.debug("Serial download") downloaded_temp = [] for _, values in to_download.items(): downloaded_temp.append( self._fetch(session, values["link"], targetdir, overwrite, robust) ) # check if downloads are OK for fid, err in downloaded_temp: if err is None: del to_download[fid] downloaded.append(fid) if to_download: if retry_count < max_retries or max_retries == -1: retry_count += 1 log.debug("Retrying downloads! Files left: %s", len(to_download)) if max_retries > 0: log.debug("Try %s of %s", retry_count, max_retries) continue raise DownloadError(list(to_download.keys())) break return downloaded
from cmr import CollectionQuery, GranuleQuery import json api = GranuleQuery() #MOD14: data - auto-sorted by start_time MOD14granules = api.parameters( short_name="MOD14", platform="Terra", downloadable=True) print MOD14granules.hits() #MOD14granules = api.get_all() MOD14granules = api.get(10) for granule in MOD14granules: print granule["title"], granule["time_start"], granule["time_end"], granule["polygons"] #MOD03: geolocation data - auto-sorted by start_time MOD03granules = api.parameters( short_name="MOD03", platform="Terra", downloadable=True) print MOD03granules.hits() MOD03granules = api.get(10) for granule in MOD03granules: print json.dumps(granule, indent=4, separators=(',', ': ')) #print granule["title"], granule["time_start"], granule["time_end"], granule["longitude"] #"polygons" gives long/lat data in pairs-first/last pair are the same # note - it appears that all granules are quadrilaterals