Ejemplo n.º 1
0
def search_api(sname, bbox, time, maxg=50, platform="", version=""):
    """
    API search of the different satellite granules
        
    :param sname: short name 
    :param bbox: polygon with the search bounding box
    :param time: time interval (init_time,final_time)
    :param maxg: max number of granules to process
    :param platform: string with the platform
    :param version: string with the version
    :return granules: dictionary with the metadata of the search

    Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH. 
    Angel Farguell ([email protected]), 2018-09-17
    """
    api = GranuleQuery()
    if not version:
        if not platform:
            search = api.parameters(short_name=sname,
                                    downloadable=True,
                                    polygon=bbox,
                                    temporal=time)
        else:
            search = api.parameters(short_name=sname,
                                    platform=platform,
                                    downloadable=True,
                                    polygon=bbox,
                                    temporal=time)
    else:
        if not platform:
            search = api.parameters(short_name=sname,
                                    downloadable=True,
                                    polygon=bbox,
                                    temporal=time,
                                    version=version)
        else:
            search = api.parameters(short_name=sname,
                                    platform=platform,
                                    downloadable=True,
                                    polygon=bbox,
                                    temporal=time,
                                    version=version)
    sh = search.hits()
    print "%s gets %s hits in this range" % (sname, sh)
    if sh > maxg:
        print "The number of hits %s is larger than the limit %s." % (sh, maxg)
        print "Use a reduced bounding box or a reduced time interval."
        granules = []
    else:
        granules = api.get(sh)
    return granules
Ejemplo n.º 2
0
    def search_api(sname, bbox, time, collection=None):
        """
        API search of the different satellite granules and return metadata dictionary

        :param sname: short name satellite product, ex: 'MOD03'
        :param bbox: polygon with the search bounding box
        :param time: time interval as datetime (init_time_datetime,final_time_datetime)

        :return metas: a dictionary with all the metadata for the API search
        """
        logging.info('search_api - CMR API search for {0} collection {1}'.format(sname,collection))
        maxg=1000
        time_esmf=(dt_to_esmf(time[0]),dt_to_esmf(time[1]))
        api = GranuleQuery()
        search = api.parameters(
                    short_name=sname,
                    downloadable=True,
                    polygon=bbox,
                    temporal=time_esmf)
        sh=search.hits()
        if sh>maxg:
            logging.warning('search_api - the number of hits {0} is larger than the limit {1}'.format(sh,maxg))
            logging.warning('search_api - any satellite data with prefix {} used'.format(sname))
            logging.warning('search_api - use a reduced bounding box or a reduced time interval')
            metas = []
        else:
            metas = api.get(sh)
        if collection:
            metas = [m for m in metas if m['collection_concept_id'] == collection]
        logging.info('search_api - {} hits in this range'.format(len(metas)))
        return metas
Ejemplo n.º 3
0
	def search_api_sat(self, sname, bbox, time, version=None):
		"""
		API search of the different satellite granules and return metadata dictionary

		:param sname: short name satellite product, ex: 'MOD03'
		:param bbox: polygon with the search bounding box
		:param time: time interval (init_time_iso,final_time_iso)

		:return metas: a dictionary with all the metadata for the API search
		"""
		maxg=100
		time_utcf=(utc_to_utcf(time[0]),utc_to_utcf(time[1]))
		api = GranuleQuery()
		if not version:
			search = api.parameters(
					short_name=sname,
					downloadable=True,
					polygon=bbox,
					temporal=time_utcf
				)
		else:
			search = api.parameters(
					short_name=sname,
					downloadable=True,
					polygon=bbox,
					temporal=time_utcf,
					version=version
				)
		sh=search.hits()
		logging.info("CMR API: %s gets %s hits in this range" % (sname,sh))
		if sh>maxg:
			logging.warning("The number of hits %s is larger than the limit %s." % (sh,maxg))
			logging.warning("Any satellite data with prefix %s used." % self.prefix)
			logging.warning("Use a reduced bounding box or a reduced time interval.")
			metas = []
		else:
			metas = api.get(sh)
		return metas
Ejemplo n.º 4
0
    def search_api_sat(self, sname, bounds, time, collection=None):
        """
        API search of the different satellite granules and return metadata dictionary

        :param sname: short name satellite product, ex: 'MOD03'
        :param bounds: bounding box as (lon_min,lon_max,lat_min,lat_max)
        :param time: time interval (init_time_iso,final_time_iso)
        :param collection: id of the collection to specify

        :return metas: a dictionary with all the metadata for the API search
        """
        maxg = 1000
        # creating polygon with the search bounding box
        lonmin, lonmax, latmin, latmax = bounds
        bbox = [(lonmin, latmax), (lonmin, latmin), (lonmax, latmin),
                (lonmax, latmax), (lonmin, latmax)]
        time_utcf = (utc_to_utcf(time[0]), utc_to_utcf(time[1]))
        api = GranuleQuery()
        search = api.parameters(short_name=sname,
                                downloadable=True,
                                polygon=bbox,
                                temporal=time_utcf)
        sh = search.hits()
        if sh > maxg:
            logging.warning(
                "The number of hits %s is larger than the limit %s." %
                (sh, maxg))
            logging.warning("Any satellite data with prefix %s used." %
                            self.prefix)
            logging.warning(
                "Use a reduced bounding box or a reduced time interval.")
            metas = []
        else:
            metas = api.get(sh)
        if collection:
            metas = [
                m for m in metas if m['collection_concept_id'] == collection
            ]
        logging.info(
            'search_api_sat - CMR API gives {0} hits for {1} of collection {2}'
            .format(len(metas), sname, collection))
        return metas
Ejemplo n.º 5
0
    def query_cmr(self, params=None):
        """
        Queries CMR for one or more data sets short-names using the spatio-temporal
        constraints defined in params. Returns a json list of CMR records.
        """
        if params is None:
            return None
        self.granules = {}
        datasets = self._expand_datasets(params)
        for d in datasets:
            cmr_api = GranuleQuery()
            g = cmr_api.parameters(
                short_name=d['name'],
                version=d['version'],
                temporal=d['temporal'],
                bounding_box=d['bounding_box']).get_all()
            self.granules[d['name']] = g

        self.cmr_download_size(self.granules)
        return self.granules
Ejemplo n.º 6
0
## Search/Download - sandbox
from cmr import CollectionQuery, GranuleQuery
import json
import requests
import urlparse
import os
import sys

api = GranuleQuery()
fire = GranuleQuery()

#MOD14: bounding box = Colorado (gps coord sorted lat,lon; counterclockwise)
MOD14granules = api.parameters(
                        short_name="MOD14",
                        platform="Terra",
                        downloadable=True,
                        polygon=[(-109.0507527,40.99898), (-109.0698568,37.0124375), (-102.0868788,36.9799819),(-102.0560592,40.999126), (-109.0507527,40.99898)],
                        temporal=("2017-01-01T00:00:00Z", "2017-01-07T00:00:00Z") #time start,end
                        )
print "MOD14 gets %s hits in this range" % MOD14granules.hits()
MOD14granules = api.get(10)

#MOD03: geoloc data for MOD14
MOD03granules = api.parameters(
                        short_name="MOD03",
                        platform="Terra",
                        downloadable=True,
                        polygon=[(-109.0507527,40.99898), (-109.0698568,37.0124375), (-102.0868788,36.9799819),(-102.0560592,40.999126), (-109.0507527,40.99898)],
                        temporal=("2017-01-01T00:00:00Z", "2017-01-07T00:00:00Z") #time start,end
                        )
print "MOD03 gets %s hits in this range" % MOD03granules.hits()
Ejemplo n.º 7
0
class ModisQuery(object):
    """Class for querying and downloading MODIS data.

    The user can create a query and send it to NASA's CMR servers.
    The response can be either just printed to console or passed to
    the `download` method, to fetch the resulting HDF files to local disk.
    """

    def __init__(self,
                 products: List[str],
                 aoi: List[Union[float, int]] = None,
                 begindate: datetime = None,
                 enddate: datetime = None,
                 tile_filter: List[str] = None,
                 version: str = "006") -> None:
        """Initialize instance ModisQuery class.

        This creates an instance of `ModisQuery` with the basic query parameters.
        The `aoi` needs to be a list if either 2 or 4 coordinates as `float` or `int`, depending
        on if a point or bounding box is requested.
        The tile_filter needs to be specified as list of MODIS tile IDs in format hXXvXX (e.g. `h20v08`).


        Args:
            products (List[str]): List of product codes to be queried / downloaded.
            aoi (List[Union[float, int]]): Area of interes (point as lon/lat or bounding box as xmin, ymin, xmax, ymax).
            begindate (datetime): Start date for query.
            enddate (datetime): End date for query.
            tile_filter (List[str]): List of tiles to be queried / downloaded (refines initial results).
            version (str): MODIS collection version.

        Raises:
            AssertionError: If no product code is supplied or `if len(aoi) not in [2, 4]`.
        """

        assert products, "No product IDs supplied!"

        self.begin = begindate
        self.end = enddate
        self.tile_filter = tile_filter
        self.api = GranuleQuery()

        # construct query
        self.api.parameters(
            short_name=products,
            version=version,
            temporal=(begindate, enddate)
        )

        if aoi is not None:
            if len(aoi) == 2:
                self.api.point(*aoi)
            elif len(aoi) == 4:
                self.api.bounding_box(*aoi)
            else:
                raise ValueError("Expected point or bounding box as AOI")

    def search(self, match_begin: bool = True) -> None:
        """Send quert to MODIS CMR servers.

        Constructs the query from parameters passed to `__init__`
        and sends the query to the NASA servers. The returned results
        will be stored in a class variable.
        To deal with overlapping date ranges of composite products,
        the specified start date can be matched to the MODIS native timestamp.

        Args:
            match_begin (bool): Flag to match begin date with native MODIS timestamp (no data with timestamp earlier than begindate is allowed).
        """

        # init results dict
        self.results = {}

        # if no dates supplied, we can't be match
        if self.begin is None and self.end is None:
            match_begin = False

        log.debug("Starting query")

        # get all results
        results_all = self.api.get_all()

        log.debug("Query complete, filtering results")

        for result in self._parse_response(results_all):

            # skip tiles outside of filter
            if self.tile_filter and result["tile"]:
                if result["tile"] not in self.tile_filter:
                    continue

            # enforce dates if required

            if match_begin:
                if self.begin is not None:
                    if result["time_start"] < self.begin.date():
                        continue

                if self.end is not None:
                    if result["time_start"] > self.end.date():
                        continue

            filename = result["filename"]
            del result["filename"]

            self.results.update({filename: result})

        # final results
        self.nresults = len(self.results)

        log.debug("Search complete. Total results: %s, filtered: %s", len(results_all), self.nresults)


    @staticmethod
    def _parse_response(query: List[dict]) -> dict:
        """Generator for parsing API response.

        Args:
            query (List[dict]): Query returned by CMR API.

        Returns:
            dict: Parsed query as dict.

        """

        tile_regxp = re.compile(r".+(h\d+v\d+).+")

        for entry in query:

            entry_parsed = dict(
                filename=entry["producer_granule_id"],
                time_start=pd.Timestamp(entry["time_start"]).date(),
                time_end=pd.Timestamp(entry["time_end"]).date(),
                updated=entry["updated"],
                link=entry["links"][0]["href"],
            )

            try:
                tile = tile_regxp.search(entry_parsed["filename"]).group(1)
            except AttributeError:
                tile = None

            entry_parsed.update({"tile": tile})

            yield entry_parsed

    @staticmethod
    def _parse_hdfxml(response):
        result = {}
        tree = ElementTree.fromstring(response.content)
        for entry in tree.iter(tag='GranuleURMetaData'):
            for datafile in entry.iter(tag="DataFiles"):
                for datafilecont in datafile.iter(tag="DataFileContainer"):
                    for content in datafilecont:
                        if content.tag in ["Checksum", "FileSize"]:
                            result.update({content.tag: int(content.text)})
        return result


    def _fetch(self,
               session: SessionWithHeaderRedirection,
               url: str,
               destination: Path,
               overwrite: bool,
               check: bool,
               ) -> Tuple[str, Union[None, Exception]]:
        """Helper function to fetch HDF files

        Args:
            session (SessionWithHeaderRedirection): requests session to fetch file.
            url (str): URL for file.
            destination (Path): Target directory.
            overwrite (bool): Overwrite existing.
            check (bool): Check file size and checksum.

        Returns:
            Tuple[str, Union[None, Exception]]: Returns tuple with
                either (filename, None) for success and (URL, Exception) for error.

        """
        filename = url.split("/")[-1]
        filename_full = destination.joinpath(filename)

        if not exists(filename_full) or overwrite:

            filename_temp = filename_full.with_suffix(".modapedl")

            try:

                with session.get(url, stream=True, allow_redirects=True) as response:
                    response.raise_for_status()
                    with open(filename_temp, "wb") as openfile:
                        shutil.copyfileobj(response.raw, openfile, length=16*1024*1024)

                if check:

                    with session.get(url + ".xml", allow_redirects=True) as response:
                        response.raise_for_status()
                        file_metadata = self._parse_hdfxml(response)

                    # check filesize
                    assert filename_temp.stat().st_size == file_metadata["FileSize"]
                    with open(filename_temp, "rb") as openfile:
                        checksum = cksum(openfile)
                    # check checksum
                    assert checksum == file_metadata["Checksum"]

                shutil.move(filename_temp, filename_full)

            except (HTTPError, ConnectionError, AssertionError, FileNotFoundError) as e:
                try:
                    filename_temp.unlink()
                except FileNotFoundError:
                    pass
                return (filename, e)
        else:
            log.info("%s exists in target. Please set overwrite to True.", filename_full)

        return (filename, None)

    def download(self,
                 targetdir: Path,
                 username: str,
                 password: str,
                 overwrite: bool = False,
                 multithread: bool = False,
                 nthreads: int = 4,
                 max_retries: int = -1,
                 robust: bool = False,
                ) -> List:
        """Download MODIS HDF files.

        This method downloads the MODIS HDF files contained in the
        server response to the `search` call. This requires
        NASA earthdata credentials. To speed up download, multiple
        threads can be used.

        Args:
            targetdir (Path): Target directory for files being downloaded.
            username (str): Earthdata username.
            password (str): Earthdata password.
            overwrite (bool): Replace existing files.
            multithread (bool): Use multiple threads for downloading.
            nthreads (int): Number of threads.
            max_retries (int): Maximum number of retries for failed downloads (for no max, set -1).
            robust (bool): Perform robust downloading (checks file size and checksum).

        Raises:
            DownloadError: If one or more errors were encountered during downloading.
        Returns:
            List of downloaded MODIS HDF filenames.
        """

        # make sure target directory is dir and exists
        assert targetdir.is_dir()
        assert targetdir.exists()

        retry_count = 0
        to_download = self.results.copy()
        downloaded = []

        while True:

            with SessionWithHeaderRedirection(username, password) as session:

                backoff = min(450, 2**retry_count)

                retries = Retry(total=5, backoff_factor=backoff, status_forcelist=[502, 503, 504])
                session.mount(
                    "https://",
                    HTTPAdapter(pool_connections=nthreads, pool_maxsize=nthreads*2, max_retries=retries)
                )

                if multithread:
                    log.debug("Multithreaded download using %s threads. Warming up connection pool.", nthreads)
                    # warm up pool
                    _ = session.get(list(to_download.values())[0]["link"], stream=True, allow_redirects=True)

                    with ThreadPoolExecutor(nthreads) as executor:

                        futures = [executor.submit(self._fetch, session, values["link"], targetdir, overwrite, robust)
                                   for key, values in to_download.items()]

                    downloaded_temp = [x.result() for x in futures]

                else:
                    log.debug("Serial download")
                    downloaded_temp = []

                    for _, values in to_download.items():

                        downloaded_temp.append(
                            self._fetch(session, values["link"], targetdir, overwrite, robust)
                        )

            # check if downloads are OK
            for fid, err in downloaded_temp:
                if err is None:
                    del to_download[fid]
                    downloaded.append(fid)

            if to_download:
                if retry_count < max_retries or max_retries == -1:
                    retry_count += 1
                    log.debug("Retrying downloads! Files left: %s", len(to_download))
                    if max_retries > 0:
                        log.debug("Try %s of %s", retry_count, max_retries)
                    continue

                raise DownloadError(list(to_download.keys()))

            break

        return downloaded
Ejemplo n.º 8
0
from cmr import CollectionQuery, GranuleQuery
import json
api = GranuleQuery()

#MOD14: data - auto-sorted by start_time
MOD14granules = api.parameters(
                        short_name="MOD14",
                        platform="Terra",
                        downloadable=True)

print MOD14granules.hits()
#MOD14granules = api.get_all()
MOD14granules = api.get(10)

for granule in MOD14granules:
    print granule["title"], granule["time_start"], granule["time_end"], granule["polygons"]

#MOD03: geolocation data - auto-sorted by start_time
MOD03granules = api.parameters(
                        short_name="MOD03",
                        platform="Terra",
                        downloadable=True)

print MOD03granules.hits()
MOD03granules = api.get(10)

for granule in MOD03granules:
    print json.dumps(granule, indent=4, separators=(',', ': '))
    #print granule["title"], granule["time_start"], granule["time_end"], granule["longitude"]
    #"polygons" gives long/lat data in pairs-first/last pair are the same
    # note - it appears that all granules are quadrilaterals