def _get_login(self,head=True): u = self with requests.Session() as session: if u.username and u.password: session.auth = u.username,u.password else: uinfo = Cylog(u.anchor).login() if uinfo == (None,None): return None session.auth = uinfo[0].decode('utf-8'),uinfo[1].decode('utf-8') u.msg(f'logging in to {u.anchor}') try: r1 = session.request('get',u) if r1.status_code == 200: u.msg(f'data read from {u.anchor}') return r1 # try encoded login if head: r2 = session.head(r1.url) else: r2 = session.get(r1.url) if r2.status_code == 200: u.msg(f'data read from {u.anchor}') if type(r2) == requests.models.Response: return r2 except: u.msg(f'failure reading data from {u.anchor}') return None u.msg(f'failure reading data from {u.anchor}') return None
def modisFile(year=2020, month=1, day=1,tile='h08v06',\ product='MCD15A3H',timeout=None,\ version='006',no_cache=False,cache=None,\ verbose=False,force=False,altcache='/shared/groups/jrole001/geog0111'): ''' Get the filename associated with a MODIS product file for a certain date and version. modisFile : return the filename for a MODIS product E.g. /Users/plewis/.modis_cache/e4ftl01.cr.usgs.gov/\ MOTA/MCD15A3H.006/2020.01.05/\ MCD15A3H.A2020005.h08v06.006.2020010210940.hdf N.B. You need to have a username and password to access the data. These are available at https://urs.earthdata.nasa.gov Example of use: from geog0111.modisUtils import modisURL, modisFile modinfo = { 'product' : 'MCD15A3H', 'year' : 2020, 'month' : 1, 'day' : 5, 'tile' : 'h08v06' } url = modisURL(**modinfo,verbose=False) print(f'-> {url}') filename = modisFile(**modinfo,verbose=False) print(f'-> {filename}') -> https://e4ftl01.cr.usgs.gov/MOTA/MCD15A3H.006/2020.01.05/MCD15A3H.A2020005.h08v06.006.2020010210940.hdf -> /Users/plewis/.modis_cache/e4ftl01.cr.usgs.gov/\ MOTA/MCD15A3H.006/2020.01.05/MCD15A3H.A2020005.h08v06.006.2020010210940.hdf Returns: filename : Path object of MODIS data product file e.g. what you would find on https://e4ftl01.cr.usgs.gov/MOTA/MCD15A3H.006/2020.01.05/MCD15A3H.A2020005.h08v06.006.2020010210940.hdf Downloaded to some cache location e.g. /Users/plewis/.modis_cache/e4ftl01.cr.usgs.gov/MOTA/MCD15A3H.006/2020.01.05/MCD15A3H.A2020005.h08v06.006.2020010210940.hdf Control options: year : int of year (2000+ for Terra, 2002+ for Aqua products) (year=2020) month: int of month (1-12) (month=1) day : int of day (1-31, as appropriate) (day=1) tile : string of tile (tile='h08v06') product : string of MODIS product name (product='MCD15A3H') version : int or string of version (version='006') timeout : timeout in seconds verbose : verbosity (verbose=False) Cache options: no_cache : Set True if you don't want to use the cache (no_cache=False) This is common for most functions, but modisFile() will use a cache in any case, as it has to store the file somewhere. If you don;'t want to keep that, then you can delete after use. cache : Use cache='/home/somewhere/else' to specify a personal cache location with write permission (ie somewhere in your filespace) Specify personal cache root. By default, this will be ~, and the cache will go into ~/.modis_cache. You can change that to somewhere else here. It will still use the sub-directory .modis_cache. Use cache='/home/somewhere/else' to specify a personal cache location with write permission (ie somewhere in your filespace) altcache : Specify system cache root. Use altcache='/home/notme/somewhere' to specify a system cache location with read permission (ie somewhere not necessarily in your filespace) force : Bool : Use force=True to override information in the cache Get the URL associated with a MODIS product for a certain date and version. Since this can involve an expensive call to get the html to access the file URL The html data used can be cached unless no_cache = True (See modisHTML()) This function returns the URL for the product/date page listing The caching is done to avoid repeated calls to expensive URL downloads. The idea is that there will be a system cache, where shared files will be set up (where you have read permission), and a personal cache where you can read and write your own files. Unless you use force=True or disble cache with no_cache=True, then the code will look in (i) personal; (ii) system cache before attempting to download any file from a URL. The cached files are stored in the same structure as the URL, i.e https://e4ftl01.cr.usgs.gov/MOTA/MCD15A3H.006/2020.01.05/\ MCD15A3H.A2020005.h08v06.006.2020010210940.hdf will be stored (personal cache) as: ~/.modis_cache/e4ftl01.cr.usgs.gov/MOTA/MCD15A3H.006/2020.01.05/\ MCD15A3H.A2020005.h08v06.006.2020010210940.hdf The html cache is what is returned from e.g. https://e4ftl01.cr.usgs.gov/MOTA/MCD15A3H.006/2020.01.05 and is stored as eg ~/.modis_cache/e4ftl01.cr.usgs.gov/MOTA/MCD15A3H.006/2020.01.05/index.html ''' if verbose: print('modisFile') print(f'year : {year}\nmonth : {month}\nday : {day}\ntile : {tile}') print(f'product : {product}\nversion : {version}\nforce : {force}') print(f'altcache : {altcache}') print(f'cache : {cache}') # get the URL for the file of interest url = modisURL(year, month, day,tile,product=product,version=version,\ no_cache=no_cache,cache=cache,altcache=altcache,\ force=force,verbose=verbose) if url == None: print(f'No dataset URL found for conditions requested.') print( f'Check the date and tile for the dataset URL that you have requested: {modisServer(product=product,version=version)}' ) print('modisFile') print(f'year : {year}\nmonth : {month}\nday : {day}\ntile : {tile}') print(f'product : {product}\nversion : {version}\nforce : {force}') print(f'altcache : {altcache}') print(f'cache : {cache}') print(f'If you think that is ok, then first try to rerun') print(f'If you still have problems, perhaps use: force=True') print(f'and try setting timout, e.g. timout=200 (it is in seconds)') return None if cache == None: cache = Path().cwd() cache = cache / ".modis_cache" cache.mkdir(parents=True, exist_ok=True) if not no_cache: if verbose: print(f'cache {cache}') #import pdb;pdb.set_trace() # generate the Path where the local cache would go cache_part = Path(url.hostinfo, '/'.join(url.parts[1:])) # check to see if we have it if (not no_cache) and (not force) and Path(cache, cache_part).exists(): if verbose: print('getting from cache') return Path(cache, cache_part) # get from cache.store for backwards compatibility if (not no_cache) and (not force) and cache and Path( cache, cache_part.as_posix() + '.store').exists(): if verbose: print('getting from cache in backward compatibility mode') return Path(cache, cache_part.as_posix() + '.store') # get from altcache if (not no_cache) and (not force) and altcache and Path( altcache, cache_part).exists(): if verbose: print('getting from altcache') return Path(altcache, cache_part) # get from altcache.store if (not no_cache) and (not force) and altcache and Path( altcache, cache_part.as_posix() + '.store').exists(): if verbose: print('getting from altcache in backward compatibility mode') return Path(altcache, cache_part.as_posix() + '.store') # else pull the file # first try a get : we have to do this twice bacause of # auth redirect if verbose: print(f'logging in to {url.anchor}') url2 = url.with_userinfo(*Cylog(url.anchor).login()) if verbose: print(f'get info from {url.anchor}') r = url2.get(timeout=timeout) if verbose: print(f'get data from {url.anchor}') url3 = URL(r.url).with_userinfo(*Cylog(url.anchor).login()) r2 = url3.get(timeout=timeout) if verbose: print(f'done - status code {r2.status_code}') if r2.status_code == 200: data = r2.content if verbose: print(f"received {len(data)} bytes") # write to cache file cache_file = cache / cache_part cache_file.parent.mkdir(parents=True, exist_ok=True) nbytes_written = cache_file.write_bytes(data) if not (nbytes_written == len(data)): if verbose: print( f'error writing cache file {cache_file}: {len(data)} bytes expected but {nbytes_written} bytes written' ) return None else: print(f'cached data to file {cache_file}: {len(data)}') return cache_file else: if verbose: print(f'failed to pull data from {url.anchor}') return None
def preamble(): uid, password = Cylog('https://n5eil01u.ecs.nsidc.org').login() cmd = "echo 'machine urs.earthdata.nasa.gov login {uid} password {password}' >> ~/.netrc && chmod 0600 ~/.netrc" subprocess.run(cmd.split())
#!/usr/bin/env python # -*- coding: utf-8 -*- from geog0111.cylog import Cylog sites = ['https://n5eil01u.ecs.nsidc.org',\ 'https://urs.earthdata.nasa.gov',\ 'https://e4ftl01.cr.usgs.gov'] l = Cylog(sites) test = l.login()