# License along with Frommle; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # Author Roelof Rietbroek ([email protected]), 2018 from geoslurp.config.catalogue import geoslurpCatalogue from geoslurp.dataset.motuGridsBase import MotuGridsBase class mss_cls2015(MotuGridsBase): """Downloads the mean sea surface heigth data as netcdf""" scheme = 'altim' variables = ["mss", "mss_err"] bandname = variables[0] # tiles=[1000,1000] # python motuclient.py -u [email protected] -p your_password(1) -m https://motu.aviso.altimetry.fr/motu-web/Motu -s AvisoMSS -d dataset-mss-cnes-cls15-global -x 20 -X 120 -y -75 -Y 30 -t "2015-01-01" -T "2015-01-01" --outputWritten netcdf4 -v sea_surface_height_above_reference_ellipsoid -v mss_err -o your_output_directory(1) -f your_output_file_name(1) --proxy-server=your_proxy_server_url:your_proxy_port_number(2) --proxy-user=your_proxy_user_login(3) --proxy-pwd=your_proxy_user_password(3) authalias = "avisoftp" moturoot = "https://motu.aviso.altimetry.fr/motu-web/Motu" motuservice = "AvisoMSS" motuproduct = "dataset-mss-cnes-cls15-global" # regularblocking = True # overviews=[8] #[ulx,xres,xskew,uly,yskew,yres] # geotransform=[0,0,1/60,84,-1/60,0] def __init__(self, dbconn): super().__init__(dbconn) geoslurpCatalogue.addDataset(mss_cls2015)
gzip=True, maxconn=10) def register(self): #create a list of files which need to be (re)registered crwl = UnrCrawler( catalogfile=os.path.join(self.dataDir(), 'DataHoldings.txt')) for uri in crwl.uris(refresh=False): if not self.uriNeedsUpdate(uri["statname"], uri["lastupdate"]): continue localfile = os.path.join(self.dataDir(), os.path.basename(uri["uri"] + ".gz")) if not os.path.exists(localfile): slurplogger().info("skipping %s" % (localfile)) continue slurplogger().info("Registering %s" % (localfile)) meta = uri.dict meta = enhancetenv3Meta(meta, localfile) self.addEntry(meta) self._dbinvent.data["citation"]="Blewitt, G., W. C. Hammond, and C. Kreemer (2018), " \ "Harnessing the GPS data explosion for interdisciplinary science, Eos, 99, https://doi.org/10.1029/2018EO104623." self.updateInvent() geoslurpCatalogue.addDataset(UNRfinal)
self.updated.append(tmp) def register(self, pattern=None): """Register static gravity fields donwloaded in the data director :param pattern: only register files whose filename obeys this regular expression """ if not pattern: pattern = '.*\.gz' #create a list of files which need to be (re)registered if self.updated: files = self.updated else: files = [ UriFile(file) for file in findFiles(self.dataDir(), pattern) ] #loop over files for uri in files: urilike = os.path.basename(uri.url) if not self.uriNeedsUpdate(urilike, uri.lastmod): continue meta = icgemMetaExtractor(uri) self.addEntry(meta) self.updateInvent() geoslurpCatalogue.addDataset(ICGEM_static)
super().__init__(dbconn) # Create table if it doesn't exist self.table.metadata.create_all(self.db.dbeng, checkfirst=True) def pull(self): """Pulls known geocenter motion estimates from the internet and stores them in the cache""" for gsource in self.dsources: gsource(self.cacheDir()).download() def register(self): """""" for gsource in self.dsources: try: src=gsource(self.cacheDir()) metadicts=src.extract() slurplogger().info("registering %s"%(src.meta["name"])) for meta in metadicts: if self.entryNeedsUpdate(meta['name'],lastmod=src.meta['lastupdate'],col=self.table.name): self.addEntry(meta) except Exception as e: #possibly not downloaded but that is ok continue self.updateInvent() geoslurpCatalogue.addDataset(Deg1n2)
# Factory method to dynamically create classes def GRDCGISClassFactory(fileName): splt = fileName.split(".") return type(splt[0], (grdc_gis_base, ), { "filename": fileName, "gtype": "GEOMETRY", "swapxy": True }) def getGRDCDsets(conf): """Automatically create all classes contained within the GRDC tables""" GISshapes = [ 'GRDC_405_basins_from_mouth.shp', 'GRDC_687_rivers.shp', 'GRDC_687_rivers_class.shp', 'GRDC_lakes_join_rivers.shp', 'grdc_basins_smoothed.shp' ] out = [GRDCGISClassFactory(name) for name in GISshapes] #also add the monthly and daily datasets for name in ["grdc_monthly", "grdc_daily"]: out.append(GRDCClassFactory(name)) return out geoslurpCatalogue.addDatasetFactory(getGRDCDsets) geoslurpCatalogue.addDataset(grdc_catalogue)
"n1c", "pna", "g1a", "j2b", "3b3", "c2a", "e1b", "gsb", "e2a", "e1e", "3b2", "j2c", "6aa", "e1f", "j3a", "j2a", "j1b", "j1c", "gsa", "3b5", "3bb", "3b0", "txn", "j1a", "e1a", "3b4", "3aa", "j2d", "e1g", "gsd", "6a1", "saa", "txb", "3b1", "sab", "3ba", "n1b", "e1c", "e1d", "txa" ] # tphases={"j1":["a","b","c"],"j2":["a","b","c"],"j3":["a"],"3a":["a"],"c2":["a"],"n1":["b","c"],"sa":["a","b"],"tx":["a","b","n"],"3a":["a"]} out = [] for sat in satnph: clname = "rads_" + sat[0:2] + "_" + sat[2:3] out.append(radsclassFactory(clname)) return out geoslurpCatalogue.addDatasetFactory(getRADSDsets) geoslurpCatalogue.addDataset(RadsCycles) #### RADS REFERENCE ORBITS (DEPENDS ON ABOVE dataset classes) #### RadsRefOrbitTBase = declarative_base(metadata=MetaData(schema=scheme)) class RadsRefT(RadsRefOrbitTBase): __tablename__ = "radsreforbits" id = Column(Integer, primary_key=True) lastupdate = Column(TIMESTAMP) missionid = Column(String, index=True) refcycle = Column(Integer) apass = Column(Integer) geom = Column(geotracktype)
from geoslurp.dataset import OGRBase from geoslurp.datapull.http import Uri as http from geoslurp.config.catalogue import geoslurpCatalogue import urllib.request from zipfile import ZipFile import os class WriBasin(OGRBase): """Base class for Wribasin watersheds """ scheme = 'globalgis' swapxy = True def __init__(self, dbconn): super().__init__(dbconn) self.ogrfile = os.path.join(self.cacheDir(), "wribasin.shp") def pull(self): """Pulls the wribasin data from the internet and unpacks it in the cache directory""" fzip = os.path.join(self.cacheDir(), "wri_basin.zip") urllib.request.urlretrieve( "http://www.fao.org/geonetwork/srv/en/resources.get?id=30914&fname=wri_basins.zip&access=private", fzip) with ZipFile(os.path.join(self.cacheDir(), "wri_basin.zip"), 'r') as zp: zp.extractall(self.cacheDir()) geoslurpCatalogue.addDataset(WriBasin)
class gleam_monthly(XarrayBase): outofdb=True scheme="prec_evap" groupby="time" writeoutofdb=False def pull(self): auth=self.conf.authCred("gleam",qryfields=["user","passw","url"]) # note url should be of the form sftp://server:port crwl=crawler(url=auth.url+"/data/v3.6b/monthly",auth=auth) downdir=self.cacheDir() for uri in crwl.uris(): uri.download(downdir,check=True) def convert2zarr(self): slurplog.info("Converting data to zarr%s"%(self.xarfile)) #open all datasets together ds=xr.open_mfdataset(os.path.join(self.cacheDir(),"*.nc")) #save to zarr format ds.to_zarr(self.xarfile) def register(self): self.xarfile=os.path.join(self.dataDir(),"2003-2021_GLEAM_v3.6b_MO.zarr") if not os.path.isdir(self.xarfile): self.convert2zarr() super().register() geoslurpCatalogue.addDataset(gleam_monthly)
#check if the last file is already extracted if os.path.exists(succesfile): slurplogger().info(f"{tarf.url} is already extracted, skipping") else: with tarfile.open(tarf.url,"r:gz") as tf: slurplogger().info(f"Extracting trajectory files from {tarf.url}") tf.extractall(datadir) #touch the sucessfile to indcate this archive has been sucessfully extracted Path(succesfile).touch() except tarfile.ReadError as exc: raise exc def register(self,pattern='.*\.nc$'): """Register downloaded trajectory files from CORA :param pattern (string) file pattern to look for (defaults to all files ending with .nc) """ #create a list of files which need to be (re)registered newfiles=self.retainnewUris([UriFile(file) for file in findFiles(self.dataDir(),pattern)]) for uri in newfiles: meta=coraMetaExtractor(uri) if not meta: #don't register empty entries continue self.addEntry(meta) self._dbinvent.data["Description"]="EasyCora output data table" self._dbinvent.data["CORAversion"] = "5.2" self.updateInvent() geoslurpCatalogue.addDataset(EasyCora)
latmax = ds.latitude.max().values lonmin = ds.longitude.min().values lonmax = ds.longitude.max().values bbox = Polygon([(lonmin, latmin), (lonmin, latmax), (lonmax, latmax), (lonmax, latmin)]) return { "name": name, "lastupdate": uri.lastmod, "tstart": tstart, "tend": tend, "uri": uri.url, "data": data, "geom": wktdumps(bbox) } class GloFASUpArea(RasterBase): """Class which downloads and registers the auxiliary uparea file""" regularblocking = True scheme = "hydro" def pull(self): upsrc = http( "https://confluence.ecmwf.int/download/attachments/143039724/upArea.nc", lastmod=datetime(2021, 11, 17)) #download to cache only (will be in db raster) urif, upd = upsrc.download(self.srcdir, check=True) geoslurpCatalogue.addDataset(GloFASUpArea)
def columnsFromOgrFeat(self, feat): cols = super().columnsFromOgrFeat(feat) cols.append(Column('orbit', Integer)) cols.append(Column('missionids', ARRAY(String))) return cols def pull(self): """Pulls the google kml files from the copernicus server""" rooturl = 'https://sentinel.esa.int/documents/247904/685098/Sentinel-3-Absolute-Ground-Tracks.zip' cache = self.cacheDir() httpserv = http(rooturl, lastmod=datetime(2021, 11, 29)) uri, upd = httpserv.download(cache, check=True) if upd: with ZipFile(uri.url, 'r') as zp: zp.extractall(cache) class s3a_reforbit(S3ABRefOrbitsBase): ogrfile = 'S3A_rel_orbit_ground_track_10sec_v1_4.kml' missionids = ["s3a"] class s3b_reforbit(S3ABRefOrbitsBase): ogrfile = 'S3B_rel_orbit_ground_track_10sec_v1_4.kml' missionids = ["s3b"] geoslurpCatalogue.addDataset(s3a_reforbit) geoslurpCatalogue.addDataset(s3b_reforbit)
"""Class for registering SH filters (downloads from github) """ scheme = schema version = (0, 0, 0) def __init__(self, dbconn): super().__init__(dbconn) self.pdfile = os.path.join(self.cacheDir(), 'inventory_upd.csv') def pull(self): """Pulls the dataset from github and unpacks it in the cache directory""" #download the inventory file lastchanged = datetime(2021, 11, 5) inventory = "https://github.com/strawpants/GRACE-filter/raw/master/inventory.xlsx" uri, upd = http(inventory, lastmod=lastchanged).download(self.cacheDir(), check=True) pdinvent = pd.read_excel(uri.url, engine="openpyxl") #download all the files ddir = self.dataDir() for idx, row in pdinvent.iterrows(): ffile, upd = http(row.uri, lastmod=lastchanged).download(ddir, check=True) #update file with newly downloaded file pdinvent.at[idx, 'uri'] = self.conf.generalize_path(ffile.url) #write updated excel file pdinvent.to_csv(os.path.join(self.pdfile)) geoslurpCatalogue.addDataset(GRACEfilter)
self.addEntry(meta) self.updateInvent() # def halt(self): # slurplogger().error("Stopping update") # self._killUpdate=True # # indicate a done task n the queue in order to allow the pullWorker thread to stop gracefully # #empty eue # while not self._uriqueue.empty(): # self._uriqueue.get() # self._uriqueue.task_done() # #also synchronize inventory info (e.g. resume # self.updateInvent(False) # raise RuntimeWarning("Argo dataset processing stopped") # def pullWorker(self,conn): # """ Pulls valid opendap URI's from a thredds server and queue them""" # for uri in conn.uris(): # slurplogger().info("queuing %s",uri.url) # self._uriqueue.put(uri) # if self._killUpdate: # slurplogger().warning("Pulling of Argo URI's stopped") # return # #signal the end of the queue by adding a none # self._uriqueue.put(None) geoslurpCatalogue.addDataset(Argo2)
with ZipFile(uri.url, 'r') as zp: zp.extractall(self.cacheDir()) def register(self): """ Register all downloaded fronts (in text files)""" slurplogger().info("Building file list..") files = [ UriFile(file) for file in findFiles(self.cacheDir(), '.*txt', self._dbinvent.lastupdate) ] if len(files) == 0: slurplogger().info( "Orsifronts: No new files found since last update") return #possibly empty table self.truncateTable() #loop over files for uri in files: slurplogger().info("adding %s" % (uri.url)) self.addEntry(orsiMetaExtractor(uri)) self.updateInvent() #register dataset geoslurpCatalogue.addDataset(Orsifronts)
ds["ETm"] = ds.ETm * mmmon_kgsecm2 #add CF atributes cfadd_global( ds, title="SEBSv2 Evapotranspiration estimates", references= "https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2020JD032873", source=f"Geoslurp class {self.__class__.__name__}") cfadd_standard_name(ds.ETm, "water_evapotranspiration_flux") # cfencode_time(ds.time) cfadd_coord(ds.lon, 'X', standard_name='longitude') cfadd_coord(ds.lat, 'Y', standard_name='latitude') if appdim: ds.to_zarr(self.xarfile, append_dim=appdim) else: ds.to_zarr(self.xarfile, mode='w') appdim = "time" def register(self): self.xarfile = os.path.join(self.dataDir(), "Global_land_monthly_ET_V2.zarr") tarar = os.path.join(self.cacheDir(), "Global_land_monthly_ET_V2.rar") if not os.path.isdir(self.xarfile): self.convert2zarr(tarar) super().register() geoslurpCatalogue.addDataset(SEBS_monthly)
te=tend) #retrieve the appropriate index range halfres = 0.1 / 2 trange = self.getRange(bbox.ts, bbox.te, self.dscoords.time) latrange = self.getRange(bbox.s - halfres, bbox.n + halfres, self.dscoords.lat) lonrange = self.getRange(bbox.w - halfres, bbox.e + halfres, self.dscoords.lon) if trange is None or latrange is None or lonrange is None: slurplog.warning("refusing to queue empty dataset, skipping") return qrystr = f"?time{trange},lat{latrange},lon{lonrange},precipitation{trange}{lonrange}{latrange}" self.pullqueue[name] = qrystr @staticmethod def getRange(start, end, within): idx = np.where((within >= start) & (within <= end)) if len(idx[0]) < 1: return None return f"[{idx[0][0]}:1:{idx[0][-1]}]" def rastExtract(self, uri): meta = super().rastExtract(uri) meta["name"] = os.path.basename(uri.url)[0:-3] return meta geoslurpCatalogue.addDataset(imerg_monthly)
self._dbinvent.data["Description"] = "ArcticDEM raster table" def pull(self, intersect=None): # download the entire mosaic domain in one tif if self.res in ['1km', '500m', '100m']: rasteruri = http( "http://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/mosaic/v3.0/" + self.res + "/" + self.rasterfile, lastmod=datetime(2018, 9, 26)) rasterfileuri, upd = rasteruri.download(self.srcdir, check=False) #download only those tiles which are needed def getArcticDems(conf): out = [] for res in ['1km', '500m', '100m']: out.append( type("arcticdem_mosaic_" + res + "_v3", (ArcticDemRasterBase, ), { "res": res, "tiles": [100, 100] })) # out.append(type("arcticdem_mosaic_"+res+"_v3", (ArcticDemRasterBase,), {"res":res})) return out #register datasets geoslurpCatalogue.addDataset(Arcticdemindex) geoslurpCatalogue.addDatasetFactory(getArcticDems)
class awipies(DataSet): """Class whichs downloads/register athe AWI South Atlantic PIES""" scheme=scheme table=PIESTable obpfile='OBPv3withtau.mat' def __init__(self,dbconn): super().__init__(dbconn) PIESTBase.metadata.create_all(self.db.dbeng, checkfirst=True) def pull(self): """Pulls the OBP matlab file from the cloud""" cred=self.conf.authCred("awipies",['url','user','passw']) obpsource=http(cred.url,auth=cred) obpsource.download(self.dataDir(),outfile=self.obpfile) def register(self): obpfile=os.path.join(self.dataDir(),self.obpfile) for meta in extractMetaPies(obpfile): self.addEntry(meta) self.updateInvent() geoslurpCatalogue.addDataset(awipies)
except: token=None # import pdb;pdb.set_trace() ghcrawler=ghCrawler(reponame,commitsha=commitsha, filter=ghfilter({"type":"blob","path":"\.love"}), followfilt=ghfilter({"type":"tree","path":"Love"}), oauthtoken=token) #download all datasets ghcrawler.parallelDownload(self.dataDir(),check=True,maxconn=3,gzip=True) def register(self): slurplogger().info("Building file list..") files=[UriFile(file) for file in findFiles(self.dataDir(),'.*love',self._dbinvent.lastupdate)] if len(files) == 0: slurplogger().info("LLove: No new files found since last update") return filesnew=self.retainnewUris(files) if len(filesnew) == 0: slurplogger().info("LLove: No database update needed") return #loop over files for uri in filesnew: self.addEntry(lloveMetaExtractor(uri)) self.updateInvent() geoslurpCatalogue.addDataset(LLove)
meta={"type":file.split('_')[-1][:-4],"time":tcent,"tstart":tstart,"tend":tend,"lastupdate":lastupdate,"nmax":1,"omax":1,"origin":"CF","format":"JSONB","uri":"self:data","gm":0.3986004415e+15,"re":0.6378136460e+07 } for el,val in zip(order,lnspl[1:4]): # import pdb;pdb.set_trace() shar["cnm"][shar.idx(el)]=float(val)/self.sqrt3timesRE #also add sigmas for el,val in zip(order,lnspl[4:7]): shar["sigcnm"][shar.idx(el)]=float(val)/self.sqrt3timesRE meta["data"]=shar.dict self.addEntry(meta) self.updateInvent() geoslurpCatalogue.addDataset(geocenter_Rietbroeketal2016upd) def parseGSMDate(dtstr): """Parse datestr as found in GSM files (yyyymmdd.00000)""" return datetime(int(dtstr[0:4]),int(dtstr[4:6]),int(dtstr[6:8])) class geocenter_GRCRL06_TN13(DataSet): scheme=scheme rooturl="https://podaac-tools.jpl.nasa.gov/drive/files/allData/grace/docs/" # fout="TN-13_GEOC_CSR_RL06.txt" def __init__(self,dbconn): self.table=type(self.__class__.__name__.lower().replace('-',"_")+"Table", (GravitySHinDBTBase,), {}) super().__init__(dbconn) def pull(self): """Pulls the geocenter ascii files in the cache"""
# Lesser General Public License for more details. # You should have received a copy of the GNU Lesser General Public # License along with Frommle; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # Author Roelof Rietbroek ([email protected]), 2018 from geoslurp.config.catalogue import geoslurpCatalogue from geoslurp.dataset.motuGridsBase import MotuGridsBase class Duacs(MotuGridsBase): """Downloads subsets of the ducacs gridded multimission altimeter datasets for given regions""" scheme = 'altim' variables = ["sla", "adt"] bandname = variables[0] authalias = "cmems" #http://my.cmems-du.eu/motu-web/Motu --service-id SEALEVEL_GLO_PHY_L4_REP_OBSERVATIONS_008_047-TDS --product-id dataset-duacs-rep-global-merged-allsat-phy-l4 --longitude-min 0.125 --longitude-max -0.125 --latitude-min -89.875 --latitude-max 89.875 --date-min "2019-01-12 00:00:00" --date-max "2019-01-12 00:00:00" --variable sla --variable adt --variable ugos --variable vgos --variable ugosa --variable vgosa --variable err --out-dir <OUTPUT_DIRECTORY> --out-name <OUTPUT_FILENAME> --user <USERNAME> --pwd <PASSWORD> moturoot = "http://my.cmems-du.eu/motu-web/Motu" # moturoot="http://my.cmems-du.eu/motu-web/Motu" motuservice = "SEALEVEL_GLO_PHY_L4_REP_OBSERVATIONS_008_047-TDS" motuproduct = "dataset-duacs-rep-global-merged-allsat-phy-l4" def __init__(self, dbconn): super().__init__(dbconn) geoslurpCatalogue.addDataset(Duacs)