Beispiel #1
0
# License along with Frommle; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

# Author Roelof Rietbroek ([email protected]), 2018

from geoslurp.config.catalogue import geoslurpCatalogue
from geoslurp.dataset.motuGridsBase import MotuGridsBase


class mss_cls2015(MotuGridsBase):
    """Downloads the mean sea surface heigth data as netcdf"""
    scheme = 'altim'
    variables = ["mss", "mss_err"]
    bandname = variables[0]
    # tiles=[1000,1000]
    # python motuclient.py -u [email protected] -p your_password(1) -m https://motu.aviso.altimetry.fr/motu-web/Motu -s AvisoMSS -d dataset-mss-cnes-cls15-global -x 20 -X 120 -y -75 -Y 30 -t "2015-01-01" -T "2015-01-01" --outputWritten netcdf4 -v sea_surface_height_above_reference_ellipsoid -v mss_err -o your_output_directory(1) -f your_output_file_name(1) --proxy-server=your_proxy_server_url:your_proxy_port_number(2) --proxy-user=your_proxy_user_login(3) --proxy-pwd=your_proxy_user_password(3)
    authalias = "avisoftp"
    moturoot = "https://motu.aviso.altimetry.fr/motu-web/Motu"
    motuservice = "AvisoMSS"
    motuproduct = "dataset-mss-cnes-cls15-global"

    # regularblocking = True
    # overviews=[8]
    #[ulx,xres,xskew,uly,yskew,yres]
    # geotransform=[0,0,1/60,84,-1/60,0]
    def __init__(self, dbconn):
        super().__init__(dbconn)


geoslurpCatalogue.addDataset(mss_cls2015)
Beispiel #2
0
                                             gzip=True,
                                             maxconn=10)

    def register(self):
        #create a list of files which need to be (re)registered

        crwl = UnrCrawler(
            catalogfile=os.path.join(self.dataDir(), 'DataHoldings.txt'))

        for uri in crwl.uris(refresh=False):

            if not self.uriNeedsUpdate(uri["statname"], uri["lastupdate"]):
                continue

            localfile = os.path.join(self.dataDir(),
                                     os.path.basename(uri["uri"] + ".gz"))
            if not os.path.exists(localfile):
                slurplogger().info("skipping %s" % (localfile))
                continue
            slurplogger().info("Registering %s" % (localfile))
            meta = uri.dict
            meta = enhancetenv3Meta(meta, localfile)
            self.addEntry(meta)

        self._dbinvent.data["citation"]="Blewitt, G., W. C. Hammond, and C. Kreemer (2018), " \
                                     "Harnessing the GPS data explosion for interdisciplinary science, Eos, 99, https://doi.org/10.1029/2018EO104623."
        self.updateInvent()


geoslurpCatalogue.addDataset(UNRfinal)
Beispiel #3
0
                    self.updated.append(tmp)

    def register(self, pattern=None):
        """Register static gravity fields donwloaded in the data director
        :param pattern: only register files whose filename obeys this regular expression
        """
        if not pattern:
            pattern = '.*\.gz'
        #create a list of files which need to be (re)registered
        if self.updated:
            files = self.updated
        else:
            files = [
                UriFile(file) for file in findFiles(self.dataDir(), pattern)
            ]

        #loop over files
        for uri in files:
            urilike = os.path.basename(uri.url)

            if not self.uriNeedsUpdate(urilike, uri.lastmod):
                continue

            meta = icgemMetaExtractor(uri)
            self.addEntry(meta)

        self.updateInvent()


geoslurpCatalogue.addDataset(ICGEM_static)
Beispiel #4
0
        super().__init__(dbconn)
        # Create table if it doesn't exist
        self.table.metadata.create_all(self.db.dbeng, checkfirst=True)

    def pull(self):
        """Pulls known geocenter motion estimates from the internet and stores them in the cache"""
        for gsource in self.dsources:
            gsource(self.cacheDir()).download()



    def register(self):
        """"""
        for gsource in self.dsources:
            try:
               src=gsource(self.cacheDir())
               metadicts=src.extract()
               slurplogger().info("registering %s"%(src.meta["name"]))
               for meta in metadicts:
                   if self.entryNeedsUpdate(meta['name'],lastmod=src.meta['lastupdate'],col=self.table.name):
                        self.addEntry(meta)
            except Exception as e:
               #possibly not downloaded but that is ok
               continue

        self.updateInvent()


geoslurpCatalogue.addDataset(Deg1n2)

Beispiel #5
0
# Factory method to dynamically create classes
def GRDCGISClassFactory(fileName):
    splt = fileName.split(".")
    return type(splt[0], (grdc_gis_base, ), {
        "filename": fileName,
        "gtype": "GEOMETRY",
        "swapxy": True
    })


def getGRDCDsets(conf):
    """Automatically create all classes contained within the GRDC tables"""
    GISshapes = [
        'GRDC_405_basins_from_mouth.shp', 'GRDC_687_rivers.shp',
        'GRDC_687_rivers_class.shp', 'GRDC_lakes_join_rivers.shp',
        'grdc_basins_smoothed.shp'
    ]

    out = [GRDCGISClassFactory(name) for name in GISshapes]

    #also add the monthly and daily  datasets
    for name in ["grdc_monthly", "grdc_daily"]:
        out.append(GRDCClassFactory(name))

    return out


geoslurpCatalogue.addDatasetFactory(getGRDCDsets)
geoslurpCatalogue.addDataset(grdc_catalogue)
Beispiel #6
0
        "n1c", "pna", "g1a", "j2b", "3b3", "c2a", "e1b", "gsb", "e2a", "e1e",
        "3b2", "j2c", "6aa", "e1f", "j3a", "j2a", "j1b", "j1c", "gsa", "3b5",
        "3bb", "3b0", "txn", "j1a", "e1a", "3b4", "3aa", "j2d", "e1g", "gsd",
        "6a1", "saa", "txb", "3b1", "sab", "3ba", "n1b", "e1c", "e1d", "txa"
    ]

    # tphases={"j1":["a","b","c"],"j2":["a","b","c"],"j3":["a"],"3a":["a"],"c2":["a"],"n1":["b","c"],"sa":["a","b"],"tx":["a","b","n"],"3a":["a"]}
    out = []
    for sat in satnph:
        clname = "rads_" + sat[0:2] + "_" + sat[2:3]
        out.append(radsclassFactory(clname))
    return out


geoslurpCatalogue.addDatasetFactory(getRADSDsets)
geoslurpCatalogue.addDataset(RadsCycles)

#### RADS REFERENCE ORBITS (DEPENDS ON ABOVE dataset classes) ####
RadsRefOrbitTBase = declarative_base(metadata=MetaData(schema=scheme))


class RadsRefT(RadsRefOrbitTBase):
    __tablename__ = "radsreforbits"
    id = Column(Integer, primary_key=True)
    lastupdate = Column(TIMESTAMP)
    missionid = Column(String, index=True)
    refcycle = Column(Integer)
    apass = Column(Integer)
    geom = Column(geotracktype)

Beispiel #7
0
from geoslurp.dataset import OGRBase
from geoslurp.datapull.http import Uri as http
from geoslurp.config.catalogue import geoslurpCatalogue
import urllib.request
from zipfile import ZipFile
import os


class WriBasin(OGRBase):
    """Base class for Wribasin watersheds """
    scheme = 'globalgis'
    swapxy = True

    def __init__(self, dbconn):
        super().__init__(dbconn)
        self.ogrfile = os.path.join(self.cacheDir(), "wribasin.shp")

    def pull(self):
        """Pulls the wribasin data from the internet and unpacks it in the cache directory"""
        fzip = os.path.join(self.cacheDir(), "wri_basin.zip")
        urllib.request.urlretrieve(
            "http://www.fao.org/geonetwork/srv/en/resources.get?id=30914&fname=wri_basins.zip&access=private",
            fzip)

        with ZipFile(os.path.join(self.cacheDir(), "wri_basin.zip"),
                     'r') as zp:
            zp.extractall(self.cacheDir())


geoslurpCatalogue.addDataset(WriBasin)
Beispiel #8
0
class gleam_monthly(XarrayBase):
    outofdb=True
    scheme="prec_evap"
    groupby="time"
    writeoutofdb=False
    def pull(self):
        auth=self.conf.authCred("gleam",qryfields=["user","passw","url"])
        # note url should be of the form  sftp://server:port
        
        crwl=crawler(url=auth.url+"/data/v3.6b/monthly",auth=auth)
        downdir=self.cacheDir()
        for uri in crwl.uris():
            uri.download(downdir,check=True)

    def convert2zarr(self):
        slurplog.info("Converting data to zarr%s"%(self.xarfile))
        #open all datasets together
        ds=xr.open_mfdataset(os.path.join(self.cacheDir(),"*.nc"))
        #save to zarr format
        ds.to_zarr(self.xarfile)

    def register(self):
        self.xarfile=os.path.join(self.dataDir(),"2003-2021_GLEAM_v3.6b_MO.zarr")
        if not os.path.isdir(self.xarfile):
            self.convert2zarr()
        
        super().register()


geoslurpCatalogue.addDataset(gleam_monthly)
Beispiel #9
0
                    #check if the last file is already extracted
                    if os.path.exists(succesfile):
                        slurplogger().info(f"{tarf.url} is already extracted, skipping")
                    else:
                        with tarfile.open(tarf.url,"r:gz") as tf:
                            slurplogger().info(f"Extracting trajectory files from {tarf.url}")
                            tf.extractall(datadir)
                            #touch the sucessfile to indcate this archive has been sucessfully extracted
                        Path(succesfile).touch()
            except tarfile.ReadError as exc:
                raise exc

    def register(self,pattern='.*\.nc$'):
        """Register downloaded trajectory files from CORA
        :param pattern (string) file pattern to look for (defaults to all files ending with .nc)
        """
        #create a list of files which need to be (re)registered
        newfiles=self.retainnewUris([UriFile(file) for file in findFiles(self.dataDir(),pattern)])
        for uri in newfiles:
            meta=coraMetaExtractor(uri)
            if not meta:
                #don't register empty entries
                continue

            self.addEntry(meta)
        self._dbinvent.data["Description"]="EasyCora output data table"
        self._dbinvent.data["CORAversion"] = "5.2"
        self.updateInvent()

geoslurpCatalogue.addDataset(EasyCora)
Beispiel #10
0
        latmax = ds.latitude.max().values
        lonmin = ds.longitude.min().values
        lonmax = ds.longitude.max().values
        bbox = Polygon([(lonmin, latmin), (lonmin, latmax), (lonmax, latmax),
                        (lonmax, latmin)])
        return {
            "name": name,
            "lastupdate": uri.lastmod,
            "tstart": tstart,
            "tend": tend,
            "uri": uri.url,
            "data": data,
            "geom": wktdumps(bbox)
        }


class GloFASUpArea(RasterBase):
    """Class which downloads and registers the auxiliary uparea file"""
    regularblocking = True
    scheme = "hydro"

    def pull(self):
        upsrc = http(
            "https://confluence.ecmwf.int/download/attachments/143039724/upArea.nc",
            lastmod=datetime(2021, 11, 17))
        #download to cache only (will be in db raster)
        urif, upd = upsrc.download(self.srcdir, check=True)


geoslurpCatalogue.addDataset(GloFASUpArea)
Beispiel #11
0
    def columnsFromOgrFeat(self, feat):
        cols = super().columnsFromOgrFeat(feat)
        cols.append(Column('orbit', Integer))
        cols.append(Column('missionids', ARRAY(String)))
        return cols

    def pull(self):
        """Pulls the google kml files from the copernicus server"""
        rooturl = 'https://sentinel.esa.int/documents/247904/685098/Sentinel-3-Absolute-Ground-Tracks.zip'
        cache = self.cacheDir()
        httpserv = http(rooturl, lastmod=datetime(2021, 11, 29))
        uri, upd = httpserv.download(cache, check=True)

        if upd:
            with ZipFile(uri.url, 'r') as zp:
                zp.extractall(cache)


class s3a_reforbit(S3ABRefOrbitsBase):
    ogrfile = 'S3A_rel_orbit_ground_track_10sec_v1_4.kml'
    missionids = ["s3a"]


class s3b_reforbit(S3ABRefOrbitsBase):
    ogrfile = 'S3B_rel_orbit_ground_track_10sec_v1_4.kml'
    missionids = ["s3b"]


geoslurpCatalogue.addDataset(s3a_reforbit)
geoslurpCatalogue.addDataset(s3b_reforbit)
Beispiel #12
0
    """Class for registering SH filters (downloads from github) """
    scheme = schema
    version = (0, 0, 0)

    def __init__(self, dbconn):
        super().__init__(dbconn)
        self.pdfile = os.path.join(self.cacheDir(), 'inventory_upd.csv')

    def pull(self):
        """Pulls the dataset from github and unpacks it in the cache directory"""
        #download the inventory file
        lastchanged = datetime(2021, 11, 5)
        inventory = "https://github.com/strawpants/GRACE-filter/raw/master/inventory.xlsx"
        uri, upd = http(inventory,
                        lastmod=lastchanged).download(self.cacheDir(),
                                                      check=True)
        pdinvent = pd.read_excel(uri.url, engine="openpyxl")
        #download all the files
        ddir = self.dataDir()
        for idx, row in pdinvent.iterrows():
            ffile, upd = http(row.uri,
                              lastmod=lastchanged).download(ddir, check=True)
            #update file with newly downloaded file
            pdinvent.at[idx, 'uri'] = self.conf.generalize_path(ffile.url)

        #write updated excel file
        pdinvent.to_csv(os.path.join(self.pdfile))


geoslurpCatalogue.addDataset(GRACEfilter)
Beispiel #13
0
                self.addEntry(meta)

        self.updateInvent()

    # def halt(self):
    # slurplogger().error("Stopping update")
    # self._killUpdate=True
    # # indicate a done task n the queue in order to allow the pullWorker thread to stop gracefully
    # #empty eue
    # while not self._uriqueue.empty():
    # self._uriqueue.get()
    # self._uriqueue.task_done()
    # #also synchronize inventory info (e.g. resume
    # self.updateInvent(False)
    # raise RuntimeWarning("Argo dataset processing stopped")

    # def pullWorker(self,conn):
    # """ Pulls valid opendap URI's from a thredds server and queue them"""

    # for uri in conn.uris():
    # slurplogger().info("queuing %s",uri.url)
    # self._uriqueue.put(uri)
    # if self._killUpdate:
    # slurplogger().warning("Pulling of Argo URI's stopped")
    # return
    # #signal the end of the queue by adding a none
    # self._uriqueue.put(None)


geoslurpCatalogue.addDataset(Argo2)
Beispiel #14
0
            with ZipFile(uri.url, 'r') as zp:
                zp.extractall(self.cacheDir())

    def register(self):
        """ Register all downloaded fronts (in text files)"""

        slurplogger().info("Building file list..")
        files = [
            UriFile(file) for file in findFiles(self.cacheDir(), '.*txt',
                                                self._dbinvent.lastupdate)
        ]

        if len(files) == 0:
            slurplogger().info(
                "Orsifronts: No new files found since last update")
            return

        #possibly empty table
        self.truncateTable()

        #loop over files
        for uri in files:
            slurplogger().info("adding %s" % (uri.url))
            self.addEntry(orsiMetaExtractor(uri))

        self.updateInvent()


#register dataset
geoslurpCatalogue.addDataset(Orsifronts)
Beispiel #15
0
            ds["ETm"] = ds.ETm * mmmon_kgsecm2
            #add CF atributes
            cfadd_global(
                ds,
                title="SEBSv2 Evapotranspiration estimates",
                references=
                "https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2020JD032873",
                source=f"Geoslurp class {self.__class__.__name__}")
            cfadd_standard_name(ds.ETm, "water_evapotranspiration_flux")
            # cfencode_time(ds.time)
            cfadd_coord(ds.lon, 'X', standard_name='longitude')
            cfadd_coord(ds.lat, 'Y', standard_name='latitude')
            if appdim:
                ds.to_zarr(self.xarfile, append_dim=appdim)
            else:

                ds.to_zarr(self.xarfile, mode='w')
                appdim = "time"

    def register(self):
        self.xarfile = os.path.join(self.dataDir(),
                                    "Global_land_monthly_ET_V2.zarr")
        tarar = os.path.join(self.cacheDir(), "Global_land_monthly_ET_V2.rar")
        if not os.path.isdir(self.xarfile):
            self.convert2zarr(tarar)

        super().register()


geoslurpCatalogue.addDataset(SEBS_monthly)
Beispiel #16
0
                      te=tend)

        #retrieve the appropriate index range
        halfres = 0.1 / 2
        trange = self.getRange(bbox.ts, bbox.te, self.dscoords.time)
        latrange = self.getRange(bbox.s - halfres, bbox.n + halfres,
                                 self.dscoords.lat)
        lonrange = self.getRange(bbox.w - halfres, bbox.e + halfres,
                                 self.dscoords.lon)
        if trange is None or latrange is None or lonrange is None:
            slurplog.warning("refusing to queue empty dataset, skipping")
            return
        qrystr = f"?time{trange},lat{latrange},lon{lonrange},precipitation{trange}{lonrange}{latrange}"
        self.pullqueue[name] = qrystr

    @staticmethod
    def getRange(start, end, within):

        idx = np.where((within >= start) & (within <= end))
        if len(idx[0]) < 1:
            return None
        return f"[{idx[0][0]}:1:{idx[0][-1]}]"

    def rastExtract(self, uri):
        meta = super().rastExtract(uri)
        meta["name"] = os.path.basename(uri.url)[0:-3]
        return meta


geoslurpCatalogue.addDataset(imerg_monthly)
Beispiel #17
0
        self._dbinvent.data["Description"] = "ArcticDEM raster table"

    def pull(self, intersect=None):
        # download the entire mosaic domain in one tif
        if self.res in ['1km', '500m', '100m']:
            rasteruri = http(
                "http://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/mosaic/v3.0/"
                + self.res + "/" + self.rasterfile,
                lastmod=datetime(2018, 9, 26))
            rasterfileuri, upd = rasteruri.download(self.srcdir, check=False)

        #download only those tiles which are needed


def getArcticDems(conf):
    out = []
    for res in ['1km', '500m', '100m']:
        out.append(
            type("arcticdem_mosaic_" + res + "_v3", (ArcticDemRasterBase, ), {
                "res": res,
                "tiles": [100, 100]
            }))
        # out.append(type("arcticdem_mosaic_"+res+"_v3", (ArcticDemRasterBase,), {"res":res}))

    return out


#register datasets
geoslurpCatalogue.addDataset(Arcticdemindex)
geoslurpCatalogue.addDatasetFactory(getArcticDems)
Beispiel #18
0



class awipies(DataSet):
    """Class whichs downloads/register athe AWI South Atlantic PIES"""
    scheme=scheme
    table=PIESTable
    obpfile='OBPv3withtau.mat'
    def __init__(self,dbconn):
        super().__init__(dbconn)
        PIESTBase.metadata.create_all(self.db.dbeng, checkfirst=True)

    def pull(self):
        """Pulls the OBP matlab file from the cloud"""
        cred=self.conf.authCred("awipies",['url','user','passw'])
        obpsource=http(cred.url,auth=cred)
        obpsource.download(self.dataDir(),outfile=self.obpfile)

    def register(self):

        obpfile=os.path.join(self.dataDir(),self.obpfile)

        for meta in extractMetaPies(obpfile):
            self.addEntry(meta)

        self.updateInvent()


geoslurpCatalogue.addDataset(awipies)
Beispiel #19
0
        except:
            token=None
        # import pdb;pdb.set_trace() 
        ghcrawler=ghCrawler(reponame,commitsha=commitsha,
                           filter=ghfilter({"type":"blob","path":"\.love"}),
                           followfilt=ghfilter({"type":"tree","path":"Love"}),
                           oauthtoken=token)
        
        #download all datasets
        ghcrawler.parallelDownload(self.dataDir(),check=True,maxconn=3,gzip=True)

    def register(self):
        slurplogger().info("Building file list..")
        files=[UriFile(file) for file in findFiles(self.dataDir(),'.*love',self._dbinvent.lastupdate)]

        if len(files) == 0:
            slurplogger().info("LLove: No new files found since last update")
            return

        filesnew=self.retainnewUris(files)
        if len(filesnew) == 0:
            slurplogger().info("LLove: No database update needed")
            return
        #loop over files
        for uri in filesnew:
            self.addEntry(lloveMetaExtractor(uri))
        self.updateInvent()


geoslurpCatalogue.addDataset(LLove)
Beispiel #20
0
                        meta={"type":file.split('_')[-1][:-4],"time":tcent,"tstart":tstart,"tend":tend,"lastupdate":lastupdate,"nmax":1,"omax":1,"origin":"CF","format":"JSONB","uri":"self:data","gm":0.3986004415e+15,"re":0.6378136460e+07
}
                        
                        for el,val in zip(order,lnspl[1:4]):
                            # import pdb;pdb.set_trace()
                            shar["cnm"][shar.idx(el)]=float(val)/self.sqrt3timesRE

                        #also add sigmas 
                        for el,val in zip(order,lnspl[4:7]):
                            shar["sigcnm"][shar.idx(el)]=float(val)/self.sqrt3timesRE
                        meta["data"]=shar.dict
                        self.addEntry(meta)
            self.updateInvent()


geoslurpCatalogue.addDataset(geocenter_Rietbroeketal2016upd)

def parseGSMDate(dtstr):
    """Parse datestr as found in GSM files (yyyymmdd.00000)"""
    return datetime(int(dtstr[0:4]),int(dtstr[4:6]),int(dtstr[6:8]))  

class geocenter_GRCRL06_TN13(DataSet):
    scheme=scheme
    rooturl="https://podaac-tools.jpl.nasa.gov/drive/files/allData/grace/docs/"
    # fout="TN-13_GEOC_CSR_RL06.txt"
    def __init__(self,dbconn):
        self.table=type(self.__class__.__name__.lower().replace('-',"_")+"Table", (GravitySHinDBTBase,), {})
        super().__init__(dbconn)
    
    def pull(self):
        """Pulls the geocenter ascii files in the cache"""
Beispiel #21
0
# Lesser General Public License for more details.

# You should have received a copy of the GNU Lesser General Public
# License along with Frommle; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

# Author Roelof Rietbroek ([email protected]), 2018

from geoslurp.config.catalogue import geoslurpCatalogue
from geoslurp.dataset.motuGridsBase import MotuGridsBase


class Duacs(MotuGridsBase):
    """Downloads subsets of the ducacs gridded multimission altimeter datasets for given regions"""
    scheme = 'altim'
    variables = ["sla", "adt"]
    bandname = variables[0]
    authalias = "cmems"
    #http://my.cmems-du.eu/motu-web/Motu --service-id SEALEVEL_GLO_PHY_L4_REP_OBSERVATIONS_008_047-TDS --product-id dataset-duacs-rep-global-merged-allsat-phy-l4 --longitude-min 0.125 --longitude-max -0.125 --latitude-min -89.875 --latitude-max 89.875 --date-min "2019-01-12 00:00:00" --date-max "2019-01-12 00:00:00" --variable sla --variable adt --variable ugos --variable vgos --variable ugosa --variable vgosa --variable err --out-dir <OUTPUT_DIRECTORY> --out-name <OUTPUT_FILENAME> --user <USERNAME> --pwd <PASSWORD>

    moturoot = "http://my.cmems-du.eu/motu-web/Motu"
    # moturoot="http://my.cmems-du.eu/motu-web/Motu"
    motuservice = "SEALEVEL_GLO_PHY_L4_REP_OBSERVATIONS_008_047-TDS"
    motuproduct = "dataset-duacs-rep-global-merged-allsat-phy-l4"

    def __init__(self, dbconn):
        super().__init__(dbconn)


geoslurpCatalogue.addDataset(Duacs)