Example #1
0
def pullHydro(hytype, downloaddir):
    # see https://www.dropbox.com/sh/hmpwobbz9qixxpe/AAAI_jasMJPZl_6wX6d3vEOla for the 'root' of the hydrsheds data
    hysource = {
        "hybas_af":
        "https://www.dropbox.com/sh/hmpwobbz9qixxpe/AADoPLdVZNd2JG-KaJNY0zT1a/HydroBASINS/standard/af/hybas_af_lev01-06_v1c.zip",
        "hybas_eu":
        "https://www.dropbox.com/sh/hmpwobbz9qixxpe/AABz1Pym5esD6GUJcnzaaqpEa/HydroBASINS/standard/eu/hybas_eu_lev01-06_v1c.zip",
        "af_riv_30s":
        "https://www.dropbox.com/sh/hmpwobbz9qixxpe/AAC9imuUajl_1bS0tKWqPE8Ya/HydroSHEDS_RIV/RIV_30s/af_riv_30s.zip",
        "eu_riv_30s":
        "https://www.dropbox.com/sh/hmpwobbz9qixxpe/AAD68vqkhRNJd5qK3NVvM7TSa/HydroSHEDS_RIV/RIV_30s/eu_riv_30s.zip"
    }
    httpserv = http(hysource[hytype], lastmod=datetime(2021, 2, 8))
    #Newest version which is supported by this plugin
    uri, upd = httpserv.download(downloaddir, check=True)
    if upd:
        #unzip all the goodies
        zipd = os.path.join(downloaddir, 'extract')
        with ZipFile(uri.url, 'r') as zp:
            zp.extractall(zipd)
    else:
        slurplogger().info(
            "This component of hydrosheds is already downloaded")

    return upd
Example #2
0
 def purgeentry(self, filter):
     """Delete pgfunction entry in the database"""
     slurplogger().info("Deleting %s entry" % (self.name))
     self._ses.delete(self._dbinvent)
     self._ses.commit()
     dropexec = text("DROP FUNCTION IF EXISTS :pgfunc;")
     self.db.dbeng.execute(dropexec, pgfunc=self.name)
Example #3
0
    def entryNeedsUpdate(self, likestr, lastmod, col=None):
        """Query for a Columns in the table based on a alike string and delete the entry when older than lastmod"""
        needsupdate = True
        try:
            if not col:
                col = self.table.uri
            qResults = self._ses.query(self.table).filter(
                col.like('%' + likestr + '%'))
            if qResults.count() == 0:
                return True
            needsupdate = False
            #check if at least one needs updating
            for qres in qResults:
                if qres.lastupdate < lastmod:
                    needsupdate = True
                    break

            if needsupdate:
                for qres in qResults:
                    #delete the entries which need updating
                    self._ses.delete(qres)
                    self._ses.commit()
            else:
                slurplogger().info("No Update needed, skipping %s" % (likestr))

        except Exception as e:
            # Fine no entries found
            pass
        return needsupdate
Example #4
0
 def pull(self,pattern='.*'):
     """ Pulls the Easy CORA file from the copernicus FTP server , and unpacks them
     :param pattern (string) only download data which obey this regular expression file pattern (e.g. 20[0-9][0-9] to download from 2000 and onward)
     """
     ftproot="ftp://my.cmems-du.eu/Core/INSITU_GLO_TS_REP_OBSERVATIONS_013_001_b/CORIOLIS-GLOBAL-EasyCORA-OBS/global"
     
     #get  cmems authentication details from database
     cred=self.conf.authCred("cmems")
     ftpcr=ftpCrawler(ftproot,auth=cred, pattern=pattern)
     
     updated=ftpcr.parallelDownload(self.cacheDir(),check=True,maxconn=10,continueonError=True)
     
     #unpack the downloaded files in the data directory
     datadir=self.dataDir()
     for tarf in [UriFile(f) for f in findFiles(self.cacheDir(),".*tgz$")]:
         succesfile=os.path.join(datadir,os.path.basename(tarf.url)+".isextracted")
         try:
             #check if the files need unpacking (only unpack when needed)
                 #check if the last file is already extracted
                 if os.path.exists(succesfile):
                     slurplogger().info(f"{tarf.url} is already extracted, skipping")
                 else:
                     with tarfile.open(tarf.url,"r:gz") as tf:
                         slurplogger().info(f"Extracting trajectory files from {tarf.url}")
                         tf.extractall(datadir)
                         #touch the sucessfile to indcate this archive has been sucessfully extracted
                     Path(succesfile).touch()
         except tarfile.ReadError as exc:
             raise exc
Example #5
0
def fillCSVTable(filename, tablename, lookup, scheme, hskip=0):
    """Update/populate a database table  from a CSV file)
    This function reads all rows from an open CSV file. The first line is expected to hold the COlumn names, which are mapped to types in the lookup string dictionary
    """

    ses = scheme.db.Session()
    # currently we can only cope with updating the entire table as a whole
    scheme.dropTable(tablename)

    # if self.dbeng.has_table(tablename,schema=schema):
    slurplogger().info("Filling CSV table %s:%s " %
                       (scheme._schema, tablename))
    with open(filename, 'r') as fid:
        for i in range(hskip):
            next(fid)
        names, cols = columnsFromCSV(fid.readline(), lookup)
        table = Table(tablename, scheme.db.mdata, *cols, schema=scheme._schema)
        table.create(checkfirst=True)
        tableMap = tableMapFactory(tablename, table)

        for ln in fid:
            values = valuesFromCSV(ln, names)
            ses.add(tableMap(**values))

    ses.commit()
    # self.vacuumAnalyze(tablename,schema)
    # ses.commit()
    ses.close()
Example #6
0
    def pull(self, missionRegex=None):
        """Pulls the ncessary tables and data from the rads server 
      :param missionRegex: only register specific mission obeying this regular expression
      """

        #pulls and registers the radsCycle table if it needs updating
        radsCycles = geoslurpCatalogue.getDatasets(
            self.conf, f"{scheme}.RadsCycles")[0](self.db)
        if radsCycles.isExpired():
            radsCycles.pull()
            readsCycles.register()

        #determine the reference cycles and download rads 1hz data files to get the orbit
        self.registerRefCycles()

        #the query will take the first cycle which has the maximum amount of passes for each mission/-phase combination

        # Download appropriate cycles
        for mission, entry in self._dbinvent.data["missions"].items():
            if missionRegex:
                if not re.search(missionRegex, mission):
                    slurplogger().info(f"Skipping mission {mission}")
                    continue
            #Download  rads data for this specific cycle
            sat = mission[0:2]
            ph = mission[2:3]
            alttbl = f"{scheme}.rads_{sat}_{ph}"
            slurplogger().info(
                f"Getting reference cycle {entry['refcycle']} for {alttbl}")
            radsOrbit = geoslurpCatalogue.getDatasets(self.conf,
                                                      f"{alttbl}")[0](self.db)
            radsOrbit.pull(cycle=entry["refcycle"])
            radsOrbit.register()
Example #7
0
    def register(self):
        """Update/populate a database table (creates one if it doesn't exist)
        This function reads a shapefile and puts it in a single table.
        :param ogrfile: gdal dataset (e.g. shapefile)
        :param forceGType (optional): a geometry type to be used as the "geom" column
        :returns nothing (but sets the internal qlalchemy table)
        """
        # currently we can only cope with updating the entire table as a whole
        self.db.dropTable(self.name, self.scheme)

        slurplogger().info("Filling POSTGIS table %s.%s with data from %s" %
                           (self.scheme, self.name, self.ogrfile))

        #open shapefile directory or ogr file
        if self.ogrfile.endswith(
                '.kmz') and not gdal.GetDriverByName('LIBKML'):
            #unzip the kmz file
            cache = self.cacheDir()
            with ZipFile(self.ogrfile, 'r') as zp:
                kmlf = zp.namelist()[0]  #take the first zip file only
                zp.extract(kmlf, cache)
            kmlfile = os.path.join(cache, kmlf)
            shpf = gdal.OpenEx(kmlfile, 0)

        else:
            shpf = gdal.OpenEx(self.ogrfile, 0)

        count = 0
        for ithlayer in range(shpf.GetLayerCount()):
            shpflayer = shpf.GetLayer(ithlayer)
            if self.layerregex:
                if not re.search(self.layerregex, shpflayer.GetName()):
                    continue
            sourceprj = shpflayer.GetSpatialRef()
            if sourceprj.IsSame(self.targetprj):
                transform = None
            else:
                transform = osr.CoordinateTransformation(
                    sourceprj, self.targetprj)
            # print(sourceprj)

            # print(self.targetprj)

            # print(sourceprj.IsSame(self.targetprj))
            for feat in shpflayer:
                count += 1
                if self.table == None:
                    cols = self.columnsFromOgrFeat(feat)
                    self.createTable(cols)
                values = self.valuesFromOgrFeat(feat, transform)
                # import pdb;pdb.set_trace()
                try:
                    self.addEntry(values)
                except:
                    pass
                #commit every X times

        #also update entry in the inventory table
        self.updateInvent()
Example #8
0
def cachedGithubCatalogue(reponame,
                          cachedir=".",
                          commitsha=None,
                          gfilter=GithubFilter(),
                          gfollowfilter=GithubFilter({"type": "tree"}),
                          depth=2,
                          ghtoken=None):
    """Caches the result of a github result for later reuse"""

    cachedCatalog = os.path.join(cachedir,
                                 reponame.replace("/", "_") + ".yaml")
    catalog = {}
    if os.path.exists(cachedCatalog):
        #check whether the commit sha agrees when explicitly specified
        if commitsha:
            #read catalog from yaml file
            with open(cachedCatalog, 'r') as fid:
                catalog = yaml.safe_load(fid)
            # import pdb;pdb.set_trace()
            if catalog["commitsha"] != commitsha:
                #trigger a new download
                catalog = {}
        else:
            #always download a newer version
            catalog = {}

    if catalog:
        slurplogger().info("using cached github catalogue %s" %
                           (cachedCatalog))
    else:
        slurplogger().info("downloading github catalogue to cache %s" %
                           (cachedCatalog))
        #retrieve from github and store for later use
        crwl = Crawler(reponame,
                       commitsha=commitsha,
                       filter=gfilter,
                       followfilt=gfollowfilter,
                       oauthtoken=ghtoken)

        catalog = {
            "Description": "Cached github crawler results",
            "rooturl": crwl.rooturl,
            "commitsha": commitsha,
            "datasets": []
        }
        # import pdb;pdb.set_trace()
        for item in crwl.treeitems(depth=depth):
            catalog["datasets"].append({
                "path":
                os.path.join(item["dirpath"], item["path"]),
                "url":
                item["url"]
            })

        #save the results to a cached file
        with open(cachedCatalog, 'w') as fid:
            yaml.dump(catalog, fid, default_flow_style=False)

    return catalog
Example #9
0
def lloveMetaExtractor(uri):
    """extract some metainfo from the load Lovenumber file"""
    #extract maximum degree from file and heuristically derive loadtype from the filename)
    if re.search("body", uri.url):
        ltype = "body"
    else:
        ltype = "surface"

    nmax = 0
    reentry = re.compile('^ *[0-9]')
    hn = []
    ln = []
    kn = []
    deg = []
    slurplogger().info(f"Processing {uri.url}")
    descr = ""
    ref = None
    with gzip.open(uri.url, 'rt') as fid:
        for line in fid:
            if reentry.search(line):
                linespl = line.split()
                n = int(linespl[0])
                if n == 1:
                    #look for CF degree 1 coefficients only
                    ref = "CF"
                    if linespl[4] != ref:
                        #only use the degree 1 numbers of the chosen reference system
                        continue
                deg.append(n)
                hln = [float(el.replace('D', 'E')) for el in linespl[1:4]]
                #possibly replace infinity values with NaN
                hln = [None if np.isinf(el) else el for el in hln]

                hn.append(hln[0])
                ln.append(hln[1])
                kn.append(hln[2])
            else:
                #append comment to description
                descr += line

    #create an xarray dataset
    dslove = xr.Dataset(data_vars=dict(kn=(["degree"], kn),
                                       hn=(["degree"], hn),
                                       ln=(["degree"], ln)),
                        coords=dict(degree=(["degree"], deg)))

    #extract the maximum degree
    nmax = dslove.degree.max().data.item()
    meta = {
        "name": os.path.basename(uri.url).replace(".love.gz", ""),
        "lastupdate": uri.lastmod,
        "descr": descr,
        "loadtype": ltype,
        "nmax": nmax,
        "ref": ref,
        "data": dslove
    }

    return meta
Example #10
0
    def register(self):

        #currently deletes all entries in the table
        self.truncateTable()

        #open main index file and read
        zipdir=self.cacheDir()+"/"+self.typ+"_"+self.freq

        with open(os.path.join(zipdir,'filelist.txt'),'r') as fid:
            for ln in fid:
                lnspl=ln.split(";")
                lat=float(lnspl[1])
                lon=float(lnspl[2])
                id=int(lnspl[0])
                slurplogger().info("Indexing %s"%(lnspl[3]))

                geoLoc=ogr.Geometry(ogr.wkbPoint)
                geoLoc.AddPoint(lon,lat)
                meta={
                    "id":id,
                    "statname":lnspl[3],
                    "countrycode":lnspl[4],
                    "formerid":lnspl[5],
                    "geom":geoLoc.ExportToWkt(),
                    # "geom":WKBElement(geoLoc.ExportToWkb(),srid=4326,extended=True),
                }
                #also open data file
                data={"time":[],"sl":[]}
                tmin=datetime.max
                tmax=datetime.min
                with open(os.path.join(zipdir,'data',"%d.%sdata"%(id,self.typ))) as dfid:
                    for dln in dfid:
                        tyear,valmm,dum1,dum2=dln.split(";")
                        dt=decyear2dt(float(tyear))
                        if self.freq == 'monthly':
                            dstart,dend=dt2monthlyinterval(dt)
                        else:
                            #yearly
                            dstart,dend=dt2yearlyinterval(dt)
                        tmin=min(dt,tmin)
                        tmax=max(dt,tmax)

                        data["time"].append(dt.isoformat())
                        data["sl"].append(1e3*int(valmm))

                #open documentation files
                with open(os.path.join(zipdir,'docu',"%d.txt"%(id))) as docid:
                    data["doc"]=docid.readlines()
                #open auth file
                with open(os.path.join(zipdir,'docu',"%d_auth.txt"%(id))) as docid:
                    data["auth"]=docid.readlines()

                meta['tstart']=tmin
                meta["tend"]=tmax
                meta["data"]=data

                self.addEntry(meta)
            self.updateInvent()
Example #11
0
    def purgeentry(self):
        """Delete pgfunction entry in the database"""
        slurplogger().info("Deleting %s function entry" % (self.name))
        self._ses.delete(self._dbinvent)
        self._ses.commit()

        dropexec = text("DROP FUNCTION IF EXISTS %s.%s;" %
                        (self.scheme, self.name))
        self.db.dbeng.execute(dropexec)
Example #12
0
    def register(self):
        #truncate table
        self.truncateTable()

        for cyclefile in glob(self.dataDir() + '/*.cyc'):
            slurplogger().info("extracting cycle catalogue from %s" %
                               (cyclefile))
            cycleinfo = extractCycleInfo(cyclefile)
            self.bulkInsert(cycleinfo)
        self.updateInvent()
Example #13
0
    def register(self):
        """ Register the drainage divides"""
        slurplogger().info("Registering %s" % self.name)
        #possibly empty table
        self.truncateTable()
        fname = os.path.join(self.cacheDir(), self.fbase + ".gz")
        #loop over  polygon entries
        for dicentry in IceSatPolygons(fname):
            self.addEntry(dicentry)

        self.updateInvent()
Example #14
0
 def addUser(self, name, passw, readonly=False):
     """Adds a user to the database (note executing this functions requires appropriate database rights"""
     slurplogger().info("Adding new user: %s" % (name))
     if readonly:
         self.dbeng.execute(
             "CREATE USER %s WITH ENCRYPTED PASSWORD '%s' IN ROLE geobrowse;"
             % (name, passw))
     else:
         self.dbeng.execute(
             "CREATE USER %s WITH ENCRYPTED PASSWORD '%s' IN ROLE geoslurp,geobrowse;"
             % (name, passw))
Example #15
0
    def requestInfo(self):
        """Request info (modification time, size, datacoverage) on this specific query from the server"""
        if self.info:
            #quick return when already done
            return

        self.opts.describe = True
        oldd = self.opts.out_dir
        oldnm = self.opts.out_name
        self.opts.out_dir = self.opts.cache
        self.opts.out_name = self.opts.out_name.replace('.nc', '_descr.xml')
        # import pdb;pdb.set_trace()
        try:
            execute_request(self.opts)
        except Exception as e:
            slurplogger().error("failed to request info on query")
            raise (e)

        self.opts.describe = False
        self.opts.out_dir = oldd

        #read and parse xml
        xml = XMLTree.parse(
            os.path.join(self.opts.cache,
                         self.opts.out_name.replace('.nc', '.xml')))
        trange = xml.find('timeCoverage')
        self.lastmod = isoParser(trange.attrib['end']).replace(tzinfo=None)

        #also retrieve datacoverage
        covdict = {}
        for axis in xml.iterfind('dataGeospatialCoverage/axis'):
            if axis.attrib['axisType'] == 'Lat':
                covdict['s'] = float(axis.attrib['lower'])
                covdict['n'] = float(axis.attrib['upper'])
            if axis.attrib['axisType'] == 'Lon':
                covdict['w'] = float(axis.attrib['lower'])
                covdict['e'] = float(axis.attrib['upper'])
            if axis.attrib['axisType'] == 'Time':
                covdict['ts'] = num2date(float(axis.attrib['lower']),
                                         axis.attrib['units'])
                covdict['te'] = num2date(float(axis.attrib['upper']),
                                         axis.attrib['units'])

        self.maxbtdbox = BtdBox(**covdict)

        #Crop/Synchronize the requested bounding box with that what is available
        self.opts.btdbox.crop(self.maxbtdbox)
        self.opts.syncbtdbox()

        #hack (change outname back to nc suffix)
        self.opts.out_name = oldnm

        self.info = True
Example #16
0
    def xmlitems(self, xmlcatalog=None, url=None, depth=10):
        """Generator which returns xml nodes which obey a certain filter
        Nodes which obey the followFilter will be recursively searched"""

        if depth == 0:
            # signals a stopiteration
            return
        else:
            depth -= 1

        if xmlcatalog is None:
            xmlcatalog = self._rootxml
        if url is None:
            url = self._catalogurl

        for xelem in xmlcatalog:

            if self._filt.isValid(xelem):

                #special check whether we're considering a resume here
                if self.resuming:
                    self.unsetResumePoint()
                    # we're going to continue with the element after this one
                    continue

                # Allright we can return this entry straight away
                yield xelem
                # Also continue with the loop after yielding
                continue

            if not self._followFilt:
                # continue with the next element if no element should be followed
                continue

            if self._followFilt.isValid(xelem):
                # If this is the case we may need a recursive search in either a
                if xelem.tag.endswith("catalogRef"):

                    # We treat CatalogRefs in a special way by retrieving the subcatalog from the thredds server
                    suburl = os.path.dirname(url) + "/" + gethref(xelem)
                    try:
                        subxml = self.getCatalog(suburl)
                    except:
                        # Just ignore this catalog entry upon exceptions
                        slurplogger().warning("Ignoring failed CatalogRef %s" %
                                              (suburl))
                        continue
                else:
                    # Otherwise we're just going to look in the children of the current element
                    suburl = url
                    subxml = xelem

                yield from self.xmlitems(subxml, suburl, depth)
Example #17
0
    def register(self):
        """Update/populate a database table (creates one if it doesn't exist)
        This function reads a shapefile and puts it in a single table.
        :param ogrfile: gdal dataset (e.g. shapefile)
        :param forceGType (optional): a geometry type to be used as the "geom" column
        :returns nothing (but sets the internal qlalchemy table)
        """
        # currently we can only cope with updating the entire table as a whole
        self.db.dropTable(self.name, self.scheme)

        slurplogger().info("Filling POSTGIS table %s.%s with data from %s" %
                           (self.scheme, self.name, self.ogrfile))

        #open shapefile directory

        shpf = gdal.OpenEx(self.ogrfile, 0)

        count = 0
        for ithlayer in range(shpf.GetLayerCount()):
            shpflayer = shpf.GetLayer(ithlayer)
            if self.layerregex:
                if not re.search(self.layerregex, shpflayer.GetName()):
                    continue
            sourceprj = shpflayer.GetSpatialRef()
            if sourceprj.IsSame(self.targetprj):
                transform = None
            else:
                transform = osr.CoordinateTransformation(
                    sourceprj, self.targetprj)
            # print(sourceprj)

            # print(self.targetprj)

            # print(sourceprj.IsSame(self.targetprj))
            for feat in shpflayer:
                count += 1
                if self.table == None:
                    cols = columnsFromOgrFeat(feat,
                                              forceGType=self.gtype,
                                              targetsrid=self.targetsrid)
                    self.createTable(cols)
                values = valuesFromOgrFeat(feat, self.encoding, transform,
                                           self.targetsrid, self.swapxy)
                try:
                    self.addEntry(values)
                except:
                    pass
                #commit every X times

        #also update entry in the inventory table
        self.updateInvent()
Example #18
0
    def download(self):
        """Download file"""
        muri = Uri(self.mopts)

        #check if download is needed
        muri.requestInfo()
        uristacked = UriFile(self.mopts.fullname())
        if uristacked.lastmod:
            if muri.lastmod <= uristacked.lastmod:
                slurplogger().info("Already downloaded %s" % (uristacked.url))
                #quick return when there is no need to merge/download
                return uristacked, False

        #check if download is allowed
        kb, maxkb = muri.updateSize()
        if kb > maxkb:
            #split up request and try again

            #create 2 bounding boxes split on time
            Abbox, Bbbox = muri.opts.btdbox.timeSplit()

            AmotuRec = MotuRecursive(copy.deepcopy(self.mopts))
            AmotuRec.mopts.syncbtdbox(Abbox)
            AmotuRec.mopts.out_name = self.mopts.out_name.replace(
                '.nc', '_A.nc')
            AmotuRec.mopts.out_dir = AmotuRec.mopts.cache

            BmotuRec = MotuRecursive(copy.deepcopy(self.mopts))
            BmotuRec.mopts.syncbtdbox(Bbbox)
            BmotuRec.mopts.out_name = self.mopts.out_name.replace(
                '.nc', '_B.nc')
            BmotuRec.mopts.out_dir = BmotuRec.mopts.cache

            Auri, Aupd = AmotuRec.download()
            Buri, Bupd = BmotuRec.download()

            #possible improvement here split a dataset at an unlimited dimensions and append the second one to the first one
            #patch files together (if updated)
            if Aupd or Bupd or not os.path.exists(self.mopts.fullname()):
                uristacked, upd = stackNcFiles(self.mopts.fullname(), Auri.url,
                                               Buri.url, 'time')
                if not self.keepfiles:
                    #remove the partial files
                    os.remove(AmotuRec.mopts.fullname())
                    os.remove(BmotuRec.mopts.fullname())
            else:
                uristacked = UriFile(self.mopts.fullname())
                upd = False
            return uristacked, True
        else:
            return muri.download(self.mopts.out_dir, check=True)
Example #19
0
    def queueRequest(self,fout,requestDict):
        
        if os.path.exists(fout):
            slurplogger().info(f"Already downloaded file {fout}, skipping request")
            return
            
        req_id=None
        #possibly get the request id from a previously queued job
        if fout in self.jobqueue:
            req_id=self.jobqueue[fout]
            
            
        if req_id:
            #try to get an existing job
            slurplogger().info(f"Trying to retrieve previously queued job for {fout}")
            try:
                req=cdsapi.api.Result(self.client,dict(request_id=req_id))
                req.update()
            except: 
                #Job cannot be found anymore
                slurplogger().info(f"Job cannot be found anymore for {fout}, requeing")
                req_id=None
                del self.jobqueue[fout]

        if not req_id:
            #start a new request
            slurplogger().info(f"Queuing new CDS request for {fout}")
            req=self.client.retrieve(self.resource,requestDict)
            req.update()
            req_id=req.reply["request_id"]
            #add an entry to the inventory
            self.jobqueue[fout]=req_id
        
        self.requests.append((req,fout,req.reply["state"]))
Example #20
0
    def rastExtract(self, uri):
        """How things are extracted from the raster file (this may be overloaded in derived classes for more granular access"""
        slurplogger().info("Extracting info from raster: %s" % (uri.url))
        #check file type
        if uri.url.endswith(".nc"):
            raw = False
        else:
            raw = True

        if self.preview or not raw:
            meta = self.rastFromRio(uri)
        else:
            meta = self.rastFromGDAL(uri)

        return meta
Example #21
0
 def register(self):
     slurplogger().info("Building file list..")
     files=[UriFile(file) for file in findFiles(self.dataDir(),'.*gz',self._dbinvent.lastupdate)]
     # import pdb;pdb.set_trace() 
     filesnew=self.retainnewUris(files)
     if len(filesnew) == 0:
         slurplogger().info("GRDC: No database update needed")
         return
     # filesnew=[UriFile(os.path.join(self.dataDir(),"4208270_Q_Month.txt.gz"))]
     #loop over files
     for uri in filesnew:
         meta=GRDCmetaExtractor(uri)
         self.addEntry(meta)
     
     self.updateInvent()
Example #22
0
    def encryptAuth(self):
        """Encrypt the authentification credentials to store in the database"""
        salt = os.urandom(16)
        cyph = self.genCypher(salt, self.db.passw.encode('utf-8'))

        conf = json.dumps(self.auth).encode('utf-8')

        if self.authver == "ENCRV1":
            slurplogger().warning(
                "Replacing the authentication details with a safer encryption (not compatible with older geoslurp versions"
            )
            self.authver = "ENCRV2"
        self.userentry.auth = self.authver.encode(
            'utf-8') + salt + cyph.encrypt(conf)
        return
Example #23
0
    def register(self):
        """"""
        for gsource in self.dsources:
            try:
               src=gsource(self.cacheDir())
               metadicts=src.extract()
               slurplogger().info("registering %s"%(src.meta["name"]))
               for meta in metadicts:
                   if self.entryNeedsUpdate(meta['name'],lastmod=src.meta['lastupdate'],col=self.table.name):
                        self.addEntry(meta)
            except Exception as e:
               #possibly not downloaded but that is ok
               continue

        self.updateInvent()
Example #24
0
def icgemMetaExtractor(uri):
    """Extract meta information from a gzipped icgem file"""

    #first extract the icgem header
    headstart = False
    hdr = {}
    with gz.open(uri.url, 'rt') as fid:
        slurplogger().info("Extracting info from %s" % (uri.url))
        for ln in fid:
            # if "begin_of_head" in ln:
            #     headstart=True
            #     continue

            if headstart and 'end_of_head' in ln:
                break

            # if headstart:
            spl = ln.split()
            if len(spl) == 2:
                hdr[spl[0]] = spl[1]

    try:
        meta = {
            "nmax": int(hdr["max_degree"]),
            "lastupdate": uri.lastmod,
            "format": "icgem",
            "gm": float(hdr["earth_gravity_constant"].replace('D', 'E')),
            "re": float(hdr["radius"].replace('D', 'E')),
            "uri": uri.url,
            "type": "GSM",
            "data": {
                "name": hdr["modelname"]
            }
        }
    except Exception as e:
        pass

    #add tide system
    try:
        tmp = hdr["tide_system"]
        if re.search('zero_tide', tmp):
            meta["tidesystem"] = "zero-tide"
        elif re.search('tide_free', tmp):
            meta["tidesystem"] = "tide-free"
    except:
        pass

    return meta
Example #25
0
    def register(self):
        slurplogger().info("Building file list..")
        files = [
            UriFile(file) for file in findFiles(self.cacheDir(), '.*love',
                                                self._dbinvent.lastupdate)
        ]

        if len(files) == 0:
            slurplogger().info("LLove: No new files found since last update")
            return

        self.truncateTable()
        #loop over files
        for uri in files:
            self.addEntry(lloveMetaExtractor(uri))
        self.updateInvent()
Example #26
0
    def pull(self):
        """Pulls the shapefile layers from the server"""
        zipf = http(
            "http://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/indexes/" +
            self.filebase + ".zip",
            lastmod=datetime(2018, 9, 26))

        #download the zip shapefiles
        downloaddir = self.cacheDir()
        uri, upd = zipf.download(downloaddir, check=True)
        zipd = os.path.join(downloaddir, 'extract')
        if not os.path.exists(zipd):
            #unzip the goodies
            with ZipFile(uri.url, 'r') as zp:
                slurplogger().info("Unzipping %s" % (uri.url))
                zp.extractall(zipd)
Example #27
0
    def pull(self):
        try:
            cred=self.conf.authCred("grdcgis")
        except:
            raise RuntimeError("No Authentification data found. The GRDC data is unfortunately only available after agreeing with the grdc user policy, please visit https://www.bafg.de/GRDC/EN/04_spcldtbss/43_GRfN/refDataset_node.html") 
        #pull the data but rezip it with gzip to save space 
        pullGRDC(self.cacheDir(),cred,pattern=self.zipname,unzip=False)

        datadir=self.dataDir()
        #rezip data in the datadirectory
        with ZipFile(os.path.join(self.cacheDir(),self.zipname),'r') as zp:
            for member in zp.namelist():
                #open file and gzip it into the datadir
                with zp.open(member) as fid:
                    slurplogger().info("re-gzipping file %s"%member)
                    with gzip.open(os.path.join(datadir,member+".gz"),'wb') as gzid:
                            gzid.write(fid.read())
Example #28
0
    def register(self):

        #create a list of files which need to be (re)registered
        if self.updated:
            files=self.updated
        else:
            files=[UriFile(file) for file in findFiles(self._dbinvent.datadir,'.*gfc.gz',since=self._dbinvent.lastupdate)]

        newfiles=self.retainnewUris(files)
        #loop over files
        for uri in newfiles:
            slurplogger().info("extracting meta info from %s"%(uri.url))
            meta=icgemMetaExtractor(uri)
            meta=enhanceMeta(meta)
            self.addEntry(meta)

        self.updateInvent()
Example #29
0
    def register(self):
        if not self.table:
            #create a new table on the fly
            self.createTable(self.columns)

        #create a list of files which need to be (re)registered
        newfiles = self.retainnewUris([
            UriFile(file)
            for file in findFiles(self.dataDir(), f".*\{self.app}$")
        ])
        for uri in newfiles:
            meta = self.metaExtractor(uri)
            if not meta:
                #don't register empty entries
                continue
            slurplogger().info(f"Adding metadata from {uri.url}")
            self.addEntry(meta)
        self._dbinvent.data["Description"] = self.description
        self.updateInvent()
Example #30
0
    def updateSize(self):
        """Request information about the size of the query"""
        self.opts.size = True
        oldd = self.opts.out_dir
        self.opts.out_dir = self.opts.cache
        try:
            execute_request(self.opts)
        except Exception as e:
            slurplogger().error("failed to request size: %s", e)
            raise (e)
        # self.opts.out_name=self.opts.out_name.replace('.nc','.xml')

        self.opts.size = False
        self.opts.out_dir = oldd

        xml = XMLTree.parse(os.path.join(self.opts.cache, self.opts.out_name))
        self.kbsize = float(xml.getroot().attrib['size'])
        self.maxkbsize = float(xml.getroot().attrib['maxAllowedSize'])
        self.opts.out_name = self.opts.out_name.replace('.xml', '.nc')

        return self.kbsize, self.maxkbsize