def pull(self,pattern='.*'): """ Pulls the Easy CORA file from the copernicus FTP server , and unpacks them :param pattern (string) only download data which obey this regular expression file pattern (e.g. 20[0-9][0-9] to download from 2000 and onward) """ ftproot="ftp://my.cmems-du.eu/Core/INSITU_GLO_TS_REP_OBSERVATIONS_013_001_b/CORIOLIS-GLOBAL-EasyCORA-OBS/global" #get cmems authentication details from database cred=self.conf.authCred("cmems") ftpcr=ftpCrawler(ftproot,auth=cred, pattern=pattern) updated=ftpcr.parallelDownload(self.cacheDir(),check=True,maxconn=10,continueonError=True) #unpack the downloaded files in the data directory datadir=self.dataDir() for tarf in [UriFile(f) for f in findFiles(self.cacheDir(),".*tgz$")]: succesfile=os.path.join(datadir,os.path.basename(tarf.url)+".isextracted") try: #check if the files need unpacking (only unpack when needed) #check if the last file is already extracted if os.path.exists(succesfile): slurplogger().info(f"{tarf.url} is already extracted, skipping") else: with tarfile.open(tarf.url,"r:gz") as tf: slurplogger().info(f"Extracting trajectory files from {tarf.url}") tf.extractall(datadir) #touch the sucessfile to indcate this archive has been sucessfully extracted Path(succesfile).touch() except tarfile.ReadError as exc: raise exc
def register(self, center=None): """register downloaded commbined prof files""" #create a list of files which need to be (re)registered if self.updated: files = self.updated else: slurplogger().info("Building file list..") files = [ UriFile(file) for file in findFiles(self.dataDir(), '.*nc', self._dbinvent.lastupdate) ] if len(files) == 0: slurplogger().info("Argo: No new files found since last update") return filesnew = self.retainnewUris(files) if len(filesnew) == 0: slurplogger().info("Argo: No database update needed") return #loop over files for uri in filesnew: if center and not re.search(center, uri.url): continue meta = argoMetaExtractor(uri) if meta: self.addEntry(meta) self.updateInvent()
def register(self): slurplogger().info("Building file list..") files=[UriFile(file) for file in findFiles(self.dataDir(),'.*gz',self._dbinvent.lastupdate)] # import pdb;pdb.set_trace() filesnew=self.retainnewUris(files) if len(filesnew) == 0: slurplogger().info("GRDC: No database update needed") return # filesnew=[UriFile(os.path.join(self.dataDir(),"4208270_Q_Month.txt.gz"))] #loop over files for uri in filesnew: meta=GRDCmetaExtractor(uri) self.addEntry(meta) self.updateInvent()
def register(self): slurplogger().info("Building file list..") files=[UriFile(file) for file in findFiles(self.dataDir(),'.*love',self._dbinvent.lastupdate)] if len(files) == 0: slurplogger().info("LLove: No new files found since last update") return filesnew=self.retainnewUris(files) if len(filesnew) == 0: slurplogger().info("LLove: No database update needed") return #loop over files for uri in filesnew: self.addEntry(lloveMetaExtractor(uri)) self.updateInvent()
def register(self,pattern='.*\.nc$'): """Register downloaded trajectory files from CORA :param pattern (string) file pattern to look for (defaults to all files ending with .nc) """ #create a list of files which need to be (re)registered newfiles=self.retainnewUris([UriFile(file) for file in findFiles(self.dataDir(),pattern)]) for uri in newfiles: meta=coraMetaExtractor(uri) if not meta: #don't register empty entries continue self.addEntry(meta) self._dbinvent.data["Description"]="EasyCora output data table" self._dbinvent.data["CORAversion"] = "5.2" self.updateInvent()
def register(self): slurplogger().info("Building file list..") files = [ UriFile(file) for file in findFiles(self.cacheDir(), '.*love', self._dbinvent.lastupdate) ] if len(files) == 0: slurplogger().info("LLove: No new files found since last update") return self.truncateTable() #loop over files for uri in files: self.addEntry(lloveMetaExtractor(uri)) self.updateInvent()
def register(self): #create a list of files which need to be (re)registered if self.updated: files = self.updated else: files = [ UriFile(file) for file in findFiles(self.dataDir(), 'G.*\.gz', self._dbinvent.lastupdate) ] filesnew = self.retainnewUris(files) #loop over the newer files for uri in filesnew: meta = graceMetaExtractor(uri) self.addEntry(meta) self.updateInvent()
def register(self): if not self.table: #create a new table on the fly self.createTable(self.columns) #create a list of files which need to be (re)registered newfiles = self.retainnewUris([ UriFile(file) for file in findFiles(self.dataDir(), f".*\{self.app}$") ]) for uri in newfiles: meta = self.metaExtractor(uri) if not meta: #don't register empty entries continue slurplogger().info(f"Adding metadata from {uri.url}") self.addEntry(meta) self._dbinvent.data["Description"] = self.description self.updateInvent()
def register(self): """ Register all downloaded fronts (in text files)""" slurplogger().info("Building file list..") files = [ UriFile(file) for file in findFiles(self.cacheDir(), '.*txt', self._dbinvent.lastupdate) ] if len(files) == 0: slurplogger().info( "Orsifronts: No new files found since last update") return #possibly empty table self.truncateTable() #loop over files for uri in files: slurplogger().info("adding %s" % (uri.url)) self.addEntry(orsiMetaExtractor(uri)) self.updateInvent()