def FESOMMetaExtractor(uri): """Extract meta information from a FESOM output file""" slurplog.info("extracting data from %s" % (uri.url)) try: ncFESOM = ncDset(uri.url) except OSError: slurplog.error("Cannot open netcdf file, skipping") return None tvar = ncFESOM["time"] if tvar.shape[0] == 0: #quick return return None if tvar.calendar == "noleap": slurplog.warning( "Note found 'noleap' calendar string but assuming 'standard'") cal = 'standard' else: cal = tvar.calendar #parse time time = num2date(tvar[:], tvar.units, cal, only_use_cftime_datetimes=False) # try to estimate the time step fromt he median deltamedian = np.median(np.diff(time)) if deltamedian.days > 28 and deltamedian.days <= 32: freq = 'monthly' #set tstart to the beginning of the month tstart = datetime(time[0].year, time[0].month, 1) elif deltamedian.days >= 1 and deltamedian.days < 28: freq = "%ddaily" % (deltamedian.days) #remove the median time interval from the first time tstart = time[0] - deltamedian elif deltamedian.days < 1: freq = "%dhourly" % (deltamedian.seconds / 3600) #remove the median time interval from the first time tstart = time[0] - deltamedian data = {"variables": {}} for ky, var in ncFESOM.variables.items(): try: data["variables"][ky] = var.description except AttributeError: data["variables"][ky] = ky meta = { "tstart": tstart, "tend": time[-1] + deltamedian, "lastupdate": uri.lastmod, "interval": freq, "uri": uri.url, "data": data } ncFESOM.close() return meta
def rastExtract(self, uri): """extract raster and other meta info from the downloaded files""" meta = super().rastExtract(uri) meta["lastupdate"] = uri.lastmod ncid = ncDset(uri.url) time = ncid.variables["time"] # import pdb;pdb.set_trace() # t0=datetime(1950,1,1) meta["time"] = num2date(time[:], time.units) # s=int(x),seconds=int(86400*divmod(x,int(x))[1])) for x in ncid['time'][:]] return meta
def ncSwapLongitude(ncinout, longitudevar='longitude'): """swap the longitude representation to span 0..360 or -180..180""" ncid = ncDset(ncinout, 'r+') ncid[longitudevar].set_auto_mask(False) #find the longitude variable if max(ncid[longitudevar][:]) > 180: ncid[longitudevar].valid_min = -180 ncid[longitudevar].valid_max = 180 ncid[longitudevar][ncid[longitudevar][:] > 180] -= 360 elif min(ncid[longitudevar][:]) < 0: ncid[longitudevar].valid_min = 0 ncid[longitudevar].valid_max = 360 ncid[longitudevar][ncid[longitudevar][:] < 0] += 360 ncid[longitudevar].set_auto_mask(True) ncid.close()
def orasMetaExtractor(uri): """Extract meta information from a output file""" slurplog.info("extracting data from %s"%(uri.url)) try: nc_id=ncDset(uri.url) except OSError: slurplog.error("Cannot open netcdf file, skipping") return None tvar=nc_id["time_counter"] if tvar.shape[0] == 0: #quick return return None if tvar.calendar == "noleap": slurplog.warning("Note found 'noleap' calendar string but assuming 'standard'") cal='standard' else: cal=tvar.calendar #parse time time=num2date(tvar[:], tvar.units,cal,only_use_cftime_datetimes=False) data={"variables":{}} for ky,var in nc_id.variables.items(): try: data["variables"][ky]=var.description except AttributeError: data["variables"][ky]=ky meta={"tstart":datetime(time[0].year,time[0].month,1), "lastupdate":uri.lastmod, "uri":uri.url, "data":data } nc_id.close() return meta
def stackNcFiles(ncout, ncA, ncB, dimension): """Append netcdf file B after file A along the dimension specified""" slurplogger().info("Patching files %s %s", ncA, ncB) #open the three netcdf files outid = ncDset(ncout, 'w', clobber=True) aid = ncDset(ncA, 'r') bid = ncDset(ncB, 'r') #copy arrays in parts when larger than the choplimit choplimit = 1024 * 1024 * 1024 # #copy global attributes nccopyAtt(aid, outid) # dimension to be excluded from dimension copy dexcl = [dimension] # make a list of variables which need to be appended and cannot be copied straight away vapp = [ var for var in aid.variables.keys() if dimension in aid.variables[var].dimensions ] #copy dimensions (excluding the specified one) for nm, dim in aid.dimensions.items(): if nm in dexcl: continue if dim.isunlimited(): outid.createDimension(nm, None) else: outid.createDimension(nm, len(dim)) # copy all variables and attributes which don't require appending for nm, var in aid.variables.items(): if nm in vapp: continue outid.createVariable(nm, var.datatype, var.dimensions) outid[nm].set_auto_mask(False) outid[nm][:] = aid[nm][:] nccopyAtt(aid[nm], outid[nm], ['_FillValue']) #create new dimension outid.createDimension( dimension, aid.dimensions[dimension].size + bid.dimensions[dimension].size) #create new appended variables for var in vapp: outid.createVariable(var, aid[var].datatype, bid[var].dimensions) outid[var].set_auto_mask(False) nccopyAtt(aid[var], outid[var], ['_FillValue']) #find out which axis is the to be appended dimension dimax = aid[var].dimensions.index(dimension) idxA = [] for dim in outid[var].dimensions: idxA.append(slice(0, outid.dimensions[dim].size)) idxA[dimax] = slice(0, aid.dimensions[dimension].size) if aid[var][:].nbytes < choplimit: outid[var][idxA] = aid[var][:] else: #loop over the first dimension (matrix is too big) ia = 0 for i in range(idxA[0].start, idxA[0].stop): # import pdb;pdb.set_trace() outid[var][[i] + idxA[1:]] = aid[var][[ia, slice(None), slice(None)]] ia += 1 idxB = idxA.copy() idxB[dimax] = slice(aid.dimensions[dimension].size, outid.dimensions[dimension].size) if bid[var][:].nbytes < choplimit: outid[var][idxB] = bid[var][:] else: #loop over the first dimension (matrix is too big) ib = 0 for i in range(idxB[0].start, idxB[0].stop): outid[var][[i] + idxB[1:]] = bid[var][[ib, slice(None), slice(None)]] ib += 1 outid.setncattr( 'History', outid.getncattr('History') + '\n Modified at %s by Geoslurp: Merge two netcdf files along dimension %s' % (datetime.now(), dimension)) return UriFile(ncout), True
def coraMetaExtractor(uri): """Extract meta information (tracks, etc) as a dictionary from an argo prof file floats""" try: url=uri.url ncArgo=ncDset(url) #check for minimum amount of profiles minProfs=3 if ncArgo.dimensions['N_PROF'].size < minProfs: slurplogger().info("amount of profiles in %s is less then %d, skipping"%(url,minProfs)) return {} slurplogger().info("Extracting meta info from: %s"%(url)) # Get reference time # t0=datetime.strptime(ncStr(ncArgo["REFERENCE_DATE_TIME"][:]),"%Y%m%d%H%M%S"\cc) t0=num2date(0.0, units=ncArgo['JULD'].units,only_use_cftime_datetimes=False) #get the start end end time #wmoid's should be the same for all entries, so take the first one # wmoid=int(ncStr(ncArgo["PLATFORM_NUMBER"][0])) wmoid=[] # this is the file type: mooring, profiler, ... datacenter=url.split('_')[-2]+'_'+url.split('_')[-1][0:2] #get modes for each profile mode=np.array([x for x in ncStr(ncArgo['DATA_MODE'])]) #get cycles for each profile cycle=[int(x) for x in ncArgo['CYCLE_NUMBER'][:]] # tlocation=[] # which profile is ascending ? # ascend=ncArgo['DIRECTION'][:]== b"A" geoMpoints=ogr.Geometry(ogr.wkbMultiPoint) iprof=[] for i,(t,lon,lat) in enumerate(zip(ncArgo["JULD"][:],ncArgo["LONGITUDE"][:],ncArgo["LATITUDE"][:])): if lon > 180: #make sure longitude goes from -180 to 180 lon-=360 #we don't want nan positions or timetags in the database if np.ma.is_masked(lon) or np.ma.is_masked(lat) or np.ma.is_masked(t): continue tdt=t0+timedelta(days=float(t)) tlocation.append(tdt) point = ogr.Geometry(ogr.wkbPoint) point.AddPoint(float(lon),float(lat),0) geoMpoints.AddGeometry(point) iprof.append(i) if not tlocation: #return an empty dictionary when no valid profiles have been found return {} tstart=np.min(tlocation) tend=np.max(tlocation) meta={"uri":url,"lastupdate":uri.lastmod,"datacenter":datacenter,"tstart":tstart,"tend":tend, "mode":mode,"tlocation":tlocation,"cycle":cycle,"iprof":iprof, "geom":geoMpoints.ExportToIsoWkb()} except Exception as e: raise RuntimeWarning("Cannot extract meta information from "+ url+ str(e)) return meta
def radsMetaDataExtractor(uri): """Extract a dictionary with rads entries for the database""" slurplogger().info("extracting data from %s" % (uri.url)) ncrads = ncDset(uri.url) track = ogr.Geometry(ogr.wkbMultiLineString) data = {"segments": []} if ncrads.dimensions['time'].size < 3: #no point trying to index empty files return {} #reference time t0 = datetime(1985, 1, 1) # We need to compare some values fromt he previous loop which we store in the follwoing variables lonprev = ncrads["lon"][0] if lonprev > 180: lonprev -= 360 tprev = ncrads["time"][0] onlandprev = flag4_isonLand(ncrads["flags"][0]) #initiate first linestring segment trackseg = ogr.Geometry(ogr.wkbLineString) #we also store some bookkeeping information on each track segment segment = { "tstart": (t0 + timedelta(seconds=float(tprev))).isoformat(), "tend": None, "istart": 0, "iend": 0, "land": int(flag4_isonLand(ncrads["flags"][0])) } for i, (t, lon, lat, flag) in enumerate( zip(ncrads["time"][:], ncrads["lon"][:], ncrads["lat"][:], ncrads['flags'][:])): dt = t0 + timedelta(seconds=float(t)) onland = flag4_isonLand(flag) if lon > 180: #make sure longitude goes from -180 to 180 lon -= 360 #create a new segment when: (a) crossing the 180 d line, (b) or when ocean/land flag changes if abs(lonprev - lon) > 180 or (onlandprev != onland): #start a new segment upon crossing the 180 line or when a time gap occurredi, or when crossing from land to ocean or lake #Segments which have more than a single point will be added: if trackseg.GetPointCount() > 1: #gather some end bookkeeping on the previous segment segment["tend"] = dt.isoformat() segment["iend"] = i #append segment and bookkeeping data data["segments"].append(segment.copy()) # import pdb;pdb.set_trace() track.AddGeometry(trackseg) #initialize new segment segment["tstart"] = dt.isoformat() segment["istart"] = i segment["land"] = int(onland) trackseg = ogr.Geometry(ogr.wkbLineString) trackseg.AddPoint(float(lon), float(lat), 0) lonprev = lon tprev = t onlandprev = onland #also add the last segment if trackseg.GetPointCount() > 1: #gather some end bookkeeping on the previous segment segment["tend"] = dt.isoformat() segment["iend"] = i #append segment and bookkeeping data data["segments"].append(segment) track.AddGeometry(trackseg) if not data["segments"]: #return an empty dict when no segments are found return {} #reference time for rads mtch = re.search("p([0-9]+)c([0-9]+).nc", uri.url) meta = { "lastupdate": uri.lastmod, "tstart": t0 + timedelta(seconds=float(ncrads['time'][0])), "tend": t0 + timedelta(seconds=float(ncrads['time'][-1])), "cycle": int(mtch.group(2)), "apass": int(mtch.group(1)), "uri": uri.url, "data": data, "geom": track.ExportToIsoWkb() } return meta