예제 #1
0
파일: FESOM.py 프로젝트: whigg/geoslurp
def FESOMMetaExtractor(uri):
    """Extract meta information from a FESOM output file"""
    slurplog.info("extracting data from %s" % (uri.url))

    try:
        ncFESOM = ncDset(uri.url)
    except OSError:
        slurplog.error("Cannot open netcdf file, skipping")
        return None
    tvar = ncFESOM["time"]

    if tvar.shape[0] == 0:
        #quick return
        return None

    if tvar.calendar == "noleap":
        slurplog.warning(
            "Note found 'noleap' calendar string but assuming 'standard'")
        cal = 'standard'
    else:
        cal = tvar.calendar

    #parse time
    time = num2date(tvar[:], tvar.units, cal, only_use_cftime_datetimes=False)
    # try to estimate the time step fromt he median
    deltamedian = np.median(np.diff(time))
    if deltamedian.days > 28 and deltamedian.days <= 32:
        freq = 'monthly'
        #set tstart to the beginning of the month
        tstart = datetime(time[0].year, time[0].month, 1)
    elif deltamedian.days >= 1 and deltamedian.days < 28:
        freq = "%ddaily" % (deltamedian.days)
        #remove the median time interval from the first time
        tstart = time[0] - deltamedian
    elif deltamedian.days < 1:
        freq = "%dhourly" % (deltamedian.seconds / 3600)
        #remove the median time interval from the first time
        tstart = time[0] - deltamedian

    data = {"variables": {}}

    for ky, var in ncFESOM.variables.items():
        try:
            data["variables"][ky] = var.description
        except AttributeError:
            data["variables"][ky] = ky

    meta = {
        "tstart": tstart,
        "tend": time[-1] + deltamedian,
        "lastupdate": uri.lastmod,
        "interval": freq,
        "uri": uri.url,
        "data": data
    }
    ncFESOM.close()
    return meta
예제 #2
0
 def rastExtract(self, uri):
     """extract raster and other meta info from the downloaded files"""
     meta = super().rastExtract(uri)
     meta["lastupdate"] = uri.lastmod
     ncid = ncDset(uri.url)
     time = ncid.variables["time"]
     # import pdb;pdb.set_trace()
     # t0=datetime(1950,1,1)
     meta["time"] = num2date(time[:], time.units)
     # s=int(x),seconds=int(86400*divmod(x,int(x))[1])) for x in ncid['time'][:]]
     return meta
예제 #3
0
def ncSwapLongitude(ncinout, longitudevar='longitude'):
    """swap the longitude representation to span 0..360 or -180..180"""
    ncid = ncDset(ncinout, 'r+')

    ncid[longitudevar].set_auto_mask(False)
    #find the longitude variable

    if max(ncid[longitudevar][:]) > 180:
        ncid[longitudevar].valid_min = -180
        ncid[longitudevar].valid_max = 180
        ncid[longitudevar][ncid[longitudevar][:] > 180] -= 360
    elif min(ncid[longitudevar][:]) < 0:
        ncid[longitudevar].valid_min = 0
        ncid[longitudevar].valid_max = 360
        ncid[longitudevar][ncid[longitudevar][:] < 0] += 360

    ncid[longitudevar].set_auto_mask(True)

    ncid.close()
예제 #4
0
파일: ORAS5.py 프로젝트: whigg/geoslurp
def orasMetaExtractor(uri):
    """Extract meta information from a output file"""
    slurplog.info("extracting data from %s"%(uri.url))

    try:
        nc_id=ncDset(uri.url)
    except OSError:
        slurplog.error("Cannot open netcdf file, skipping")
        return None
    tvar=nc_id["time_counter"]

    if tvar.shape[0] == 0:
        #quick return 
        return None
    
    if tvar.calendar == "noleap":
        slurplog.warning("Note found 'noleap' calendar string but assuming 'standard'")
        cal='standard'
    else:
        cal=tvar.calendar

    #parse time
    time=num2date(tvar[:], tvar.units,cal,only_use_cftime_datetimes=False)

    data={"variables":{}}

    for ky,var in nc_id.variables.items():
        try:
            data["variables"][ky]=var.description
        except AttributeError:
            data["variables"][ky]=ky


    meta={"tstart":datetime(time[0].year,time[0].month,1),
          "lastupdate":uri.lastmod,
          "uri":uri.url,
          "data":data
          }
    nc_id.close()
    return meta
예제 #5
0
def stackNcFiles(ncout, ncA, ncB, dimension):
    """Append netcdf file B after file A along the dimension specified"""
    slurplogger().info("Patching files %s %s", ncA, ncB)
    #open the three netcdf files
    outid = ncDset(ncout, 'w', clobber=True)
    aid = ncDset(ncA, 'r')
    bid = ncDset(ncB, 'r')

    #copy arrays in parts when larger than the choplimit
    choplimit = 1024 * 1024 * 1024

    # #copy global attributes
    nccopyAtt(aid, outid)

    # dimension to be excluded from dimension copy
    dexcl = [dimension]

    # make a list of variables which need to be appended  and cannot be copied straight away
    vapp = [
        var for var in aid.variables.keys()
        if dimension in aid.variables[var].dimensions
    ]

    #copy dimensions (excluding the specified one)
    for nm, dim in aid.dimensions.items():
        if nm in dexcl:
            continue
        if dim.isunlimited():
            outid.createDimension(nm, None)
        else:
            outid.createDimension(nm, len(dim))

    # copy all variables  and attributes which don't require appending
    for nm, var in aid.variables.items():
        if nm in vapp:
            continue
        outid.createVariable(nm, var.datatype, var.dimensions)
        outid[nm].set_auto_mask(False)
        outid[nm][:] = aid[nm][:]
        nccopyAtt(aid[nm], outid[nm], ['_FillValue'])

    #create new dimension
    outid.createDimension(
        dimension,
        aid.dimensions[dimension].size + bid.dimensions[dimension].size)

    #create new appended variables
    for var in vapp:
        outid.createVariable(var, aid[var].datatype, bid[var].dimensions)
        outid[var].set_auto_mask(False)
        nccopyAtt(aid[var], outid[var], ['_FillValue'])

        #find out which axis is the to be appended dimension
        dimax = aid[var].dimensions.index(dimension)

        idxA = []
        for dim in outid[var].dimensions:
            idxA.append(slice(0, outid.dimensions[dim].size))

        idxA[dimax] = slice(0, aid.dimensions[dimension].size)

        if aid[var][:].nbytes < choplimit:
            outid[var][idxA] = aid[var][:]
        else:
            #loop over the first dimension (matrix is too big)
            ia = 0
            for i in range(idxA[0].start, idxA[0].stop):
                # import pdb;pdb.set_trace()
                outid[var][[i] +
                           idxA[1:]] = aid[var][[ia,
                                                 slice(None),
                                                 slice(None)]]
                ia += 1

        idxB = idxA.copy()
        idxB[dimax] = slice(aid.dimensions[dimension].size,
                            outid.dimensions[dimension].size)
        if bid[var][:].nbytes < choplimit:
            outid[var][idxB] = bid[var][:]
        else:
            #loop over the first dimension (matrix is too big)
            ib = 0
            for i in range(idxB[0].start, idxB[0].stop):
                outid[var][[i] +
                           idxB[1:]] = bid[var][[ib,
                                                 slice(None),
                                                 slice(None)]]
                ib += 1

    outid.setncattr(
        'History',
        outid.getncattr('History') +
        '\n Modified at %s by Geoslurp: Merge two netcdf files along dimension %s'
        % (datetime.now(), dimension))
    return UriFile(ncout), True
예제 #6
0
def coraMetaExtractor(uri):
    """Extract meta information (tracks, etc) as a dictionary from an argo prof file  floats"""

    try:
        url=uri.url
        ncArgo=ncDset(url)

        #check for minimum amount of profiles
        minProfs=3
        if ncArgo.dimensions['N_PROF'].size < minProfs:
            slurplogger().info("amount of profiles in %s is less then %d, skipping"%(url,minProfs))
            return {}



        slurplogger().info("Extracting meta info from: %s"%(url))

        # Get reference time
        
        # t0=datetime.strptime(ncStr(ncArgo["REFERENCE_DATE_TIME"][:]),"%Y%m%d%H%M%S"\cc)
        t0=num2date(0.0, units=ncArgo['JULD'].units,only_use_cftime_datetimes=False)

        #get the start end end time


        #wmoid's should be the same for all entries, so take the first one
        # wmoid=int(ncStr(ncArgo["PLATFORM_NUMBER"][0]))
        wmoid=[]
        
        # this is the file type: mooring, profiler, ...
        datacenter=url.split('_')[-2]+'_'+url.split('_')[-1][0:2]
        

        #get modes for each profile
        mode=np.array([x for x in ncStr(ncArgo['DATA_MODE'])])

        #get cycles for each profile
        cycle=[int(x) for x in ncArgo['CYCLE_NUMBER'][:]]

        #
        tlocation=[]
        # which profile is ascending ?
        # ascend=ncArgo['DIRECTION'][:]== b"A"

        geoMpoints=ogr.Geometry(ogr.wkbMultiPoint)
        iprof=[]
        for i,(t,lon,lat) in enumerate(zip(ncArgo["JULD"][:],ncArgo["LONGITUDE"][:],ncArgo["LATITUDE"][:])):
            if lon > 180:
                #make sure longitude goes from -180 to 180
                lon-=360
            #we don't want nan positions or timetags in the database
            if np.ma.is_masked(lon) or np.ma.is_masked(lat) or np.ma.is_masked(t):
                continue

            tdt=t0+timedelta(days=float(t))
            tlocation.append(tdt)
            point = ogr.Geometry(ogr.wkbPoint)
            point.AddPoint(float(lon),float(lat),0)
            geoMpoints.AddGeometry(point)
            iprof.append(i)

        if not tlocation:
            #return an empty dictionary when no valid profiles have been found
            return {}

        tstart=np.min(tlocation)
        tend=np.max(tlocation)
        meta={"uri":url,"lastupdate":uri.lastmod,"datacenter":datacenter,"tstart":tstart,"tend":tend,
              "mode":mode,"tlocation":tlocation,"cycle":cycle,"iprof":iprof,
              "geom":geoMpoints.ExportToIsoWkb()}
    except Exception as e:
        raise RuntimeWarning("Cannot extract meta information from "+ url+ str(e))

    return meta
예제 #7
0
def radsMetaDataExtractor(uri):
    """Extract a dictionary with rads entries for the database"""
    slurplogger().info("extracting data from %s" % (uri.url))
    ncrads = ncDset(uri.url)
    track = ogr.Geometry(ogr.wkbMultiLineString)
    data = {"segments": []}

    if ncrads.dimensions['time'].size < 3:
        #no point trying to index empty files
        return {}
    #reference time
    t0 = datetime(1985, 1, 1)

    # We need to compare some values fromt he previous loop which we store in the follwoing variables
    lonprev = ncrads["lon"][0]
    if lonprev > 180:
        lonprev -= 360

    tprev = ncrads["time"][0]
    onlandprev = flag4_isonLand(ncrads["flags"][0])

    #initiate first linestring segment
    trackseg = ogr.Geometry(ogr.wkbLineString)
    #we also store some bookkeeping information on each track segment
    segment = {
        "tstart": (t0 + timedelta(seconds=float(tprev))).isoformat(),
        "tend": None,
        "istart": 0,
        "iend": 0,
        "land": int(flag4_isonLand(ncrads["flags"][0]))
    }

    for i, (t, lon, lat, flag) in enumerate(
            zip(ncrads["time"][:], ncrads["lon"][:], ncrads["lat"][:],
                ncrads['flags'][:])):
        dt = t0 + timedelta(seconds=float(t))
        onland = flag4_isonLand(flag)
        if lon > 180:
            #make sure longitude goes from -180 to 180
            lon -= 360
            #create a new segment when: (a) crossing the 180 d line, (b) or when ocean/land flag changes
        if abs(lonprev - lon) > 180 or (onlandprev != onland):
            #start a new segment upon crossing the 180 line or when a time gap occurredi, or when crossing from land to ocean or lake
            #Segments which have more than a single point will be added:
            if trackseg.GetPointCount() > 1:
                #gather some end bookkeeping on the previous segment
                segment["tend"] = dt.isoformat()
                segment["iend"] = i

                #append segment and bookkeeping data
                data["segments"].append(segment.copy())
                # import pdb;pdb.set_trace()
                track.AddGeometry(trackseg)
            #initialize new segment
            segment["tstart"] = dt.isoformat()
            segment["istart"] = i
            segment["land"] = int(onland)
            trackseg = ogr.Geometry(ogr.wkbLineString)

        trackseg.AddPoint(float(lon), float(lat), 0)
        lonprev = lon
        tprev = t
        onlandprev = onland

    #also add the last segment
    if trackseg.GetPointCount() > 1:
        #gather some end bookkeeping on the previous segment
        segment["tend"] = dt.isoformat()
        segment["iend"] = i

        #append segment and bookkeeping data
        data["segments"].append(segment)
        track.AddGeometry(trackseg)

    if not data["segments"]:
        #return an empty dict when no segments are found
        return {}

    #reference time for rads
    mtch = re.search("p([0-9]+)c([0-9]+).nc", uri.url)
    meta = {
        "lastupdate": uri.lastmod,
        "tstart": t0 + timedelta(seconds=float(ncrads['time'][0])),
        "tend": t0 + timedelta(seconds=float(ncrads['time'][-1])),
        "cycle": int(mtch.group(2)),
        "apass": int(mtch.group(1)),
        "uri": uri.url,
        "data": data,
        "geom": track.ExportToIsoWkb()
    }

    return meta