Example #1
0
def read_local_ncfiles(**kwargs):
    """
    Wrapping function to read satellite netcdf files.

    param:
        pathlst - list of paths to be parsed
        product - product as specified in satellite_specs.yaml
        varalias
        sd - start date (datetime object)
        ed - start date (datetime object)
        twin - time window (temporal constraint) in minutes

    return:
        dictionary of variables for the satellite_class object
    """
    pathlst = kwargs.get('pathlst')
    product = kwargs.get('product')
    varalias = kwargs.get('varalias')
    sdate = kwargs.get('sdate')
    edate = kwargs.get('edate')
    twin = kwargs.get('twin')

    # adjust start and end
    sdate = sdate - timedelta(minutes=twin)
    edate = edate + timedelta(minutes=twin)
    # get meta data
    ncmeta = ncdumpMeta(pathlst[0])
    ncvar = get_filevarname(varalias, variable_info, satellite_dict[product],
                            ncmeta)
    # retrieve sliced data
    ds = read_netcdfs(pathlst)
    ds_sort = ds.sortby('time')
    ds_sliced = ds_sort.sel(time=slice(sdate, edate))
    # make dict and start with stdvarname for varalias
    stdvarname = variable_info[varalias]['standard_name']
    var_sliced = ds_sliced[ncvar]
    vardict = {}
    vardict[stdvarname] = list(var_sliced.values)
    # add coords to vardict
    # 1. retrieve list of coordinates
    coords_lst = list(var_sliced.coords.keys())
    # 2. iterate over coords_lst
    for varname in coords_lst:
        stdcoordname = ds_sliced[varname].attrs['standard_name']
        if stdcoordname == 'longitude':
            vardict[stdcoordname] = \
                list(((ds_sliced[varname].values - 180) % 360) - 180)
        elif stdcoordname == 'time':
            # convert to unixtime
            df_time = ds_sliced[varname].to_dataframe()
            unxt = (pd.to_datetime(df_time[varname]).view(int) / 10**9)
            vardict[stdcoordname] = unxt.values
            vardict['time_unit'] = variable_info[stdcoordname]['units']
        else:
            vardict[stdcoordname] = list(ds_sliced[varname].values)
    return vardict
Example #2
0
def get_model_fc_mode(filestr, model, fc_date, varalias=None, **kwargs):
    """
    fct to retrieve model data for correct time
    """
    vardict = {}
    print("Get model data according to selected date(s) ....")
    print(filestr)
    meta = ncdumpMeta(filestr)
    stdvarname = variable_info[varalias]['standard_name']
    lonsname = get_filevarname('lons',variable_info,
                               model_dict[model],meta)
    latsname = get_filevarname('lats',variable_info,
                               model_dict[model],meta)
    timename = get_filevarname('time',variable_info,
                               model_dict[model],meta)
    # get other variables e.g. Hs [time,lat,lon]
    filevarname = get_filevarname(varalias,variable_info,
                                  model_dict[model],meta)
    try:
        model_lons,model_lats,model_time_dt = \
        read_model_nc_output_lru(filestr,lonsname,latsname,timename)
    except Exception as e:
        print(e)
        print('continue with uncached retrieval')
        model_lons,model_lats,model_time_dt = \
        read_model_nc_output(filestr,lonsname,latsname,timename)

    vardict[variable_info['lons']['standard_name']] = model_lons
    vardict[variable_info['lats']['standard_name']] = model_lats

    # remove escape character because netCDF4 handles white spaces
    # but cannot handle escape characters (apparently)
    filestr = filestr.replace('\\','')
    f = netCDF4.Dataset(filestr, 'r')
    model_time = f.variables[timename]
    l = kwargs.get('vertical_level',0)
    if (type(filevarname) is dict):
        print(
            'Target variable can be computed from vector \n'
            'components with the following aliases: ', filevarname)
        model_time_dt_valid = model_time_dt[model_time_dt.index(fc_date)]
        model_time_valid = float(model_time[model_time_dt.index(fc_date)])
        model_time_unit = model_time.units
        vardict[variable_info['time']['standard_name']] = model_time_valid
        vardict['datetime'] = model_time_dt_valid
        vardict['time_unit'] = model_time_unit
        for key in filevarname.keys():
            filevarname_dummy = get_filevarname(\
                                    filevarname[key][0],
                                    variable_info,
                                    model_dict[model],meta)
            if filevarname_dummy is not None:
                print(filevarname[key][0], 'exists')
                break
        print('Use aliases:', filevarname[key])
        model_var_dummy = f.variables[filevarname_dummy]
        if len(model_var_dummy.dimensions) == 4:
            model_var_dummy = model_var_dummy[:,l,:,:].squeeze()
        if len(model_var_dummy.shape) == 3: # for multiple time steps
            model_var_valid_tmp = \
                model_var_dummy[model_time_dt.index(fc_date),:,:].squeeze()**2
            for i in range(1, len(filevarname[key])):
                filevarname_dummy = get_filevarname(\
                                        filevarname[key][i],
                                        variable_info,
                                        model_dict[model],meta)
                if len(f.variables[filevarname_dummy].dimensions) == 4:
                    model_var_valid_tmp += \
                        f.variables[filevarname_dummy][
                            model_time_dt.index(fc_date),l,:,:
                            ].squeeze()**2
                elif len(f.variables[filevarname_dummy].dimensions) == 3:
                    model_var_valid_tmp += \
                        f.variables[filevarname_dummy][
                            model_time_dt.index(fc_date),:,:
                            ].squeeze()**2
            model_var_valid = np.sqrt(model_var_valid_tmp)
        elif len(model_var_dummy.dimensions) == 2:
            model_var_valid_tmp = model_var_dummy[:, :]**2
            for i in range(1, len(filevarname[key])):
                filevarname_dummy = get_filevarname(\
                                            filevarname[key][i],
                                            variable_info,
                                            model_dict[model],
                                            meta)
                model_var_valid_tmp += \
                    f.variables[filevarname_dummy][:,:]**2
            model_var_valid = np.sqrt(model_var_valid_tmp)
        else:
            print('Dimension mismatch!')
        vardict[stdvarname] = model_var_valid
    else:
        model_time_dt_valid = model_time_dt[model_time_dt.index(fc_date)]
        model_time_valid = float(model_time[model_time_dt.index(fc_date)])
        model_time_unit = model_time.units
        vardict[variable_info['time']['standard_name']] = model_time_valid
        vardict['datetime'] = model_time_dt_valid
        vardict['time_unit'] = model_time_unit
        model_var_link = f.variables[filevarname]
        if len(model_var_link.dimensions) == 4:
            model_var_link = model_var_link[:,l,:,:].squeeze()
        if len(model_var_link.shape) == 3:  # for multiple time steps
            model_var_valid = \
                model_var_link[model_time_dt.index(fc_date),:,:].squeeze()
        elif len(model_var_link.dimensions) == 2:
            model_var_valid = model_var_link[:, :].squeeze()
        else:
            print('Dimension mismatch!')
        vardict[variable_info[varalias]['standard_name']] = \
                                                    model_var_valid
    # transform masked array to numpy array with NaNs
    f.close()
    vardict['longitude'] = vardict['longitude'].filled(np.nan)
    vardict['latitude'] = vardict['latitude'].filled(np.nan)
    vardict[variable_info[varalias]['standard_name']] = \
        vardict[variable_info[varalias]['standard_name']].filled(np.nan)
    vardict['meta'] = meta
    # make lats,lons 2D if only 1D (regular grid)
    if len(vardict['longitude'].shape)==1:
        LATS,LONS = np.meshgrid(vardict['latitude'],
                                vardict['longitude'])
        vardict['longitude']=np.transpose(LONS)
        vardict['latitude']=np.transpose(LATS)
    return vardict, filevarname
Example #3
0
def collocate_poi_ts(indict,model=None,distlim=None,\
    leadtime=None,date_incr=None,varalias=None,twin=None):
    """
    indict: mandatory - lons, lats, time, values
            optional - leadtime, distlim, date_incr
    """
    # get stdvarname
    stdvarname = variable_info[varalias]['standard_name']
    # datetime or str
    if isinstance(indict['time'][0], str):
        poi_dtimes = [parse_date(t) for t in indict['time']]
    elif isinstance(indict['time'][0], datetime):
        poi_dtimes = indict['time']
    else:
        print('no valid time/datetime format for poi')
        print('use either str or datetime')
    fc_date = make_fc_dates(poi_dtimes[0], poi_dtimes[-1], date_incr)
    # get coinciding date between fc_date and dates in obs_obj
    idx1 = collocate_times(unfiltered_t=poi_dtimes,
                           target_t=fc_date,
                           twin=twin)
    # find valid/coinciding fc_dates
    if len(idx1) > len(fc_date):
        print('Muliple assignments within given time window')
        print('--> only closest to time stamp is chosen')
        idx_closest = get_closest_date(\
                    list(np.array(poi_dtimes)[idx1]),\
                    fc_date)
        idx1 = list(np.array(idx1)[idx_closest])
    # adjust obs_obj according to valid dates
    for key in indict.keys():
        if (key != 'time_unit' and key != 'meta' and key != 'nID'):
            indict[key] = list(np.array(indict[key])[idx1])
    poi_dtimes = list(np.array(poi_dtimes)[idx1])
    del idx1
    # find valid dates for given leadtime and model
    fc_date = find_valid_fc_dates_for_model_and_leadtime(\
                                    fc_date,model,leadtime)
    # adjust fc_date according to obs date
    idx2 = collocate_times(unfiltered_t=fc_date,
                           target_t=poi_dtimes,
                           twin=twin)
    fc_date = list(np.array(fc_date)[idx2])
    del idx2
    # compute time based on time unit from variable definition
    time_unit = variable_info['time']['units']
    time = netCDF4.date2num(poi_dtimes, time_unit)
    # check if file exists and if it includes desired time and append
    model_vals = []
    model_lons = []
    model_lats = []
    obs_vals = []
    obs_lons = []
    obs_lats = []
    collocation_idx_x = []
    collocation_idx_y = []
    distance = []
    time_lst = []
    dtimes = []
    switch = 0
    for d in tqdm(range(len(fc_date))):
        #        for t in range(1):
        with NoStdStreams():
            check = False
            check = check_if_file_is_valid(fc_date[d], model, leadtime)
            if check == True:
                # retrieve model
                fname = make_model_filename_wrapper(model, fc_date[d],
                                                    leadtime)
                # get hold of variable names (done only once)
                if switch == 0:
                    meta = ncdumpMeta(fname)
                    lonsname = get_filevarname('lons', variable_info,
                                               model_dict[model], meta)
                    latsname = get_filevarname('lats', variable_info,
                                               model_dict[model], meta)
                    timename = get_filevarname('time', variable_info,
                                               model_dict[model], meta)
                    filevarname = get_filevarname(varalias, variable_info,
                                                  model_dict[model], meta)
                    mlons = xr.open_dataset(fname)[lonsname].values
                    # secure lons from -180 to 180
                    mlons = ((mlons - 180) % 360) - 180
                    mlats = xr.open_dataset(fname)[latsname].values
                    # ensure matching dimension
                    if len(mlons.shape) == 1:
                        Mlons, Mlats = np.meshgrid(mlons, mlats)
                    else:
                        Mlons, Mlats = mlons, mlats
                    switch = 1
                plon = [indict['longitude'][d]]
                plat = [indict['latitude'][d]]
                index_array_2d, distance_array, _ = \
                        collocation_fct(plon,plat,Mlons,Mlats)
                dst = xr.open_dataset(fname)[timename].values
                tidx = list(dst).index(np.datetime64(fc_date[d]))
                # impose distlim
                if distance_array[0] < distlim * 1000:
                    idx_x = index_array_2d[0][0]
                    idx_y = index_array_2d[1][0]
                    model_lons.append(Mlons[idx_x, idx_y])
                    model_lats.append(Mlats[idx_x, idx_y])
                    vals = xr.open_dataset(fname)[filevarname]\
                                        [tidx,idx_x,idx_y].values
                    model_vals.append(vals.item())
                    obs_vals.append(indict['obs'][d])
                    obs_lons.append(indict['longitude'][d])
                    obs_lats.append(indict['latitude'][d])
                    collocation_idx_x.append(idx_x)
                    collocation_idx_y.append(idx_y)
                    distance.append(distance_array[0])
                    time_lst.append(time[d])
                    dtimes.append(poi_dtimes[d])
    results_dict = {
        'valid_date': dtimes,
        'time': time_lst,
        'time_unit': time_unit,
        'datetime': dtimes,
        'distance': distance,
        'model_values': model_vals,
        'model_lons': model_lons,
        'model_lats': model_lats,
        'obs_values': obs_vals,
        'obs_lons': obs_lons,
        'obs_lats': obs_lats,
        'collocation_idx_x': collocation_idx_x,
        'collocation_idx_y': collocation_idx_y
    }
    return results_dict
Example #4
0
def match_region_poly(LATS, LONS, region, grid_date):
    """
    Takes care of region defined as polygon
    """
    from matplotlib.patches import Polygon
    from matplotlib.path import Path
    import numpy as np
    if (region not in region_dict['poly'] \
        and region not in model_dict):
        sys.exit("Region polygone is not defined")
    elif isinstance(region, dict) == True:
        print("Manuall specified region: \n" + " --> Bounds: " + str(region))
        poly = Polygon(list(zip(region['lons'], region['lats'])), closed=True)
    elif (isinstance(region, str) == True and region in model_dict):
        try:
            print('Use date for retrieving grid: ', grid_date)
            filestr = make_model_filename_wrapper(\
                                    region,grid_date,'best')
            meta = ncdumpMeta(filestr)
            flon = get_filevarname('lons',variable_info,\
                                model_dict[region],meta)
            flat = get_filevarname('lats',variable_info,\
                                model_dict[region],meta)
            time = get_filevarname('time',variable_info,\
                                model_dict[region],meta)
            #M = xa.open_dataset(filestr, decode_cf=True)
            #model_lons = M[flon].data
            #model_lats = M[flat].data
            model_lons, model_lats, _ = \
                read_model_nc_output_lru(filestr,flon,flat,time)
        except (KeyError, IOError, ValueError) as e:
            print(e)
            if 'grid_date' in model_dict[region]:
                grid_date = model_dict[region]['grid_date']
                print('Trying default date ', grid_date)
            else:
                grid_date = datetime(datetime.now().year,
                                     datetime.now().month,
                                     datetime.now().day)
            filestr = make_model_filename_wrapper(\
                                    region,grid_date,'best')
            meta = ncdumpMeta(filestr)
            flon = get_filevarname('lons',variable_info,\
                                model_dict[region],meta)
            flat = get_filevarname('lats',variable_info,\
                                model_dict[region],meta)
            time = get_filevarname('time',variable_info,\
                                model_dict[region],meta)
            #M = xa.open_dataset(filestr, decode_cf=True)
            #model_lons = M[flon].data
            #model_lats = M[flat].data
            model_lons, model_lats, _ = \
                read_model_nc_output_lru(filestr,flon,flat,time)
        if (len(model_lons.shape) == 1):
            model_lons, model_lats = np.meshgrid(model_lons, model_lats)
        print('Check if footprints fall within the chosen domain')
        ncdict = ncdumpMeta(filestr)
        try:
            proj4 = find_attr_in_nc('proj', ncdict=ncdict, subattrstr='proj4')
        except IndexError:
            print('proj4 not defined in netcdf-file')
            print('Using proj4 from model config file')
            proj4 = model_dict[region]['proj4']
        proj_model = pyproj.Proj(proj4)
        Mx, My = proj_model(model_lons, model_lats, inverse=False)
        Vx, Vy = proj_model(LONS, LATS, inverse=False)
        xmax, xmin = np.max(Mx), np.min(Mx)
        ymax, ymin = np.max(My), np.min(My)
        ridx = list(
            np.where((Vx > xmin) & (Vx < xmax) & (Vy > ymin) & (Vy < ymax))[0])
    elif isinstance(region, str) == True:
        print("Specified region: " + region + "\n" +
              " --> Bounded by polygon: \n" + "lons: " +
              str(region_dict['poly'][region]['lons']) + "\n" + "lats: " +
              str(region_dict['poly'][region]['lats']))
        poly = Polygon(list(
            zip(region_dict['poly'][region]['lons'],
                region_dict['poly'][region]['lats'])),
                       closed=True)
        # check if coords in region
        LATS = list(LATS)
        LONS = list(LONS)
        lats = np.array(LATS).ravel()
        lons = np.array(LONS).ravel()
        points = np.c_[lons, lats]
        # radius seems to be important to correctly define polygone
        # see discussion here:
        # https://github.com/matplotlib/matplotlib/issues/9704
        hits = Path(poly.xy).contains_points(points, radius=1e-9)
        ridx = list(np.array(range(len(LONS)))[hits])
    if not ridx:
        print("No values for chosen region and time frame!!!")
    else:
        print("Values found for chosen region and time frame.")
    return ridx
Example #5
0
 def __init__(self,
              sdate=None,
              mission='s3a',
              product='cmems_L3_NRT',
              edate=None,
              twin=30,
              download=False,
              path_local=None,
              region='mwam4',
              nproc=1,
              varalias='Hs',
              api_url=None,
              filterData=False,
              poi=None,
              distlim=None,
              **kwargs):
     print('# ----- ')
     print(" ### Initializing satellite_class object ###")
     print(" ")
     # parse and translate date input
     sdate = parse_date(sdate)
     edate = parse_date(edate)
     # check settings
     if (sdate is None and edate is None and poi is not None):
         sdate = poi['datetime'][0]
         edate = poi['datetime'][-1]
     elif (edate is None and sdate is not None):
         print("Requested time: ", str(sdate))
         edate = sdate
     elif (edate is None and sdate is None):
         now = datetime.now()
         sdate = datetime(now.year, now.month, now.day, now.hour)
         edate = sdate
         print("Requested time: ", str(sdate))
     else:
         print("Requested time frame: " + str(sdate) + " - " + str(edate))
     stdname = variable_info[varalias].get('standard_name')
     units = variable_info[varalias].get('units')
     # define some class variables
     self.sdate = sdate
     self.edate = edate
     self.varalias = varalias
     self.units = units
     self.stdvarname = stdname
     self.twin = twin
     self.region = region
     self.mission = mission
     self.obstype = 'satellite_altimeter'
     self.product = product
     self.provider = satellite_dict[product].get('provider')
     self.processing_level = \
             satellite_dict[product].get('processing_level')
     print('Chosen time window is:', twin, 'min')
     # make satpaths
     if path_local is None:
         path_template = satellite_dict[product]\
                                       ['dst']\
                                       ['path_template']
         self.path_local = path_template
     else:
         self.path_local = path_local
     # retrieve files
     if download is False:
         print("No download initialized, checking local files")
     else:
         print("Downloading necessary files ...")
         get_remote_files(\
                         path_local=path_local,
                         sdate=sdate,edate=edate,
                         twin=twin,nproc=nproc,
                         product=product,
                         api_url=api_url,
                         mission=mission,
                         dict_for_sub=vars(self))
     print(" ")
     print(" ## Find files ...")
     t0 = time.time()
     pathlst, _ = get_local_files(sdate,
                                  edate,
                                  twin,
                                  product,
                                  vars(self),
                                  path_local=path_local)
     print(" ")
     print(" ## Read files ...")
     if len(pathlst) > 0:
         #            for i in range(1):
         try:
             if filterData == True:
                 # extend time period due to filter
                 if 'stwin' not in kwargs.keys():
                     kwargs['stwin'] = 1  # needs to be changed
                 if 'etwin' not in kwargs.keys():
                     kwargs['etwin'] = 1
                 twin_tmp = twin + kwargs['stwin'] + kwargs['etwin']
                 # retrieve data
                 rvardict = get_sat_ts(sdate, edate, twin_tmp, region,
                                       product, pathlst, varalias, poi,
                                       distlim)
                 # filter data
                 rvardict = filter_main(rvardict,
                                        varalias=varalias,
                                        **kwargs)
                 # crop to original time period
                 sdate_tmp = sdate - timedelta(minutes=twin)
                 edate_tmp = sdate + timedelta(minutes=twin)
                 rvardict = crop_vardict_to_period(rvardict, sdate_tmp,
                                                   edate_tmp)
                 self.filter = True
                 self.filterSpecs = kwargs
             else:
                 rvardict = get_sat_ts(sdate, edate, twin, region, product,
                                       pathlst, varalias, poi, distlim)
                 # make ts in vardict unique
                 rvardict = vardict_unique(rvardict)
                 # rm NaNs
                 rvardict = rm_nan_from_vardict(varalias, rvardict)
             # find variable name as defined in file
             if (product == 'cmems_L3_NRT' or product == 'cmems_L3_MY'
                     or product == 'cmems_L3_s6a'):
                 ncdict = ncdumpMeta(pathlst[0])
             elif (product == 'cci_L2P' or product == 'cci_L3'):
                 ncdict = ncdumpMeta(pathlst[0])
             elif product == 'eumetsat_L2':
                 tmpdir = tempfile.TemporaryDirectory()
                 zipped = zipfile.ZipFile(pathlst[0])
                 enhanced_measurement = zipped.namelist()[-1]
                 extracted = zipped.extract(enhanced_measurement,
                                            path=tmpdir.name)
                 ncdict = ncdumpMeta(extracted)
                 tmpdir.cleanup()
             with NoStdStreams():
                 filevarname = get_filevarname(varalias, variable_info,
                                               satellite_dict[product],
                                               ncdict)
             rvardict['meta'] = ncdict
             # define more class variables
             self.vars = rvardict
             self.varname = filevarname
             t1 = time.time()
             print(" ")
             print('## Summary:')
             print(str(len(self.vars['time'])) + " footprints retrieved.")
             print("Time used for retrieving satellite data:",\
                     round(t1-t0,2),"seconds")
             print(" ")
             print("### Satellite object initialized ###")
             print('# ----- ')
         except Exception as e:
             print(e)
             print('Error encountered')
             print('No satellite_class object initialized')
     else:
         print('No satellite data found')
         print('No satellite_class object initialized')
         print('# ----- ')
Example #6
0
def get_sat_ts(sdate, edate, twin, region, product, pathlst, varalias, poi,
               distlim):
    """
    Main function to obtain data from satellite missions.
    reads files, apply region and temporal filter

    return: adjusted dictionary according to spatial and
            temporal contarinst
    """
    cvardict = read_local_files(\
                                pathlst = pathlst,
                                product = product,
                                varalias = varalias,
                                sdate = sdate,
                                edate = edate,
                                twin = twin
                                )
    print('Total: ', len(cvardict['time']), ' footprints found')
    print('Apply region mask')
    ridx = match_region(cvardict['latitude'],
                        cvardict['longitude'],
                        region=region,
                        grid_date=sdate)
    print('Region mask applied')
    rvardict = {}
    for element in cvardict:
        if element != 'time_unit':
            rvardict[element] = list(np.array(cvardict[element])[ridx])
        else:
            rvardict[element] = cvardict[element]
    del cvardict, ridx
    if len(rvardict['time']) > 0:
        rvardict['datetime'] = netCDF4.num2date(rvardict['time'],
                                                rvardict['time_unit'])
        print('For chosen region and time: ', len(rvardict['time']),
              'footprints found')
        # convert to datetime object
        timedt = rvardict['datetime']
        rvardict['datetime'] = [datetime(t.year,t.month,t.day,
                                         t.hour,t.minute,t.second,\
                                         t.microsecond)\
                                for t in timedt]
    else:
        print('For chosen region and time: 0 footprints found!')
    if poi is not None:
        pvardict = {}
        pidx = match_poi(rvardict, twin, distlim, poi)
        for element in rvardict:
            if element != 'time_unit':
                pvardict[element] = list(np.array(rvardict[element])[pidx])
            else:
                pvardict[element] = rvardict[element]
        rvardict = pvardict
        print('For chosen poi: ', len(rvardict['time']), 'footprints found')
    # find variable name as defined in file
    if (product == 'cmems_L3_NRT' or product == 'cmems_L3_MY'
            or product == 'cmems_L3_s6a'):
        ncdict = ncdumpMeta(pathlst[0])
    elif (product == 'cci_L2P' or product == 'cci_L3'):
        ncdict = ncdumpMeta(pathlst[0])
    elif product == 'eumetsat_L2':
        tmpdir = tempfile.TemporaryDirectory()
        zipped = zipfile.ZipFile(pathlst[0])
        enhanced_measurement = zipped.namelist()[-1]
        extracted = zipped.extract(enhanced_measurement, path=tmpdir.name)
        ncdict = ncdumpMeta(extracted)
        tmpdir.cleanup()
    rvardict['meta'] = ncdict
    return rvardict
Example #7
0
def read_local_files_eumetsat(**kwargs):
    '''
    Read and concatenate all data to one timeseries for each variable.
    Fct is tailored to EUMETSAT files.
    '''
    pathlst = kwargs.get('pathlst')
    product = kwargs.get('product')
    varalias = kwargs.get('varalias')
    sdate = kwargs.get('sdate')
    edate = kwargs.get('edate')
    twin = kwargs.get('twin')
    # adjust start and end
    sdate = sdate - timedelta(minutes=twin)
    edate = edate + timedelta(minutes=twin)
    # --- find variable cf names --- #
    print("Processing " + str(int(len(pathlst))) + " files")
    print(pathlst[0])
    print(pathlst[-1])
    # --- find ncvar cf names --- #
    tmpdir = tempfile.TemporaryDirectory()
    zipped = zipfile.ZipFile(pathlst[0])
    enhanced_measurement = zipped.namelist()[-1]
    extracted = zipped.extract(enhanced_measurement, path=tmpdir.name)
    stdvarname = variable_info[varalias]['standard_name']
    ncmeta = ncdumpMeta(extracted)
    ncvar = get_filevarname(varalias, variable_info, satellite_dict[product],
                            ncmeta)
    latname = get_filevarname('lats', variable_info, satellite_dict[product],
                              ncmeta)
    lonname = get_filevarname('lons', variable_info, satellite_dict[product],
                              ncmeta)
    timename = get_filevarname('time', variable_info, satellite_dict[product],
                               ncmeta)
    tmpdir.cleanup()
    # --- create vardict --- #
    vardict = {}
    ds = read_netcdfs_zipped_lru(pathlst, ncvar, dim=timename)
    ds_sort = ds.sortby(timename)
    ds_sliced = ds_sort.sel({timename: slice(sdate, edate)})
    # make dict and start with stdvarname for varalias
    var_sliced = ds_sliced[ncvar]
    vardict = {}
    vardict[stdvarname] = list(var_sliced.values)
    # add coords to vardict
    # 1. retrieve list of coordinates
    coords_lst = list(var_sliced.coords.keys())
    # 2. iterate over coords_lst
    for varname in coords_lst:
        stdcoordname = ds_sliced[varname].attrs['standard_name']
        if stdcoordname == 'longitude':
            vardict[stdcoordname] = \
                list(((ds_sliced[varname].values - 180) % 360) - 180)
        elif stdcoordname == 'time':
            # convert to unixtime
            df_time = ds_sliced[varname].to_dataframe()
            unxt = (pd.to_datetime(df_time[varname]).view(int) / 10**9)
            vardict[stdcoordname] = unxt.values
            vardict['time_unit'] = variable_info[stdcoordname]['units']
        else:
            vardict[stdcoordname] = list(ds_sliced[varname].values)
    return vardict
Example #8
0
def get_nc_dict(**kwargs):
    sdate = kwargs.get('sdate')
    edate = kwargs.get('edate')
    nID = kwargs.get('nID')
    #sensor = kwargs.get('sensor')
    varalias = kwargs.get('varalias')
    pathlst = kwargs.get('pathlst')
    strsublst = kwargs.get('strsublst')
    dict_for_sub = kwargs.get('dict_for_sub')
    # loop from sdate to edate with dateincr
    stdvarname = variable_info[varalias]['standard_name']
    tmpdate = deepcopy(sdate)
    varlst = []
    lonlst = []
    latlst = []
    timelst = []
    dtimelst = []
    # make subdict
    subdict = make_subdict(strsublst,class_object_dict=dict_for_sub)
    while datetime(tmpdate.year,tmpdate.month,1)\
    <= datetime(edate.year,edate.month,1):
        # get pathtofile
        pathtofile = get_pathtofile(pathlst,strsublst,\
                                        subdict,tmpdate)
        # get ncdump
        ncdict = ncdumpMeta(pathtofile)
        # retrieve filevarname for varalias
        filevarname = get_filevarname(varalias,
                                      variable_info,
                                      insitu_dict[nID],
                                      ncdict)
        varstrlst = [filevarname,'longitude','latitude','time']
        # query
        vardict = get_varlst_from_nc_1D(pathtofile,
                                        varstrlst,
                                        sdate,edate)
        varlst.append(list(vardict[filevarname]))
        lonlst.append(list(vardict['longitude']))
        latlst.append(list(vardict['latitude']))
        timelst.append(list(vardict['time']))
        dtimelst.append(list(vardict['dtime']))
        # determine date increment
        file_date_incr = insitu_dict[nID]['src'].get('file_date_incr','m')
        if file_date_incr == 'm':
            tmpdate += relativedelta(months = +1)
        elif file_date_incr == 'Y':
            tmpdate += relativedelta(years = +1)
        elif file_date_incr == 'd':
            tmpdate += timedelta(days = +1)
    varlst = flatten(varlst)
    lonlst = flatten(lonlst)
    latlst = flatten(latlst)
    timelst = flatten(timelst)
    dtimelst = flatten(dtimelst)
    vardict = {
                stdvarname:varlst,
                'time':timelst,
                'datetime':dtimelst,
                'time_unit':variable_info['time']['units'],
                'longitude':lonlst,
                'latitude':latlst
                }
    return vardict
Example #9
0
    def __init__(self,nID,sensor,sdate,edate,varalias='Hs',
    filterData=False,**kwargs):
        # parse and translate date input
        sdate = parse_date(sdate)
        edate = parse_date(edate)
        print('# ----- ')
        print(" ### Initializing insitu_class object ###")
        print(" ")
        print ('Chosen period: ' + str(sdate) + ' - ' + str(edate))
        stdvarname = variable_info[varalias]['standard_name']
#        for i in range(1):
        try:
            self.stdvarname = stdvarname
            self.varalias = varalias
            self.units = variable_info[varalias].get('units')
            self.sdate = sdate
            self.edate = edate
            self.nID = nID
            self.sensor = sensor
            self.obstype = 'insitu'
            if ('tags' in insitu_dict[nID].keys() and
            len(insitu_dict[nID]['tags'])>0):
                self.tags = insitu_dict[nID]['tags']
            print(" ")
            print(" ## Read files ...")
            t0=time.time()
            if filterData == False:
                vardict, fifo, pathtofile = \
                    get_insitu_ts(\
                                nID = nID,
                                sensor = sensor,
                                sdate = sdate,
                                edate = edate,
                                varalias = varalias,
                                basedate = self.basedate,
                                dict_for_sub = vars(self),
                                **kwargs)
            elif filterData == True:
                # determine start and end date
                if 'stwin' not in kwargs.keys():
                    kwargs['stwin'] = 3
                if 'etwin' not in kwargs.keys():
                    kwargs['etwin'] = 0
                sdate_new = sdate - timedelta(hours=kwargs['stwin'])
                edate_new = edate + timedelta(hours=kwargs['etwin'])
                tmp_vardict, fifo, pathtofile = \
                    get_insitu_ts(
                                nID = nID,
                                sensor = sensor,
                                sdate = sdate_new,
                                edate = edate_new,
                                varalias = varalias,
                                basedate = self.basedate,
                                dict_for_sub = vars(self),
                                **kwargs)
                vardict = filter_main(tmp_vardict,
                                      varalias=varalias,
                                      **kwargs)
                # cut to original sdate and edate
                time_cut = np.array(vardict['time'])[ \
                                ( (np.array(vardict['datetime'])>=sdate)
                                & (np.array(vardict['datetime'])<=edate)
                                ) ]
                var_cut = np.array(vardict[stdvarname])[ \
                                ( (np.array(vardict['datetime'])>=sdate)
                                & (np.array(vardict['datetime'])<=edate)
                                ) ]
                lon_cut = np.array(vardict['longitude'])[ \
                                ( (np.array(vardict['datetime'])>=sdate)
                                & (np.array(vardict['datetime'])<=edate)
                                ) ]
                lat_cut = np.array(vardict['latitude'])[ \
                                ( (np.array(vardict['datetime'])>=sdate)
                                & (np.array(vardict['datetime'])<=edate)
                                ) ]
                dtime_cut = np.array(vardict['datetime'])[ \
                                ( (np.array(vardict['datetime'])>=sdate)
                                & (np.array(vardict['datetime'])<=edate)) ]
                vardict['time'] = list(time_cut)
                vardict['datetime'] = list(dtime_cut)
                vardict[stdvarname] = list(var_cut)
                vardict['longitude'] = list(lon_cut)
                vardict['latitude'] = list(lat_cut)
                self.filter = True
                self.filterSpecs = kwargs
            self.vars = vardict
            self.lat = np.nanmean(vardict['latitude'])
            self.lon = np.nanmean(vardict['longitude'])
            if fifo == 'nc':
                print(pathtofile)
                meta = ncdumpMeta(sdate.strftime(pathtofile))
                self.vars['meta'] = meta
                varname = get_filevarname(varalias,
                                          variable_info,
                                          insitu_dict[nID],
                                          meta)
                self.varname = varname
            else:
                self.varname = varalias
            if fifo == 'frost':
                self.sensor = sensor
            t1=time.time()
            print(" ")
            print( '## Summary:')
            print(str(len(self.vars['time'])) + " values retrieved.")
            print("Time used for retrieving insitu data:",\
                   round(t1-t0,2),"seconds")
            print(" ")
            print (" ### insitu_class object initialized ### ")
        except Exception as e:
            print(e)
            self.error = e
            print ("! No insitu_class object initialized !")
        print ('# ----- ')