예제 #1
0
def preproc_ocn_wv(ds):
    """
    read and preprocess data for training/usage of the model
    :param ds:
    :return:
    """
    filee = ds.encoding["source"]
    logging.debug('filee %s', os.path.basename(filee))
    fdatedt = datetime.datetime.strptime(
        os.path.basename(filee).split('-')[4], '%Y%m%dt%H%M%S')
    logging.debug('fdatedt : %s %s', fdatedt, type(fdatedt))
    #ds['time'] = xarray.DataArray([fdatedt],dims=['time']) # marche avec derniere version de xarray pas ancienne
    logging.debug('brut ds: %s', ds)
    try:
        ds['time'] = xarray.DataArray(np.array([fdatedt]),
                                      dims=['time'],
                                      coords={'time': [0]})
        ds = ds.sortby('time', ascending=True)
    except:
        pass
    newds = xarray.Dataset()
    #format data for CWAVE 22 params computation
    cspcRe = ds['oswQualityCrossSpectraRe'].values.squeeze()
    cspcIm = ds['oswQualityCrossSpectraIm'].values.squeeze()
    ths1 = np.arange(0, 360, 5)
    ks1 = patch_oswK(ds['oswK'].values.squeeze(),
                     ipfvesion=None,
                     datedtsar=fdatedt)
    if cspcRe.shape == (36, 30):
        logging.debug('put zero matrix X spectra')
        cspcRe = np.zeros((72, 60))
        cspcIm = np.zeros((72, 60))

        #ks1 = reference_oswK_1145m_60pts
    else:
        pass
        #ths1 = ds['oswPhi'].values.squeeze()
        #ks1 = ds['oswK'].values.squeeze()
    ta = ds['oswHeading'].values.squeeze()
    incidenceangle = ds['oswIncidenceAngle'].values.squeeze()
    s0 = ds['oswNrcs'].values.squeeze()
    nv = ds['oswNv'].values.squeeze()
    lonSAR = ds['oswLon'].values.squeeze()
    latSAR = ds['oswLat'].values.squeeze()
    #lonSAR = ds['rvlLon'].values.squeeze() #test
    #latSAR = ds['rvlLat'].values.squeeze()
    satellite = os.path.basename(filee)[0:3]
    # if True :  # save a pickle for debug/test
    #     import pickle
    #     savings = {'cspcRe' : cspcRe,'cspcIm' : cspcIm,'ta' : ta,'incidenceangle' : incidenceangle,'nv' : nv,'s0' : s0,
    #                'datedt' : fdatedt,
    #                'lonsar' : lonSAR,'latsar' : latSAR}
    #     outputpl = '/tmp/hs_sar_ocn_dataset_vars_before_cwave_compute_%s.pkl' % (fdatedt.strftime('%Y%m%dT%H%M%S'))
    #     fifi = open(outputpl,'wb')
    #     pickle.dump(savings,fifi)
    #     fifi.close()
    #     logging.info('pickle: %s',outputpl)
    subset_ok,flagKcorrupted,cspcReX,cspcImX,_,ks1,ths1,kx,ky,\
    cspcReX_not_conservativ,S = format_input_CWAVE_vector_from_OCN(cspcRe=cspcRe.T,
                                                                            cspcIm=cspcIm.T,ths1=ths1,ta=ta,
                                                                            incidenceangle=incidenceangle,
                                                                            s0=s0,nv=nv,ks1=ks1,datedt=fdatedt,
                                                                            lonSAR=lonSAR,latSAR=latSAR,satellite=satellite)
    varstoadd = [
        'S', 'cwave', 'dxdt', 'latlonSARcossin', 'todSAR', 'incidence',
        'incidence_angle', 'satellite', 'oswQualityCrossSpectraRe',
        'oswQualityCrossSpectraIm'
    ]
    additional_vars_for_validation = [
        'oswLon', 'oswLat', 'oswLandFlag', 'oswIncidenceAngle', 'oswWindSpeed',
        'platformName', 'nrcs', 'nv', 'heading', 'oswK', 'oswNrcs'
    ]
    varstoadd += additional_vars_for_validation
    logging.debug('varstoadd : %s', varstoadd)
    if 'time' in ds:
        newds['time'] = ds['time']
    else:
        newds['time'] = xarray.DataArray(np.array([fdatedt]),
                                         dims=['time'],
                                         coords={'time': [0]})
    for vv in varstoadd:
        logging.debug('start format variable :%s', vv)
        if vv in ['cwave']:
            dimszi = ['time', 'cwavedim']
            coordi = {'time': [fdatedt], 'cwavedim': np.arange(22)}
            cwave = np.hstack([S.T, s0.reshape(-1, 1),
                               nv.reshape(-1, 1)])  #found L77 in preprocess.py
            cwave = preprocess.conv_cwave(cwave)
            newds[vv] = xarray.DataArray(data=cwave,
                                         dims=dimszi,
                                         coords=coordi)
        elif vv == 'S':  #to ease the comparison with Justin files
            dimszi = ['time', 'Sdim']
            coordi = {'time': [fdatedt], 'Sdim': np.arange(20)}
            newds[vv] = xarray.DataArray(data=S.T, dims=dimszi, coords=coordi)
        elif vv in [
                'dxdt'
        ]:  #dx and dt and delta from coloc with alti see /home/cercache/users/jstopa/sar/empHs/cwaveV5, I can put zeros here at this stage
            dx = np.array([0])
            dt = np.array([1])
            dxdt = np.column_stack([dx, dt])
            dimszi = ['time', 'dxdtdim']
            coordi = {'time': [fdatedt], 'dxdtdim': np.arange(2)}
            #print('dxdt')
            newds[vv] = xarray.DataArray(data=dxdt, dims=dimszi, coords=coordi)
        elif vv in ['latlonSARcossin']:
            latSARcossin = preprocess.conv_position(
                subset_ok['latSAR'])  # Gets cos and sin
            lonSARcossin = preprocess.conv_position(subset_ok['lonSAR'])
            latlonSARcossin = np.hstack([latSARcossin, lonSARcossin])
            dimszi = ['time', 'latlondim']
            coordi = {'time': [fdatedt], 'latlondim': np.arange(4)}
            newds[vv] = xarray.DataArray(data=latlonSARcossin,
                                         dims=dimszi,
                                         coords=coordi)
        elif vv in ['todSAR']:
            dimszi = ['time']
            coordi = {'time': [fdatedt]}
            newds[vv] = xarray.DataArray(data=subset_ok['todSAR'],
                                         dims=dimszi,
                                         coords=coordi)
        elif vv in ['oswK']:
            dimszi = ['time', 'oswWavenumberBinSize']
            coordi = {
                'time': [fdatedt],
                'oswWavenumberBinSize': np.arange(len(ks1))
            }
            newds[vv] = xarray.DataArray(data=ks1.reshape((1, len(ks1))),
                                         dims=dimszi,
                                         coords=coordi)
        elif vv in [
                'incidence',
        ]:
            dimszi = ['time', 'incdim']
            coordi = {'time': [fdatedt], 'incdim': np.arange(2)}
            incidence = preprocess.conv_incidence(
                ds['oswIncidenceAngle'].values.squeeze())
            newds[vv] = xarray.DataArray(data=incidence,
                                         dims=dimszi,
                                         coords=coordi)
        elif vv in ['incidence_angle']:
            dimszi = ['time']
            olddims = [
                x for x in ds['oswIncidenceAngle'].dims
                if x not in ['oswAzSize', 'oswRaSize']
            ]
            coordi = {}
            for didi in olddims:
                coordi[didi] = ds['oswIncidenceAngle'].coords[didi].values
            coordi['time'] = [fdatedt]
            incidence = np.array([ds['oswIncidenceAngle'].values.squeeze()])
            newds[vv] = xarray.DataArray(data=incidence,
                                         dims=dimszi,
                                         coords=coordi)
        elif vv in ['satellite']:
            dimszi = ['time']
            coordi = {'time': [fdatedt]}
            satellite_int = np.array([satellite[2] == 'a']).astype(int)
            newds[vv] = xarray.DataArray(data=satellite_int,
                                         dims=dimszi,
                                         coords=coordi)
        elif vv in ['platformName']:
            dimszi = ['time']
            coordi = {'time': [fdatedt]}
            satellite_int = np.array([satellite])
            newds[vv] = xarray.DataArray(data=satellite_int,
                                         dims=dimszi,
                                         coords=coordi)
        elif vv in ['nrcs']:
            dimszi = ['time']
            coordi = {'time': [fdatedt]}
            newds[vv] = xarray.DataArray(data=s0.reshape((1, )),
                                         dims=dimszi,
                                         coords=coordi)
        elif vv in ['heading']:
            dimszi = ['time']
            coordi = {'time': [fdatedt]}
            newds[vv] = xarray.DataArray(data=ds['oswHeading'].values.reshape(
                (1, )),
                                         dims=dimszi,
                                         coords=coordi)
        elif vv in ['nv']:
            dimszi = ['time']
            coordi = {'time': [fdatedt]}
            newds[vv] = xarray.DataArray(data=nv.reshape((1, )),
                                         dims=dimszi,
                                         coords=coordi)
        elif vv in ['oswQualityCrossSpectraRe', 'oswQualityCrossSpectraIm']:
            if vv == 'oswQualityCrossSpectraRe':
                datatmp = cspcRe
            elif vv == 'oswQualityCrossSpectraIm':
                datatmp = cspcIm
            else:
                raise Exception()
            #datatmp = ds[vv].values.squeeze()
            #olddims = [x for x in ds[vv].dims if x not in ['oswAzSize','oswRaSize']]
            coordi = {}
            #for didi in olddims:
            #    coordi[didi] = ds[vv].coords[didi].values
            coordi['time'] = [fdatedt]
            coordi['oswAngularBinSize'] = np.arange(len(ths1))
            coordi['oswWavenumberBinSize'] = np.arange(len(ks1))
            dimsadd = ['time', 'oswAngularBinSize', 'oswWavenumberBinSize']
            if datatmp.shape == (72, 60):  #case only one spectra
                datatmp = datatmp.reshape((1, 72, 60))

            newds[vv] = xarray.DataArray(data=datatmp,
                                         dims=dimsadd,
                                         coords=coordi)
        else:
            datatmp = ds[vv].values.squeeze()
            olddims = [
                x for x in ds[vv].dims if x not in ['oswAzSize', 'oswRaSize']
            ]
            coordi = {}
            for didi in olddims:
                coordi[didi] = ds[vv].coords[didi].values
            coordi['time'] = [fdatedt]
            dimsadd = ['time']
            newds[vv] = xarray.DataArray(data=[datatmp],
                                         dims=dimsadd,
                                         coords=coordi)
        #logging.debug('field xarray : %s %s',vv,newds[vv])
    logging.debug('newds: %s', newds)
    return newds
예제 #2
0
def preproc_ref_input(ds):
    """

    :param ds:
    :return:
    """
    filee = ds.encoding["source"]
    logging.debug('filee %s',os.path.basename(filee))
    fdate = ds['timeSAR'].values
    try:
        fdatedt = netCDF4.num2date(fdate,ds['timeSAR'].units)
    except:
        fdatedt = fdate
    logging.debug('fdatedt : %s',fdatedt)

    real_dates = []
    if 'fileNameL2' in ds:
        #version stopa ref input/ouput
        filesL2 = ds['fileNameL2'].values
    else:
        #version ifr training dataset
        filesL2 = ds['fileNameFull'].values
    for tt in range(len(ds['timeSAR'])) :
        if tt % 10000 == 0 :
            print(tt,'/',len(ds['timeSAR']))
        fileL2 = filesL2[tt]
        if isinstance(fileL2,str) is False:
            fileL2 = fileL2.decode()
        dt = datetime.datetime.strptime(os.path.basename(fileL2).split('-')[4],'%Y%m%dt%H%M%S')
        # print(dt)
        real_dates.append(dt)

    #fdate = datetime.datetime.strptime(os.path.basename(filee).split('-')[4],'%Y%m%dt%H%M%S')
    #ds['time'] = xarray.DataArray([fdate],dims=['time'])
    #ds = ds.sortby('time',ascending=True)
    newds = xarray.Dataset()

    #format data for CWAVE 22 params computation
    #cspcRe = ds['oswQualityCrossSpectraRe'].values.squeeze().T
    #cspcIm = ds['oswQualityCrossSpectraIm'].values.squeeze().T
    #ths1 = ds['oswPhi'].values.squeeze()
    #ks1 = ds['oswK'].values.squeeze()
    #ta = ds['oswHeading'].values.squeeze()
    #incidenceangle =ds['oswIncidenceAngle'].values.squeeze()
    s0 =  ds['sigma0'].values.squeeze()
    nv = ds['normalizedVariance'].values.squeeze()
    nv = nv.reshape((len(nv),1)) #to allow concatenation with 2D S variable
    s0 = s0.reshape((len(s0),1))
    logging.debug('s0: %s',s0.shape)
    if 'S' in  ds:
        varname_20CWAVEparam = 'S'
        varstoadd = ['cwave','dxdt','latlonSARcossin','todSAR','incidence','satellite','cspcRe','cspcIm','hsNN',
                     'hsNNSTD']
    else:
        varname_20CWAVEparam = 'py_S'
        varstoadd = ['cwave','dxdt','latlonSARcossin','todSAR','incidence','satellite','cspcRe','cspcIm',
                     'hsALT','hsALTmin','hsALTmax','hsWW3','hsSM','hsWW3v2']
    logging.debug('ds[S] %s %s',ds[varname_20CWAVEparam].shape,ds[varname_20CWAVEparam])
    #ds['S'] = ds['S'].astype('float32',casting='unsafe')
    #nc = netCDF4.Dataset(filee) #patch because S params saved by JStopa are not readable with xarray (different dtypes)
    #S = nc.variables['S'][:,0].astype('float32')
    #logging.info('S from nc: %s',S.shape)
    #lonSAR = ds['oswLon'].values.squeeze()
    #latSAR = ds['oswLat'].values.squeeze()
    #satellite = os.path.basename(filee)[0:3]
    # subset_ok,flagKcorrupted,cspcReX,cspcImX,cspcRe,ks1,ths1,kx,ky,cspcReX_not_conservativ,S = compute_hs_total_SAR_v2.format_input_CWAVE_vector_from_OCN(cspcRe,
    #                                                                         cspcIm,ths1,ta,incidenceangle,s0,nv,ks1,fdate,lonSAR,latSAR,satellite)


    #additional_vars_for_validation = ['oswLon','oswLat','oswLandFlag','oswIncidenceAngle']
    #varstoadd += additional_vars_for_validation
    newds['timeSAR'] = xarray.DataArray(fdate,dims=['time'],coords={'time':fdate})
    #newds['timeSARdt'] = xarray.DataArray(fdatedt,dims=['time'],coords={'time':fdate})
    newds['timeSARdt'] = xarray.DataArray(real_dates,dims='time',coords={'time':fdate})
    if 'S1A' in filee:
        satellite = 0
    else:
        satellite = 1
    logging.debug('newds with only time: %s',newds)
    for vv in varstoadd:
        logging.debug('vv : %s',vv)
        if vv in ['cwave']:
            dimszi = ['time','cwavedim']
            coordi= {'time':fdate,'cwavedim':np.arange(22)}
            #tmptmp = ds['S'].astype('float32',casting='unsafe').values[:,1]
            tmptmp = ds[varname_20CWAVEparam].values
            logging.debug('tmptmp : %s %s %s',tmptmp.shape,type(tmptmp),tmptmp.dtype)
            logging.debug('s0 %s',s0.shape)
            logging.debug('nV : %s',nv.shape)
            cwave = np.hstack([tmptmp, s0, nv]) #found L77 in preprocess.py
            logging.debug('cwave : %s',cwave.shape)
            cwave = preprocess.conv_cwave(cwave)
            logging.debug('cwave after normalization : %s,%s',cwave.shape,type(cwave))
            newds[vv] = xarray.DataArray(cwave,coords=coordi,dims=dimszi)
        elif vv in ['dxdt']: #dx and dt and delta from coloc with alti see /home/cercache/users/jstopa/sar/empHs/cwaveV5, I can put zeros here at this stage
            #dx = preprocess.conv_dx(fs['dx'][indices])
            #dt = preprocess.conv_dt(fs['dt'][indices])
            #dx = np.array([0])
            #dt = np.array([1])
            dx = np.zeros(len(fdate))
            dt = np.zeros(len(fdate))
            dxdt = np.column_stack([dx, dt])
            logging.debug('dxdt: %s %s',dxdt.shape,dxdt)
            dimszi = ['time','dxdtdim']
            coordi= {'time':fdate,'dxdtdim':np.arange(2)}
            #print('dxdt')
            newds[vv] = xarray.DataArray(data=dxdt,dims=dimszi,coords=coordi)
        elif vv in ['latlonSARcossin']:
            latSARcossin = preprocess.conv_position(ds['latSAR']) # Gets cos and sin
            lonSARcossin = preprocess.conv_position(ds['lonSAR'])
            latlonSARcossin = np.hstack([latSARcossin, lonSARcossin])
            dimszi = ['time','latlondim']
            coordi= {'time':fdate,'latlondim':np.arange(4)}
            newds[vv] = xarray.DataArray(data=latlonSARcossin,dims=dimszi,coords=coordi)
        elif vv in ['todSAR']:
            dimszi = ['time']
            coordi= {'time':fdate}
            todSAR = preprocess.conv_time(fdate)
            logging.debug('todSAR : %s',todSAR)
            newds[vv] = xarray.DataArray(data=todSAR,dims=dimszi,coords=coordi)
        elif vv in ['incidence',]:
            dimszi = ['time','incdim']
            coordi= {'time':fdate,'incdim':np.arange(2)}
            incidence = preprocess.conv_incidence(ds['incidenceAngle'].values.squeeze())
            newds[vv] = xarray.DataArray(data=incidence,dims=dimszi,coords=coordi)
        elif vv in ['satellite']:
            dimszi = ['time']
            coordi= {'time':fdate}
            #satellite_int = np.array([satellite[2] == 'a']).astype(int)
            #satellite_int = np.repeat(satellite_int,len(fdate))
            satellite_int = np.ones((ds['timeSAR'].shape[0], ), dtype=float) * satellite
            logging.debug('satellite_int = %s',satellite_int.shape)
            newds[vv] = xarray.DataArray(data=satellite_int,dims=dimszi,coords=coordi)
        elif vv in ['cspcRe','cspcIm']:
            datatmp = ds[vv].values.squeeze()
            logging.debug('vv: %s shape : %s',vv,datatmp.shape)
            olddims = [x for x in ds[vv].dims if x not in ['oswAzSize','oswRaSize']]
            coordi = {}
            for didi in olddims:
                coordi[didi] = ds[vv].coords[didi].values
            coordi['time'] = fdate
            dimsadd= ['time','directions','wavenumbers']
            #datatmp = datatmp.reshape((1,72,60))
            newds[vv] = xarray.DataArray(data=datatmp,dims=dimsadd,coords=coordi)
        else:
            datatmp = ds[vv].values.squeeze()
            olddims = [x for x in ds[vv].dims if x not in ['oswAzSize','oswRaSize']]
            coordi = {}
            for didi in olddims :
                coordi[didi] = ds[vv].coords[didi].values
            coordi['time'] = fdate
            dimsadd = ['time']
            newds[vv] = xarray.DataArray(data=datatmp,dims=dimsadd,coords=coordi)

    return newds
def prepare_training_dataset_core(ds_train_raw, validation_dataset=False):
    """
    this method I used for building training dataset and also to do the validation dataset
    :param ds_train_raw:
    :return:
    """
    # except: #for py2.7 version
    #    ocn_wv_ds = xarray.open_mfdataset(pattern_path,concat_dim='time',preprocess=preproc_ocn_wv)
    logging.info('Nb pts in dataset: %s', ds_train_raw['timeSAR'].size)
    varstoadd = [
        'S', 'cwave', 'dxdt', 'latlonSARcossin', 'todSAR', 'incidence',
        'satellite', 'oswQualityCrossSpectraRe', 'oswQualityCrossSpectraIm'
    ]
    # additional_vars_for_validation = ['oswLon','oswLat','oswLandFlag','oswIncidenceAngle','oswWindSpeed','platformName',
    #                                  'nrcs','nv','heading','oswK','oswNrcs']
    # varstoadd += additional_vars_for_validation
    if validation_dataset:
        varstoadd.append('py_cspcImX')
        varstoadd.append('py_cspcReX')
        varstoadd.append('fileNameL2')
    if 'hsSM' in ds_train_raw:
        varstoadd += ['hsSM']
    S = ds_train_raw['py_S'].values
    s0 = ds_train_raw['sigma0']
    nv = ds_train_raw['normalizedVariance'].values
    ds_training_normalized = xarray.Dataset()
    timeSAR_vals = ds_train_raw['timeSAR'].values  # hours since ....
    #apath = ('').join([ddc.decode() for ddc in filenames_L2[iiu,:]])
    timeSAR_seconds = np.array([
        datetime.datetime.strptime(
            os.path.basename(fup.decode()).split('-')[4], '%Y%m%dt%H%M%S')
        for fup in ds_train_raw['fileNameL2'].values
    ])
    ths1 = ds_train_raw['th'].values
    ks1 = ds_train_raw['k'].values
    if 'fileNameFull' in ds_train_raw:
        fpaths = ds_train_raw['fileNameFull'].values
        #varstoadd.append('fileNameFull')
    else:
        fpaths = ds_train_raw[
            'fileNameL2'].values  # 2019 dataset is a bt different
        #varstoadd.append('fileNameL2')
    sattelites = np.array([os.path.basename(hhy)[0:3] for hhy in fpaths])
    satellites_int = np.array([
        threelettersat[2] == 'a' for threelettersat in sattelites
    ]).astype(int)
    cspcRe = ds_train_raw['cspcRe'].values
    cspcIm = ds_train_raw['cspcIm'].values
    for vv in varstoadd:
        logging.info('start format variable :%s', vv)
        if vv in ['cwave']:
            dimszi = ['time', 'cwavedim']
            coordi = {'time': timeSAR_seconds, 'cwavedim': np.arange(22)}
            logging.debug('S %s s0: %s nv: %s', S.shape, s0.shape, nv.shape)
            cwave = np.vstack([S.T, s0, nv]).T  # found L77 in preprocess.py
            logging.debug('cwave vals: %s', cwave.shape)
            cwave = preprocess.conv_cwave(cwave)
            ds_training_normalized[vv] = xarray.DataArray(data=cwave,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in ['fileNameFull', 'fileNameL2']:
            # dimszi = ['time','pathnchar']
            # coordi = {'time' : timeSAR_seconds,'pathnchar' : len(fpaths[0])}
            dimszi = ['time']
            coordi = {'time': timeSAR_seconds}
            ds_training_normalized[vv] = xarray.DataArray(data=fpaths,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv == 'S':  # to ease the comparison with Justin files
            dimszi = ['time', 'Sdim']
            coordi = {'time': timeSAR_seconds, 'Sdim': np.arange(20)}
            ds_training_normalized[vv] = xarray.DataArray(data=S,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in [
                'dxdt'
        ]:  # dx and dt and delta from coloc with alti see /home/cercache/users/jstopa/sar/empHs/cwaveV5, I can put zeros here at this stage
            #dxdt = np.column_stack([ds_train_raw['dx'].values,ds_train_raw['dt'].values])
            dxdt = np.column_stack([np.zeros(s0.shape), np.ones(s0.shape)])
            dimszi = ['time', 'dxdtdim']
            coordi = {'time': timeSAR_seconds, 'dxdtdim': np.arange(2)}
            ds_training_normalized[vv] = xarray.DataArray(data=dxdt,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in ['latlonSARcossin']:
            latSARcossin = preprocess.conv_position(
                ds_train_raw['latSAR'].values)  # Gets cos and sin
            lonSARcossin = preprocess.conv_position(
                ds_train_raw['lonSAR'].values)
            latlonSARcossin = np.hstack([latSARcossin, lonSARcossin])
            dimszi = ['time', 'latlondim']
            coordi = {'time': timeSAR_seconds, 'latlondim': np.arange(4)}
            ds_training_normalized[vv] = xarray.DataArray(data=latlonSARcossin,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in ['todSAR']:
            dimszi = ['time']
            new_dates_dt = np.array(
                [from_np64_to_dt(dt64) for dt64 in timeSAR_vals])
            unit = "hours since 2010-01-01T00:00:00Z UTC"  # see https://github.com/grouny/sar_hs_nn/blob/c05322e6635c6d77409e36537d7c3b58788e7322/sarhspredictor/lib/sarhs/preprocess.py#L11
            new_dates_num = np.array(
                [netCDF4.date2num(dfg, unit) for dfg in new_dates_dt])
            coordi = {'time': timeSAR_seconds}
            todSAR = conv_time(new_dates_num)
            ds_training_normalized[vv] = xarray.DataArray(data=todSAR,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in ['oswK']:
            dimszi = ['time', 'oswWavenumberBinSize']
            coordi = {
                'time': timeSAR_seconds,
                'oswWavenumberBinSize': np.arange(len(ks1))
            }
            ds_training_normalized[vv] = xarray.DataArray(data=ks1,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in [
                'incidence',
        ]:
            dimszi = ['time', 'incdim']
            coordi = {'time': timeSAR_seconds, 'incdim': np.arange(2)}
            incidence = preprocess.conv_incidence(
                ds_train_raw['incidenceAngle'].values.squeeze())
            ds_training_normalized[vv] = xarray.DataArray(data=incidence,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in ['incidence_angle']:
            dimszi = ['time']
            olddims = [
                x for x in ds_train_raw['incidenceAngle'].dims
                if x not in ['oswAzSize', 'oswRaSize']
            ]
            coordi = {}
            for didi in olddims:
                coordi[didi] = ds_train_raw['incidenceAngle'].coords[
                    didi].values
            coordi['time'] = timeSAR_seconds
            incidence = np.array(
                [ds_train_raw['incidenceAngle'].values.squeeze()])
            ds_training_normalized[vv] = xarray.DataArray(data=incidence,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in ['satellite']:
            dimszi = ['time']
            coordi = {'time': timeSAR_seconds}
            # satellite_int = np.array([satellite[2] == 'a']).astype(int)
            ds_training_normalized[vv] = xarray.DataArray(data=satellites_int,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in ['platformName']:
            dimszi = ['time']
            coordi = {'time': timeSAR_seconds}
            satellite_int = sattelites
            ds_training_normalized[vv] = xarray.DataArray(data=satellite_int,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in ['nrcs']:
            dimszi = ['time']
            coordi = {'time': timeSAR_seconds}
            ds_training_normalized[vv] = xarray.DataArray(data=s0,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in ['heading']:
            dimszi = ['time']
            coordi = {'time': timeSAR_seconds}
            ds_training_normalized[vv] = xarray.DataArray(
                data=ds_train_raw['trackAngle'].values,
                dims=dimszi,
                coords=coordi)
        elif vv in ['nv']:
            dimszi = ['time']
            coordi = {'time': timeSAR_seconds}
            ds_training_normalized[vv] = xarray.DataArray(data=nv,
                                                          dims=dimszi,
                                                          coords=coordi)
        elif vv in ['oswQualityCrossSpectraRe', 'oswQualityCrossSpectraIm']:
            if vv == 'oswQualityCrossSpectraRe':
                datatmp = cspcRe
            elif vv == 'oswQualityCrossSpectraIm':
                datatmp = cspcIm
            else:
                raise Exception()
            # datatmp = ds[vv].values.squeeze()
            # olddims = [x for x in ds[vv].dims if x not in ['oswAzSize','oswRaSize']]
            coordi = {}
            # for didi in olddims:
            #    coordi[didi] = ds[vv].coords[didi].values
            coordi['time'] = timeSAR_seconds
            coordi['oswAngularBinSize'] = np.arange(len(ths1))
            coordi['oswWavenumberBinSize'] = np.arange(len(ks1))
            dimsadd = ['time', 'oswAngularBinSize', 'oswWavenumberBinSize']
            # if datatmp.shape == (72,60) :  # case only one spectra
            #    datatmp = datatmp.reshape((1,72,60))

            ds_training_normalized[vv] = xarray.DataArray(data=datatmp,
                                                          dims=dimsadd,
                                                          coords=coordi)
        elif vv in ['py_cspcImX', 'py_cspcReX']:
            datatmp = ds_train_raw[vv].values
            coordi = ds_train_raw[vv].coords
            coordi['time'] = timeSAR_seconds
            dimsadd = ds_train_raw[vv].dims
            ds_training_normalized[vv] = xarray.DataArray(data=datatmp,
                                                          dims=dimsadd,
                                                          coords=coordi)
        else:
            datatmp = ds_train_raw[vv].values.squeeze()
            olddims = [
                x for x in ds_train_raw[vv].dims
                if x not in ['oswAzSize', 'oswRaSize']
            ]
            coordi = {}
            for didi in olddims:
                coordi[didi] = ds_train_raw[vv].coords[didi].values
            coordi['time'] = timeSAR_seconds
            dimsadd = ['time']
            logging.info('data: %s', datatmp.shape)
            ds_training_normalized[vv] = xarray.DataArray(data=datatmp,
                                                          dims=dimsadd,
                                                          coords=coordi)
        # logging.debug('field xarray : %s %s',vv,newds[vv])
    logging.debug('newds: %s', ds_training_normalized)
    logging.info('SAR data ready to be used')
    # cspcRe = ds_train_raw['oswQualityCrossSpectraRe'].values
    # cspcIm = ds_train_raw['oswQualityCrossSpectraIm'].values
    re = preprocess.conv_real(cspcRe)
    im = preprocess.conv_imaginary(cspcIm)
    logging.info('re : %s', re.shape)
    logging.info('im : %s', im.shape)
    spectrum = np.stack((re, im), axis=3)
    logging.info('spectrum shape : %s', spectrum.shape)
    return spectrum, ds_training_normalized
예제 #4
0
def split_aggregated_ds(file_src, file_dest):
    """

    :param file_src:
    :param file_dest:
    :return:
    """
    groups = {'2015_2016': [2015, 2016], '2017': [2017], '2018': [2018]}
    # Print fields of source file.
    with h5py.File(file_src, 'r') as f:
        for k in [k for k in f.keys()]:
            print(f'{k}: {f[k].dtype}')

    # Create h5.
    with h5py.File(file_src, 'r') as fs, h5py.File(file_dest, 'w') as fd:
        for group_name, years in groups.items():
            grp = fd.create_group(group_name)

            # Find examples of the specified years.
            indices = np.zeros_like(fs['year'][:], dtype='bool')
            for year in years:
                indices = np.logical_or(fs['year'][:] == year, indices)
            # Find examples that don't have nans.
            indices[np.any(np.isnan(fs['py_S'][:]), axis=1)] = 0
            indices[np.isnan(fs['sigma0'][:])] = 0
            indices[np.isnan(fs['normalizedVariance'][:])] = 0
            # Done
            num_examples = indices.sum()
            print(f'Found {num_examples} events from years: ', years)

            # Write data from this year.
            # print(fs['year'][indices].shape)
            grp.create_dataset('year', data=fs['year'][indices])

            # Get 22 CWAVE features.
            cwave = np.hstack([
                fs['py_S'][indices, ...], fs['sigma0'][indices].reshape(-1, 1),
                fs['normalizedVariance'][indices].reshape(-1, 1)
            ])
            cwave = preprocess.conv_cwave(
                cwave
            )  # Remove extrema, then standardize with hardcoded mean,vars.
            grp.create_dataset('cwave', data=cwave)

            # Additional features.
            dx = preprocess.conv_dx(fs['dx'][indices])
            dt = preprocess.conv_dt(fs['dt'][indices])
            grp.create_dataset('dxdt', data=np.column_stack([dx, dt]))

            latSAR = fs['latSAR'][indices]
            lonSAR = fs['lonSAR'][indices]
            latSARcossin = preprocess.conv_position(latSAR)  # Gets cos and sin
            lonSARcossin = preprocess.conv_position(lonSAR)
            grp.create_dataset('latlonSAR',
                               data=np.column_stack([latSAR, lonSAR]))
            grp.create_dataset('latlonSARcossin',
                               data=np.hstack([latSARcossin, lonSARcossin]))

            timeSAR = fs['timeSAR'][indices]
            todSAR = preprocess.conv_time(timeSAR)
            grp.create_dataset('timeSAR',
                               data=timeSAR,
                               shape=(timeSAR.shape[0], 1))
            grp.create_dataset('todSAR',
                               data=todSAR,
                               shape=(todSAR.shape[0], 1))

            incidence = preprocess.conv_incidence(
                fs['incidenceAngle'][indices])  # Separates into 2 var.
            grp.create_dataset('incidence', data=incidence)

            satellite = fs['satellite'][indices]
            grp.create_dataset('satellite',
                               data=satellite,
                               shape=(satellite.shape[0], 1))

            # Altimeter
            hsALT = fs['hsALT'][indices]
            grp.create_dataset('hsALT', data=hsALT, shape=(hsALT.shape[0], 1))

            # Get spectral data.
            x = np.stack((
                preprocess.conv_real(fs['cspcRe'][indices, ...]),
                preprocess.conv_imaginary(fs['cspcIm'][indices, ...]),
            ),
                         axis=3)
            grp.create_dataset('spectrum', data=x)
            print(f'Done with {years}')
    print('Done')