Python find_filesの例、myprojects.functions.find_files Pythonの例

コード例 #1

0

ファイルを表示

def replace_orbit_field():

    root = r'C:\Users\u0116961\Documents\VSC\vsc_data_copies\scratch_TEST_RUNS\US_M36_SMOS_noDA_unscaled\obs_scaling'

    for f in find_files(root, '_D_p'):
        data = np.fromfile(f, '>i4')
        data[1] = 0
        data.tofile(f)

コード例 #2

0

ファイルを表示

ファイル: mswep.py プロジェクト: alexgruber/myprojects

    def __init__(self, root=None, cellfiles=True):

        if root is None:
            if platform.system() == 'Windows':
                root = os.path.join('D:', 'data_sets', 'MSWEP_v21')
            elif platform.system() == 'Linux':
                root = os.path.join('/', 'data', 'leuven', '320', 'vsc32046',
                                    'data_sets', 'MSWEP')
            else:
                root = os.path.join('~', 'data_sets', 'MSWEP_v21')

        if cellfiles is True:
            self.root = os.path.join(root, 'cellfiles')
            self.loaded_cell = None
            self.ds = None
        else:
            self.ds = Dataset(np.atleast_1d(find_files(root, '.nc4'))[0])

        self.grid = pd.read_csv(find_files(root, 'grid.csv'), index_col=0)

コード例 #3

0

ファイルを表示

ファイル: cci_sm.py プロジェクト: alexgruber/myprojects

    def reshuffle(self):

        timeunit = 'hours since 2000-01-01 00:00'

        for version in self.versions:
            for mode in self.modes:
                files = find_files(os.path.join(self.root, mode, version),
                                   '.nc')
                dates = pd.DatetimeIndex([f[-24:-16] for f in files])
                meta = pd.Series(files, index=dates)
                meta = meta[self.date_range[0]:self.date_range[1]]

                fname = os.path.join(self.root, '_reshuffled',
                                     mode + '_' + version + '.nc')
                ds = Dataset(fname, mode='w')

                dates = date2num(meta.index.to_pydatetime(),
                                 timeunit).astype('int32')
                dimensions = OrderedDict([('time', dates), ('lat', self.lats),
                                          ('lon', self.lons)])

                chunksizes = []
                for key, values in dimensions.iteritems():
                    if key == 'time':
                        chunksize = 1
                    else:
                        chunksize = len(values)
                    chunksizes.append(chunksize)
                    dtype = values.dtype
                    ds.createDimension(key, len(values))
                    ds.createVariable(key,
                                      dtype,
                                      dimensions=(key, ),
                                      chunksizes=(chunksize, ),
                                      zlib=True)
                    ds[key][:] = values
                ds.variables['time'].setncattr('units', timeunit)

                ds.createVariable('sm',
                                  'float32',
                                  dimensions=dimensions.keys(),
                                  chunksizes=chunksizes,
                                  fill_value=-9999.,
                                  zlib=True)
                for i, f in enumerate(meta.values):
                    tmp_ds = Dataset(f)
                    ds['sm'][i, :, :] = tmp_ds.variables['sm'][0, :, :].data
                    tmp_ds.close()

                ds.close()

コード例 #4

0

ファイルを表示

ファイル: smos.py プロジェクト: alexgruber/myprojects

def extract_L3_tar_files():

    root = r"D:\data_sets\SMOS_L3\raw"

    files = find_files(root, '.tgz')

    for cnt,f in enumerate(files):
        print('%i / %i' % (cnt, len(files)))

        out_path = os.path.dirname(f).replace('raw', 'unzipped').replace('asc', 'ascdsc').replace('dsc', 'ascdsc')
        if not os.path.exists(out_path):
            os.makedirs(out_path)

        tmp = tarfile.open(f)
        tmp.extract(tmp.getmember([x for x in tmp.getnames() if x.find('.nc') != -1][0]), out_path)
        tmp.close()

コード例 #5

0

ファイルを表示

ファイル: smos.py プロジェクト: alexgruber/myprojects

    def __init__(self, root=None):

        if root is None:
            if platform.system() == 'Windows':
                root = os.path.join('D:','data_sets', 'SMOS_L3')
            elif platform.system() == 'Linux':
                root = os.path.join('/', 'data', 'leuven', '320', 'vsc32046', 'data_sets', 'SMOS')
            else:
                root = os.path.join('~','data_sets', 'SMOS_L3')

        self.loaded_cell=None
        self.ds = None

        self.grid = pd.read_csv(find_files(root,'grid.csv'), index_col=0)

        self.root = os.path.join(root, 'cellfiles')

コード例 #6

0

ファイルを表示

ファイル: interface.py プロジェクト: DataFusion18/pyldas

    def read_params(self, param, fname=None):
        """ Read parameter files (tilegrids, tilecoord, RTMparam, catparam"""

        if fname is None:
            fname = find_files(self.paths.rc_out, param)

        reg_ftags = False if param == 'tilegrids' else True

        dtype, hdr, length = get_template(param)
        data = self.read_fortran_binary(fname, dtype, hdr=hdr, length=length, reg_ftags=reg_ftags)
        data.replace(-9999., np.nan, inplace=True)

        if param == 'tilegrids':
            data.index = ['global', 'domain']
        else:
            # index equals the 'tilenum' which starts at 1!!
            data.index += 1

        return data

コード例 #7

0

ファイルを表示

ファイル: smos.py プロジェクト: alexgruber/myprojects

def generate_grid_file():

    files = find_files(r'D:\data_sets\SMOS_L3\cellfiles', '.nc')

    dgg = pd.read_csv(r"D:\data_sets\ASCAT\warp5_grid\pointlist_warp_conus.csv", index_col=0)
    ease_grid = LDAS_io(exp='US_M36_SMOS_DA_cal_scaled_yearly').grid

    grid = pd.DataFrame()

    for cnt, f in enumerate(files):
        print('%i / %i' % (cnt, len(files)))

        tmp = Dataset(f)
        lats = tmp.variables['lat'][:]
        lons = tmp.variables['lon'][:]
        tmp.close()

        offset = grid.index.values[-1] + 1 if len(grid) > 0 else 0
        idx = np.arange(offset, len(lats)*len(lons) + offset)
        tmp_grid = pd.DataFrame(columns=['lat', 'lon', 'row', 'col', 'ease_row', 'ease_col', 'dgg_cell', 'dgg_gpi'], index=idx)

        for row, lat in enumerate(lats):
            for col, lon in enumerate(lons):
                tmp_grid.loc[offset, 'lat'] = lat
                tmp_grid.loc[offset, 'lon'] = lon

                tmp_grid.loc[offset, 'row'] = row
                tmp_grid.loc[offset, 'col'] = col

                ease_col, ease_row = ease_grid.lonlat2colrow(lon, lat, domain=True)
                tmp_grid.loc[offset, 'ease_row'] = ease_row
                tmp_grid.loc[offset, 'ease_col'] = ease_col

                tmp_grid.loc[offset, 'dgg_cell'] = int(os.path.basename(f)[0:4])
                r = np.sqrt((dgg.lon - lon)**2 + (dgg.lat - lat)**2)
                tmp_grid.loc[offset, 'dgg_gpi'] = dgg.iloc[np.where(abs(r - r.min()) < 0.0001)[0][0], 0]

                offset += 1

        grid = pd.concat((grid,tmp_grid))

    grid.to_csv(r'D:\data_sets\SMOS_L3\grid.csv')

コード例 #8

0

ファイルを表示

ファイル: interface.py プロジェクト: DataFusion18/pyldas

    def __init__(self,
                 param=None,
                 exp=None,
                 domain=None):

        self.paths = paths(exp=exp, domain=domain)

        self.obsparam = self.read_obsparam()
        self.tilecoord = self.read_params('tilecoord')
        self.tilegrids = self.read_params('tilegrids')

        self.grid = EASE2(tilecoord=self.tilecoord, tilegrids=self.tilegrids)

        self.param = param
        if param is not None:

            if param == 'xhourly':
                path = self.paths.__getattribute__('cat')
            else:
                path = self.paths.__getattribute__('exp_root')

            self.files = find_files(path, param)

            if self.files[0].find('images.nc') == -1:
                print 'NetCDF image cube not yet created. Use method "bin2netcdf".'
                self.dates = pd.to_datetime([f[-18:-5] for f in self.files], format='%Y%m%d_%H%M').sort_values()

                # TODO: Currently valid for 3-hourly data only! Times of the END of the 3hr periods are assigned!
                # if self.param == 'xhourly':
                    # self.dates += pd.to_timedelta('2 hours')

                self.dtype, self.hdr, self.length = get_template(self.param)

            else:
                self.images = xr.open_dataset(self.files[0])
                if self.files[1].find('timeseries.nc') == -1:
                    print 'NetCDF time series cube not yet created. Use the NetCDF kitchen sink.'
                else:
                    self.timeseries = xr.open_dataset(self.files[1])

コード例 #9

0

ファイルを表示

ファイル: interface.py プロジェクト: DataFusion18/pyldas

    def read_obsparam(self):
        """ Read the 'obsparam' file. """

        fp = open(find_files(self.paths.rc_out, 'obsparam'))

        lines = fp.readlines()[1::]
        n_lines = len(lines)

        # 30 or 32 fields (before and after two entries for the use of uncertainty maps)
        n_fields = 32 if n_lines == 128 else 30

        n_blocks = n_lines / n_fields

        res = []
        for bl in np.arange(n_blocks) * n_fields:
            if n_fields == 32:
                res.append({'descr': s(lines[bl + 0]),
                            'species': int(lines[bl + 1]),
                            'orbit': int(lines[bl + 2]),
                            'pol': int(lines[bl + 3]),
                            'N_ang': int(lines[bl + 4]),
                            'ang': float(lines[bl + 5]),
                            'freq': float(lines[bl + 6]),
                            'FOV': float(lines[bl + 7]),
                            'FOV_units': s(lines[bl + 8]),
                            'assim': b(lines[bl + 9]),
                            'scale': b(lines[bl + 10]),
                            'getinnov': b(lines[bl + 11]),
                            'RTM_ID': int(lines[bl + 12]),
                            'bias_Npar': int(lines[bl + 13]),
                            'bias_trel': int(lines[bl + 14]),
                            'bias_tcut': int(lines[bl + 15]),
                            'nodata': float(lines[bl + 16]),
                            'varname': s(lines[bl + 17]),
                            'units': s(lines[bl + 18]),
                            'path': s(lines[bl + 19]),
                            'name': s(lines[bl + 20]),
                            'scalepath': s(lines[bl + 21]),
                            'scalename': s(lines[bl + 22]),
                            'errstd': float(lines[bl + 23]),
                            'errstd_file': b(lines[bl + 24]),
                            'path_errstd': s(lines[bl + 25]),
                            'std_normal_max': float(lines[bl + 26]),
                            'zeromean': b(lines[bl + 27]),
                            'coarsen_pert': b(lines[bl + 28]),
                            'xcorr': float(lines[bl + 29]),
                            'ycorr': float(lines[bl + 30]),
                            'adapt': int(lines[bl + 31])})
            else:
                res.append({'descr': s(lines[bl + 0]),
                            'species': int(lines[bl + 1]),
                            'orbit': int(lines[bl + 2]),
                            'pol': int(lines[bl + 3]),
                            'N_ang': int(lines[bl + 4]),
                            'ang': float(lines[bl + 5]),
                            'freq': float(lines[bl + 6]),
                            'FOV': float(lines[bl + 7]),
                            'FOV_units': s(lines[bl + 8]),
                            'assim': b(lines[bl + 9]),
                            'scale': b(lines[bl + 10]),
                            'getinnov': b(lines[bl + 11]),
                            'RTM_ID': int(lines[bl + 12]),
                            'bias_Npar': int(lines[bl + 13]),
                            'bias_trel': int(lines[bl + 14]),
                            'bias_tcut': int(lines[bl + 15]),
                            'nodata': float(lines[bl + 16]),
                            'varname': s(lines[bl + 17]),
                            'units': s(lines[bl + 18]),
                            'path': s(lines[bl + 19]),
                            'name': s(lines[bl + 20]),
                            'scalepath': s(lines[bl + 21]),
                            'scalename': s(lines[bl + 22]),
                            'errstd': float(lines[bl + 23]),
                            'std_normal_max': float(lines[bl + 24]),
                            'zeromean': b(lines[bl + 25]),
                            'coarsen_pert': b(lines[bl + 26]),
                            'xcorr': float(lines[bl + 27]),
                            'ycorr': float(lines[bl + 28]),
                            'adapt': int(lines[bl + 29])})

        return pd.DataFrame(res)

コード例 #10

0

ファイルを表示

ファイル: smos.py プロジェクト: alexgruber/myprojects

def generate_cell_files():

    path_in = r'D:\data_sets\SMOS_L3\unzipped' + '\\'
    path_out = r'D:\data_sets\SMOS_L3\cellfiles' + '\\'
    if not os.path.exists(path_out):
        os.makedirs(path_out)

    # SMOS image coordinates
    tmp = Dataset(r"D:\data_sets\SMOS_L3\unzipped\2010\015\SM_RE04_MIR_CLF31A_20100115T000000_20100115T235959_300_001_7.DBL.nc")
    lats = tmp.variables['lat'][:]
    lons = tmp.variables['lon'][:]
    tmp.close()

    # WARP cell's and coordinates
    dgg_grid = Dataset(r"D:\data_sets\ASCAT\warp5_grid\TUW_WARP5_grid_info_2_2.nc")
    dgg_lats = dgg_grid['lat'][:]
    dgg_lons = dgg_grid['lon'][:]
    dgg_cells = dgg_grid['cell'][:]
    dgg_grid.close()

    # Cell list
    conus_gpis = pd.read_csv(r"D:\data_sets\ASCAT\warp5_grid\pointlist_warp_conus.csv",index_col=0)
    cells = np.unique(conus_gpis['cell'])

    # NC parameters
    timeunit = 'hours since 2000-01-01 00:00'
    smunit = 'm3/m3'

    # Date range
    dates = pd.date_range('2010-01-15','2015-05-06').to_pydatetime()
    num_dates = date2num(dates, timeunit).astype('int32')

    for cell in cells:
        print(cell)

        latmin = dgg_lats[dgg_cells==cell].min(); latmax = dgg_lats[dgg_cells==cell].max()
        lonmin = dgg_lons[dgg_cells==cell].min(); lonmax = dgg_lons[dgg_cells==cell].max()

        ind_lats = np.where((lats>=latmin)&(lats<=latmax))[0]; ind_lons = np.where((lons>=lonmin)&(lons<=lonmax))[0]
        tmp_lats = lats[ind_lats]; tmp_lons = lons[ind_lons]

        res_arr = np.full((len(dates), len(tmp_lats), len(tmp_lons)), np.nan)

        # Read in SMOS native files
        for idx, date in enumerate(dates):
            print('%i / %i' % (idx, len(dates)))

            files = find_files(os.path.join(path_in, date.strftime('%Y')), date.strftime('%Y%m%d'))
            if files is None:
                continue

            tmp_res = np.full((len(tmp_lats),len(tmp_lons),2),np.nan)
            for i,f in enumerate(files):
                ds = Dataset(f)
                data = ds.variables['Soil_Moisture'][ind_lats,ind_lons]
                tmp_res[:, :, i] = data
                if hasattr(data,'fill_value'):
                    tmp_res[tmp_res == data.fill_value] = np.nan
                ds.close()
            res_arr[idx, :, :] = np.nanmean(tmp_res,axis=2)


        # store to NetCDF cell file
        fname = os.path.join(path_out,'%04i.nc' % cell)
        ds = Dataset(fname, mode='w')
        dimensions = OrderedDict([('time', num_dates), ('lat', tmp_lats), ('lon', tmp_lons)])

        # Create/Write dimensions
        chunksizes = []
        for key, values in dimensions.iteritems():

            if key in ['lon', 'lat']:
                chunksize = 1
            else:
                chunksize = len(values)
            chunksizes.append(chunksize)

            dtype = values.dtype
            ds.createDimension(key, len(values))
            ds.createVariable(key, dtype,
                              dimensions=(key,),
                              chunksizes=(chunksize,),
                              zlib=True)
            ds[key][:] = values

        # Create/Write data
        ds.createVariable('soil_moisture', 'float32',
                          dimensions=dimensions.keys(),
                          chunksizes=chunksizes,
                          fill_value=-9999.,
                          zlib=True)
        ds['soil_moisture'][:, :, :] = res_arr

        ds.variables['time'].setncattr('units', timeunit)
        ds.variables['soil_moisture'].setncattr('units', smunit)

        ds.close()