def replace_orbit_field(): root = r'C:\Users\u0116961\Documents\VSC\vsc_data_copies\scratch_TEST_RUNS\US_M36_SMOS_noDA_unscaled\obs_scaling' for f in find_files(root, '_D_p'): data = np.fromfile(f, '>i4') data[1] = 0 data.tofile(f)
def __init__(self, root=None, cellfiles=True): if root is None: if platform.system() == 'Windows': root = os.path.join('D:', 'data_sets', 'MSWEP_v21') elif platform.system() == 'Linux': root = os.path.join('/', 'data', 'leuven', '320', 'vsc32046', 'data_sets', 'MSWEP') else: root = os.path.join('~', 'data_sets', 'MSWEP_v21') if cellfiles is True: self.root = os.path.join(root, 'cellfiles') self.loaded_cell = None self.ds = None else: self.ds = Dataset(np.atleast_1d(find_files(root, '.nc4'))[0]) self.grid = pd.read_csv(find_files(root, 'grid.csv'), index_col=0)
def reshuffle(self): timeunit = 'hours since 2000-01-01 00:00' for version in self.versions: for mode in self.modes: files = find_files(os.path.join(self.root, mode, version), '.nc') dates = pd.DatetimeIndex([f[-24:-16] for f in files]) meta = pd.Series(files, index=dates) meta = meta[self.date_range[0]:self.date_range[1]] fname = os.path.join(self.root, '_reshuffled', mode + '_' + version + '.nc') ds = Dataset(fname, mode='w') dates = date2num(meta.index.to_pydatetime(), timeunit).astype('int32') dimensions = OrderedDict([('time', dates), ('lat', self.lats), ('lon', self.lons)]) chunksizes = [] for key, values in dimensions.iteritems(): if key == 'time': chunksize = 1 else: chunksize = len(values) chunksizes.append(chunksize) dtype = values.dtype ds.createDimension(key, len(values)) ds.createVariable(key, dtype, dimensions=(key, ), chunksizes=(chunksize, ), zlib=True) ds[key][:] = values ds.variables['time'].setncattr('units', timeunit) ds.createVariable('sm', 'float32', dimensions=dimensions.keys(), chunksizes=chunksizes, fill_value=-9999., zlib=True) for i, f in enumerate(meta.values): tmp_ds = Dataset(f) ds['sm'][i, :, :] = tmp_ds.variables['sm'][0, :, :].data tmp_ds.close() ds.close()
def extract_L3_tar_files(): root = r"D:\data_sets\SMOS_L3\raw" files = find_files(root, '.tgz') for cnt,f in enumerate(files): print('%i / %i' % (cnt, len(files))) out_path = os.path.dirname(f).replace('raw', 'unzipped').replace('asc', 'ascdsc').replace('dsc', 'ascdsc') if not os.path.exists(out_path): os.makedirs(out_path) tmp = tarfile.open(f) tmp.extract(tmp.getmember([x for x in tmp.getnames() if x.find('.nc') != -1][0]), out_path) tmp.close()
def __init__(self, root=None): if root is None: if platform.system() == 'Windows': root = os.path.join('D:','data_sets', 'SMOS_L3') elif platform.system() == 'Linux': root = os.path.join('/', 'data', 'leuven', '320', 'vsc32046', 'data_sets', 'SMOS') else: root = os.path.join('~','data_sets', 'SMOS_L3') self.loaded_cell=None self.ds = None self.grid = pd.read_csv(find_files(root,'grid.csv'), index_col=0) self.root = os.path.join(root, 'cellfiles')
def read_params(self, param, fname=None): """ Read parameter files (tilegrids, tilecoord, RTMparam, catparam""" if fname is None: fname = find_files(self.paths.rc_out, param) reg_ftags = False if param == 'tilegrids' else True dtype, hdr, length = get_template(param) data = self.read_fortran_binary(fname, dtype, hdr=hdr, length=length, reg_ftags=reg_ftags) data.replace(-9999., np.nan, inplace=True) if param == 'tilegrids': data.index = ['global', 'domain'] else: # index equals the 'tilenum' which starts at 1!! data.index += 1 return data
def generate_grid_file(): files = find_files(r'D:\data_sets\SMOS_L3\cellfiles', '.nc') dgg = pd.read_csv(r"D:\data_sets\ASCAT\warp5_grid\pointlist_warp_conus.csv", index_col=0) ease_grid = LDAS_io(exp='US_M36_SMOS_DA_cal_scaled_yearly').grid grid = pd.DataFrame() for cnt, f in enumerate(files): print('%i / %i' % (cnt, len(files))) tmp = Dataset(f) lats = tmp.variables['lat'][:] lons = tmp.variables['lon'][:] tmp.close() offset = grid.index.values[-1] + 1 if len(grid) > 0 else 0 idx = np.arange(offset, len(lats)*len(lons) + offset) tmp_grid = pd.DataFrame(columns=['lat', 'lon', 'row', 'col', 'ease_row', 'ease_col', 'dgg_cell', 'dgg_gpi'], index=idx) for row, lat in enumerate(lats): for col, lon in enumerate(lons): tmp_grid.loc[offset, 'lat'] = lat tmp_grid.loc[offset, 'lon'] = lon tmp_grid.loc[offset, 'row'] = row tmp_grid.loc[offset, 'col'] = col ease_col, ease_row = ease_grid.lonlat2colrow(lon, lat, domain=True) tmp_grid.loc[offset, 'ease_row'] = ease_row tmp_grid.loc[offset, 'ease_col'] = ease_col tmp_grid.loc[offset, 'dgg_cell'] = int(os.path.basename(f)[0:4]) r = np.sqrt((dgg.lon - lon)**2 + (dgg.lat - lat)**2) tmp_grid.loc[offset, 'dgg_gpi'] = dgg.iloc[np.where(abs(r - r.min()) < 0.0001)[0][0], 0] offset += 1 grid = pd.concat((grid,tmp_grid)) grid.to_csv(r'D:\data_sets\SMOS_L3\grid.csv')
def __init__(self, param=None, exp=None, domain=None): self.paths = paths(exp=exp, domain=domain) self.obsparam = self.read_obsparam() self.tilecoord = self.read_params('tilecoord') self.tilegrids = self.read_params('tilegrids') self.grid = EASE2(tilecoord=self.tilecoord, tilegrids=self.tilegrids) self.param = param if param is not None: if param == 'xhourly': path = self.paths.__getattribute__('cat') else: path = self.paths.__getattribute__('exp_root') self.files = find_files(path, param) if self.files[0].find('images.nc') == -1: print 'NetCDF image cube not yet created. Use method "bin2netcdf".' self.dates = pd.to_datetime([f[-18:-5] for f in self.files], format='%Y%m%d_%H%M').sort_values() # TODO: Currently valid for 3-hourly data only! Times of the END of the 3hr periods are assigned! # if self.param == 'xhourly': # self.dates += pd.to_timedelta('2 hours') self.dtype, self.hdr, self.length = get_template(self.param) else: self.images = xr.open_dataset(self.files[0]) if self.files[1].find('timeseries.nc') == -1: print 'NetCDF time series cube not yet created. Use the NetCDF kitchen sink.' else: self.timeseries = xr.open_dataset(self.files[1])
def read_obsparam(self): """ Read the 'obsparam' file. """ fp = open(find_files(self.paths.rc_out, 'obsparam')) lines = fp.readlines()[1::] n_lines = len(lines) # 30 or 32 fields (before and after two entries for the use of uncertainty maps) n_fields = 32 if n_lines == 128 else 30 n_blocks = n_lines / n_fields res = [] for bl in np.arange(n_blocks) * n_fields: if n_fields == 32: res.append({'descr': s(lines[bl + 0]), 'species': int(lines[bl + 1]), 'orbit': int(lines[bl + 2]), 'pol': int(lines[bl + 3]), 'N_ang': int(lines[bl + 4]), 'ang': float(lines[bl + 5]), 'freq': float(lines[bl + 6]), 'FOV': float(lines[bl + 7]), 'FOV_units': s(lines[bl + 8]), 'assim': b(lines[bl + 9]), 'scale': b(lines[bl + 10]), 'getinnov': b(lines[bl + 11]), 'RTM_ID': int(lines[bl + 12]), 'bias_Npar': int(lines[bl + 13]), 'bias_trel': int(lines[bl + 14]), 'bias_tcut': int(lines[bl + 15]), 'nodata': float(lines[bl + 16]), 'varname': s(lines[bl + 17]), 'units': s(lines[bl + 18]), 'path': s(lines[bl + 19]), 'name': s(lines[bl + 20]), 'scalepath': s(lines[bl + 21]), 'scalename': s(lines[bl + 22]), 'errstd': float(lines[bl + 23]), 'errstd_file': b(lines[bl + 24]), 'path_errstd': s(lines[bl + 25]), 'std_normal_max': float(lines[bl + 26]), 'zeromean': b(lines[bl + 27]), 'coarsen_pert': b(lines[bl + 28]), 'xcorr': float(lines[bl + 29]), 'ycorr': float(lines[bl + 30]), 'adapt': int(lines[bl + 31])}) else: res.append({'descr': s(lines[bl + 0]), 'species': int(lines[bl + 1]), 'orbit': int(lines[bl + 2]), 'pol': int(lines[bl + 3]), 'N_ang': int(lines[bl + 4]), 'ang': float(lines[bl + 5]), 'freq': float(lines[bl + 6]), 'FOV': float(lines[bl + 7]), 'FOV_units': s(lines[bl + 8]), 'assim': b(lines[bl + 9]), 'scale': b(lines[bl + 10]), 'getinnov': b(lines[bl + 11]), 'RTM_ID': int(lines[bl + 12]), 'bias_Npar': int(lines[bl + 13]), 'bias_trel': int(lines[bl + 14]), 'bias_tcut': int(lines[bl + 15]), 'nodata': float(lines[bl + 16]), 'varname': s(lines[bl + 17]), 'units': s(lines[bl + 18]), 'path': s(lines[bl + 19]), 'name': s(lines[bl + 20]), 'scalepath': s(lines[bl + 21]), 'scalename': s(lines[bl + 22]), 'errstd': float(lines[bl + 23]), 'std_normal_max': float(lines[bl + 24]), 'zeromean': b(lines[bl + 25]), 'coarsen_pert': b(lines[bl + 26]), 'xcorr': float(lines[bl + 27]), 'ycorr': float(lines[bl + 28]), 'adapt': int(lines[bl + 29])}) return pd.DataFrame(res)
def generate_cell_files(): path_in = r'D:\data_sets\SMOS_L3\unzipped' + '\\' path_out = r'D:\data_sets\SMOS_L3\cellfiles' + '\\' if not os.path.exists(path_out): os.makedirs(path_out) # SMOS image coordinates tmp = Dataset(r"D:\data_sets\SMOS_L3\unzipped\2010\015\SM_RE04_MIR_CLF31A_20100115T000000_20100115T235959_300_001_7.DBL.nc") lats = tmp.variables['lat'][:] lons = tmp.variables['lon'][:] tmp.close() # WARP cell's and coordinates dgg_grid = Dataset(r"D:\data_sets\ASCAT\warp5_grid\TUW_WARP5_grid_info_2_2.nc") dgg_lats = dgg_grid['lat'][:] dgg_lons = dgg_grid['lon'][:] dgg_cells = dgg_grid['cell'][:] dgg_grid.close() # Cell list conus_gpis = pd.read_csv(r"D:\data_sets\ASCAT\warp5_grid\pointlist_warp_conus.csv",index_col=0) cells = np.unique(conus_gpis['cell']) # NC parameters timeunit = 'hours since 2000-01-01 00:00' smunit = 'm3/m3' # Date range dates = pd.date_range('2010-01-15','2015-05-06').to_pydatetime() num_dates = date2num(dates, timeunit).astype('int32') for cell in cells: print(cell) latmin = dgg_lats[dgg_cells==cell].min(); latmax = dgg_lats[dgg_cells==cell].max() lonmin = dgg_lons[dgg_cells==cell].min(); lonmax = dgg_lons[dgg_cells==cell].max() ind_lats = np.where((lats>=latmin)&(lats<=latmax))[0]; ind_lons = np.where((lons>=lonmin)&(lons<=lonmax))[0] tmp_lats = lats[ind_lats]; tmp_lons = lons[ind_lons] res_arr = np.full((len(dates), len(tmp_lats), len(tmp_lons)), np.nan) # Read in SMOS native files for idx, date in enumerate(dates): print('%i / %i' % (idx, len(dates))) files = find_files(os.path.join(path_in, date.strftime('%Y')), date.strftime('%Y%m%d')) if files is None: continue tmp_res = np.full((len(tmp_lats),len(tmp_lons),2),np.nan) for i,f in enumerate(files): ds = Dataset(f) data = ds.variables['Soil_Moisture'][ind_lats,ind_lons] tmp_res[:, :, i] = data if hasattr(data,'fill_value'): tmp_res[tmp_res == data.fill_value] = np.nan ds.close() res_arr[idx, :, :] = np.nanmean(tmp_res,axis=2) # store to NetCDF cell file fname = os.path.join(path_out,'%04i.nc' % cell) ds = Dataset(fname, mode='w') dimensions = OrderedDict([('time', num_dates), ('lat', tmp_lats), ('lon', tmp_lons)]) # Create/Write dimensions chunksizes = [] for key, values in dimensions.iteritems(): if key in ['lon', 'lat']: chunksize = 1 else: chunksize = len(values) chunksizes.append(chunksize) dtype = values.dtype ds.createDimension(key, len(values)) ds.createVariable(key, dtype, dimensions=(key,), chunksizes=(chunksize,), zlib=True) ds[key][:] = values # Create/Write data ds.createVariable('soil_moisture', 'float32', dimensions=dimensions.keys(), chunksizes=chunksizes, fill_value=-9999., zlib=True) ds['soil_moisture'][:, :, :] = res_arr ds.variables['time'].setncattr('units', timeunit) ds.variables['soil_moisture'].setncattr('units', smunit) ds.close()