def read_precip_stats_mf(reanalysis): """ Reads monthly precip_stats files into a big data cube for a given reanalysis. This method avoids open_mfdataset because it runs into a Too Many files open issue NB I use sortby to ensure that data are returned in time order. Returns: a xarray dataset """ def read_one_file(path): "Reads a single file using a context manager to ensure file gets closed" with xr.open_dataset(path, drop_variables=['latitude', 'longitude']) as ds: #if ('latitude' not in ds.coords) & ('latitude' in ds.data_vars): # ds.set_coords('latitude') #if ('longitude' not in ds.coords) & ('longitude' in ds.data_vars): # ds.set_coords('longitude') ds.load() return ds fileList = globFiles(reanalysis) date = [util.date_from_filename(f) for f in fileList] datasets = [read_one_file(f) for f in fileList] combined = xr.concat(datasets, 'time') combined.coords['time'] = date combined.sortby(combined.time) return combined
def read_precip_stats(reanalysis): """ Reads monthly precip_stats files into a big data cube for a given reanalysis. NB I use sortby to ensure that data are returned in time order. Returns: a xarray dataset """ fileList = globFiles(reanalysis) date = [util.date_from_filename(f) for f in fileList] ds = xr.open_mfdataset(fileList, concat_dim='time', data_vars=[ 'wetday_mean', 'wetday_frequency', 'wetday_total', 'wetday_max', 'prectot', ]) ds.load() ds['drizzle'] = ds['prectot'] - ds['wetday_total'] ds.coords['time'] = date return ds.sortby(ds.time)
def process_daily_precip(reanalysis, variable, start_date=None, end_date=None, threshold=1., verbose=False, grid=None): '''Processes monthly precipitation statistics for a daterange''' if not start_date: start_date = '19790101' if not end_date: end_date = dt.datetime.today().strftime('%Y%m%d') if verbose: print('% Processing {} from {} for {} to {}'.format( variable, reanalysis, start_date, end_date)) fileList = util.make_fileList(reanalysis, variable, (start_date, end_date), grid=grid) for f in fileList: if verbose: print(' Generating statistics for {}'.format( util.date_from_filename(f).strftime('%Y%m'))) try: ds = get_precip_statistics(f, reanalysis, threshold=threshold) except IOError: print('get_precip_statistics: could not read {:s}'.format(f)) else: filo = util.make_outfile(f, reanalysis, variable) if verbose: print(' Writing statistics to {}'.format(filo)) ds.to_netcdf(filo, encoding={ 'wetday_mean': { 'zlib': True, '_FillValue': -999. }, 'wetday_frequency': { 'zlib': True, '_FillValue': -999. }, 'wetday_total': { 'zlib': True, '_FillValue': -999. }, 'wetday_max': { 'zlib': True, '_FillValue': -999. }, 'prectot': { 'zlib': True, '_FillValue': -999. }, }) return
def process_daily_snow(reanalysis, variable, start_date=None, end_date=None, threshold=1., verbose=False, grid=None): '''Processes monthly precipitation statistics for a daterange''' if not start_date: start_date = '19790101' if not end_date: end_date = dt.datetime.today().strftime('%Y%m%d') if verbose: print('% Processing {} from {} for {} to {}'.format( variable, reanalysis, start_date, end_date)) fileList = util.make_fileList(reanalysis, variable, (start_date, end_date), grid=grid) for f in fileList: if verbose: print(' Generating statistics for {}'.format( util.date_from_filename(f).strftime('%Y%m'))) ds = get_month_snow(f, reanalysis, threshold=threshold) #try: # ds = get_month_snow(f, reanalysis) # print (ds) #except: # pass # Not a good way to do this filo = util.make_outfile(f, reanalysis, variable) if verbose: print(' Writing statistics to {}'.format(filo)) ds.to_netcdf(filo) return