def run_job(metadata, variable, transformation_name, transformation, unit, read_acct, rcp, pername, years, model, baseline_model, seasons, agglev, aggwt, weights=None): logger.debug('Beginning job\nkwargs:\t{}'.format( pprint.pformat(metadata, indent=2))) # Add to job metadata metadata.update(dict(time_horizon='{}-{}'.format(years[0], years[-1]))) baseline_file = BASELINE_FILE.format(**metadata) pattern_file = BCSD_pattern_files.format(**metadata) write_file = WRITE_PATH.format(**metadata) # do not duplicate if os.path.isfile(write_file): return # Get transformed data total = None seasonal_baselines = {} for season in seasons: basef = baseline_file.format(season=season) logger.debug('attempting to load baseline file: {}'.format(basef)) seasonal_baselines[season] = load_baseline(basef, variable) season_month_start = {'DJF': 12, 'MAM': 3, 'JJA': 6, 'SON': 9} for year in years: seasonal = [] for s, season in enumerate(seasons): pattf = pattern_file.format(year=year, season=season) logger.debug('attempting to load pattern file: {}'.format(pattf)) patt = load_bcsd(pattf, variable, broadcast_dims=('day', )) logger.debug('{} {} {} - reindexing coords day --> time'.format( model, year, season)) patt = (patt.assign_coords( time=xr.DataArray(pd.period_range('{}-{}-1'.format( year - int(season == 'DJF'), season_month_start[season]), periods=len(patt.day), freq='D'), coords={'day': patt.day})).swap_dims({ 'day': 'time' }).drop('day')) logger.debug( '{} {} {} - adding pattern residuals to baseline'.format( model, year, season)) seasonal.append(patt + seasonal_baselines[season]) logger.debug(('{} {} - concatenating seasonal data and ' + 'applying transform').format(model, year)) annual = xr.Dataset( {variable: xr.concat(seasonal, dim='time').pipe(transformation)}) if total is None: total = annual else: total += annual ds = total / len(years) # Reshape to regions logger.debug('{} reshaping to regions'.format(model)) if not agglev.startswith('grid'): ds = weighted_aggregate_grid_to_regions(ds, variable, aggwt, agglev, weights=weights) # Update netCDF metadata logger.debug('{} udpate metadata'.format(model)) ds.attrs.update( **{k: str(v) for k, v in metadata.items() if k in DS_METADATA_FEILDS}) # Write output logger.debug('attempting to write to file: {}'.format(write_file)) if not os.path.isdir(os.path.dirname(write_file)): os.makedirs(os.path.dirname(write_file)) ds.to_netcdf(write_file) logger.debug('done')
def run_job(metadata, variable, transformation_name, transformation, unit, rcp, pername, years, model, agglev, aggwt, weights=None): logger.debug('Beginning job\nkwargs:\t{}'.format( pprint.pformat(metadata, indent=2))) # Add to job metadata metadata.update(dict(time_horizon='{}-{}'.format(years[0], years[-1]))) read_file = BCSD_orig_files.format(**metadata) write_file = WRITE_PATH.format(**metadata) # do not duplicate if os.path.isfile(write_file): return # Prepare annual transformed data annual = [] for y in years: fp = read_file.format(year=y) logger.debug('attempting to load BCSD file: {}'.format(fp)) annual.append( load_bcsd(fp, variable, broadcast_dims=('time', )).pipe(transformation)) # Concatente years to single dataset and average across years logger.debug('{} - concatenating annual data'.format(model)) ds = xr.Dataset({ variable: xr.concat(annual, dim=pd.Index(years, name='year')).mean(dim='year') }) # Reshape to regions logger.debug('{} reshaping to regions'.format(model)) if not agglev.startswith('grid'): ds = weighted_aggregate_grid_to_regions(ds, variable, aggwt, agglev, weights=weights) # Update netCDF metadata logger.debug('{} udpate metadata'.format(model)) ds.attrs.update( **{k: str(v) for k, v in metadata.items() if k in DS_METADATA_FEILDS}) # Write output logger.debug('attempting to write to file: {}'.format(write_file)) if not os.path.isdir(os.path.dirname(write_file)): os.makedirs(os.path.dirname(write_file)) ds.to_netcdf(write_file) logger.debug('done')
def run_job(metadata, variable, transformation, source_variable, unit, scenario, read_acct, year, model, agglev, aggwt, weights=None): import xarray as xr import metacsv from climate_toolbox import (load_bcsd, weighted_aggregate_grid_to_regions) # Add to job metadata metadata.update(ADDITIONAL_METADATA) file_dependencies = {} read_file = BCSD_orig_files.format(**metadata) write_file = WRITE_PATH.format(**metadata) # do not duplicate if os.path.isfile(write_file): return # Get transformed data fp = read_file.format(year=year) with xr.open_dataset(fp) as ds: ds.load() file_dependencies[os.path.splitext(os.path.basename(fp))[0]] = (str( ds.attrs.get('version', '1.0'))) logger.debug('year {} - attempting to read file "{}"'.format(year, fp)) ds = (load_bcsd(ds, source_variable, broadcast_dims=('time', )).pipe(transformation)) varattrs = {var: dict(ds[var].attrs) for var in ds.data_vars.keys()} # Reshape to regions if not agglev.startswith('grid'): logger.debug('aggregating to "{}" using "{}"'.format(agglev, aggwt)) ds = weighted_aggregate_grid_to_regions(ds, variable, aggwt, agglev, weights=weights) # Update netCDF metadata ds.attrs.update( **{k: str(v) for k, v in metadata.items() if k in INCLUDED_METADATA}) ds.attrs.update(ADDITIONAL_METADATA) # Write output if not os.path.isdir(os.path.dirname(write_file)): logger.debug('attempting to create_directory "{}"'.format( os.path.dirname(write_file))) os.makedirs(os.path.dirname(write_file)) logger.debug('attempting to write to file "{}"'.format(write_file)) attrs = dict(ds.attrs) attrs['file_dependencies'] = file_dependencies for var, vattrs in varattrs.items(): ds[var].attrs.update(vattrs) ds.to_netcdf(write_file) metacsv.to_header(write_file.replace('.nc', '.fgh'), attrs=dict(attrs), variables=varattrs) logger.debug('job done')
def run_job(metadata, variable, transformation_name, transformation, unit, rcp, pername, years, model, agglev, aggwt, weights=None): # Add to job metadata metadata.update(dict(time_horizon='{}-{}'.format(years[0], years[-1]))) logger.debug('Beginning job:\n\tkwargs:\t{}'.format( pprint.pformat(metadata, indent=2))) read_file = BCSD_orig_files.format(**metadata) write_file = WRITE_PATH.format(**metadata) # do not duplicate if os.path.isfile(write_file): return # Get transformed data annual = [] for y in years: fp = read_file.format(year=y) logging.debug('year {} - attempting to read file "{}"'.format(y, fp)) annual.append( load_bcsd(fp, variable, broadcast_dims=('time', )).pipe(transformation)) logging.debug('concatenating & reducing annual data') ds = xr.Dataset({ variable: xr.concat(annual, dim=pd.Index(years, name='year')).mean(dim='year') }) # Reshape to regions if not agglev.startswith('grid'): logger.debug('aggregating to "{}" using "{}"'.format(agglev, aggwt)) ds = weighted_aggregate_grid_to_regions(ds, variable, aggwt, agglev, weights=weights) # Update netCDF metadata ds.attrs.update(**metadata) # Write output if not os.path.isdir(os.path.dirname(write_file)): logger.debug('attempting to create_directory "{}"'.format( os.path.dirname(write_file))) os.makedirs(os.path.dirname(write_file)) logger.debug('attempting to write to file "{}"'.format(write_file)) ds.to_netcdf(write_file) logger.debug('job done')
def run_job(metadata, variable, transformation_name, transformation, unit, rcp, pername, read_acct, years, model, baseline_model, season, agglev, aggwt, weights=None): logger.debug('Beginning job\nkwargs:\t{}'.format( pprint.pformat(metadata, indent=2))) # Add to job metadata metadata.update(dict(time_horizon='{}-{}'.format(years[0], years[-1]))) baseline_file = BASELINE_FILE.format(**metadata) pattern_file = BCSD_pattern_files.format(**metadata) write_file = WRITE_PATH.format(**metadata) # do not duplicate if os.path.isfile(write_file): return # Get transformed data total = [] for year in years: # Get transformed data pattf = pattern_file.format(year=year) logger.debug('attempting to load pattern file: {}'.format(pattf)) annual = load_bcsd(pattf, variable, broadcast_dims=('day', )) logger.debug('{} {} - applying transform'.format(model, year)) annual = xr.Dataset({variable: annual.pipe(transformation)}) logger.debug('{} {} - adding to running total'.format(model, year)) total.append(annual) ds = xr.concat(total, dim=pd.Index(years, name='year')).mean(dim='year') # load baseline logger.debug('attempting to load baseline file: '.format(baseline_file)) base = load_baseline(baseline_file, variable) logger.debug('{} - adding pattern residuals to baseline'.format(model)) ds = (ds + base) # Reshape to regions logger.debug('{} - reshaping to regions'.format(model)) if not agglev.startswith('grid'): ds = weighted_aggregate_grid_to_regions(ds, variable, aggwt, agglev, weights=weights) # Update netCDF metadata logger.debug('{} udpate metadata'.format(model)) ds.attrs.update(**metadata) # Write output logger.debug('attempting to write to file: {}'.format(write_file)) if not os.path.isdir(os.path.dirname(write_file)): os.makedirs(os.path.dirname(write_file)) ds.to_netcdf(write_file)