def add_metadata(self, metadata=None): """ Add additional custom metadata to the output files Parameters ---------- metadata (dict): The keys are the names of the metadata fields, the values the values for the fields """ cmip_path = os.path.join(self._output_path, 'CMIP6') procPool = Pool(10) results = list() filepaths = list() print_message('Adding additional metadata to output files', 'ok') for root, dirs, files in os.walk(cmip_path, topdown=False): for name in files: if name[-3:] != '.nc': continue index = name.find('_') if index != -1 and name[:index] in self._var_list: print_message( "Adding additional metadata to {}".format(name), 'ok') filepaths.append(os.path.join(root, name)) for idx, filepath in enumerate(filepaths): datafile = cdms2.open(filepath, 'a') datafile.e3sm_source_code_doi = '10.11578/E3SM/dc.20180418.36' datafile.e3sm_source_code_reference = 'https://github.com/E3SM-Project/E3SM/releases/tag/v1.0.0' datafile.doe_acknowledgement = 'This research was supported as part of the Energy Exascale Earth System Model (E3SM) project, funded by the U.S. Department of Energy, Office of Science, Office of Biological and Environmental Research.' datafile.computational_acknowledgement = 'The data were produced using resources of the National Energy Research Scientific Computing Center, a DOE Office of Science User Facility supported by the Office of Science of the U.S. Department of Energy under Contract No. DE-AC02-05CH11231.' datafile.ncclimo_generation_command = """ncclimo --var=${var} -7 --dfl_lvl=1 --no_cll_msr --no_frm_trm --no_stg_grd --yr_srt=1 --yr_end=500 --ypf=500 --map=map_ne30np4_to_cmip6_180x360_aave.20181001.nc """ datafile.ncclimo_version = '4.7.9-alpha04' datafile.close()
def handle(infiles, tables, user_input_path, **kwargs): """ Transform MPASO timeMonthly_avg_activeTracers_temperature into CMIP.tosga Parameters ---------- infiles : dict a dictionary with namelist, mesh and time series file names tables : str path to CMOR tables user_input_path : str path to user input json file Returns ------- varname : str the name of the processed variable after processing is complete """ if kwargs.get('simple'): msg = f"{VAR_NAME} is not supported for simple conversion" print_message(msg) return msg = 'Starting {name}'.format(name=__name__) logging.info(msg) meshFileName = infiles['MPAS_mesh'] timeSeriesFiles = infiles['MPASO'] dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False) cellMask2D, _ = mpas.get_cell_masks(dsMesh) variableList = [ 'timeMonthly_avg_activeTracers_temperature', 'xtime_startMonthly', 'xtime_endMonthly' ] ds = xarray.Dataset() with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn: thetao = dsIn.timeMonthly_avg_activeTracers_temperature tos = thetao.isel(nVertLevels=0).squeeze(drop=True).where(cellMask2D) areaCell = dsMesh.areaCell.where(cellMask2D) ds[VAR_NAME] = ((tos * areaCell).sum(dim='nCells') / areaCell.sum(dim='nCells')) ds = mpas.add_time(ds, dsIn) ds.compute() ds.compute() mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean') # create axes axes = [{'table_entry': 'time', 'units': ds.time.units}] try: mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS) except Exception: return "" return VAR_NAME
def run(self): """ run all the requested CMOR handlers """ self.load_handlers() if len(self._handlers) == 0: print_message('No handlers loaded') sys.exit(1) # Setup the number of processes that will exist in the pool len_handlers = len(self._handlers) if self._proc_vars: ncpu = cpu_count() - 1 self._nproc = len_handlers if len_handlers < ncpu else ncpu print_message('running with {} processes'.format(self._nproc), 'debug') # create process pool and results list self._pool = Pool(self._nproc) self._pool_res = list() for handler in self._handlers: for handle_name, handler_info in handler.items(): handler_method = handler_info[0] handler_variables = handler_info[1] # find the input files this handler needs input_files = list() for variable in handler_variables: input_file = self.find_variable_file(var=variable, path=self._input_path) if input_file is None: continue var_path = os.path.join(self._input_path, input_file) input_files.append(var_path) kwds = { 'infiles': input_files, 'tables': self._tables_path, 'user_input_path': self._user_input_path } _args = (kwds['infiles'], kwds['tables'], kwds['user_input_path']) self._pool_res.append( self._pool.apply_async(handler_method, args=_args, kwds={})) for idx, res in enumerate(self._pool_res): try: out = res.get(9999999) msg = 'Finished {handler}, {done}/{total} jobs complete'.format( handler=out, done=idx + 1, total=len(self._pool_res)) logging.info(msg) except Exception as e: logging.error(e) self.terminate() self.add_metadata()
def terminate(self): """ Terminates the process pool """ if self._debug: print_message('Shutting down process pool', 'debug') if self._pool: self._pool.close() self._pool.terminate() self._pool.join()
def load_handlers(self): """ load the cmor handler modules """ handler_names = os.listdir(self._handlers_path) # if the handler director doesnt have an __init__.py file create one if "__init__.py" not in handler_names: with open(os.path.join(self._handlers_path, '__init__.py'), 'w') as fp: fp.write('\n') for handler in handler_names: if not handler.endswith('.py'): continue if handler == "__init__.py": continue module_name, _ = handler.rsplit('.', 1) # ignore handlers for variables that werent requested if 'all' not in self._var_list: if module_name not in self._var_list: continue module_path = os.path.join(self._handlers_path, handler) # load the module, and extract the "handle" method and required variables try: module = imp.load_source(module_name, module_path) method = module.handle raw_variables = module.RAW_VARIABLES except ImportError as e: msg = format_debug(e) print_message( 'Error loading handler for {}'.format(module_path)) print_message(msg) logging.error(msg) continue else: msg = 'Loaded {}'.format(module_name) if self._debug: print_message(msg, 'debug') logging.info(msg) self._handlers.append({module_name: (method, raw_variables)})
def handle(infiles, tables, user_input_path, **kwargs): """ Transform MPASO timeMonthly_avg_layerThickness into CMIP.thkcello Parameters ---------- infiles : dict a dictionary with namelist, mesh and time series file names tables : str path to CMOR tables user_input_path : str path to user input json file Returns ------- varname : str the name of the processed variable after processing is complete """ if kwargs.get('simple'): msg = f"{VAR_NAME} is not supported for simple conversion" print_message(msg) return msg = 'Starting {name}'.format(name=__name__) logging.info(msg) meshFileName = infiles['MPAS_mesh'] mappingFileName = infiles['MPAS_map'] timeSeriesFiles = infiles['MPASO'] dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False) earth_radius = dsMesh.attrs['sphere_radius'] _, cellMask3D = mpas.get_cell_masks(dsMesh) variableList = [ 'timeMonthly_avg_layerThickness', 'xtime_startMonthly', 'xtime_endMonthly' ] ds = xarray.Dataset() with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn: ds[VAR_NAME] = \ dsIn.timeMonthly_avg_layerThickness.where(cellMask3D, 0.) ds = mpas.add_time(ds, dsIn) ds.compute() ds = mpas.add_depth(ds, dsMesh) ds.compute() ds = mpas.remap(ds, mappingFileName) # set masked values (where there are no MPAS grid cells) to zero ds[VAR_NAME] = ds[VAR_NAME].where( ds[VAR_NAME] != netCDF4.default_fillvals['f4'], 0.) # the result above is just a mask of area fraction. We need to multiply # by the area on the output grid dsMap = xarray.open_dataset(mappingFileName) area_b = dsMap.area_b.values dst_grid_dims = dsMap.dst_grid_dims.values area_b = area_b.reshape((dst_grid_dims[1], dst_grid_dims[0])) area_b = xarray.DataArray(data=area_b, dims=('lat', 'lon'), coords=(ds.coords['lat'], ds.coords['lon'])) # area_b is in square radians, so need to multiply by the earth_radius**2 # multiply variables in this order so they don't get transposed ds[VAR_NAME] = ds[VAR_NAME] * earth_radius**2 * area_b setup_cmor(var_name=VAR_NAME, table_path=tables, table_name=TABLE, user_input_path=user_input_path) # create axes axes = [{ 'table_entry': 'time', 'units': ds.time.units }, { 'table_entry': 'depth_coord', 'units': 'm', 'coord_vals': ds.depth.values, 'cell_bounds': ds.depth_bnds.values }, { 'table_entry': 'latitude', 'units': 'degrees_north', 'coord_vals': ds.lat.values, 'cell_bounds': ds.lat_bnds.values }, { 'table_entry': 'longitude', 'units': 'degrees_east', 'coord_vals': ds.lon.values, 'cell_bounds': ds.lon_bnds.values }] try: mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS) except Exception: return "" return VAR_NAME
def handle(infiles, tables, user_input_path, **kwargs): """ Transform MPASO timeMonthly_avg_activeTracers_salinity into CMIP.sos Parameters ---------- infiles : dict a dictionary with namelist, mesh and time series file names tables : str path to CMOR tables user_input_path : str path to user input json file Returns ------- varname : str the name of the processed variable after processing is complete """ if kwargs.get('simple'): msg = f"{VAR_NAME} is not supported for simple conversion" print_message(msg) return msg = 'Starting {name}'.format(name=__name__) logging.info(msg) meshFileName = infiles['MPAS_mesh'] mappingFileName = infiles['MPAS_map'] timeSeriesFiles = infiles['MPASO'] dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False) cellMask2D, _ = mpas.get_cell_masks(dsMesh) variableList = ['timeMonthly_avg_activeTracers_salinity', 'xtime_startMonthly', 'xtime_endMonthly'] ds = xarray.Dataset() with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn: thetao = dsIn.timeMonthly_avg_activeTracers_salinity ds[VAR_NAME] = thetao.isel(nVertLevels=0).squeeze(drop=True) ds = mpas.add_time(ds, dsIn) ds.compute() ds = mpas.add_mask(ds, cellMask2D) ds.compute() ds = mpas.remap(ds, mappingFileName) mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean') # create axes axes = [{'table_entry': 'time', 'units': ds.time.units}, {'table_entry': 'latitude', 'units': 'degrees_north', 'coord_vals': ds.lat.values, 'cell_bounds': ds.lat_bnds.values}, {'table_entry': 'longitude', 'units': 'degrees_east', 'coord_vals': ds.lon.values, 'cell_bounds': ds.lon_bnds.values}] try: mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS) except Exception: return "" return VAR_NAME
def handle(infiles, tables, user_input_path, **kwargs): simple = kwargs.get('simple') logger = logging.getLogger() msg = f'{VAR_NAME}: Starting' logger.info(msg) logdir = kwargs.get('logdir') # check that we have some input files for every variable zerofiles = False for variable in RAW_VARIABLES: if len(infiles[variable]) == 0: msg = f'{VAR_NAME}: Unable to find input files for {variable}' print_message(msg) logging.error(msg) zerofiles = True if zerofiles: return None if simple: handle_simple(infiles) return VAR_NAME # Create the logging directory and setup cmor if logdir: logpath = logdir else: outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename) logpath = os.path.join(outpath, 'cmor_logs') os.makedirs(logpath, exist_ok=True) logfile = os.path.join(logpath, VAR_NAME + '.log') cmor.setup( inpath=tables, netcdf_file_action=cmor.CMOR_REPLACE, logfile=logfile) cmor.dataset_json(str(user_input_path)) cmor.load_table(str(TABLE)) msg = '{}: CMOR setup complete'.format(VAR_NAME) logging.info(msg) # extract data from the input file msg = 'orog: loading PHIS' logger.info(msg) filename = infiles['PHIS'][0] if not os.path.exists(filename): raise IOError("File not found: {}".format(filename)) ds = xr.open_dataset(filename, decode_times=False) # load the data for each variable variable_data = ds['PHIS'] # load the lon and lat info & bounds data = { 'lat': ds['lat'], 'lon': ds['lon'], 'lat_bnds': ds['lat_bnds'], 'lon_bnds': ds['lon_bnds'], 'PHIS': ds['PHIS'] } msg = f'{VAR_NAME}: loading axes' logger.info(msg) axes = [{ str('table_entry'): str('latitude'), str('units'): ds['lat'].units, str('coord_vals'): data['lat'].values, str('cell_bounds'): data['lat_bnds'].values }, { str('table_entry'): str('longitude'), str('units'): ds['lon'].units, str('coord_vals'): data['lon'].values, str('cell_bounds'): data['lon_bnds'].values }] msg = 'orog: running CMOR' logging.info(msg) axis_ids = list() for axis in axes: axis_id = cmor.axis(**axis) axis_ids.append(axis_id) varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids) outdata = data['PHIS'].values / GRAV cmor.write( varid, outdata) msg = '{}: write complete, closing'.format(VAR_NAME) logger.debug(msg) cmor.close() msg = '{}: file close complete'.format(VAR_NAME) logger.debug(msg) return 'orog'
def handle(infiles, tables, user_input_path, **kwargs): """ Transform MPASO timeMonthly_avg_seaIceFreshWaterFlux, timeMonthly_avg_riverRunoffFlux, timeMonthly_avg_iceRunoffFlux, timeMonthly_avg_rainFlux, and timeMonthly_avg_snowFlux into CMIP.wfo Parameters ---------- infiles : dict a dictionary with namelist, mesh and time series file names tables : str path to CMOR tables user_input_path : str path to user input json file Returns ------- varname : str the name of the processed variable after processing is complete """ if kwargs.get('simple'): msg = f"{VAR_NAME} is not supported for simple conversion" print_message(msg) return msg = 'Starting {name}'.format(name=__name__) logging.info(msg) mappingFileName = infiles['MPAS_map'] timeSeriesFiles = infiles['MPASO'] variableList = [ 'timeMonthly_avg_seaIceFreshWaterFlux', 'timeMonthly_avg_riverRunoffFlux', 'timeMonthly_avg_iceRunoffFlux', 'timeMonthly_avg_rainFlux', 'timeMonthly_avg_snowFlux', 'xtime_startMonthly', 'xtime_endMonthly' ] ds = xarray.Dataset() with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn: ds[VAR_NAME] = \ dsIn.timeMonthly_avg_seaIceFreshWaterFlux + \ dsIn.timeMonthly_avg_riverRunoffFlux + \ dsIn.timeMonthly_avg_iceRunoffFlux + \ dsIn.timeMonthly_avg_rainFlux + \ dsIn.timeMonthly_avg_snowFlux ds = mpas.add_time(ds, dsIn) ds.compute() ds = mpas.remap(ds, mappingFileName) mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean') # create axes axes = [{ 'table_entry': 'time', 'units': ds.time.units }, { 'table_entry': 'latitude', 'units': 'degrees_north', 'coord_vals': ds.lat.values, 'cell_bounds': ds.lat_bnds.values }, { 'table_entry': 'longitude', 'units': 'degrees_east', 'coord_vals': ds.lon.values, 'cell_bounds': ds.lon_bnds.values }] try: mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS, comment='Computed as the water flux into the ocean ' 'divided by the area of the ocean portion of ' 'the grid cell. This is the sum of sea-ice' 'freshwater, river runoff, ice runoff, rain,' 'and snow fluxes.') except Exception: return "" return VAR_NAME
def handle(infiles, tables, user_input_path, **kwargs): """ Transform E3SM.TS into CMIP.ts Parameters ---------- infiles (List): a list of strings of file names for the raw input data tables (str): path to CMOR tables user_input_path (str): path to user input json file Returns ------- var name (str): the name of the processed variable after processing is complete """ msg = '{}: Starting'.format(VAR_NAME) logger.info(msg) nonzero = False for variable in RAW_VARIABLES: if len(infiles[variable]) == 0: msg = '{}: Unable to find input files for {}'.format( VAR_NAME, variable) print_message(msg) logging.error(msg) nonzero = True if nonzero: return None msg = '{}: running with input files: {}'.format(VAR_NAME, infiles) logger.debug(msg) # setup cmor logdir = kwargs.get('logdir') if logdir: logfile = logfile = os.path.join(logdir, VAR_NAME + '.log') else: logfile = os.path.join(os.getcwd(), 'logs') if not os.path.exists(logfile): os.makedirs(logfile) logfile = os.path.join(logfile, VAR_NAME + '.log') cmor.setup(inpath=tables, netcdf_file_action=cmor.CMOR_REPLACE, logfile=logfile) cmor.dataset_json(user_input_path) cmor.load_table(TABLE) msg = '{}: CMOR setup complete'.format(VAR_NAME) logger.info(msg) data = {} # assuming all year ranges are the same for every variable num_files_per_variable = len(infiles['FISCCP1_COSP']) # sort the input files for each variable infiles['FISCCP1_COSP'].sort() for index in range(num_files_per_variable): f = cdms2.open(infiles['FISCCP1_COSP'][index]) # load the data for each variable variable_data = f('FISCCP1_COSP') tau = variable_data.getAxis(2)[:] tau[-1] = 100.0 tau_bnds = f.variables['cosp_tau_bnds'][:] tau_bnds[-1] = [60.0, 100000.0] # load data = { 'FISCCP1_COSP': variable_data, 'lat': variable_data.getLatitude(), 'lon': variable_data.getLongitude(), 'lat_bnds': f('lat_bnds'), 'lon_bnds': f('lon_bnds'), 'time': variable_data.getTime(), 'time_bnds': f('time_bnds'), 'plev7c': variable_data.getAxis(1)[:] * 100.0, 'plev7c_bnds': f.variables['cosp_prs_bnds'][:] * 100.0, 'tau': tau, 'tau_bnds': tau_bnds } # create the cmor variable and axis axes = [{ str('table_entry'): str('time'), str('units'): data['time'].units }, { str('table_entry'): str('plev7c'), str('units'): str('Pa'), str('coord_vals'): data['plev7c'], str('cell_bounds'): data['plev7c_bnds'] }, { str('table_entry'): str('tau'), str('units'): str('1'), str('coord_vals'): data['tau'], str('cell_bounds'): data['tau_bnds'] }, { str('table_entry'): str('latitude'), str('units'): data['lat'].units, str('coord_vals'): data['lat'][:], str('cell_bounds'): data['lat_bnds'][:] }, { str('table_entry'): str('longitude'), str('units'): data['lon'].units, str('coord_vals'): data['lon'][:], str('cell_bounds'): data['lon_bnds'][:] }] axis_ids = list() for axis in axes: axis_id = cmor.axis(**axis) axis_ids.append(axis_id) varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids) # write out the data msg = "{}: time {:1.1f} - {:1.1f}".format(VAR_NAME, data['time_bnds'][0][0], data['time_bnds'][-1][-1]) logger.info(msg) serial = kwargs.get('serial') if serial: myMessage = progressbar.DynamicMessage('running') myMessage.__call__ = my_dynamic_message widgets = [ progressbar.DynamicMessage('running'), ' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ' ] progressbar.DynamicMessage.__call__ = my_dynamic_message pbar = progressbar.ProgressBar(maxval=len(data['time']), widgets=widgets) pbar.start() for index, val in enumerate(data['time']): if serial: pbar.update(index, running=msg) write_data(varid=varid, data=data, timeval=val, timebnds=[data['time_bnds'][index, :]], index=index) if serial: pbar.finish() msg = '{}: write complete, closing'.format(VAR_NAME) logger.info(msg) cmor.close() msg = '{}: file close complete'.format(VAR_NAME) logger.info(msg) return VAR_NAME
def handle(infiles, tables, user_input_path, **kwargs): """ Transform E3SM.TSOI into CMIP.tsl Parameters ---------- infiles (List): a list of strings of file names for the raw input data tables (str): path to CMOR tables user_input_path (str): path to user input json file Returns ------- var name (str): the name of the processed variable after processing is complete """ msg = f'{VAR_NAME}: Starting' logger.info(msg) nonzero = False for variable in RAW_VARIABLES: if len(infiles[variable]) == 0: msg = f'{VAR_NAME}: Unable to find input files for {variable}' print_message(msg) logging.error(msg) nonzero = True if nonzero: return None msg = f'{VAR_NAME}: running with input files: {infiles}' logger.debug(msg) # setup cmor logdir = kwargs.get('logdir') if logdir: logfile = logfile = os.path.join(logdir, f"{VAR_NAME}.log") else: logfile = os.path.join(os.getcwd(), 'logs') if not os.path.exists(logfile): os.makedirs(logfile) logfile = os.path.join(logfile, f"{VAR_NAME}.log") simple = kwargs.get('simple') if simple: outpath = kwargs['outpath'] _, inputfile = os.path.split(sorted(infiles[RAW_VARIABLES[0]])[0]) start_year = inputfile[len(RAW_VARIABLES[0]) + 1:].split('_')[0] end_year = inputfile[len(RAW_VARIABLES[0]) + 1:].split('_')[1] outds = xr.Dataset() with xr.open_mfdataset(infiles[RAW_VARIABLES[0]], decode_times=False) as inputds: for dim in inputds.coords: if dim == 'levgrnd': outds['levgrnd'] = inputds[dim] outds['levgrnd_bnds'] = get_levgrnd_bnds() else: outds[dim] = inputds[dim] for var in inputds.data_vars: if var == RAW_VARIABLES[0]: outds[VAR_NAME] = inputds[RAW_VARIABLES[0]] elif var == 'time_bounds': outds['time_bnds'] = inputds['time_bounds'] else: outds[var] = inputds[var] for attr, val in inputds.attrs.items(): outds.attrs[attr] = val outds = outds.rename_dims({ 'levgrnd': 'depth', 'levgrnd_bnds': 'depth_bnds' }) outds = outds.rename_vars({ 'levgrnd': 'depth', 'levgrnd_bnds': 'depth_bnds' }) resource_path, _ = os.path.split(os.path.abspath(resources.__file__)) table_path = os.path.join(resource_path, 'CMIP6_Lmon.json') with open(table_path, 'r') as ip: table_data = json.load(ip) variable_attrs = ['standard_name', 'long_name', 'comment', 'cell_methods', 'cell_measures', 'units'] for attr in variable_attrs: outds[VAR_NAME].attrs[attr] = table_data['variable_entry'][VAR_NAME][attr] output_file_path = os.path.join( outpath, f'{VAR_NAME}_{start_year}_{end_year}.nc') msg = f'writing out variable to file {output_file_path}' print_message(msg, 'ok') write_netcdf(outds, output_file_path, unlimited=['time']) return RAW_VARIABLES[0] cmor.setup( inpath=tables, netcdf_file_action=cmor.CMOR_REPLACE, logfile=logfile) cmor.dataset_json(user_input_path) cmor.load_table(TABLE) msg = f'{VAR_NAME}: CMOR setup complete' logger.info(msg) data = {} # assuming all year ranges are the same for every variable num_files_per_variable = len(infiles['TSOI']) # sort the input files for each variable infiles['TSOI'].sort() for index in range(num_files_per_variable): f = cdms2.open(infiles['TSOI'][index]) # load the data for each variable variable_data = f('TSOI') levgrnd = variable_data.getAxis(1)[:] levgrnd_bnds = get_levgrnd_bnds() # load data = { 'TSOI': variable_data, 'lat': variable_data.getLatitude(), 'lon': variable_data.getLongitude(), 'lat_bnds': f('lat_bnds'), 'lon_bnds': f('lon_bnds'), 'time': variable_data.getTime(), 'time_bnds': f('time_bounds'), 'levgrnd': levgrnd, 'levgrnd_bnds': levgrnd_bnds } # create the cmor variable and axis axes = [{ str('table_entry'): str('time'), str('units'): data['time'].units }, { str('table_entry'): str('sdepth'), str('units'): str('m'), str('coord_vals'): levgrnd, str('cell_bounds'): levgrnd_bnds }, { str('table_entry'): str('latitude'), str('units'): data['lat'].units, str('coord_vals'): data['lat'][:], str('cell_bounds'): data['lat_bnds'][:] }, { str('table_entry'): str('longitude'), str('units'): data['lon'].units, str('coord_vals'): data['lon'][:], str('cell_bounds'): data['lon_bnds'][:] }] axis_ids = list() for axis in axes: axis_id = cmor.axis(**axis) axis_ids.append(axis_id) varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids) # write out the data msg = f"{VAR_NAME}: writing {data['time_bnds'][0][0]} - {data['time_bnds'][-1][-1]}" logger.info(msg) serial = kwargs.get('serial') if serial: pbar = tqdm(total=len(data['time'])) pbar.set_description(msg) for index, val in enumerate(data['time']): cmor.write( varid, data['TSOI'][index, :], time_vals=val, time_bnds=[data['time_bnds'][index, :]]) if serial: pbar.update(1) if serial: pbar.close() msg = f'{VAR_NAME}: write complete, closing' logger.info(msg) cmor.close() msg = f'{VAR_NAME}: file close complete' logger.info(msg) return VAR_NAME
def timeout_exit(): print_message("Hit timeout limit, exiting") os.kill(os.getpid(), signal.SIGINT)
def handle(infiles, tables, user_input_path, **kwargs): """ Transform MPASO timeMonthly_avg_layerThickness into CMIP.zhalfo Parameters ---------- infiles : dict a dictionary with namelist, mesh and time series file names tables : str path to CMOR tables user_input_path : str path to user input json file Returns ------- varname : str the name of the processed variable after processing is complete """ if kwargs.get('simple'): msg = f"{VAR_NAME} is not supported for simple conversion" print_message(msg) return msg = 'Starting {name}'.format(name=__name__) logging.info(msg) meshFileName = infiles['MPAS_mesh'] mappingFileName = infiles['MPAS_map'] timeSeriesFiles = infiles['MPASO'] dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False) _, cellMask3D = mpas.get_cell_masks(dsMesh) variableList = [ 'timeMonthly_avg_layerThickness', 'xtime_startMonthly', 'xtime_endMonthly' ] nVertLevels = dsMesh.sizes['nVertLevels'] ds = xarray.Dataset() with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn: layerThickness = dsIn.timeMonthly_avg_layerThickness layerThickness = layerThickness.where(cellMask3D) thicknessSum = layerThickness.sum(dim='nVertLevels') mask = cellMask3D.isel(nVertLevels=0) zSurface = (-dsMesh.bottomDepth + thicknessSum).where(mask) zSurface.compute() # print('done zSurface') slices = [zSurface] maskSlices = [mask] zLayerBot = zSurface for zIndex in range(nVertLevels): mask = cellMask3D.isel(nVertLevels=zIndex) zLayerBot = (zLayerBot - layerThickness.isel(nVertLevels=zIndex)).where(mask) zLayerBot.compute() # print('done zLayerBot {}/{}'.format(zIndex+1, nVertLevels)) slices.append(zLayerBot) maskSlices.append(mask) ds[VAR_NAME] = xarray.concat(slices, dim='olevhalf') mask = xarray.concat(maskSlices, dim='olevhalf') ds = mpas.add_mask(ds, mask) ds = ds.transpose('Time', 'olevhalf', 'nCells') ds = mpas.add_time(ds, dsIn) ds.compute() ds = mpas.remap(ds, mappingFileName) depth_coord_half = numpy.zeros(nVertLevels + 1) depth_coord_half[1:] = dsMesh.refBottomDepth.values mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean') # create axes axes = [{ 'table_entry': 'time', 'units': ds.time.units }, { 'table_entry': 'depth_coord_half', 'units': 'm', 'coord_vals': depth_coord_half }, { 'table_entry': 'latitude', 'units': 'degrees_north', 'coord_vals': ds.lat.values, 'cell_bounds': ds.lat_bnds.values }, { 'table_entry': 'longitude', 'units': 'degrees_east', 'coord_vals': ds.lon.values, 'cell_bounds': ds.lon_bnds.values }] try: mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS) except Exception: return "" return VAR_NAME
def run_serial(handlers, input_path, tables_path, metadata_path, map_path=None, mode='atm', logdir=None, simple=False, outpath=None, freq="mon"): """ Run each of the handlers one at a time on the main process Params: ------- handlers: a dict(str: (function_pointer, list(str) ) ) input_path (str): path to the input files directory tables_path (str): path to the tables directory metadata_path (str): path to the cmor input metadata mode (str): what type of files to work with Returns: -------- returns 1 if an error occurs, else 0 """ try: num_handlers = len(handlers) num_success = 0 name = None if mode != 'atm': pbar = tqdm(total=len(handlers)) for _, handler in enumerate(handlers): handler_method = handler['method'] handler_variables = handler['raw_variables'] unit_conversion = handler.get('unit_conversion') # find the input files this handler needs if mode in ['atm', 'lnd']: input_paths = {var: [os.path.join(input_path, x) for x in find_atm_files(var, input_path)] for var in handler_variables} elif mode == 'fx': input_paths = {var: [os.path.join(input_path, x) for x in os.listdir(input_path) if x[-3:] == '.nc'] for var in handler_variables} else: input_paths = {var: find_mpas_files(var, input_path, map_path) for var in handler_variables} try: name = handler_method( input_paths, tables_path, metadata_path, raw_variables=handler.get('raw_variables'), units=handler.get('units'), name=handler.get('name'), table=handler.get('table'), positive=handler.get('positive'), serial=True, logdir=logdir, simple=simple, outpath=outpath, unit_conversion=unit_conversion, freq=freq) except Exception as e: print_debug(e) if name is not None: num_success += 1 msg = f'Finished {name}, {num_success}/{num_handlers} jobs complete' else: msg = f'Error running handler {handler["name"]}' print_message(msg, status='error') logger.info(msg) if mode != 'atm': pbar.update(1) if mode != 'atm': pbar.close() except Exception as error: print_debug(error) return 1 else: print_message( f"{num_success} of {num_handlers} handlers complete", 'ok') return 0
def handle(infiles, tables, user_input_path, **kwargs): simple = kwargs.get('simple') r = 6.37122e6 logger = logging.getLogger() msg = '{}: Starting'.format(VAR_NAME) logger.info(msg) logdir = kwargs.get('logdir') # check that we have some input files for every variable zerofiles = False for variable in RAW_VARIABLES: if len(infiles[variable]) == 0: msg = '{}: Unable to find input files for {}'.format( VAR_NAME, variable) print_message(msg) logging.error(msg) zerofiles = True if zerofiles: return None if simple: resource_path, _ = os.path.split(os.path.abspath(resources.__file__)) table_path = os.path.join(resource_path, TABLE) with open(table_path, 'r') as ip: table_data = json.load(ip) ds = xr.Dataset() outname = f'{VAR_NAME}_fx.nc' with xr.open_dataset(infiles[RAW_VARIABLES[0]][0]) as inputds: ds['lat'] = inputds['lat'] ds['lat_bnds'] = inputds['lat_bnds'] ds['lon'] = inputds['lon'] ds['lon_bnds'] = inputds['lon_bnds'] outdata = inputds['area'] * pow(r, 2) for attr, val in inputds.attrs.items(): ds.attrs[attr] = val ds[VAR_NAME] = outdata for attr in [ 'standard_name', 'cell_methods', 'long_name', 'comment', 'units' ]: ds[VAR_NAME].attrs[attr] = table_data["variable_entry"][VAR_NAME][ attr] ds.to_netcdf(outname) return VAR_NAME # Create the logging directory and setup cmor if logdir: logpath = logdir else: outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename) logpath = os.path.join(outpath, 'cmor_logs') os.makedirs(logpath, exist_ok=True) logfile = os.path.join(logpath, VAR_NAME + '.log') cmor.setup(inpath=tables, netcdf_file_action=cmor.CMOR_REPLACE, logfile=logfile) cmor.dataset_json(str(user_input_path)) cmor.load_table(str(TABLE)) msg = '{}: CMOR setup complete'.format(VAR_NAME) logging.info(msg) # extract data from the input file msg = 'areacella: loading area' logger.info(msg) filename = infiles['area'][0] if not os.path.exists(filename): raise IOError("File not found: {}".format(filename)) f = cdms2.open(filename) # load the data for each variable variable_data = f('area') if not variable_data.any(): raise IOError("Variable data not found: {}".format(variable)) # load the lon and lat info & bounds data = { 'lat': variable_data.getLatitude(), 'lon': variable_data.getLongitude(), 'lat_bnds': f('lat_bnds'), 'lon_bnds': f('lon_bnds'), 'area': f('area') } msg = '{name}: loading axes'.format(name=VAR_NAME) logger.info(msg) axes = [{ str('table_entry'): str('latitude'), str('units'): data['lat'].units, str('coord_vals'): data['lat'][:], str('cell_bounds'): data['lat_bnds'][:] }, { str('table_entry'): str('longitude'), str('units'): data['lon'].units, str('coord_vals'): data['lon'][:], str('cell_bounds'): data['lon_bnds'][:] }] msg = 'areacella: running CMOR' logging.info(msg) axis_ids = list() for axis in axes: axis_id = cmor.axis(**axis) axis_ids.append(axis_id) varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids) outdata = data['area'] * pow(r, 2) cmor.write(varid, outdata) msg = '{}: write complete, closing'.format(VAR_NAME) logger.debug(msg) cmor.close() msg = '{}: file close complete'.format(VAR_NAME) logger.debug(msg) return 'areacella'
def run_parallel(pool, handlers, input_path, tables_path, metadata_path, map_path=None, mode='atm', nproc=6, **kwargs): """ Run all the handlers in parallel Params: ------- pool (multiprocessing.Pool): a processing pool to run the handlers in handlers: a dict(str: (function_pointer, list(str) ) ) input_path (str): path to the input files directory tables_path (str): path to the tables directory metadata_path (str): path to the cmor input metadata mode (str): what type of files to work with Returns: -------- returns 1 if an error occurs, else 0 """ pool_res = list() will_run = [] for idx, handler in enumerate(handlers): handler_method = handler['method'] handler_variables = handler['raw_variables'] # find the input files this handler needs if mode in ['atm', 'lnd']: input_paths = {var: [os.path.join(input_path, x) for x in find_atm_files(var, input_path)] for var in handler_variables} else: input_paths = {var: find_mpas_files(var, input_path, map_path) for var in handler_variables} # setup the input args for the handler _kwargs = { 'table': handler.get('table'), 'raw_variables': handler.get('raw_variables'), 'units': handler.get('units'), 'positive': handler.get('positive'), 'name': handler.get('name'), 'logdir': kwargs.get('logdir'), 'unit_conversion': handler.get('unit_conversion'), 'simple': kwargs.get('simple'), 'outpath': kwargs.get('outpath') } will_run.append(handler.get('name')) pool_res.append( pool.submit( handler_method, input_paths, tables_path, metadata_path, **_kwargs)) # wait for each result to complete pbar = tqdm(total=len(pool_res)) num_success = 0 num_handlers = len(handlers) finished_success = [] for idx, res in enumerate(pool_res): try: out = res.result() finished_success.append(out) if out: num_success += 1 msg = f'Finished {out}, {idx + 1}/{num_handlers} jobs complete' else: msg = f'Error running handler {handlers[idx]["name"]}' print_message(msg, 'error') logger.info(msg) except Exception as e: print_debug(e) pbar.update(1) pbar.close() terminate(pool) print_message(f"{num_success} of {num_handlers} handlers complete", 'ok') failed = set(will_run) - set(finished_success) if failed: print_message(f"{', '.join(list(failed))} failed to complete") return 0
def handle_simple(infiles, raw_variables, write_data, outvar_name, outvar_units, serial=None, positive=None, levels=None, axis=None, logdir=None, outpath=None, table='Amon', has_time=True): from e3sm_to_cmip.util import print_message logger = logging.getLogger() logger.info(f'{outvar_name}: Starting') # check that we have some input files for every variable zerofiles = False for variable in raw_variables: if len(infiles[variable]) == 0: msg = f'{outvar_name}: Unable to find input files for {variable}' print_message(msg) logging.error(msg) zerofiles = True if zerofiles: return None # Create the logging directory and setup cmor if logdir: logpath = logdir else: outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename) logpath = os.path.join(outpath, 'cmor_logs') os.makedirs(logpath, exist_ok=True) _, inputfile = os.path.split(sorted(infiles[raw_variables[0]])[0]) # counting from the end, since the variable names might have a _ in them start_year = inputfile[len(raw_variables[0]) + 1:].split('_')[0] end_year = inputfile[len(raw_variables[0]) + 1:].split('_')[1] data = {} # assuming all year ranges are the same for every variable num_files_per_variable = len(infiles[raw_variables[0]]) # sort the input files for each variable for var_name in raw_variables: infiles[var_name].sort() for file_index in range(num_files_per_variable): loaded = False # reload the dimensions for each time slice get_dims = True # load data for each variables for var_name in raw_variables: # extract data from the input file logger.info(f'{outvar_name}: loading {var_name}') new_data = get_dimension_data( filename=infiles[var_name][file_index], variable=var_name, levels=levels, get_dims=get_dims) data.update(new_data) get_dims = False if not loaded: loaded = True # new data set ds = xr.Dataset() if has_time: dims = ['time', 'lat', 'lon'] else: dims = ['lat', 'lon'] for depth_dim in ['lev', 'plev', 'levgrnd']: if depth_dim in new_data.keys(): dims.insert(1, depth_dim) ds[outvar_name] = (tuple(dims), new_data[var_name]) for d in dims: ds.coords[d] = new_data[d][:] # write out the data msg = f"{outvar_name}: time {data['time_bnds'][0][0]:1.1f} - {data['time_bnds'][-1][-1]:1.1f}" logger.info(msg) if serial: pbar = tqdm(total=len(data['time'])) pbar.set_description(msg) for time_index, val in enumerate(data['time']): outdata = write_data( varid=0, data=data, timeval=val, timebnds=[data['time_bnds'][time_index, :]], index=time_index, raw_variables=raw_variables, simple=True) ds[outvar_name][time_index] = outdata if serial: pbar.update(1) if serial: pbar.close() with xr.open_dataset(infiles[raw_variables[0]][0], decode_cf=False, decode_times=False) as inputds: for attr, val in inputds.attrs.items(): ds.attrs[attr] = val ds['lat_bnds'] = inputds['lat_bnds'] ds['lon_bnds'] = inputds['lon_bnds'] # check for and change the bounds name for lnd files since "time_bounds" is different # from every other bounds name in the entire E3SM project time_bounds_name = 'time_bnds' if 'time_bnds' in inputds.data_vars else 'time_bounds' ds['time_bnds'] = inputds[time_bounds_name] ds['time'] = inputds['time'] ds['time'].attrs['bounds'] = 'time_bnds' resource_path, _ = os.path.split(os.path.abspath(resources.__file__)) table_path = os.path.join(resource_path, table) with open(table_path, 'r') as ip: table_data = json.load(ip) variable_attrs = ['standard_name', 'long_name', 'comment', 'cell_methods', 'cell_measures', 'units'] for attr in variable_attrs: ds[outvar_name].attrs[attr] = table_data['variable_entry'][outvar_name][attr] output_file_path = os.path.join( outpath, f'{outvar_name}_{table[:-5]}_{start_year}-{end_year}') msg = f'writing out variable to file {output_file_path}' print_message(msg, 'ok') fillVals = { np.dtype('float32'): 1e20, np.dtype('float64'): 1e20, } write_netcdf(ds, output_file_path, fillValues=fillVals, unlimited=['time']) msg = f'{outvar_name}: file close complete' logger.debug(msg) return outvar_name
def handle(infiles, tables, user_input_path, **kwargs): """ Transform MPASO timeMonthly_avg_pressureAdjustedSSH, timeMonthly_avg_ssh, timeMonthly_avg_density, timeMonthly_avg_layerThickness, and EAM PSL into CMIP.pbo Parameters ---------- infiles : dict a dictionary with namelist, mesh and time series file names tables : str path to CMOR tables user_input_path : str path to user input json file Returns ------- varname : str the name of the processed variable after processing is complete """ if kwargs.get('simple'): msg = f"{VAR_NAME} is not supported for simple conversion" print_message(msg) return msg = 'Starting {name}'.format(name=__name__) logging.info(msg) namelistFileName = infiles['MPASO_namelist'] meshFileName = infiles['MPAS_mesh'] mappingFileName = infiles['MPAS_map'] timeSeriesFiles = infiles['MPASO'] pslFileNames = infiles['PSL'] namelist = mpas.convert_namelist_to_dict(namelistFileName) config_density0 = float(namelist['config_density0']) gravity = 9.80616 dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False) cellMask2D, cellMask3D = mpas.get_cell_masks(dsMesh) variableList = [ 'timeMonthly_avg_pressureAdjustedSSH', 'timeMonthly_avg_ssh', 'timeMonthly_avg_layerThickness', 'timeMonthly_avg_density', 'xtime_startMonthly', 'xtime_endMonthly' ] ds = xarray.Dataset() with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn: seaIcePressure = config_density0 * gravity * \ (dsIn.timeMonthly_avg_pressureAdjustedSSH - dsIn.timeMonthly_avg_ssh) ds[VAR_NAME] = seaIcePressure.where(cellMask2D) + gravity * \ (dsIn.timeMonthly_avg_density * dsIn.timeMonthly_avg_layerThickness).where(cellMask3D).sum( dim='nVertLevels') ds = mpas.add_time(ds, dsIn) ds.compute() ds = mpas.remap(ds, mappingFileName) with xarray.open_mfdataset(pslFileNames, concat_dim='time') as dsIn: ds[VAR_NAME] = ds[VAR_NAME] + dsIn.PSL.values mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean') # create axes axes = [{ 'table_entry': 'time', 'units': ds.time.units }, { 'table_entry': 'latitude', 'units': 'degrees_north', 'coord_vals': ds.lat.values, 'cell_bounds': ds.lat_bnds.values }, { 'table_entry': 'longitude', 'units': 'degrees_east', 'coord_vals': ds.lon.values, 'cell_bounds': ds.lon_bnds.values }] try: mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS) except Exception: return "" return VAR_NAME
def handle(infiles, tables, user_input_path, **kwargs): """ Transform MPASO timeMonthly_avg_windStressMeridional into CMIP.tauvo Parameters ---------- infiles : dict a dictionary with namelist, mesh and time series file names tables : str path to CMOR tables user_input_path : str path to user input json file Returns ------- varname : str the name of the processed variable after processing is complete """ if kwargs.get('simple'): msg = f"{VAR_NAME} is not supported for simple conversion" print_message(msg) return msg = 'Starting {name}'.format(name=__name__) logging.info(msg) mappingFileName = infiles['MPAS_map'] timeSeriesFiles = infiles['MPASO'] variableList = [ 'timeMonthly_avg_windStressMeridional', 'xtime_startMonthly', 'xtime_endMonthly' ] ds = xarray.Dataset() with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn: ds[VAR_NAME] = dsIn.timeMonthly_avg_windStressMeridional ds = mpas.add_time(ds, dsIn) ds.compute() ds = mpas.remap(ds, mappingFileName) mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean') # create axes axes = [{ 'table_entry': 'time', 'units': ds.time.units }, { 'table_entry': 'latitude', 'units': 'degrees_north', 'coord_vals': ds.lat.values, 'cell_bounds': ds.lat_bnds.values }, { 'table_entry': 'longitude', 'units': 'degrees_east', 'coord_vals': ds.lon.values, 'cell_bounds': ds.lon_bnds.values }] try: mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS, positive='down') except Exception: return "" return VAR_NAME
def handle(infiles, tables, user_input_path, **kwargs): """ Transform MPASO timeMonthly_avg_layerThickness into CMIP.masscello Parameters ---------- infiles : dict a dictionary with namelist, mesh and time series file names tables : str path to CMOR tables user_input_path : str path to user input json file Returns ------- varname : str the name of the processed variable after processing is complete """ if kwargs.get('simple'): msg = f"{VAR_NAME} is not supported for simple conversion" print_message(msg) return msg = 'Starting {name}'.format(name=__name__) logging.info(msg) namelistFileName = infiles['MPASO_namelist'] meshFileName = infiles['MPAS_mesh'] mappingFileName = infiles['MPAS_map'] timeSeriesFiles = infiles['MPASO'] namelist = mpas.convert_namelist_to_dict(namelistFileName) config_density0 = float(namelist['config_density0']) dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False) _, cellMask3D = mpas.get_cell_masks(dsMesh) variableList = [ 'timeMonthly_avg_layerThickness', 'xtime_startMonthly', 'xtime_endMonthly' ] ds = xarray.Dataset() with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn: ds[VAR_NAME] = config_density0 * \ dsIn.timeMonthly_avg_layerThickness.where(cellMask3D, 0.) ds = mpas.add_time(ds, dsIn) ds.compute() ds = mpas.add_depth(ds, dsMesh) ds.compute() ds = mpas.remap(ds, mappingFileName) # set masked values (where there are no MPAS grid cells) to zero ds[VAR_NAME] = ds[VAR_NAME].where( ds[VAR_NAME] != netCDF4.default_fillvals['f4'], 0.) mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean') # create axes axes = [{ 'table_entry': 'time', 'units': ds.time.units }, { 'table_entry': 'depth_coord', 'units': 'm', 'coord_vals': ds.depth.values, 'cell_bounds': ds.depth_bnds.values }, { 'table_entry': 'latitude', 'units': 'degrees_north', 'coord_vals': ds.lat.values, 'cell_bounds': ds.lat_bnds.values }, { 'table_entry': 'longitude', 'units': 'degrees_east', 'coord_vals': ds.lon.values, 'cell_bounds': ds.lon_bnds.values }] try: mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS) except Exception: return "" return VAR_NAME
def handle(infiles, tables, user_input_path, **kwargs): logger = logging.getLogger() msg = '{}: Starting'.format(VAR_NAME) logger.info(msg) logdir = kwargs.get('logdir') serial = kwargs.get('serial') # check that we have some input files for every variable zerofiles = False for variable in RAW_VARIABLES: if len(infiles[variable]) == 0: msg = '{}: Unable to find input files for {}'.format( VAR_NAME, variable) print_message(msg) logging.error(msg) zerofiles = True if zerofiles: return None # Create the logging directory and setup cmor if logdir: logpath = logdir else: outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename) logpath = os.path.join(outpath, 'cmor_logs') os.makedirs(logpath, exist_ok=True) logfile = os.path.join(logpath, VAR_NAME + '.log') cmor.setup(inpath=tables, netcdf_file_action=cmor.CMOR_REPLACE, logfile=logfile) cmor.dataset_json(str(user_input_path)) cmor.load_table(str(TABLE)) msg = '{}: CMOR setup complete'.format(VAR_NAME) logging.info(msg) # extract data from the input file msg = 'orog: loading PHIS' logger.info(msg) filename = infiles['PHIS'][0] if not os.path.exists(filename): raise IOError("File not found: {}".format(filename)) f = cdms2.open(filename) # load the data for each variable variable_data = f('PHIS') if not variable_data.any(): raise IOError("Variable data not found: {}".format(variable)) # load the lon and lat info & bounds data = { 'lat': variable_data.getLatitude(), 'lon': variable_data.getLongitude(), 'lat_bnds': f('lat_bnds'), 'lon_bnds': f('lon_bnds'), 'PHIS': f('PHIS') } msg = '{name}: loading axes'.format(name=VAR_NAME) logger.info(msg) axes = [{ str('table_entry'): str('latitude'), str('units'): data['lat'].units, str('coord_vals'): data['lat'][:], str('cell_bounds'): data['lat_bnds'][:] }, { str('table_entry'): str('longitude'), str('units'): data['lon'].units, str('coord_vals'): data['lon'][:], str('cell_bounds'): data['lon_bnds'][:] }] msg = 'orog: running CMOR' logging.info(msg) axis_ids = list() for axis in axes: axis_id = cmor.axis(**axis) axis_ids.append(axis_id) varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids) if serial: myMessage = progressbar.DynamicMessage('running') myMessage.__call__ = my_dynamic_message widgets = [ progressbar.DynamicMessage('running'), ' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ' ] progressbar.DynamicMessage.__call__ = my_dynamic_message pbar = progressbar.ProgressBar(maxval=1, widgets=widgets) pbar.start() g = 9.80616 outdata = data['PHIS'] / g cmor.write(varid, outdata) if serial: pbar.update(1, running=msg) pbar.finish() msg = '{}: write complete, closing'.format(VAR_NAME) logger.debug(msg) cmor.close() msg = '{}: file close complete'.format(VAR_NAME) logger.debug(msg) return 'orog'
def handle_variables(infiles, raw_variables, write_data, outvar_name, outvar_units, table, tables, metadata_path, serial=None, positive=None, levels=None, axis=None, logdir=None, simple=False, outpath=None): timename = var_has_time(os.path.join(tables, table), outvar_name) if simple: return handle_simple( infiles, raw_variables, write_data, outvar_name, outvar_units, serial=serial, table=table, positive=positive, levels=levels, axis=axis, logdir=logdir, outpath=outpath, has_time=timename) from e3sm_to_cmip.util import print_message logger = logging.getLogger() logger.info(f'{outvar_name}: Starting') # check that we have some input files for every variable zerofiles = False for variable in raw_variables: if len(infiles[variable]) == 0: msg = f'{outvar_name}: Unable to find input files for {variable}' print_message(msg) logging.error(msg) zerofiles = True if zerofiles: return None # Create the logging directory and setup cmor if logdir: logpath = logdir else: outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename) logpath = os.path.join(outpath, 'cmor_logs') os.makedirs(logpath, exist_ok=True) logfile = os.path.join(logpath, outvar_name + '.log') cmor.setup( inpath=tables, netcdf_file_action=cmor.CMOR_REPLACE, logfile=logfile) cmor.dataset_json(str(metadata_path)) cmor.load_table(str(table)) msg = f'{outvar_name}: CMOR setup complete' logging.info(msg) data = {} # assuming all year ranges are the same for every variable num_files_per_variable = len(infiles[raw_variables[0]]) # sort the input files for each variable for var_name in raw_variables: infiles[var_name].sort() for index in range(num_files_per_variable): # reload the dimensions for each time slice get_dims = True # load data for each variable for var_name in raw_variables: # extract data from the input file logger.info(f'{outvar_name}: loading {var_name}') new_data = get_dimension_data( filename=infiles[var_name][index], variable=var_name, levels=levels, get_dims=get_dims) data.update(new_data) get_dims = False if simple and not loaded_one: loaded_one = True # new data set ds = xr.Dataset() if timename: dims = (timename, 'lat', 'lon') else: dims = ('lat', 'lon') if 'lev' in new_data.keys(): dims = (timename, 'lev', 'lat', 'lon') elif 'plev' in new_data.keys(): dims = (timename, 'plev', 'lat', 'lon') ds[outvar_name] = (dims, new_data[var_name]) for d in dims: ds.coords[d] = new_data[d][:] logger.info(f'{outvar_name}: loading axes') # create the cmor variable and axis axis_ids, ips = load_axis(data=data, levels=levels, has_time=timename) if ips: data['ips'] = ips if positive: varid = cmor.variable(outvar_name, outvar_units, axis_ids, positive=positive) else: varid = cmor.variable(outvar_name, outvar_units, axis_ids) # write out the data msg = f"{outvar_name}: time {data['time_bnds'][0][0]:1.1f} - {data['time_bnds'][-1][-1]:1.1f}" logger.info(msg) if serial: pbar = tqdm(total=len(data['time'])) pbar.set_description(msg) if timename: for index, val in enumerate(data['time']): write_data( varid=varid, data=data, timeval=val, timebnds=[data['time_bnds'][index, :]], index=index, raw_variables=raw_variables, simple=False) if serial: pbar.update(1) else: write_data( varid=varid, data=data, raw_variables=raw_variables, simple=False) if serial: pbar.close() msg = f'{outvar_name}: write complete, closing' logger.debug(msg) cmor.close() msg = f'{outvar_name}: file close complete' logger.debug(msg) return outvar_name
def handle(infiles, tables, user_input_path, **kwargs): """ Parameters ---------- infiles (List): a list of strings of file names for the raw input data tables (str): path to CMOR tables user_input_path (str): path to user input json file Returns ------- var name (str): the name of the processed variable after processing is complete """ logger = logging.getLogger() msg = '{}: Starting'.format(VAR_NAME) logger.info(msg) serial = kwargs.get('serial') logdir = kwargs.get('logdir') if kwargs.get('simple'): msg = f"{VAR_NAME} is not supported for simple conversion" print_message(msg) return # check that we have some input files for every variable zerofiles = False for variable in RAW_VARIABLES: if len(infiles[variable]) == 0: msg = '{}: Unable to find input files for {}'.format( VAR_NAME, variable) print_message(msg) logging.error(msg) zerofiles = True if zerofiles: return None # Create the logging directory and setup cmor logdir = kwargs.get('logdir') if logdir: logpath = logdir else: outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename) logpath = os.path.join(outpath, 'cmor_logs') os.makedirs(logpath, exist_ok=True) logfile = os.path.join(logpath, VAR_NAME + '.log') cmor.setup(inpath=tables, netcdf_file_action=cmor.CMOR_REPLACE, logfile=logfile) cmor.dataset_json(str(user_input_path)) cmor.load_table(str(TABLE)) msg = '{}: CMOR setup complete'.format(VAR_NAME) logging.info(msg) data = {} # assuming all year ranges are the same for every variable num_files_per_variable = len(infiles[RAW_VARIABLES[0]]) # sort the input files for each variable for var_name in RAW_VARIABLES: infiles[var_name].sort() for index in range(num_files_per_variable): # load data for each variable for var_name in RAW_VARIABLES: # extract data from the input file msg = '{name}: loading {variable}'.format(name=VAR_NAME, variable=var_name) logger.info(msg) filename = infiles[var_name][index] new_data = {} if not os.path.exists(filename): raise IOError("File not found: {}".format(filename)) f = cdms2.open(filename) # load the data for each variable variable_data = f(var_name) if not variable_data.any(): raise IOError("Variable data not found: {}".format(variable)) data.update({variable: variable_data}) # load the lon and lat info & bounds # load time & time bounds if var_name == 'PS': data.update({ 'ps': f('PS'), 'lat': variable_data.getLatitude(), 'lon': variable_data.getLongitude(), 'lat_bnds': f('lat_bnds'), 'lon_bnds': f('lon_bnds'), 'time2': variable_data.getTime(), 'time_bnds': f('time_bnds') }) if 'lev' in f.listdimension() and 'ilev' in f.listdimension(): data.update({ 'lev': f.getAxis('lev')[:] / 1000, 'ilev': f.getAxis('ilev')[:] / 1000 }) new_data = { i: f(i) for i in ['hyai', 'hybi', 'hyam', 'hybm'] if i in f.variables } data.update(new_data) msg = '{name}: loading axes'.format(name=VAR_NAME) logger.info(msg) # create the cmor variable and axis axes = [{ str('table_entry'): 'time2', str('units'): data['time2'].units }, { str('table_entry'): str('standard_hybrid_sigma_half'), str('units'): str('1'), str('coord_vals'): data['lev'][:], str('cell_bounds'): data['ilev'][:] }, { str('table_entry'): str('latitude'), str('units'): data['lat'].units, str('coord_vals'): data['lat'][:], str('cell_bounds'): data['lat_bnds'][:] }, { str('table_entry'): str('longitude'), str('units'): data['lon'].units, str('coord_vals'): data['lon'][:], str('cell_bounds'): data['lon_bnds'][:] }] axis_ids = list() for axis in axes: axis_id = cmor.axis(**axis) axis_ids.append(axis_id) # add hybrid level formula terms cmor.zfactor(zaxis_id=axis_ids[1], zfactor_name='a_half', axis_ids=[ axis_ids[1], ], zfactor_values=data['hyam'][:]) cmor.zfactor(zaxis_id=axis_ids[1], zfactor_name='b_half', axis_ids=[ axis_ids[1], ], zfactor_values=data['hybm'][:]) cmor.zfactor(zaxis_id=axis_ids[1], zfactor_name='p0', units='Pa', zfactor_values=100000) ips = cmor.zfactor(zaxis_id=axis_ids[1], zfactor_name='ps2', axis_ids=[0, 2, 3], units='Pa') data['ips'] = ips varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids[:4]) # write out the data msg = "{}: time {:1.1f} - {:1.1f}".format(VAR_NAME, data['time_bnds'][0][0], data['time_bnds'][-1][-1]) logger.info(msg) if serial: pbar = tqdm(total=len(data['time2'])) for index, val in enumerate(data['time2']): if serial: pbar.update(1) write_data(varid=varid, data=data, timeval=val, timebnds=[data['time_bnds'][index, :]], index=index, RAW_VARIABLES=RAW_VARIABLES) if serial: pbar.close() msg = '{}: write complete, closing'.format(VAR_NAME) logger.debug(msg) cmor.close() msg = '{}: file close complete'.format(VAR_NAME) logger.debug(msg) return 'phalf'
def main(): # parse the command line arguments _args = parse_argsuments().__dict__ if len(_args.get('var_list')) == 1 and " " in _args.get('var_list')[0]: var_list = _args.get('var_list')[0].split() else: var_list = _args.get('var_list') var_list = [x.strip(',') for x in var_list] input_path = _args.get('input_path') output_path = _args.get('output_path') tables_path = _args.get('tables_path') user_metadata = _args.get('user_metadata') no_metadata = _args['no_metadata'] if _args.get('no_metadata') else False only_metadata = _args['only_metadata'] if _args.get( 'only_metadata') else False nproc = _args['num_proc'] if _args.get('num_proc') else 6 serial = _args['serial'] if _args.get('serial') else False mode = _args['mode'] if _args.get('mode') else 'atm' debug = True if _args.get('debug') else False map_path = _args['map'] if _args.get('map') else None cmor_log_dir = _args['logdir'] if _args.get('logdir') else None timeout = int(_args['timeout']) if _args.get('timeout') else None should_precheck = _args.get('precheck') timer = None if timeout: timer = threading.Timer(timeout, timeout_exit) timer.start() if _args.get('handlers'): handlers_path = os.path.abspath(_args.get('handlers')) else: handlers_path, _ = os.path.split( os.path.abspath(cmor_handlers.__file__)) if should_precheck: new_var_list = precheck(input_path, output_path, var_list, mode) if not new_var_list: print("All variables previously computed") if timer: timer.cancel() return 0 else: print("Setting up conversion for {}".format( " ".join(new_var_list))) var_list = new_var_list # add additional optional metadata to the output files if only_metadata: print_message('Updating file metadata and exiting', 'ok') add_metadata(file_path=output_path, var_list=var_list) return 0 new_metadata_path = os.path.join(output_path, 'user_metadata.json') # create the output dir if it doesnt exist if not os.path.exists(output_path): os.makedirs(output_path) # setup temp storage directory temp_path = os.environ.get('TMPDIR') if temp_path is None: temp_path = '{}/tmp'.format(output_path) if not os.path.exists(temp_path): os.makedirs(temp_path) tempfile.tempdir = temp_path logging_path = os.path.join(output_path, 'converter.log') print_message("Writing log output to: {}".format(logging_path), 'debug') # setup logging logging.basicConfig(format='%(asctime)s:%(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filename=logging_path, filemode='w', level=logging.INFO) # copy the users metadata json file with the updated output directory copy_user_metadata(user_metadata, output_path) # load variable handlers handlers = load_handlers(handlers_path, var_list, debug) if len(handlers) == 0: print_message('No handlers loaded') sys.exit(1) # run in the user-selected mode if serial: print_message('Running CMOR handlers in serial', 'ok') try: status = run_serial(handlers=handlers, input_path=input_path, tables_path=tables_path, metadata_path=new_metadata_path, map_path=map_path, mode=mode, logdir=cmor_log_dir) except KeyboardInterrupt as error: print_message(' -- keyboard interrupt -- ', 'error') return 1 except Exception as e: print_debug(e) return 1 else: print_message('Running CMOR handlers in parallel', 'ok') try: pool = Pool(nproc) status = run_parallel(pool=pool, handlers=handlers, input_path=input_path, tables_path=tables_path, metadata_path=new_metadata_path, map_path=map_path, mode=mode, logdir=cmor_log_dir) except KeyboardInterrupt as error: print_message(' -- keyboard interrupt -- ', 'error') return 1 except Exception as error: print_debug(error) return 1 if status != 0: print_message("Error running handlers: {}".format(" ".join( [x['name'] for x in handlers]))) return 1 # add additional optional metadata to the output files if no_metadata: print_message('Not adding additional metadata', 'ok') else: add_metadata(file_path=output_path, var_list=var_list) if timeout: timer.cancel() return 0
def main(): # parse the command line arguments _args = parse_arguments().__dict__ if len(_args.get('var_list')) == 1 and " " in _args.get('var_list')[0]: var_list = _args.get('var_list')[0].split() else: var_list = _args.get('var_list') var_list = [x.strip(',') for x in var_list] input_path = _args.get('input_path') output_path = _args.get('output_path') tables_path = _args.get('tables_path') user_metadata = _args.get('user_metadata') custom_metadata = _args.get('custom_metadata') nproc = _args.get('num_proc') serial = _args.get('serial') realm = _args.get('realm') debug = True if _args.get('debug') else False map_path = _args.get('map') cmor_log_dir = _args.get('logdir') timeout = int(_args.get('timeout')) if _args.get('timeout') else False simple = _args.get('simple', False) precheck_path = _args.get('precheck', False) freq = _args.get('freq') logger = _setup_custom_logger(f"{cmor_log_dir}/e3sm_to_cmip.log", True) logger.info(f"input_path = {input_path}") logger.info(f"output_path = {output_path}") logger.info(f"precheck_path = {precheck_path}") if simple: no_metadata = True if not tables_path: resource_path, _ = os.path.split(os.path.abspath(resources.__file__)) tables_path = resource_path timer = None if timeout: timer = threading.Timer(timeout, timeout_exit) timer.start() if _args.get('handlers'): handlers_path = os.path.abspath(_args.get('handlers')) else: handlers_path, _ = os.path.split( os.path.abspath(cmor_handlers.__file__)) if precheck_path: new_var_list = precheck(input_path, precheck_path, var_list, realm) if not new_var_list: print("All variables previously computed") os.mkdir(os.path.join(output_path, 'CMIP6')) if timer: timer.cancel() return 0 else: print_message( f"Setting up conversion for {' '.join(new_var_list)}", 'ok') var_list = new_var_list # load variable handlers handlers = _load_handlers( handlers_path=handlers_path, tables_path=tables_path, var_list=var_list, freq=freq, realm=realm) if len(handlers) == 0: print_message('No handlers loaded') sys.exit(1) if _args.get('info'): print_var_info( handlers, freq, input_path, tables_path, _args.get('info_out')) sys.exit(0) new_metadata_path = os.path.join( output_path, 'user_metadata.json') # create the output dir if it doesnt exist if not os.path.exists(output_path): os.makedirs(output_path) # setup temp storage directory temp_path = os.environ.get('TMPDIR') if temp_path is None: temp_path = f'{output_path}/tmp' if not os.path.exists(temp_path): os.makedirs(temp_path) tempfile.tempdir = temp_path logging_path = os.path.join(output_path, 'converter.log') print_message(f"Writing log output to: {logging_path}", 'debug') # setup logging logging.basicConfig( format='%(asctime)s:%(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filename=logging_path, filemode='w', level=logging.INFO) # copy the users metadata json file with the updated output directory if not simple: copy_user_metadata( user_metadata, output_path) # run in the user-selected mode if serial: print_message('Running CMOR handlers in serial', 'ok') try: status = run_serial( handlers=handlers, input_path=input_path, tables_path=tables_path, metadata_path=new_metadata_path, map_path=map_path, realm=realm, logdir=cmor_log_dir, simple=simple, outpath=output_path, freq=freq) except KeyboardInterrupt as error: print_message(' -- keyboard interrupt -- ', 'error') return 1 except Exception as e: print_debug(e) return 1 else: print_message('Running CMOR handlers in parallel', 'ok') try: pool = Pool(max_workers=nproc) status = run_parallel( pool=pool, handlers=handlers, input_path=input_path, tables_path=tables_path, metadata_path=new_metadata_path, map_path=map_path, realm=realm, logdir=cmor_log_dir, simple=simple, outpath=output_path, freq=freq) except KeyboardInterrupt as error: print_message(' -- keyboard interrupt -- ', 'error') return 1 except Exception as error: print_debug(error) return 1 if status != 0: print_message( f"Error running handlers: { ' '.join([x['name'] for x in handlers]) }") return 1 if custom_metadata: add_metadata( file_path=output_path, var_list=var_list, metadata=custom_metadata) if timeout: timer.cancel() return 0
def handle(infiles, tables, user_input_path, **kwargs): """ Transform MPASO timeMonthly_avg_seaIceFreshWaterFlux into CMIP.fsitherm Parameters ---------- infiles : dict a dictionary with namelist, mesh and time series file names tables : str path to CMOR tables user_input_path : str path to user input json file Returns ------- varname : str the name of the processed variable after processing is complete """ if kwargs.get('simple'): print_message(f'Simple CMOR output not supported for {VAR_NAME}', 'error') return None logging.info(f'Starting {VAR_NAME}') mappingFileName = infiles['MPAS_map'] timeSeriesFiles = infiles['MPASO'] variableList = [ 'timeMonthly_avg_seaIceFreshWaterFlux', 'xtime_startMonthly', 'xtime_endMonthly' ] ds = xarray.Dataset() with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn: ds[VAR_NAME] = dsIn.timeMonthly_avg_seaIceFreshWaterFlux ds = mpas.add_time(ds, dsIn) ds.compute() ds = mpas.remap(ds, mappingFileName) mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean') # create axes axes = [{ 'table_entry': 'time', 'units': ds.time.units }, { 'table_entry': 'latitude', 'units': 'degrees_north', 'coord_vals': ds.lat.values, 'cell_bounds': ds.lat_bnds.values }, { 'table_entry': 'longitude', 'units': 'degrees_east', 'coord_vals': ds.lon.values, 'cell_bounds': ds.lon_bnds.values }] try: mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS) except Exception: return "" return VAR_NAME
def handle(infiles, tables, user_input_path, **kwargs): logger = logging.getLogger() msg = '{}: Starting'.format(VAR_NAME) logger.info(msg) serial = kwargs.get('serial') logdir = kwargs.get('logdir') # check that we have some input files for every variable zerofiles = False for variable in RAW_VARIABLES: if len(infiles[variable]) == 0: msg = '{}: Unable to find input files for {}'.format( VAR_NAME, variable) print_message(msg) logging.error(msg) zerofiles = True if zerofiles: return None # Create the logging directory and setup cmor logdir = kwargs.get('logdir') if logdir: logpath = logdir else: outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename) logpath = os.path.join(outpath, 'cmor_logs') os.makedirs(logpath, exist_ok=True) logfile = os.path.join(logpath, VAR_NAME + '.log') cmor.setup( inpath=tables, netcdf_file_action=cmor.CMOR_REPLACE, logfile=logfile) cmor.dataset_json(str(user_input_path)) cmor.load_table(str(TABLE)) msg = '{}: CMOR setup complete'.format(VAR_NAME) logging.info(msg) data = {} # assuming all year ranges are the same for every variable num_files_per_variable = len(infiles[RAW_VARIABLES[0]]) # sort the input files for each variable for var_name in RAW_VARIABLES: infiles[var_name].sort() for index in range(num_files_per_variable): # load data for each variable for var_name in RAW_VARIABLES: # extract data from the input file msg = '{name}: loading {variable}'.format( name=VAR_NAME, variable=var_name) logger.info(msg) filename = infiles[var_name][index] new_data = {} if not os.path.exists(filename): raise IOError("File not found: {}".format(filename)) f = cdms2.open(filename) # load the data for each variable variable_data = f(var_name) if not variable_data.any(): raise IOError("Variable data not found: {}".format(variable)) data.update({ variable: variable_data }) # load the lon and lat info & bounds # load time & time bounds if var_name == 'PS': data.update({ 'ps': f('PS'), 'lat': variable_data.getLatitude(), 'lon': variable_data.getLongitude(), 'lat_bnds': f('lat_bnds'), 'lon_bnds': f('lon_bnds'), 'time': variable_data.getTime(), 'time2': variable_data.getTime(), 'time_bnds': f('time_bnds') }) if 'lev' in f.listdimension() and 'ilev' in f.listdimension(): data.update({ 'lev': f.getAxis('lev')[:]/1000, 'ilev': f.getAxis('ilev')[:]/1000 }) new_data = {i: f(i) for i in [ 'hyam', 'hybm', 'hyai', 'hybi'] if i in f.variables} data.update(new_data) msg = '{name}: loading axes'.format(name=VAR_NAME) logger.info(msg) axes = [{ str('table_entry'): 'time2', str('units'): data['time2'].units }, { str('table_entry'): str('standard_hybrid_sigma'), str('units'): str('1'), str('coord_vals'): data['lev'][:], str('cell_bounds'): data['ilev'][:] }, { str('table_entry'): str('latitude'), str('units'): data['lat'].units, str('coord_vals'): data['lat'][:], str('cell_bounds'): data['lat_bnds'][:] }, { str('table_entry'): str('longitude'), str('units'): data['lon'].units, str('coord_vals'): data['lon'][:], str('cell_bounds'): data['lon_bnds'][:] }] axis_ids = list() for axis in axes: axis_id = cmor.axis(**axis) axis_ids.append(axis_id) # add hybrid level formula terms cmor.zfactor( zaxis_id=axis_ids[1], zfactor_name=str('a'), axis_ids=[axis_ids[1], ], zfactor_values=data['hyam'][:], zfactor_bounds=data['hyai'][:]) cmor.zfactor( zaxis_id=axis_ids[1], zfactor_name=str('b'), axis_ids=[axis_ids[1], ], zfactor_values=data['hybm'][:], zfactor_bounds=data['hybi'][:]) cmor.zfactor( zaxis_id=axis_ids[1], zfactor_name=str('p0'), units=str('Pa'), zfactor_values=100000) ips = cmor.zfactor( zaxis_id=axis_ids[1], zfactor_name=str('ps2'), axis_ids=[0, 2, 3], units=str('Pa')) data['ips'] = ips varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids[:4]) # write out the data msg = "{}: time {:1.1f} - {:1.1f}".format( VAR_NAME, data['time_bnds'][0][0], data['time_bnds'][-1][-1]) logger.info(msg) if serial: myMessage = progressbar.DynamicMessage('running') myMessage.__call__ = my_dynamic_message widgets = [ progressbar.DynamicMessage('running'), ' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ' ] progressbar.DynamicMessage.__call__ = my_dynamic_message pbar = progressbar.ProgressBar( maxval=len(data['time']), widgets=widgets) pbar.start() for index, val in enumerate(data['time']): if serial: pbar.update(index, running=msg) write_data( varid=varid, data=data, timeval=val, timebnds=[data['time_bnds'][index, :]], index=index, RAW_VARIABLES=RAW_VARIABLES) if serial: pbar.finish() msg = '{}: write complete, closing'.format(VAR_NAME) logger.debug(msg) cmor.close() msg = '{}: file close complete'.format(VAR_NAME) logger.debug(msg) return 'pfull'
def handle(infiles, tables, user_input_path, **kwargs): """ Transform MPASSI timeMonthly_avg_vVelocityGeo into CMIP.siv Parameters ---------- infiles : dict a dictionary with namelist, mesh and time series file names tables : str path to CMOR tables user_input_path : str path to user input json file Returns ------- varname : str the name of the processed variable after processing is complete """ if kwargs.get('simple'): msg = f"{VAR_NAME} is not supported for simple conversion" print_message(msg) return msg = 'Starting {name}'.format(name=__name__) logging.info(msg) meshFileName = infiles['MPAS_mesh'] mappingFileName = infiles['MPAS_map'] timeSeriesFiles = infiles['MPASSI'] dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False) cellMask2D, _ = mpas.get_cell_masks(dsMesh) variableList = [ 'timeMonthly_avg_iceAreaCell', 'timeMonthly_avg_vVelocityGeo', 'xtime_startMonthly', 'xtime_endMonthly' ] ds = xarray.Dataset() with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn: ds['siconc'] = dsIn.timeMonthly_avg_iceAreaCell ds[VAR_NAME] = ds['siconc'] * mpas.interp_vertex_to_cell( dsIn.timeMonthly_avg_vVelocityGeo, dsMesh) ds = mpas.add_time(ds, dsIn) ds = ds.chunk(chunks={'nCells': None, 'time': 6}) ds.compute() ds = mpas.add_si_mask(ds, cellMask2D, ds.siconc) ds['cellMask'] = ds.siconc * ds.cellMask ds.compute() ds = mpas.remap(ds, mappingFileName) mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='seaice') # create axes axes = [{ 'table_entry': 'time', 'units': ds.time.units }, { 'table_entry': 'latitude', 'units': 'degrees_north', 'coord_vals': ds.lat.values, 'cell_bounds': ds.lat_bnds.values }, { 'table_entry': 'longitude', 'units': 'degrees_east', 'coord_vals': ds.lon.values, 'cell_bounds': ds.lon_bnds.values }] try: mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS) except Exception: return "" return VAR_NAME
def handle(infiles, tables, user_input_path, **kwargs): """ Transform MPASO timeMonthly_avg_normalVelocity, timeMonthly_avg_normalGMBolusVelocity, timeMonthly_avg_vertVelocityTop, timeMonthly_avg_vertGMBolusVelocityTop, and timeMonthly_avg_layerThickness into CMIP.msftmz Parameters ---------- infiles : dict a dictionary with namelist, mesh and time series file names tables : str path to CMOR tables user_input_path : str path to user input json file Returns ------- varname : str the name of the processed variable after processing is complete """ if kwargs.get('simple'): msg = f"{VAR_NAME} is not supported for simple conversion" print_message(msg) return msg = 'Starting {name}'.format(name=__name__) logging.info(msg) meshFileName = infiles['MPAS_mesh'] timeSeriesFiles = infiles['MPASO'] regionMaskFileName = infiles['MPASO_MOC_regions'] namelistFileName = infiles['MPASO_namelist'] namelist = mpas.convert_namelist_to_dict(namelistFileName) config_density0 = float(namelist['config_density0']) dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False) dsMesh = dsMesh.isel(Time=0) dsMasks = xarray.open_dataset(regionMaskFileName, mask_and_scale=False) variableList = [ 'timeMonthly_avg_normalVelocity', 'timeMonthly_avg_normalGMBolusVelocity', 'timeMonthly_avg_vertVelocityTop', 'timeMonthly_avg_vertGMBolusVelocityTop', 'timeMonthly_avg_layerThickness', 'xtime_startMonthly', 'xtime_endMonthly' ] with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn: showProgress = 'serial' in kwargs and kwargs['serial'] ds = config_density0 * mpas.compute_moc_streamfunction( dsIn, dsMesh, dsMasks, showProgress=showProgress) ds = ds.rename({'moc': VAR_NAME}) mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean') region = ['global_ocean', 'atlantic_arctic_ocean'] # create axes axes = [{ 'table_entry': 'time', 'units': ds.time.units }, { 'table_entry': 'basin', 'units': '', 'coord_vals': region }, { 'table_entry': 'depth_coord', 'units': 'm', 'coord_vals': ds.depth.values, 'cell_bounds': ds.depth_bnds.values }, { 'table_entry': 'latitude', 'units': 'degrees_north', 'coord_vals': ds.lat.values, 'cell_bounds': ds.lat_bnds.values }] try: mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS) except Exception: return "" return VAR_NAME
def handle_variables(infiles, raw_variables, write_data, outvar_name, outvar_units, table, tables, metadata_path, serial=None, positive=None, levels=None, axis=None, logdir=None): from e3sm_to_cmip.util import print_message logger = logging.getLogger() msg = '{}: Starting'.format(outvar_name) logger.info(msg) # check that we have some input files for every variable zerofiles = False for variable in raw_variables: if len(infiles[variable]) == 0: msg = '{}: Unable to find input files for {}'.format( outvar_name, variable) print_message(msg) logging.error(msg) zerofiles = True if zerofiles: return None # Create the logging directory and setup cmor if logdir: logpath = logdir else: outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename) logpath = os.path.join(outpath, 'cmor_logs') os.makedirs(logpath, exist_ok=True) logfile = os.path.join(logpath, outvar_name + '.log') cmor.setup(inpath=tables, netcdf_file_action=cmor.CMOR_REPLACE, logfile=logfile) cmor.dataset_json(str(metadata_path)) cmor.load_table(str(table)) msg = '{}: CMOR setup complete'.format(outvar_name) logging.info(msg) data = {} # assuming all year ranges are the same for every variable num_files_per_variable = len(infiles[raw_variables[0]]) # sort the input files for each variable for var_name in raw_variables: infiles[var_name].sort() for index in range(num_files_per_variable): # reload the dimensions for each time slice get_dims = True # load data for each variable for var_name in raw_variables: # extract data from the input file msg = '{name}: loading {variable}'.format(name=outvar_name, variable=var_name) logger.info(msg) new_data = get_dimension_data(filename=infiles[var_name][index], variable=var_name, levels=levels, get_dims=get_dims) data.update(new_data) get_dims = False msg = '{name}: loading axes'.format(name=outvar_name) logger.info(msg) # create the cmor variable and axis axis_ids, ips = load_axis(data=data, levels=levels) if ips: data['ips'] = ips if positive: varid = cmor.variable(outvar_name, outvar_units, axis_ids, positive=positive) else: varid = cmor.variable(outvar_name, outvar_units, axis_ids) # write out the data msg = "{}: time {:1.1f} - {:1.1f}".format(outvar_name, data['time_bnds'][0][0], data['time_bnds'][-1][-1]) logger.info(msg) if serial: myMessage = progressbar.DynamicMessage('running') myMessage.__call__ = my_dynamic_message widgets = [ progressbar.DynamicMessage('running'), ' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ' ] progressbar.DynamicMessage.__call__ = my_dynamic_message pbar = progressbar.ProgressBar(maxval=len(data['time']), widgets=widgets) pbar.start() for index, val in enumerate(data['time']): if serial: pbar.update(index, running=msg) write_data(varid=varid, data=data, timeval=val, timebnds=[data['time_bnds'][index, :]], index=index, raw_variables=raw_variables) if serial: pbar.finish() msg = '{}: write complete, closing'.format(outvar_name) logger.debug(msg) cmor.close() msg = '{}: file close complete'.format(outvar_name) logger.debug(msg) return outvar_name