예제 #1
0
    def add_metadata(self, metadata=None):
        """
        Add additional custom metadata to the output files

        Parameters
        ----------
            metadata (dict): The keys are the names of the metadata fields, the values the values for the fields
        """
        cmip_path = os.path.join(self._output_path, 'CMIP6')

        procPool = Pool(10)
        results = list()

        filepaths = list()

        print_message('Adding additional metadata to output files', 'ok')
        for root, dirs, files in os.walk(cmip_path, topdown=False):
            for name in files:
                if name[-3:] != '.nc':
                    continue
                index = name.find('_')
                if index != -1 and name[:index] in self._var_list:
                    print_message(
                        "Adding additional metadata to {}".format(name), 'ok')
                    filepaths.append(os.path.join(root, name))

        for idx, filepath in enumerate(filepaths):
            datafile = cdms2.open(filepath, 'a')
            datafile.e3sm_source_code_doi = '10.11578/E3SM/dc.20180418.36'
            datafile.e3sm_source_code_reference = 'https://github.com/E3SM-Project/E3SM/releases/tag/v1.0.0'
            datafile.doe_acknowledgement = 'This research was supported as part of the Energy Exascale Earth System Model (E3SM) project, funded by the U.S. Department of Energy, Office of Science, Office of Biological and Environmental Research.'
            datafile.computational_acknowledgement = 'The data were produced using resources of the National Energy Research Scientific Computing Center, a DOE Office of Science User Facility supported by the Office of Science of the U.S. Department of Energy under Contract No. DE-AC02-05CH11231.'
            datafile.ncclimo_generation_command = """ncclimo --var=${var} -7 --dfl_lvl=1 --no_cll_msr --no_frm_trm --no_stg_grd --yr_srt=1 --yr_end=500 --ypf=500 --map=map_ne30np4_to_cmip6_180x360_aave.20181001.nc """
            datafile.ncclimo_version = '4.7.9-alpha04'
            datafile.close()
예제 #2
0
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASO timeMonthly_avg_activeTracers_temperature into CMIP.tosga

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    if kwargs.get('simple'):
        msg = f"{VAR_NAME} is not supported for simple conversion"
        print_message(msg)
        return

    msg = 'Starting {name}'.format(name=__name__)
    logging.info(msg)

    meshFileName = infiles['MPAS_mesh']
    timeSeriesFiles = infiles['MPASO']

    dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False)
    cellMask2D, _ = mpas.get_cell_masks(dsMesh)

    variableList = [
        'timeMonthly_avg_activeTracers_temperature', 'xtime_startMonthly',
        'xtime_endMonthly'
    ]

    ds = xarray.Dataset()
    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        thetao = dsIn.timeMonthly_avg_activeTracers_temperature
        tos = thetao.isel(nVertLevels=0).squeeze(drop=True).where(cellMask2D)
        areaCell = dsMesh.areaCell.where(cellMask2D)
        ds[VAR_NAME] = ((tos * areaCell).sum(dim='nCells') /
                        areaCell.sum(dim='nCells'))
        ds = mpas.add_time(ds, dsIn)
        ds.compute()
    ds.compute()

    mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean')

    # create axes
    axes = [{'table_entry': 'time', 'units': ds.time.units}]
    try:
        mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS)
    except Exception:
        return ""
    return VAR_NAME
예제 #3
0
    def run(self):
        """
        run all the requested CMOR handlers
        """
        self.load_handlers()
        if len(self._handlers) == 0:
            print_message('No handlers loaded')
            sys.exit(1)

        # Setup the number of processes that will exist in the pool
        len_handlers = len(self._handlers)
        if self._proc_vars:
            ncpu = cpu_count() - 1
            self._nproc = len_handlers if len_handlers < ncpu else ncpu

        print_message('running with {} processes'.format(self._nproc), 'debug')

        # create process pool and results list
        self._pool = Pool(self._nproc)
        self._pool_res = list()

        for handler in self._handlers:
            for handle_name, handler_info in handler.items():

                handler_method = handler_info[0]
                handler_variables = handler_info[1]
                # find the input files this handler needs
                input_files = list()
                for variable in handler_variables:
                    input_file = self.find_variable_file(var=variable,
                                                         path=self._input_path)
                    if input_file is None:
                        continue
                    var_path = os.path.join(self._input_path, input_file)
                    input_files.append(var_path)
                kwds = {
                    'infiles': input_files,
                    'tables': self._tables_path,
                    'user_input_path': self._user_input_path
                }

                _args = (kwds['infiles'], kwds['tables'],
                         kwds['user_input_path'])
                self._pool_res.append(
                    self._pool.apply_async(handler_method, args=_args,
                                           kwds={}))

        for idx, res in enumerate(self._pool_res):
            try:
                out = res.get(9999999)
                msg = 'Finished {handler}, {done}/{total} jobs complete'.format(
                    handler=out, done=idx + 1, total=len(self._pool_res))
                logging.info(msg)
            except Exception as e:
                logging.error(e)
        self.terminate()

        self.add_metadata()
예제 #4
0
 def terminate(self):
     """
     Terminates the process pool
     """
     if self._debug:
         print_message('Shutting down process pool', 'debug')
     if self._pool:
         self._pool.close()
         self._pool.terminate()
         self._pool.join()
예제 #5
0
    def load_handlers(self):
        """
        load the cmor handler modules
        """

        handler_names = os.listdir(self._handlers_path)

        # if the handler director doesnt have an __init__.py file create one
        if "__init__.py" not in handler_names:
            with open(os.path.join(self._handlers_path, '__init__.py'),
                      'w') as fp:
                fp.write('\n')

        for handler in handler_names:
            if not handler.endswith('.py'):
                continue
            if handler == "__init__.py":
                continue

            module_name, _ = handler.rsplit('.', 1)

            # ignore handlers for variables that werent requested
            if 'all' not in self._var_list:
                if module_name not in self._var_list:
                    continue

            module_path = os.path.join(self._handlers_path, handler)

            # load the module, and extract the "handle" method and required variables
            try:
                module = imp.load_source(module_name, module_path)
                method = module.handle
                raw_variables = module.RAW_VARIABLES
            except ImportError as e:
                msg = format_debug(e)
                print_message(
                    'Error loading handler for {}'.format(module_path))
                print_message(msg)
                logging.error(msg)
                continue
            else:
                msg = 'Loaded {}'.format(module_name)
                if self._debug:
                    print_message(msg, 'debug')
                logging.info(msg)
            self._handlers.append({module_name: (method, raw_variables)})
예제 #6
0
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASO timeMonthly_avg_layerThickness into CMIP.thkcello

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    if kwargs.get('simple'):
        msg = f"{VAR_NAME} is not supported for simple conversion"
        print_message(msg)
        return

    msg = 'Starting {name}'.format(name=__name__)
    logging.info(msg)

    meshFileName = infiles['MPAS_mesh']
    mappingFileName = infiles['MPAS_map']
    timeSeriesFiles = infiles['MPASO']

    dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False)
    earth_radius = dsMesh.attrs['sphere_radius']
    _, cellMask3D = mpas.get_cell_masks(dsMesh)

    variableList = [
        'timeMonthly_avg_layerThickness', 'xtime_startMonthly',
        'xtime_endMonthly'
    ]

    ds = xarray.Dataset()
    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        ds[VAR_NAME] = \
            dsIn.timeMonthly_avg_layerThickness.where(cellMask3D, 0.)
        ds = mpas.add_time(ds, dsIn)
        ds.compute()

    ds = mpas.add_depth(ds, dsMesh)
    ds.compute()

    ds = mpas.remap(ds, mappingFileName)

    # set masked values (where there are no MPAS grid cells) to zero
    ds[VAR_NAME] = ds[VAR_NAME].where(
        ds[VAR_NAME] != netCDF4.default_fillvals['f4'], 0.)

    # the result above is just a mask of area fraction.  We need to multiply
    # by the area on the output grid
    dsMap = xarray.open_dataset(mappingFileName)
    area_b = dsMap.area_b.values
    dst_grid_dims = dsMap.dst_grid_dims.values
    area_b = area_b.reshape((dst_grid_dims[1], dst_grid_dims[0]))
    area_b = xarray.DataArray(data=area_b,
                              dims=('lat', 'lon'),
                              coords=(ds.coords['lat'], ds.coords['lon']))

    # area_b is in square radians, so need to multiply by the earth_radius**2
    # multiply variables in this order so they don't get transposed
    ds[VAR_NAME] = ds[VAR_NAME] * earth_radius**2 * area_b

    setup_cmor(var_name=VAR_NAME,
               table_path=tables,
               table_name=TABLE,
               user_input_path=user_input_path)

    # create axes
    axes = [{
        'table_entry': 'time',
        'units': ds.time.units
    }, {
        'table_entry': 'depth_coord',
        'units': 'm',
        'coord_vals': ds.depth.values,
        'cell_bounds': ds.depth_bnds.values
    }, {
        'table_entry': 'latitude',
        'units': 'degrees_north',
        'coord_vals': ds.lat.values,
        'cell_bounds': ds.lat_bnds.values
    }, {
        'table_entry': 'longitude',
        'units': 'degrees_east',
        'coord_vals': ds.lon.values,
        'cell_bounds': ds.lon_bnds.values
    }]
    try:
        mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS)
    except Exception:
        return ""
    return VAR_NAME
예제 #7
0
파일: sos.py 프로젝트: JGCRI/e3sm_to_cmip
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASO timeMonthly_avg_activeTracers_salinity into CMIP.sos

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    if kwargs.get('simple'):
        msg = f"{VAR_NAME} is not supported for simple conversion"
        print_message(msg)
        return
        
    msg = 'Starting {name}'.format(name=__name__)
    logging.info(msg)

    meshFileName = infiles['MPAS_mesh']
    mappingFileName = infiles['MPAS_map']
    timeSeriesFiles = infiles['MPASO']

    dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False)
    cellMask2D, _ = mpas.get_cell_masks(dsMesh)

    variableList = ['timeMonthly_avg_activeTracers_salinity',
                    'xtime_startMonthly', 'xtime_endMonthly']

    ds = xarray.Dataset()
    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        thetao = dsIn.timeMonthly_avg_activeTracers_salinity
        ds[VAR_NAME] = thetao.isel(nVertLevels=0).squeeze(drop=True)
        ds = mpas.add_time(ds, dsIn)
        ds.compute()
    ds = mpas.add_mask(ds, cellMask2D)
    ds.compute()

    ds = mpas.remap(ds, mappingFileName)

    mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean')

    # create axes
    axes = [{'table_entry': 'time',
             'units': ds.time.units},
            {'table_entry': 'latitude',
             'units': 'degrees_north',
             'coord_vals': ds.lat.values,
             'cell_bounds': ds.lat_bnds.values},
            {'table_entry': 'longitude',
             'units': 'degrees_east',
             'coord_vals': ds.lon.values,
             'cell_bounds': ds.lon_bnds.values}]
    try:
        mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS)
    except Exception:
        return ""
    return VAR_NAME
예제 #8
0
def handle(infiles, tables, user_input_path, **kwargs):

    simple = kwargs.get('simple')
    logger = logging.getLogger()
    msg = f'{VAR_NAME}: Starting'
    logger.info(msg)

    logdir = kwargs.get('logdir')

    # check that we have some input files for every variable
    zerofiles = False
    for variable in RAW_VARIABLES:
        if len(infiles[variable]) == 0:
            msg = f'{VAR_NAME}: Unable to find input files for {variable}'
            print_message(msg)
            logging.error(msg)
            zerofiles = True
    if zerofiles:
        return None
    
    if simple:
        handle_simple(infiles)
        return VAR_NAME

    # Create the logging directory and setup cmor
    if logdir:
        logpath = logdir
    else:
        outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename)
        logpath = os.path.join(outpath, 'cmor_logs')
    os.makedirs(logpath, exist_ok=True)

    logfile = os.path.join(logpath, VAR_NAME + '.log')

    cmor.setup(
        inpath=tables,
        netcdf_file_action=cmor.CMOR_REPLACE,
        logfile=logfile)

    cmor.dataset_json(str(user_input_path))
    cmor.load_table(str(TABLE))

    msg = '{}: CMOR setup complete'.format(VAR_NAME)
    logging.info(msg)

    # extract data from the input file
    msg = 'orog: loading PHIS'
    logger.info(msg)

    filename = infiles['PHIS'][0]

    if not os.path.exists(filename):
        raise IOError("File not found: {}".format(filename))

    ds = xr.open_dataset(filename, decode_times=False)

    # load the data for each variable
    variable_data = ds['PHIS']

    # load the lon and lat info & bounds
    data = {
        'lat': ds['lat'],
        'lon': ds['lon'],
        'lat_bnds': ds['lat_bnds'],
        'lon_bnds': ds['lon_bnds'],
        'PHIS': ds['PHIS']
    }

    msg = f'{VAR_NAME}: loading axes'
    logger.info(msg)

    axes = [{
        str('table_entry'): str('latitude'),
        str('units'): ds['lat'].units,
        str('coord_vals'): data['lat'].values,
        str('cell_bounds'): data['lat_bnds'].values
    }, {
        str('table_entry'): str('longitude'),
        str('units'): ds['lon'].units,
        str('coord_vals'): data['lon'].values,
        str('cell_bounds'): data['lon_bnds'].values
    }]

    msg = 'orog: running CMOR'
    logging.info(msg)

    axis_ids = list()
    for axis in axes:
        axis_id = cmor.axis(**axis)
        axis_ids.append(axis_id)

    varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids)

    outdata = data['PHIS'].values / GRAV
    cmor.write(
        varid,
        outdata)

    msg = '{}: write complete, closing'.format(VAR_NAME)
    logger.debug(msg)

    cmor.close()

    msg = '{}: file close complete'.format(VAR_NAME)
    logger.debug(msg)

    return 'orog'
예제 #9
0
파일: wfo.py 프로젝트: JGCRI/e3sm_to_cmip
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASO timeMonthly_avg_seaIceFreshWaterFlux,
    timeMonthly_avg_riverRunoffFlux, timeMonthly_avg_iceRunoffFlux,
    timeMonthly_avg_rainFlux, and timeMonthly_avg_snowFlux into CMIP.wfo

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    if kwargs.get('simple'):
        msg = f"{VAR_NAME} is not supported for simple conversion"
        print_message(msg)
        return

    msg = 'Starting {name}'.format(name=__name__)
    logging.info(msg)

    mappingFileName = infiles['MPAS_map']
    timeSeriesFiles = infiles['MPASO']

    variableList = [
        'timeMonthly_avg_seaIceFreshWaterFlux',
        'timeMonthly_avg_riverRunoffFlux', 'timeMonthly_avg_iceRunoffFlux',
        'timeMonthly_avg_rainFlux', 'timeMonthly_avg_snowFlux',
        'xtime_startMonthly', 'xtime_endMonthly'
    ]

    ds = xarray.Dataset()
    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        ds[VAR_NAME] = \
            dsIn.timeMonthly_avg_seaIceFreshWaterFlux + \
            dsIn.timeMonthly_avg_riverRunoffFlux + \
            dsIn.timeMonthly_avg_iceRunoffFlux + \
            dsIn.timeMonthly_avg_rainFlux + \
            dsIn.timeMonthly_avg_snowFlux

        ds = mpas.add_time(ds, dsIn)
        ds.compute()

    ds = mpas.remap(ds, mappingFileName)

    mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean')

    # create axes
    axes = [{
        'table_entry': 'time',
        'units': ds.time.units
    }, {
        'table_entry': 'latitude',
        'units': 'degrees_north',
        'coord_vals': ds.lat.values,
        'cell_bounds': ds.lat_bnds.values
    }, {
        'table_entry': 'longitude',
        'units': 'degrees_east',
        'coord_vals': ds.lon.values,
        'cell_bounds': ds.lon_bnds.values
    }]
    try:
        mpas.write_cmor(axes,
                        ds,
                        VAR_NAME,
                        VAR_UNITS,
                        comment='Computed as the water flux into the ocean '
                        'divided by the area of the ocean portion of '
                        'the grid cell. This is the sum of sea-ice'
                        'freshwater, river runoff, ice runoff, rain,'
                        'and snow fluxes.')
    except Exception:
        return ""
    return VAR_NAME
예제 #10
0
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform E3SM.TS into CMIP.ts

    Parameters
    ----------
        infiles (List): a list of strings of file names for the raw input data
        tables (str): path to CMOR tables
        user_input_path (str): path to user input json file
    Returns
    -------
        var name (str): the name of the processed variable after processing is complete
    """

    msg = '{}: Starting'.format(VAR_NAME)
    logger.info(msg)

    nonzero = False
    for variable in RAW_VARIABLES:
        if len(infiles[variable]) == 0:
            msg = '{}: Unable to find input files for {}'.format(
                VAR_NAME, variable)
            print_message(msg)
            logging.error(msg)
            nonzero = True
    if nonzero:
        return None

    msg = '{}: running with input files: {}'.format(VAR_NAME, infiles)
    logger.debug(msg)

    # setup cmor
    logdir = kwargs.get('logdir')
    if logdir:
        logfile = logfile = os.path.join(logdir, VAR_NAME + '.log')
    else:
        logfile = os.path.join(os.getcwd(), 'logs')
        if not os.path.exists(logfile):
            os.makedirs(logfile)
        logfile = os.path.join(logfile, VAR_NAME + '.log')

    cmor.setup(inpath=tables,
               netcdf_file_action=cmor.CMOR_REPLACE,
               logfile=logfile)
    cmor.dataset_json(user_input_path)
    cmor.load_table(TABLE)

    msg = '{}: CMOR setup complete'.format(VAR_NAME)
    logger.info(msg)

    data = {}

    # assuming all year ranges are the same for every variable
    num_files_per_variable = len(infiles['FISCCP1_COSP'])

    # sort the input files for each variable
    infiles['FISCCP1_COSP'].sort()

    for index in range(num_files_per_variable):

        f = cdms2.open(infiles['FISCCP1_COSP'][index])

        # load the data for each variable
        variable_data = f('FISCCP1_COSP')

        tau = variable_data.getAxis(2)[:]
        tau[-1] = 100.0
        tau_bnds = f.variables['cosp_tau_bnds'][:]
        tau_bnds[-1] = [60.0, 100000.0]

        # load
        data = {
            'FISCCP1_COSP': variable_data,
            'lat': variable_data.getLatitude(),
            'lon': variable_data.getLongitude(),
            'lat_bnds': f('lat_bnds'),
            'lon_bnds': f('lon_bnds'),
            'time': variable_data.getTime(),
            'time_bnds': f('time_bnds'),
            'plev7c': variable_data.getAxis(1)[:] * 100.0,
            'plev7c_bnds': f.variables['cosp_prs_bnds'][:] * 100.0,
            'tau': tau,
            'tau_bnds': tau_bnds
        }

        # create the cmor variable and axis
        axes = [{
            str('table_entry'): str('time'),
            str('units'): data['time'].units
        }, {
            str('table_entry'): str('plev7c'),
            str('units'): str('Pa'),
            str('coord_vals'): data['plev7c'],
            str('cell_bounds'): data['plev7c_bnds']
        }, {
            str('table_entry'): str('tau'),
            str('units'): str('1'),
            str('coord_vals'): data['tau'],
            str('cell_bounds'): data['tau_bnds']
        }, {
            str('table_entry'): str('latitude'),
            str('units'): data['lat'].units,
            str('coord_vals'): data['lat'][:],
            str('cell_bounds'): data['lat_bnds'][:]
        }, {
            str('table_entry'): str('longitude'),
            str('units'): data['lon'].units,
            str('coord_vals'): data['lon'][:],
            str('cell_bounds'): data['lon_bnds'][:]
        }]

        axis_ids = list()
        for axis in axes:
            axis_id = cmor.axis(**axis)
            axis_ids.append(axis_id)

        varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids)

        # write out the data
        msg = "{}: time {:1.1f} - {:1.1f}".format(VAR_NAME,
                                                  data['time_bnds'][0][0],
                                                  data['time_bnds'][-1][-1])
        logger.info(msg)

        serial = kwargs.get('serial')
        if serial:
            myMessage = progressbar.DynamicMessage('running')
            myMessage.__call__ = my_dynamic_message
            widgets = [
                progressbar.DynamicMessage('running'), ' [',
                progressbar.Timer(), '] ',
                progressbar.Bar(), ' (',
                progressbar.ETA(), ') '
            ]
            progressbar.DynamicMessage.__call__ = my_dynamic_message
            pbar = progressbar.ProgressBar(maxval=len(data['time']),
                                           widgets=widgets)
            pbar.start()

        for index, val in enumerate(data['time']):
            if serial:
                pbar.update(index, running=msg)
            write_data(varid=varid,
                       data=data,
                       timeval=val,
                       timebnds=[data['time_bnds'][index, :]],
                       index=index)
            if serial:
                pbar.finish()

    msg = '{}: write complete, closing'.format(VAR_NAME)
    logger.info(msg)

    cmor.close()
    msg = '{}: file close complete'.format(VAR_NAME)
    logger.info(msg)

    return VAR_NAME
예제 #11
0
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform E3SM.TSOI into CMIP.tsl

    Parameters
    ----------
        infiles (List): a list of strings of file names for the raw input data
        tables (str): path to CMOR tables
        user_input_path (str): path to user input json file
    Returns
    -------
        var name (str): the name of the processed variable after processing is complete
    """
    msg = f'{VAR_NAME}: Starting'
    logger.info(msg)

    nonzero = False
    for variable in RAW_VARIABLES:
        if len(infiles[variable]) == 0:
            msg = f'{VAR_NAME}: Unable to find input files for {variable}'
            print_message(msg)
            logging.error(msg)
            nonzero = True
    if nonzero:
        return None

    msg = f'{VAR_NAME}: running with input files: {infiles}'
    logger.debug(msg)

    # setup cmor
    logdir = kwargs.get('logdir')
    if logdir:
        logfile = logfile = os.path.join(logdir, f"{VAR_NAME}.log")
    else:
        logfile = os.path.join(os.getcwd(), 'logs')
        if not os.path.exists(logfile):
            os.makedirs(logfile)
        logfile = os.path.join(logfile, f"{VAR_NAME}.log")
    
    simple = kwargs.get('simple')
    if simple:
        outpath = kwargs['outpath']
        _, inputfile = os.path.split(sorted(infiles[RAW_VARIABLES[0]])[0])
        start_year = inputfile[len(RAW_VARIABLES[0]) + 1:].split('_')[0]
        end_year = inputfile[len(RAW_VARIABLES[0]) + 1:].split('_')[1]
        outds = xr.Dataset()
        with xr.open_mfdataset(infiles[RAW_VARIABLES[0]], decode_times=False) as inputds:
            for dim in inputds.coords:
                if dim == 'levgrnd':
                    outds['levgrnd'] = inputds[dim]
                    outds['levgrnd_bnds'] = get_levgrnd_bnds()
                else:
                    outds[dim] = inputds[dim]

            for var in inputds.data_vars:
                if var == RAW_VARIABLES[0]:
                    outds[VAR_NAME] = inputds[RAW_VARIABLES[0]]
                elif var == 'time_bounds':
                    outds['time_bnds'] = inputds['time_bounds']
                else:
                    outds[var] = inputds[var]
            
            for attr, val in inputds.attrs.items():
                outds.attrs[attr] = val
        
        outds = outds.rename_dims({
            'levgrnd': 'depth',
            'levgrnd_bnds': 'depth_bnds'
        })
        outds = outds.rename_vars({
            'levgrnd': 'depth',
            'levgrnd_bnds': 'depth_bnds'
        })

        resource_path, _ = os.path.split(os.path.abspath(resources.__file__))
        table_path = os.path.join(resource_path, 'CMIP6_Lmon.json')
        with open(table_path, 'r') as ip:
            table_data = json.load(ip)

        variable_attrs = ['standard_name', 'long_name',
                        'comment', 'cell_methods', 'cell_measures', 'units']
        for attr in variable_attrs:
            outds[VAR_NAME].attrs[attr] = table_data['variable_entry'][VAR_NAME][attr]
        
        output_file_path = os.path.join(
            outpath, f'{VAR_NAME}_{start_year}_{end_year}.nc')
        msg = f'writing out variable to file {output_file_path}'
        print_message(msg, 'ok')
        write_netcdf(outds, output_file_path, unlimited=['time'])
        return RAW_VARIABLES[0]

    cmor.setup(
        inpath=tables,
        netcdf_file_action=cmor.CMOR_REPLACE,
        logfile=logfile)
    cmor.dataset_json(user_input_path)
    cmor.load_table(TABLE)

    msg = f'{VAR_NAME}: CMOR setup complete'
    logger.info(msg)

    data = {}

    # assuming all year ranges are the same for every variable
    num_files_per_variable = len(infiles['TSOI'])

    # sort the input files for each variable
    infiles['TSOI'].sort()

    for index in range(num_files_per_variable):

        f = cdms2.open(infiles['TSOI'][index])

        # load the data for each variable
        variable_data = f('TSOI')

        levgrnd = variable_data.getAxis(1)[:]
        levgrnd_bnds = get_levgrnd_bnds()

        # load
        data = {
            'TSOI': variable_data,
            'lat': variable_data.getLatitude(),
            'lon': variable_data.getLongitude(),
            'lat_bnds': f('lat_bnds'),
            'lon_bnds': f('lon_bnds'),
            'time': variable_data.getTime(),
            'time_bnds': f('time_bounds'),
            'levgrnd': levgrnd,
            'levgrnd_bnds': levgrnd_bnds
        }

        # create the cmor variable and axis
        axes = [{
            str('table_entry'): str('time'),
            str('units'): data['time'].units
        }, {
            str('table_entry'): str('sdepth'),
            str('units'): str('m'),
            str('coord_vals'): levgrnd,
            str('cell_bounds'): levgrnd_bnds
        }, {
            str('table_entry'): str('latitude'),
            str('units'): data['lat'].units,
            str('coord_vals'): data['lat'][:],
            str('cell_bounds'): data['lat_bnds'][:]
        }, {
            str('table_entry'): str('longitude'),
            str('units'): data['lon'].units,
            str('coord_vals'): data['lon'][:],
            str('cell_bounds'): data['lon_bnds'][:]
        }]

        axis_ids = list()
        for axis in axes:
            axis_id = cmor.axis(**axis)
            axis_ids.append(axis_id)

        varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids)

        # write out the data
        msg = f"{VAR_NAME}: writing {data['time_bnds'][0][0]} - {data['time_bnds'][-1][-1]}"
        logger.info(msg)

        serial = kwargs.get('serial')
        if serial:
            pbar = tqdm(total=len(data['time']))
            pbar.set_description(msg)
        
        for index, val in enumerate(data['time']):
            cmor.write(
                varid,
                data['TSOI'][index, :],
                time_vals=val,
                time_bnds=[data['time_bnds'][index, :]])
            if serial:
                pbar.update(1)
        if serial:
            pbar.close()

    msg = f'{VAR_NAME}: write complete, closing'
    logger.info(msg)

    cmor.close()
    msg = f'{VAR_NAME}: file close complete'
    logger.info(msg)

    return VAR_NAME
예제 #12
0
def timeout_exit():
    print_message("Hit timeout limit, exiting")
    os.kill(os.getpid(), signal.SIGINT)
예제 #13
0
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASO timeMonthly_avg_layerThickness into CMIP.zhalfo

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    if kwargs.get('simple'):
        msg = f"{VAR_NAME} is not supported for simple conversion"
        print_message(msg)
        return

    msg = 'Starting {name}'.format(name=__name__)
    logging.info(msg)

    meshFileName = infiles['MPAS_mesh']
    mappingFileName = infiles['MPAS_map']
    timeSeriesFiles = infiles['MPASO']

    dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False)
    _, cellMask3D = mpas.get_cell_masks(dsMesh)

    variableList = [
        'timeMonthly_avg_layerThickness', 'xtime_startMonthly',
        'xtime_endMonthly'
    ]

    nVertLevels = dsMesh.sizes['nVertLevels']

    ds = xarray.Dataset()
    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        layerThickness = dsIn.timeMonthly_avg_layerThickness
        layerThickness = layerThickness.where(cellMask3D)
        thicknessSum = layerThickness.sum(dim='nVertLevels')
        mask = cellMask3D.isel(nVertLevels=0)
        zSurface = (-dsMesh.bottomDepth + thicknessSum).where(mask)
        zSurface.compute()
        # print('done zSurface')
        slices = [zSurface]
        maskSlices = [mask]
        zLayerBot = zSurface
        for zIndex in range(nVertLevels):
            mask = cellMask3D.isel(nVertLevels=zIndex)
            zLayerBot = (zLayerBot -
                         layerThickness.isel(nVertLevels=zIndex)).where(mask)
            zLayerBot.compute()
            # print('done zLayerBot {}/{}'.format(zIndex+1, nVertLevels))
            slices.append(zLayerBot)
            maskSlices.append(mask)
        ds[VAR_NAME] = xarray.concat(slices, dim='olevhalf')
        mask = xarray.concat(maskSlices, dim='olevhalf')
        ds = mpas.add_mask(ds, mask)
        ds = ds.transpose('Time', 'olevhalf', 'nCells')
        ds = mpas.add_time(ds, dsIn)
        ds.compute()

    ds = mpas.remap(ds, mappingFileName)
    depth_coord_half = numpy.zeros(nVertLevels + 1)
    depth_coord_half[1:] = dsMesh.refBottomDepth.values

    mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean')

    # create axes
    axes = [{
        'table_entry': 'time',
        'units': ds.time.units
    }, {
        'table_entry': 'depth_coord_half',
        'units': 'm',
        'coord_vals': depth_coord_half
    }, {
        'table_entry': 'latitude',
        'units': 'degrees_north',
        'coord_vals': ds.lat.values,
        'cell_bounds': ds.lat_bnds.values
    }, {
        'table_entry': 'longitude',
        'units': 'degrees_east',
        'coord_vals': ds.lon.values,
        'cell_bounds': ds.lon_bnds.values
    }]
    try:
        mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS)
    except Exception:
        return ""
    return VAR_NAME
예제 #14
0
파일: lib.py 프로젝트: JGCRI/e3sm_to_cmip
def run_serial(handlers, input_path, tables_path, metadata_path, map_path=None,
               mode='atm', logdir=None, simple=False, outpath=None, freq="mon"):
    """
    Run each of the handlers one at a time on the main process

    Params:
    -------
        handlers: a dict(str: (function_pointer, list(str) ) )
        input_path (str): path to the input files directory
        tables_path (str): path to the tables directory
        metadata_path (str): path to the cmor input metadata
        mode (str): what type of files to work with
    Returns:
    --------
        returns 1 if an error occurs, else 0
    """
    try:

        num_handlers = len(handlers)
        num_success = 0
        name = None

        if mode != 'atm':
            pbar = tqdm(total=len(handlers))

        for _, handler in enumerate(handlers):

            handler_method = handler['method']
            handler_variables = handler['raw_variables']
            unit_conversion = handler.get('unit_conversion')

            # find the input files this handler needs
            if mode in ['atm', 'lnd']:

                input_paths = {var: [os.path.join(input_path, x) for x in
                                     find_atm_files(var, input_path)]
                               for var in handler_variables}
            elif mode == 'fx':
                input_paths = {var: [os.path.join(input_path, x) for x in os.listdir(input_path) if x[-3:] == '.nc']
                               for var in handler_variables}
            else:
                input_paths = {var: find_mpas_files(var, input_path,
                                                    map_path)
                               for var in handler_variables}

            try:
                name = handler_method(
                    input_paths,
                    tables_path,
                    metadata_path,
                    raw_variables=handler.get('raw_variables'),
                    units=handler.get('units'),
                    name=handler.get('name'),
                    table=handler.get('table'),
                    positive=handler.get('positive'),
                    serial=True,
                    logdir=logdir,
                    simple=simple,
                    outpath=outpath,
                    unit_conversion=unit_conversion,
                    freq=freq)
            except Exception as e:
                print_debug(e)

            if name is not None:
                num_success += 1
                msg = f'Finished {name}, {num_success}/{num_handlers} jobs complete'
            else:
                msg = f'Error running handler {handler["name"]}'
                print_message(msg, status='error')
            logger.info(msg)

            if mode != 'atm':
                pbar.update(1)
        if mode != 'atm':
            pbar.close()

    except Exception as error:
        print_debug(error)
        return 1
    else:
        print_message(
            f"{num_success} of {num_handlers} handlers complete", 'ok')
        return 0
예제 #15
0
def handle(infiles, tables, user_input_path, **kwargs):

    simple = kwargs.get('simple')
    r = 6.37122e6

    logger = logging.getLogger()
    msg = '{}: Starting'.format(VAR_NAME)
    logger.info(msg)

    logdir = kwargs.get('logdir')

    # check that we have some input files for every variable
    zerofiles = False
    for variable in RAW_VARIABLES:
        if len(infiles[variable]) == 0:
            msg = '{}: Unable to find input files for {}'.format(
                VAR_NAME, variable)
            print_message(msg)
            logging.error(msg)
            zerofiles = True
    if zerofiles:
        return None

    if simple:
        resource_path, _ = os.path.split(os.path.abspath(resources.__file__))
        table_path = os.path.join(resource_path, TABLE)
        with open(table_path, 'r') as ip:
            table_data = json.load(ip)

        ds = xr.Dataset()
        outname = f'{VAR_NAME}_fx.nc'
        with xr.open_dataset(infiles[RAW_VARIABLES[0]][0]) as inputds:

            ds['lat'] = inputds['lat']
            ds['lat_bnds'] = inputds['lat_bnds']
            ds['lon'] = inputds['lon']
            ds['lon_bnds'] = inputds['lon_bnds']
            outdata = inputds['area'] * pow(r, 2)

            for attr, val in inputds.attrs.items():
                ds.attrs[attr] = val

        ds[VAR_NAME] = outdata
        for attr in [
                'standard_name', 'cell_methods', 'long_name', 'comment',
                'units'
        ]:
            ds[VAR_NAME].attrs[attr] = table_data["variable_entry"][VAR_NAME][
                attr]

        ds.to_netcdf(outname)
        return VAR_NAME

    # Create the logging directory and setup cmor
    if logdir:
        logpath = logdir
    else:
        outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename)
        logpath = os.path.join(outpath, 'cmor_logs')
    os.makedirs(logpath, exist_ok=True)

    logfile = os.path.join(logpath, VAR_NAME + '.log')

    cmor.setup(inpath=tables,
               netcdf_file_action=cmor.CMOR_REPLACE,
               logfile=logfile)

    cmor.dataset_json(str(user_input_path))
    cmor.load_table(str(TABLE))

    msg = '{}: CMOR setup complete'.format(VAR_NAME)
    logging.info(msg)

    # extract data from the input file
    msg = 'areacella: loading area'
    logger.info(msg)

    filename = infiles['area'][0]

    if not os.path.exists(filename):
        raise IOError("File not found: {}".format(filename))

    f = cdms2.open(filename)

    # load the data for each variable
    variable_data = f('area')

    if not variable_data.any():
        raise IOError("Variable data not found: {}".format(variable))

    # load the lon and lat info & bounds
    data = {
        'lat': variable_data.getLatitude(),
        'lon': variable_data.getLongitude(),
        'lat_bnds': f('lat_bnds'),
        'lon_bnds': f('lon_bnds'),
        'area': f('area')
    }

    msg = '{name}: loading axes'.format(name=VAR_NAME)
    logger.info(msg)

    axes = [{
        str('table_entry'): str('latitude'),
        str('units'): data['lat'].units,
        str('coord_vals'): data['lat'][:],
        str('cell_bounds'): data['lat_bnds'][:]
    }, {
        str('table_entry'): str('longitude'),
        str('units'): data['lon'].units,
        str('coord_vals'): data['lon'][:],
        str('cell_bounds'): data['lon_bnds'][:]
    }]

    msg = 'areacella: running CMOR'
    logging.info(msg)

    axis_ids = list()
    for axis in axes:
        axis_id = cmor.axis(**axis)
        axis_ids.append(axis_id)

    varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids)

    outdata = data['area'] * pow(r, 2)
    cmor.write(varid, outdata)

    msg = '{}: write complete, closing'.format(VAR_NAME)
    logger.debug(msg)

    cmor.close()

    msg = '{}: file close complete'.format(VAR_NAME)
    logger.debug(msg)

    return 'areacella'
예제 #16
0
파일: lib.py 프로젝트: JGCRI/e3sm_to_cmip
def run_parallel(pool, handlers, input_path, tables_path, metadata_path,
                 map_path=None, mode='atm', nproc=6, **kwargs):
    """
    Run all the handlers in parallel
    Params:
    -------
        pool (multiprocessing.Pool): a processing pool to run the handlers in
        handlers: a dict(str: (function_pointer, list(str) ) )
        input_path (str): path to the input files directory
        tables_path (str): path to the tables directory
        metadata_path (str): path to the cmor input metadata
        mode (str): what type of files to work with
    Returns:
    --------
        returns 1 if an error occurs, else 0
    """

    pool_res = list()
    will_run = []
    for idx, handler in enumerate(handlers):
        handler_method = handler['method']
        handler_variables = handler['raw_variables']
        # find the input files this handler needs
        if mode in ['atm', 'lnd']:

            input_paths = {var: [os.path.join(input_path, x) for x in
                                 find_atm_files(var, input_path)]
                           for var in handler_variables}
        else:
            input_paths = {var: find_mpas_files(var, input_path,
                                                map_path)
                           for var in handler_variables}

        # setup the input args for the handler
        _kwargs = {
            'table': handler.get('table'),
            'raw_variables': handler.get('raw_variables'),
            'units': handler.get('units'),
            'positive': handler.get('positive'),
            'name': handler.get('name'),
            'logdir': kwargs.get('logdir'),
            'unit_conversion': handler.get('unit_conversion'),
            'simple': kwargs.get('simple'),
            'outpath': kwargs.get('outpath')
        }
        will_run.append(handler.get('name'))

        pool_res.append(
            pool.submit(
                handler_method,
                input_paths,
                tables_path,
                metadata_path,
                **_kwargs))

    # wait for each result to complete
    pbar = tqdm(total=len(pool_res))
    num_success = 0
    num_handlers = len(handlers)
    finished_success = []
    for idx, res in enumerate(pool_res):
        try:
            out = res.result()
            finished_success.append(out)
            if out:
                num_success += 1
                msg = f'Finished {out}, {idx + 1}/{num_handlers} jobs complete'
            else:
                msg = f'Error running handler {handlers[idx]["name"]}'
                print_message(msg, 'error')

            logger.info(msg)
        except Exception as e:
            print_debug(e)
        pbar.update(1)

    pbar.close()
    terminate(pool)
    print_message(f"{num_success} of {num_handlers} handlers complete", 'ok')
    failed = set(will_run) - set(finished_success)
    if failed:
        print_message(f"{', '.join(list(failed))} failed to complete")
    return 0
예제 #17
0
파일: lib.py 프로젝트: JGCRI/e3sm_to_cmip
def handle_simple(infiles, raw_variables, write_data, outvar_name, outvar_units, serial=None, positive=None, levels=None, axis=None, logdir=None, outpath=None, table='Amon', has_time=True):
    from e3sm_to_cmip.util import print_message
    logger = logging.getLogger()

    logger.info(f'{outvar_name}: Starting')

    # check that we have some input files for every variable
    zerofiles = False
    for variable in raw_variables:
        if len(infiles[variable]) == 0:
            msg = f'{outvar_name}: Unable to find input files for {variable}'
            print_message(msg)
            logging.error(msg)
            zerofiles = True
    if zerofiles:
        return None

    # Create the logging directory and setup cmor
    if logdir:
        logpath = logdir
    else:
        outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename)
        logpath = os.path.join(outpath, 'cmor_logs')
    os.makedirs(logpath, exist_ok=True)

    _, inputfile = os.path.split(sorted(infiles[raw_variables[0]])[0])
    # counting from the end, since the variable names might have a _ in them
    start_year = inputfile[len(raw_variables[0]) + 1:].split('_')[0]
    end_year = inputfile[len(raw_variables[0]) + 1:].split('_')[1]

    data = {}

    # assuming all year ranges are the same for every variable
    num_files_per_variable = len(infiles[raw_variables[0]])

    # sort the input files for each variable
    for var_name in raw_variables:
        infiles[var_name].sort()

    for file_index in range(num_files_per_variable):
        loaded = False

        # reload the dimensions for each time slice
        get_dims = True

        # load data for each variables
        for var_name in raw_variables:

            # extract data from the input file
            logger.info(f'{outvar_name}: loading {var_name}')

            new_data = get_dimension_data(
                filename=infiles[var_name][file_index],
                variable=var_name,
                levels=levels,
                get_dims=get_dims)
            data.update(new_data)
            get_dims = False
            if not loaded:
                loaded = True

                # new data set
                ds = xr.Dataset()
                if has_time:
                    dims = ['time', 'lat', 'lon']
                else:
                    dims = ['lat', 'lon']

                for depth_dim in ['lev', 'plev', 'levgrnd']:
                    if depth_dim in new_data.keys():
                        dims.insert(1, depth_dim)

                ds[outvar_name] = (tuple(dims), new_data[var_name])
                for d in dims:
                    ds.coords[d] = new_data[d][:]

        # write out the data
        msg = f"{outvar_name}: time {data['time_bnds'][0][0]:1.1f} - {data['time_bnds'][-1][-1]:1.1f}"
        logger.info(msg)

        if serial:
            pbar = tqdm(total=len(data['time']))
            pbar.set_description(msg)

        for time_index, val in enumerate(data['time']):
            
            outdata = write_data(
                varid=0,
                data=data,
                timeval=val,
                timebnds=[data['time_bnds'][time_index, :]],
                index=time_index,
                raw_variables=raw_variables,
                simple=True)
            ds[outvar_name][time_index] = outdata
            if serial:
                pbar.update(1)

        if serial:
            pbar.close()

    with xr.open_dataset(infiles[raw_variables[0]][0], decode_cf=False, decode_times=False) as inputds:
        for attr, val in inputds.attrs.items():
            ds.attrs[attr] = val

        ds['lat_bnds'] = inputds['lat_bnds']
        ds['lon_bnds'] = inputds['lon_bnds']

        # check for and change the bounds name for lnd files since "time_bounds" is different
        # from every other bounds name in the entire E3SM project
        time_bounds_name = 'time_bnds' if 'time_bnds' in inputds.data_vars else 'time_bounds'
        ds['time_bnds'] = inputds[time_bounds_name]
        ds['time'] = inputds['time']
        ds['time'].attrs['bounds'] = 'time_bnds'

    resource_path, _ = os.path.split(os.path.abspath(resources.__file__))
    table_path = os.path.join(resource_path, table)
    with open(table_path, 'r') as ip:
        table_data = json.load(ip)

    variable_attrs = ['standard_name', 'long_name',
                      'comment', 'cell_methods', 'cell_measures', 'units']
    for attr in variable_attrs:
        ds[outvar_name].attrs[attr] = table_data['variable_entry'][outvar_name][attr]

    output_file_path = os.path.join(
        outpath, f'{outvar_name}_{table[:-5]}_{start_year}-{end_year}')
    msg = f'writing out variable to file {output_file_path}'
    print_message(msg, 'ok')
    fillVals = {
        np.dtype('float32'): 1e20,
        np.dtype('float64'): 1e20,
    }
    write_netcdf(ds, output_file_path, fillValues=fillVals, unlimited=['time'])

    msg = f'{outvar_name}: file close complete'
    logger.debug(msg)

    return outvar_name
예제 #18
0
파일: pbo.py 프로젝트: JGCRI/e3sm_to_cmip
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASO timeMonthly_avg_pressureAdjustedSSH, timeMonthly_avg_ssh,
    timeMonthly_avg_density, timeMonthly_avg_layerThickness, and EAM PSL into
    CMIP.pbo

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    if kwargs.get('simple'):
        msg = f"{VAR_NAME} is not supported for simple conversion"
        print_message(msg)
        return

    msg = 'Starting {name}'.format(name=__name__)
    logging.info(msg)

    namelistFileName = infiles['MPASO_namelist']
    meshFileName = infiles['MPAS_mesh']
    mappingFileName = infiles['MPAS_map']
    timeSeriesFiles = infiles['MPASO']
    pslFileNames = infiles['PSL']

    namelist = mpas.convert_namelist_to_dict(namelistFileName)
    config_density0 = float(namelist['config_density0'])
    gravity = 9.80616

    dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False)
    cellMask2D, cellMask3D = mpas.get_cell_masks(dsMesh)

    variableList = [
        'timeMonthly_avg_pressureAdjustedSSH', 'timeMonthly_avg_ssh',
        'timeMonthly_avg_layerThickness', 'timeMonthly_avg_density',
        'xtime_startMonthly', 'xtime_endMonthly'
    ]

    ds = xarray.Dataset()
    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        seaIcePressure = config_density0 * gravity * \
            (dsIn.timeMonthly_avg_pressureAdjustedSSH -
             dsIn.timeMonthly_avg_ssh)
        ds[VAR_NAME] = seaIcePressure.where(cellMask2D) + gravity * \
            (dsIn.timeMonthly_avg_density *
             dsIn.timeMonthly_avg_layerThickness).where(cellMask3D).sum(
                dim='nVertLevels')

        ds = mpas.add_time(ds, dsIn)
        ds.compute()

    ds = mpas.remap(ds, mappingFileName)

    with xarray.open_mfdataset(pslFileNames, concat_dim='time') as dsIn:
        ds[VAR_NAME] = ds[VAR_NAME] + dsIn.PSL.values

    mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean')

    # create axes
    axes = [{
        'table_entry': 'time',
        'units': ds.time.units
    }, {
        'table_entry': 'latitude',
        'units': 'degrees_north',
        'coord_vals': ds.lat.values,
        'cell_bounds': ds.lat_bnds.values
    }, {
        'table_entry': 'longitude',
        'units': 'degrees_east',
        'coord_vals': ds.lon.values,
        'cell_bounds': ds.lon_bnds.values
    }]
    try:
        mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS)
    except Exception:
        return ""
    return VAR_NAME
예제 #19
0
파일: tauvo.py 프로젝트: JGCRI/e3sm_to_cmip
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASO timeMonthly_avg_windStressMeridional into CMIP.tauvo

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    if kwargs.get('simple'):
        msg = f"{VAR_NAME} is not supported for simple conversion"
        print_message(msg)
        return

    msg = 'Starting {name}'.format(name=__name__)
    logging.info(msg)

    mappingFileName = infiles['MPAS_map']
    timeSeriesFiles = infiles['MPASO']

    variableList = [
        'timeMonthly_avg_windStressMeridional', 'xtime_startMonthly',
        'xtime_endMonthly'
    ]

    ds = xarray.Dataset()
    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        ds[VAR_NAME] = dsIn.timeMonthly_avg_windStressMeridional

        ds = mpas.add_time(ds, dsIn)
        ds.compute()

    ds = mpas.remap(ds, mappingFileName)

    mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean')

    # create axes
    axes = [{
        'table_entry': 'time',
        'units': ds.time.units
    }, {
        'table_entry': 'latitude',
        'units': 'degrees_north',
        'coord_vals': ds.lat.values,
        'cell_bounds': ds.lat_bnds.values
    }, {
        'table_entry': 'longitude',
        'units': 'degrees_east',
        'coord_vals': ds.lon.values,
        'cell_bounds': ds.lon_bnds.values
    }]
    try:
        mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS, positive='down')
    except Exception:
        return ""
    return VAR_NAME
예제 #20
0
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASO timeMonthly_avg_layerThickness into CMIP.masscello

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    if kwargs.get('simple'):
        msg = f"{VAR_NAME} is not supported for simple conversion"
        print_message(msg)
        return

    msg = 'Starting {name}'.format(name=__name__)
    logging.info(msg)

    namelistFileName = infiles['MPASO_namelist']
    meshFileName = infiles['MPAS_mesh']
    mappingFileName = infiles['MPAS_map']
    timeSeriesFiles = infiles['MPASO']

    namelist = mpas.convert_namelist_to_dict(namelistFileName)
    config_density0 = float(namelist['config_density0'])

    dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False)
    _, cellMask3D = mpas.get_cell_masks(dsMesh)

    variableList = [
        'timeMonthly_avg_layerThickness', 'xtime_startMonthly',
        'xtime_endMonthly'
    ]

    ds = xarray.Dataset()
    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        ds[VAR_NAME] = config_density0 * \
            dsIn.timeMonthly_avg_layerThickness.where(cellMask3D, 0.)
        ds = mpas.add_time(ds, dsIn)
        ds.compute()

    ds = mpas.add_depth(ds, dsMesh)
    ds.compute()

    ds = mpas.remap(ds, mappingFileName)

    # set masked values (where there are no MPAS grid cells) to zero
    ds[VAR_NAME] = ds[VAR_NAME].where(
        ds[VAR_NAME] != netCDF4.default_fillvals['f4'], 0.)

    mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean')

    # create axes
    axes = [{
        'table_entry': 'time',
        'units': ds.time.units
    }, {
        'table_entry': 'depth_coord',
        'units': 'm',
        'coord_vals': ds.depth.values,
        'cell_bounds': ds.depth_bnds.values
    }, {
        'table_entry': 'latitude',
        'units': 'degrees_north',
        'coord_vals': ds.lat.values,
        'cell_bounds': ds.lat_bnds.values
    }, {
        'table_entry': 'longitude',
        'units': 'degrees_east',
        'coord_vals': ds.lon.values,
        'cell_bounds': ds.lon_bnds.values
    }]
    try:
        mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS)
    except Exception:
        return ""
    return VAR_NAME
예제 #21
0
def handle(infiles, tables, user_input_path, **kwargs):
    logger = logging.getLogger()
    msg = '{}: Starting'.format(VAR_NAME)
    logger.info(msg)

    logdir = kwargs.get('logdir')
    serial = kwargs.get('serial')

    # check that we have some input files for every variable
    zerofiles = False
    for variable in RAW_VARIABLES:
        if len(infiles[variable]) == 0:
            msg = '{}: Unable to find input files for {}'.format(
                VAR_NAME, variable)
            print_message(msg)
            logging.error(msg)
            zerofiles = True
    if zerofiles:
        return None

    # Create the logging directory and setup cmor
    if logdir:
        logpath = logdir
    else:
        outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename)
        logpath = os.path.join(outpath, 'cmor_logs')
    os.makedirs(logpath, exist_ok=True)

    logfile = os.path.join(logpath, VAR_NAME + '.log')

    cmor.setup(inpath=tables,
               netcdf_file_action=cmor.CMOR_REPLACE,
               logfile=logfile)

    cmor.dataset_json(str(user_input_path))
    cmor.load_table(str(TABLE))

    msg = '{}: CMOR setup complete'.format(VAR_NAME)
    logging.info(msg)

    # extract data from the input file
    msg = 'orog: loading PHIS'
    logger.info(msg)

    filename = infiles['PHIS'][0]

    if not os.path.exists(filename):
        raise IOError("File not found: {}".format(filename))

    f = cdms2.open(filename)

    # load the data for each variable
    variable_data = f('PHIS')

    if not variable_data.any():
        raise IOError("Variable data not found: {}".format(variable))

    # load the lon and lat info & bounds
    data = {
        'lat': variable_data.getLatitude(),
        'lon': variable_data.getLongitude(),
        'lat_bnds': f('lat_bnds'),
        'lon_bnds': f('lon_bnds'),
        'PHIS': f('PHIS')
    }

    msg = '{name}: loading axes'.format(name=VAR_NAME)
    logger.info(msg)

    axes = [{
        str('table_entry'): str('latitude'),
        str('units'): data['lat'].units,
        str('coord_vals'): data['lat'][:],
        str('cell_bounds'): data['lat_bnds'][:]
    }, {
        str('table_entry'): str('longitude'),
        str('units'): data['lon'].units,
        str('coord_vals'): data['lon'][:],
        str('cell_bounds'): data['lon_bnds'][:]
    }]

    msg = 'orog: running CMOR'
    logging.info(msg)

    axis_ids = list()
    for axis in axes:
        axis_id = cmor.axis(**axis)
        axis_ids.append(axis_id)

    varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids)

    if serial:
        myMessage = progressbar.DynamicMessage('running')
        myMessage.__call__ = my_dynamic_message
        widgets = [
            progressbar.DynamicMessage('running'), ' [',
            progressbar.Timer(), '] ',
            progressbar.Bar(), ' (',
            progressbar.ETA(), ') '
        ]
        progressbar.DynamicMessage.__call__ = my_dynamic_message
        pbar = progressbar.ProgressBar(maxval=1, widgets=widgets)
        pbar.start()

    g = 9.80616

    outdata = data['PHIS'] / g
    cmor.write(varid, outdata)

    if serial:
        pbar.update(1, running=msg)
        pbar.finish()

    msg = '{}: write complete, closing'.format(VAR_NAME)
    logger.debug(msg)

    cmor.close()

    msg = '{}: file close complete'.format(VAR_NAME)
    logger.debug(msg)

    return 'orog'
예제 #22
0
파일: lib.py 프로젝트: JGCRI/e3sm_to_cmip
def handle_variables(infiles, raw_variables, write_data, outvar_name, outvar_units, table, tables, metadata_path, serial=None, positive=None, levels=None, axis=None, logdir=None, simple=False, outpath=None):
    
    timename = var_has_time(os.path.join(tables, table), outvar_name)
    if simple:
        return handle_simple(
            infiles,
            raw_variables,
            write_data,
            outvar_name,
            outvar_units,
            serial=serial,
            table=table,
            positive=positive,
            levels=levels,
            axis=axis,
            logdir=logdir,
            outpath=outpath,
            has_time=timename)

    from e3sm_to_cmip.util import print_message
    logger = logging.getLogger()

    logger.info(f'{outvar_name}: Starting')

    # check that we have some input files for every variable
    zerofiles = False
    for variable in raw_variables:
        if len(infiles[variable]) == 0:
            msg = f'{outvar_name}: Unable to find input files for {variable}'
            print_message(msg)
            logging.error(msg)
            zerofiles = True
    if zerofiles:
        return None

    # Create the logging directory and setup cmor
    if logdir:
        logpath = logdir
    else:
        outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename)
        logpath = os.path.join(outpath, 'cmor_logs')
    os.makedirs(logpath, exist_ok=True)

    logfile = os.path.join(logpath, outvar_name + '.log')

    cmor.setup(
        inpath=tables,
        netcdf_file_action=cmor.CMOR_REPLACE,
        logfile=logfile)

    cmor.dataset_json(str(metadata_path))
    cmor.load_table(str(table))

    msg = f'{outvar_name}: CMOR setup complete'
    logging.info(msg)

    data = {}

    # assuming all year ranges are the same for every variable
    num_files_per_variable = len(infiles[raw_variables[0]])

    # sort the input files for each variable
    for var_name in raw_variables:
        infiles[var_name].sort()

    for index in range(num_files_per_variable):

        # reload the dimensions for each time slice
        get_dims = True

        # load data for each variable
        for var_name in raw_variables:

            # extract data from the input file
            logger.info(f'{outvar_name}: loading {var_name}')

            new_data = get_dimension_data(
                filename=infiles[var_name][index],
                variable=var_name,
                levels=levels,
                get_dims=get_dims)
            data.update(new_data)
            get_dims = False
            if simple and not loaded_one:
                loaded_one = True

                # new data set
                ds = xr.Dataset()
                if timename:
                    dims = (timename, 'lat', 'lon')
                else:
                    dims = ('lat', 'lon')

                if 'lev' in new_data.keys():
                    dims = (timename, 'lev', 'lat', 'lon')
                elif 'plev' in new_data.keys():
                    dims = (timename, 'plev', 'lat', 'lon')
                ds[outvar_name] = (dims, new_data[var_name])
                for d in dims:
                    ds.coords[d] = new_data[d][:]

        logger.info(f'{outvar_name}: loading axes')

        # create the cmor variable and axis
        axis_ids, ips = load_axis(data=data, levels=levels, has_time=timename)

        if ips:
            data['ips'] = ips

        if positive:
            varid = cmor.variable(outvar_name, outvar_units,
                                  axis_ids, positive=positive)
        else:
            varid = cmor.variable(outvar_name, outvar_units, axis_ids)

        # write out the data
        msg = f"{outvar_name}: time {data['time_bnds'][0][0]:1.1f} - {data['time_bnds'][-1][-1]:1.1f}"
        logger.info(msg)

        if serial:
            pbar = tqdm(total=len(data['time']))
            pbar.set_description(msg)

        if timename:
            for index, val in enumerate(data['time']):
                write_data(
                    varid=varid,
                    data=data,
                    timeval=val,
                    timebnds=[data['time_bnds'][index, :]],
                    index=index,
                    raw_variables=raw_variables,
                    simple=False)
                if serial:
                    pbar.update(1)
        else:
            write_data(
                varid=varid,
                data=data,
                raw_variables=raw_variables,
                simple=False)
        if serial:
            pbar.close()

    msg = f'{outvar_name}: write complete, closing'
    logger.debug(msg)
    cmor.close()

    msg = f'{outvar_name}: file close complete'
    logger.debug(msg)

    return outvar_name
예제 #23
0
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Parameters
    ----------
        infiles (List): a list of strings of file names for the raw input data
        tables (str): path to CMOR tables
        user_input_path (str): path to user input json file
    Returns
    -------
        var name (str): the name of the processed variable after processing is complete
    """
    logger = logging.getLogger()
    msg = '{}: Starting'.format(VAR_NAME)
    logger.info(msg)

    serial = kwargs.get('serial')
    logdir = kwargs.get('logdir')
    if kwargs.get('simple'):
        msg = f"{VAR_NAME} is not supported for simple conversion"
        print_message(msg)
        return

    # check that we have some input files for every variable
    zerofiles = False
    for variable in RAW_VARIABLES:
        if len(infiles[variable]) == 0:
            msg = '{}: Unable to find input files for {}'.format(
                VAR_NAME, variable)
            print_message(msg)
            logging.error(msg)
            zerofiles = True
    if zerofiles:
        return None

    # Create the logging directory and setup cmor
    logdir = kwargs.get('logdir')
    if logdir:
        logpath = logdir
    else:
        outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename)
        logpath = os.path.join(outpath, 'cmor_logs')
    os.makedirs(logpath, exist_ok=True)

    logfile = os.path.join(logpath, VAR_NAME + '.log')

    cmor.setup(inpath=tables,
               netcdf_file_action=cmor.CMOR_REPLACE,
               logfile=logfile)

    cmor.dataset_json(str(user_input_path))
    cmor.load_table(str(TABLE))

    msg = '{}: CMOR setup complete'.format(VAR_NAME)
    logging.info(msg)

    data = {}

    # assuming all year ranges are the same for every variable
    num_files_per_variable = len(infiles[RAW_VARIABLES[0]])

    # sort the input files for each variable
    for var_name in RAW_VARIABLES:
        infiles[var_name].sort()

    for index in range(num_files_per_variable):

        # load data for each variable
        for var_name in RAW_VARIABLES:

            # extract data from the input file
            msg = '{name}: loading {variable}'.format(name=VAR_NAME,
                                                      variable=var_name)
            logger.info(msg)

            filename = infiles[var_name][index]
            new_data = {}

            if not os.path.exists(filename):
                raise IOError("File not found: {}".format(filename))

            f = cdms2.open(filename)

            # load the data for each variable
            variable_data = f(var_name)

            if not variable_data.any():
                raise IOError("Variable data not found: {}".format(variable))

            data.update({variable: variable_data})

            # load the lon and lat info & bounds
            # load time & time bounds
            if var_name == 'PS':
                data.update({
                    'ps': f('PS'),
                    'lat': variable_data.getLatitude(),
                    'lon': variable_data.getLongitude(),
                    'lat_bnds': f('lat_bnds'),
                    'lon_bnds': f('lon_bnds'),
                    'time2': variable_data.getTime(),
                    'time_bnds': f('time_bnds')
                })

            if 'lev' in f.listdimension() and 'ilev' in f.listdimension():
                data.update({
                    'lev': f.getAxis('lev')[:] / 1000,
                    'ilev': f.getAxis('ilev')[:] / 1000
                })
            new_data = {
                i: f(i)
                for i in ['hyai', 'hybi', 'hyam', 'hybm'] if i in f.variables
            }

            data.update(new_data)

        msg = '{name}: loading axes'.format(name=VAR_NAME)
        logger.info(msg)

        # create the cmor variable and axis
        axes = [{
            str('table_entry'): 'time2',
            str('units'): data['time2'].units
        }, {
            str('table_entry'): str('standard_hybrid_sigma_half'),
            str('units'): str('1'),
            str('coord_vals'): data['lev'][:],
            str('cell_bounds'): data['ilev'][:]
        }, {
            str('table_entry'): str('latitude'),
            str('units'): data['lat'].units,
            str('coord_vals'): data['lat'][:],
            str('cell_bounds'): data['lat_bnds'][:]
        }, {
            str('table_entry'): str('longitude'),
            str('units'): data['lon'].units,
            str('coord_vals'): data['lon'][:],
            str('cell_bounds'): data['lon_bnds'][:]
        }]

        axis_ids = list()
        for axis in axes:
            axis_id = cmor.axis(**axis)
            axis_ids.append(axis_id)

        # add hybrid level formula terms
        cmor.zfactor(zaxis_id=axis_ids[1],
                     zfactor_name='a_half',
                     axis_ids=[
                         axis_ids[1],
                     ],
                     zfactor_values=data['hyam'][:])
        cmor.zfactor(zaxis_id=axis_ids[1],
                     zfactor_name='b_half',
                     axis_ids=[
                         axis_ids[1],
                     ],
                     zfactor_values=data['hybm'][:])
        cmor.zfactor(zaxis_id=axis_ids[1],
                     zfactor_name='p0',
                     units='Pa',
                     zfactor_values=100000)
        ips = cmor.zfactor(zaxis_id=axis_ids[1],
                           zfactor_name='ps2',
                           axis_ids=[0, 2, 3],
                           units='Pa')

        data['ips'] = ips

        varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids[:4])

        # write out the data
        msg = "{}: time {:1.1f} - {:1.1f}".format(VAR_NAME,
                                                  data['time_bnds'][0][0],
                                                  data['time_bnds'][-1][-1])
        logger.info(msg)

        if serial:
            pbar = tqdm(total=len(data['time2']))

        for index, val in enumerate(data['time2']):
            if serial:
                pbar.update(1)
            write_data(varid=varid,
                       data=data,
                       timeval=val,
                       timebnds=[data['time_bnds'][index, :]],
                       index=index,
                       RAW_VARIABLES=RAW_VARIABLES)
        if serial:
            pbar.close()

    msg = '{}: write complete, closing'.format(VAR_NAME)
    logger.debug(msg)

    cmor.close()

    msg = '{}: file close complete'.format(VAR_NAME)
    logger.debug(msg)

    return 'phalf'
예제 #24
0
def main():

    # parse the command line arguments
    _args = parse_argsuments().__dict__

    if len(_args.get('var_list')) == 1 and " " in _args.get('var_list')[0]:
        var_list = _args.get('var_list')[0].split()
    else:
        var_list = _args.get('var_list')
    var_list = [x.strip(',') for x in var_list]
    input_path = _args.get('input_path')
    output_path = _args.get('output_path')
    tables_path = _args.get('tables_path')
    user_metadata = _args.get('user_metadata')
    no_metadata = _args['no_metadata'] if _args.get('no_metadata') else False
    only_metadata = _args['only_metadata'] if _args.get(
        'only_metadata') else False
    nproc = _args['num_proc'] if _args.get('num_proc') else 6
    serial = _args['serial'] if _args.get('serial') else False
    mode = _args['mode'] if _args.get('mode') else 'atm'
    debug = True if _args.get('debug') else False
    map_path = _args['map'] if _args.get('map') else None
    cmor_log_dir = _args['logdir'] if _args.get('logdir') else None
    timeout = int(_args['timeout']) if _args.get('timeout') else None
    should_precheck = _args.get('precheck')

    timer = None
    if timeout:
        timer = threading.Timer(timeout, timeout_exit)
        timer.start()

    if _args.get('handlers'):
        handlers_path = os.path.abspath(_args.get('handlers'))
    else:
        handlers_path, _ = os.path.split(
            os.path.abspath(cmor_handlers.__file__))

    if should_precheck:
        new_var_list = precheck(input_path, output_path, var_list, mode)
        if not new_var_list:
            print("All variables previously computed")
            if timer: timer.cancel()
            return 0
        else:
            print("Setting up conversion for {}".format(
                " ".join(new_var_list)))
            var_list = new_var_list

    # add additional optional metadata to the output files
    if only_metadata:
        print_message('Updating file metadata and exiting', 'ok')
        add_metadata(file_path=output_path, var_list=var_list)
        return 0

    new_metadata_path = os.path.join(output_path, 'user_metadata.json')

    # create the output dir if it doesnt exist
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    # setup temp storage directory
    temp_path = os.environ.get('TMPDIR')
    if temp_path is None:

        temp_path = '{}/tmp'.format(output_path)
        if not os.path.exists(temp_path):
            os.makedirs(temp_path)

    tempfile.tempdir = temp_path

    logging_path = os.path.join(output_path, 'converter.log')
    print_message("Writing log output to: {}".format(logging_path), 'debug')

    # setup logging
    logging.basicConfig(format='%(asctime)s:%(levelname)s: %(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p',
                        filename=logging_path,
                        filemode='w',
                        level=logging.INFO)

    # copy the users metadata json file with the updated output directory
    copy_user_metadata(user_metadata, output_path)

    # load variable handlers
    handlers = load_handlers(handlers_path, var_list, debug)
    if len(handlers) == 0:
        print_message('No handlers loaded')
        sys.exit(1)

    # run in the user-selected mode
    if serial:
        print_message('Running CMOR handlers in serial', 'ok')
        try:
            status = run_serial(handlers=handlers,
                                input_path=input_path,
                                tables_path=tables_path,
                                metadata_path=new_metadata_path,
                                map_path=map_path,
                                mode=mode,
                                logdir=cmor_log_dir)
        except KeyboardInterrupt as error:
            print_message(' -- keyboard interrupt -- ', 'error')
            return 1
        except Exception as e:
            print_debug(e)
            return 1
    else:
        print_message('Running CMOR handlers in parallel', 'ok')
        try:
            pool = Pool(nproc)
            status = run_parallel(pool=pool,
                                  handlers=handlers,
                                  input_path=input_path,
                                  tables_path=tables_path,
                                  metadata_path=new_metadata_path,
                                  map_path=map_path,
                                  mode=mode,
                                  logdir=cmor_log_dir)
        except KeyboardInterrupt as error:
            print_message(' -- keyboard interrupt -- ', 'error')
            return 1
        except Exception as error:
            print_debug(error)
            return 1
    if status != 0:
        print_message("Error running handlers: {}".format(" ".join(
            [x['name'] for x in handlers])))
        return 1

    # add additional optional metadata to the output files
    if no_metadata:
        print_message('Not adding additional metadata', 'ok')
    else:
        add_metadata(file_path=output_path, var_list=var_list)

    if timeout:
        timer.cancel()
    return 0
예제 #25
0
def main():

    # parse the command line arguments
    _args = parse_arguments().__dict__

    if len(_args.get('var_list')) == 1 and " " in _args.get('var_list')[0]:
        var_list = _args.get('var_list')[0].split()
    else:
        var_list = _args.get('var_list')
    var_list = [x.strip(',') for x in var_list]
    input_path = _args.get('input_path')
    output_path = _args.get('output_path')
    tables_path = _args.get('tables_path')
    user_metadata = _args.get('user_metadata')
    custom_metadata = _args.get('custom_metadata')
    nproc = _args.get('num_proc')
    serial = _args.get('serial')
    realm = _args.get('realm')
    debug = True if _args.get('debug') else False
    map_path = _args.get('map')
    cmor_log_dir = _args.get('logdir')
    timeout = int(_args.get('timeout')) if _args.get('timeout') else False
    simple = _args.get('simple', False)
    precheck_path = _args.get('precheck', False)
    freq = _args.get('freq')

    logger = _setup_custom_logger(f"{cmor_log_dir}/e3sm_to_cmip.log", True)
    logger.info(f"input_path = {input_path}")
    logger.info(f"output_path = {output_path}")
    logger.info(f"precheck_path = {precheck_path}")

    if simple:
        no_metadata = True
        if not tables_path:
            resource_path, _ = os.path.split(os.path.abspath(resources.__file__))
            tables_path = resource_path

    timer = None
    if timeout:
        timer = threading.Timer(timeout, timeout_exit)
        timer.start()

    if _args.get('handlers'):
        handlers_path = os.path.abspath(_args.get('handlers'))
    else:
        handlers_path, _ = os.path.split(
            os.path.abspath(cmor_handlers.__file__))

    if precheck_path:
        new_var_list = precheck(input_path, precheck_path, var_list, realm)
        if not new_var_list:
            print("All variables previously computed")
            os.mkdir(os.path.join(output_path, 'CMIP6'))
            if timer:
                timer.cancel()
            return 0
        else:
            print_message(
                f"Setting up conversion for {' '.join(new_var_list)}", 'ok')
            var_list = new_var_list

    # load variable handlers
    handlers = _load_handlers(
        handlers_path=handlers_path,
        tables_path=tables_path,
        var_list=var_list,
        freq=freq,
        realm=realm)

    if len(handlers) == 0:
        print_message('No handlers loaded')
        sys.exit(1)
    if _args.get('info'):
        print_var_info(
            handlers,
            freq,
            input_path,
            tables_path,
            _args.get('info_out'))
        sys.exit(0)

    new_metadata_path = os.path.join(
        output_path,
        'user_metadata.json')

    # create the output dir if it doesnt exist
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    # setup temp storage directory
    temp_path = os.environ.get('TMPDIR')
    if temp_path is None:

        temp_path = f'{output_path}/tmp'
        if not os.path.exists(temp_path):
            os.makedirs(temp_path)

    tempfile.tempdir = temp_path

    logging_path = os.path.join(output_path, 'converter.log')
    print_message(f"Writing log output to: {logging_path}", 'debug')

    # setup logging
    logging.basicConfig(
        format='%(asctime)s:%(levelname)s: %(message)s',
        datefmt='%m/%d/%Y %I:%M:%S %p',
        filename=logging_path,
        filemode='w',
        level=logging.INFO)

    # copy the users metadata json file with the updated output directory
    if not simple:
        copy_user_metadata(
            user_metadata, output_path)

    # run in the user-selected mode
    if serial:
        print_message('Running CMOR handlers in serial', 'ok')
        try:
            status = run_serial(
                handlers=handlers,
                input_path=input_path,
                tables_path=tables_path,
                metadata_path=new_metadata_path,
                map_path=map_path,
                realm=realm,
                logdir=cmor_log_dir,
                simple=simple,
                outpath=output_path,
                freq=freq)
        except KeyboardInterrupt as error:
            print_message(' -- keyboard interrupt -- ', 'error')
            return 1
        except Exception as e:
            print_debug(e)
            return 1
    else:
        print_message('Running CMOR handlers in parallel', 'ok')
        try:
            pool = Pool(max_workers=nproc)
            status = run_parallel(
                pool=pool,
                handlers=handlers,
                input_path=input_path,
                tables_path=tables_path,
                metadata_path=new_metadata_path,
                map_path=map_path,
                realm=realm,
                logdir=cmor_log_dir,
                simple=simple,
                outpath=output_path,
                freq=freq)
        except KeyboardInterrupt as error:
            print_message(' -- keyboard interrupt -- ', 'error')
            return 1
        except Exception as error:
            print_debug(error)
            return 1
    if status != 0:
        print_message(
            f"Error running handlers: { ' '.join([x['name'] for x in handlers]) }")
        return 1

    if custom_metadata:
        add_metadata(
            file_path=output_path,
            var_list=var_list,
            metadata=custom_metadata)

    if timeout:
        timer.cancel()
    return 0
예제 #26
0
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASO timeMonthly_avg_seaIceFreshWaterFlux into CMIP.fsitherm

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    if kwargs.get('simple'):
        print_message(f'Simple CMOR output not supported for {VAR_NAME}',
                      'error')
        return None

    logging.info(f'Starting {VAR_NAME}')

    mappingFileName = infiles['MPAS_map']
    timeSeriesFiles = infiles['MPASO']

    variableList = [
        'timeMonthly_avg_seaIceFreshWaterFlux', 'xtime_startMonthly',
        'xtime_endMonthly'
    ]

    ds = xarray.Dataset()
    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        ds[VAR_NAME] = dsIn.timeMonthly_avg_seaIceFreshWaterFlux

        ds = mpas.add_time(ds, dsIn)
        ds.compute()

    ds = mpas.remap(ds, mappingFileName)

    mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean')

    # create axes
    axes = [{
        'table_entry': 'time',
        'units': ds.time.units
    }, {
        'table_entry': 'latitude',
        'units': 'degrees_north',
        'coord_vals': ds.lat.values,
        'cell_bounds': ds.lat_bnds.values
    }, {
        'table_entry': 'longitude',
        'units': 'degrees_east',
        'coord_vals': ds.lon.values,
        'cell_bounds': ds.lon_bnds.values
    }]
    try:
        mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS)
    except Exception:
        return ""
    return VAR_NAME
예제 #27
0
def handle(infiles, tables, user_input_path, **kwargs):

    logger = logging.getLogger()
    msg = '{}: Starting'.format(VAR_NAME)
    logger.info(msg)

    serial = kwargs.get('serial')
    logdir = kwargs.get('logdir')

    # check that we have some input files for every variable
    zerofiles = False
    for variable in RAW_VARIABLES:
        if len(infiles[variable]) == 0:
            msg = '{}: Unable to find input files for {}'.format(
                VAR_NAME, variable)
            print_message(msg)
            logging.error(msg)
            zerofiles = True
    if zerofiles:
        return None

    # Create the logging directory and setup cmor
    logdir = kwargs.get('logdir')
    if logdir:
        logpath = logdir
    else:
        outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename)
        logpath = os.path.join(outpath, 'cmor_logs')
    os.makedirs(logpath, exist_ok=True)

    logfile = os.path.join(logpath, VAR_NAME + '.log')

    cmor.setup(
        inpath=tables,
        netcdf_file_action=cmor.CMOR_REPLACE,
        logfile=logfile)

    cmor.dataset_json(str(user_input_path))
    cmor.load_table(str(TABLE))

    msg = '{}: CMOR setup complete'.format(VAR_NAME)
    logging.info(msg)

    data = {}

    # assuming all year ranges are the same for every variable
    num_files_per_variable = len(infiles[RAW_VARIABLES[0]])

    # sort the input files for each variable
    for var_name in RAW_VARIABLES:
        infiles[var_name].sort()

    for index in range(num_files_per_variable):

        # load data for each variable
        for var_name in RAW_VARIABLES:

            # extract data from the input file
            msg = '{name}: loading {variable}'.format(
                name=VAR_NAME,
                variable=var_name)
            logger.info(msg)

            filename = infiles[var_name][index]
            new_data = {}

            if not os.path.exists(filename):
                raise IOError("File not found: {}".format(filename))

            f = cdms2.open(filename)

            # load the data for each variable
            variable_data = f(var_name)

            if not variable_data.any():
                raise IOError("Variable data not found: {}".format(variable))

            data.update({
                variable: variable_data
            })

            # load the lon and lat info & bounds
            # load time & time bounds
            if var_name == 'PS':
                data.update({
                    'ps': f('PS'),
                    'lat': variable_data.getLatitude(),
                    'lon': variable_data.getLongitude(),
                    'lat_bnds': f('lat_bnds'),
                    'lon_bnds': f('lon_bnds'),
                    'time': variable_data.getTime(),
                    'time2': variable_data.getTime(),
                    'time_bnds': f('time_bnds')
                })

            if 'lev' in f.listdimension() and 'ilev' in f.listdimension():
                data.update({
                    'lev': f.getAxis('lev')[:]/1000,
                    'ilev': f.getAxis('ilev')[:]/1000
                })
            new_data = {i: f(i) for i in [
                'hyam', 'hybm', 'hyai', 'hybi'] if i in f.variables}

            data.update(new_data)

        msg = '{name}: loading axes'.format(name=VAR_NAME)
        logger.info(msg)

        axes = [{
            str('table_entry'): 'time2',
            str('units'): data['time2'].units
        }, {
            str('table_entry'): str('standard_hybrid_sigma'),
            str('units'): str('1'),
            str('coord_vals'): data['lev'][:],
            str('cell_bounds'): data['ilev'][:]
        }, {
            str('table_entry'): str('latitude'),
            str('units'): data['lat'].units,
            str('coord_vals'): data['lat'][:],
            str('cell_bounds'): data['lat_bnds'][:]
        }, {
            str('table_entry'): str('longitude'),
            str('units'): data['lon'].units,
            str('coord_vals'): data['lon'][:],
            str('cell_bounds'): data['lon_bnds'][:]
        }]

        axis_ids = list()
        for axis in axes:
            axis_id = cmor.axis(**axis)
            axis_ids.append(axis_id)

        # add hybrid level formula terms
        cmor.zfactor(
            zaxis_id=axis_ids[1],
            zfactor_name=str('a'),
            axis_ids=[axis_ids[1], ],
            zfactor_values=data['hyam'][:],
            zfactor_bounds=data['hyai'][:])
        cmor.zfactor(
            zaxis_id=axis_ids[1],
            zfactor_name=str('b'),
            axis_ids=[axis_ids[1], ],
            zfactor_values=data['hybm'][:],
            zfactor_bounds=data['hybi'][:])
        cmor.zfactor(
            zaxis_id=axis_ids[1],
            zfactor_name=str('p0'),
            units=str('Pa'),
            zfactor_values=100000)
        ips = cmor.zfactor(
            zaxis_id=axis_ids[1],
            zfactor_name=str('ps2'),
            axis_ids=[0, 2, 3],
            units=str('Pa'))

        data['ips'] = ips

        varid = cmor.variable(VAR_NAME, VAR_UNITS, axis_ids[:4])

        # write out the data
        msg = "{}: time {:1.1f} - {:1.1f}".format(
            VAR_NAME,
            data['time_bnds'][0][0],
            data['time_bnds'][-1][-1])
        logger.info(msg)

        if serial:
            myMessage = progressbar.DynamicMessage('running')
            myMessage.__call__ = my_dynamic_message
            widgets = [
                progressbar.DynamicMessage('running'), ' [',
                progressbar.Timer(), '] ',
                progressbar.Bar(),
                ' (', progressbar.ETA(), ') '
            ]
            progressbar.DynamicMessage.__call__ = my_dynamic_message
            pbar = progressbar.ProgressBar(
                maxval=len(data['time']), widgets=widgets)
            pbar.start()

        for index, val in enumerate(data['time']):
            if serial:
                pbar.update(index, running=msg)
            write_data(
                varid=varid,
                data=data,
                timeval=val,
                timebnds=[data['time_bnds'][index, :]],
                index=index,
                RAW_VARIABLES=RAW_VARIABLES)
        if serial:
            pbar.finish()

    msg = '{}: write complete, closing'.format(VAR_NAME)
    logger.debug(msg)

    cmor.close()

    msg = '{}: file close complete'.format(VAR_NAME)
    logger.debug(msg)

    return 'pfull'
예제 #28
0
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASSI timeMonthly_avg_vVelocityGeo into CMIP.siv

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    if kwargs.get('simple'):
        msg = f"{VAR_NAME} is not supported for simple conversion"
        print_message(msg)
        return

    msg = 'Starting {name}'.format(name=__name__)
    logging.info(msg)

    meshFileName = infiles['MPAS_mesh']
    mappingFileName = infiles['MPAS_map']
    timeSeriesFiles = infiles['MPASSI']

    dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False)
    cellMask2D, _ = mpas.get_cell_masks(dsMesh)

    variableList = [
        'timeMonthly_avg_iceAreaCell', 'timeMonthly_avg_vVelocityGeo',
        'xtime_startMonthly', 'xtime_endMonthly'
    ]

    ds = xarray.Dataset()
    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        ds['siconc'] = dsIn.timeMonthly_avg_iceAreaCell
        ds[VAR_NAME] = ds['siconc'] * mpas.interp_vertex_to_cell(
            dsIn.timeMonthly_avg_vVelocityGeo, dsMesh)
        ds = mpas.add_time(ds, dsIn)
        ds = ds.chunk(chunks={'nCells': None, 'time': 6})
        ds.compute()

    ds = mpas.add_si_mask(ds, cellMask2D, ds.siconc)
    ds['cellMask'] = ds.siconc * ds.cellMask
    ds.compute()

    ds = mpas.remap(ds, mappingFileName)

    mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='seaice')

    # create axes
    axes = [{
        'table_entry': 'time',
        'units': ds.time.units
    }, {
        'table_entry': 'latitude',
        'units': 'degrees_north',
        'coord_vals': ds.lat.values,
        'cell_bounds': ds.lat_bnds.values
    }, {
        'table_entry': 'longitude',
        'units': 'degrees_east',
        'coord_vals': ds.lon.values,
        'cell_bounds': ds.lon_bnds.values
    }]

    try:
        mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS)
    except Exception:
        return ""
    return VAR_NAME
예제 #29
0
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASO timeMonthly_avg_normalVelocity,
    timeMonthly_avg_normalGMBolusVelocity, timeMonthly_avg_vertVelocityTop,
    timeMonthly_avg_vertGMBolusVelocityTop, and timeMonthly_avg_layerThickness
    into CMIP.msftmz

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    if kwargs.get('simple'):
        msg = f"{VAR_NAME} is not supported for simple conversion"
        print_message(msg)
        return

    msg = 'Starting {name}'.format(name=__name__)
    logging.info(msg)

    meshFileName = infiles['MPAS_mesh']
    timeSeriesFiles = infiles['MPASO']
    regionMaskFileName = infiles['MPASO_MOC_regions']
    namelistFileName = infiles['MPASO_namelist']

    namelist = mpas.convert_namelist_to_dict(namelistFileName)
    config_density0 = float(namelist['config_density0'])

    dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False)
    dsMesh = dsMesh.isel(Time=0)

    dsMasks = xarray.open_dataset(regionMaskFileName, mask_and_scale=False)

    variableList = [
        'timeMonthly_avg_normalVelocity',
        'timeMonthly_avg_normalGMBolusVelocity',
        'timeMonthly_avg_vertVelocityTop',
        'timeMonthly_avg_vertGMBolusVelocityTop',
        'timeMonthly_avg_layerThickness', 'xtime_startMonthly',
        'xtime_endMonthly'
    ]

    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        showProgress = 'serial' in kwargs and kwargs['serial']
        ds = config_density0 * mpas.compute_moc_streamfunction(
            dsIn, dsMesh, dsMasks, showProgress=showProgress)

    ds = ds.rename({'moc': VAR_NAME})

    mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='ocean')

    region = ['global_ocean', 'atlantic_arctic_ocean']

    # create axes
    axes = [{
        'table_entry': 'time',
        'units': ds.time.units
    }, {
        'table_entry': 'basin',
        'units': '',
        'coord_vals': region
    }, {
        'table_entry': 'depth_coord',
        'units': 'm',
        'coord_vals': ds.depth.values,
        'cell_bounds': ds.depth_bnds.values
    }, {
        'table_entry': 'latitude',
        'units': 'degrees_north',
        'coord_vals': ds.lat.values,
        'cell_bounds': ds.lat_bnds.values
    }]
    try:
        mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS)
    except Exception:
        return ""
    return VAR_NAME
예제 #30
0
def handle_variables(infiles,
                     raw_variables,
                     write_data,
                     outvar_name,
                     outvar_units,
                     table,
                     tables,
                     metadata_path,
                     serial=None,
                     positive=None,
                     levels=None,
                     axis=None,
                     logdir=None):

    from e3sm_to_cmip.util import print_message
    logger = logging.getLogger()

    msg = '{}: Starting'.format(outvar_name)
    logger.info(msg)

    # check that we have some input files for every variable
    zerofiles = False
    for variable in raw_variables:
        if len(infiles[variable]) == 0:
            msg = '{}: Unable to find input files for {}'.format(
                outvar_name, variable)
            print_message(msg)
            logging.error(msg)
            zerofiles = True
    if zerofiles:
        return None

    # Create the logging directory and setup cmor
    if logdir:
        logpath = logdir
    else:
        outpath, _ = os.path.split(logger.__dict__['handlers'][0].baseFilename)
        logpath = os.path.join(outpath, 'cmor_logs')
    os.makedirs(logpath, exist_ok=True)

    logfile = os.path.join(logpath, outvar_name + '.log')

    cmor.setup(inpath=tables,
               netcdf_file_action=cmor.CMOR_REPLACE,
               logfile=logfile)

    cmor.dataset_json(str(metadata_path))
    cmor.load_table(str(table))

    msg = '{}: CMOR setup complete'.format(outvar_name)
    logging.info(msg)

    data = {}

    # assuming all year ranges are the same for every variable
    num_files_per_variable = len(infiles[raw_variables[0]])

    # sort the input files for each variable
    for var_name in raw_variables:
        infiles[var_name].sort()

    for index in range(num_files_per_variable):

        # reload the dimensions for each time slice
        get_dims = True

        # load data for each variable
        for var_name in raw_variables:

            # extract data from the input file
            msg = '{name}: loading {variable}'.format(name=outvar_name,
                                                      variable=var_name)
            logger.info(msg)

            new_data = get_dimension_data(filename=infiles[var_name][index],
                                          variable=var_name,
                                          levels=levels,
                                          get_dims=get_dims)
            data.update(new_data)
            get_dims = False

        msg = '{name}: loading axes'.format(name=outvar_name)
        logger.info(msg)

        # create the cmor variable and axis
        axis_ids, ips = load_axis(data=data, levels=levels)

        if ips:
            data['ips'] = ips

        if positive:
            varid = cmor.variable(outvar_name,
                                  outvar_units,
                                  axis_ids,
                                  positive=positive)
        else:
            varid = cmor.variable(outvar_name, outvar_units, axis_ids)

        # write out the data
        msg = "{}: time {:1.1f} - {:1.1f}".format(outvar_name,
                                                  data['time_bnds'][0][0],
                                                  data['time_bnds'][-1][-1])
        logger.info(msg)

        if serial:
            myMessage = progressbar.DynamicMessage('running')
            myMessage.__call__ = my_dynamic_message
            widgets = [
                progressbar.DynamicMessage('running'), ' [',
                progressbar.Timer(), '] ',
                progressbar.Bar(), ' (',
                progressbar.ETA(), ') '
            ]
            progressbar.DynamicMessage.__call__ = my_dynamic_message
            pbar = progressbar.ProgressBar(maxval=len(data['time']),
                                           widgets=widgets)
            pbar.start()

        for index, val in enumerate(data['time']):
            if serial:
                pbar.update(index, running=msg)
            write_data(varid=varid,
                       data=data,
                       timeval=val,
                       timebnds=[data['time_bnds'][index, :]],
                       index=index,
                       raw_variables=raw_variables)
        if serial:
            pbar.finish()

    msg = '{}: write complete, closing'.format(outvar_name)
    logger.debug(msg)

    cmor.close()

    msg = '{}: file close complete'.format(outvar_name)
    logger.debug(msg)

    return outvar_name