Esempio n. 1
0
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type,
                esmf_dst_type, genweights, esmf_regrid_method, spatial_subset,
                src_resolution, dst_resolution, buffer_distance, wd, persist,
                eager, ignore_degenerate, data_variables, spatial_subset_path,
                verbose, loglvl, weightfilemode):

    # Used for creating the history string.
    the_locals = locals()

    if verbose:
        ocgis_lh.configure(to_stream=True, level=getattr(logging, loglvl))
    ocgis_lh(msg="Starting Chunked Regrid Weight Generation",
             level=logging.INFO,
             logger=CRWG_LOG)

    if not ocgis.env.USE_NETCDF4_MPI:
        msg = (
            'env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is '
            'netCDF4-python built with parallel support?')
        ocgis_lh(msg, level=logging.WARN, logger=CRWG_LOG, force=True)

    if data_variables is not None:
        data_variables = data_variables.split(',')

    if nchunks_dst is not None:
        # Format the chunking decomposition from its string representation.
        if ',' in nchunks_dst:
            nchunks_dst = nchunks_dst.split(',')
        else:
            nchunks_dst = [nchunks_dst]
        nchunks_dst = tuple([int(ii) for ii in nchunks_dst])
    if merge:
        if not spatial_subset and weight is None:
            raise ValueError('"weight" must be a valid path if --merge')
    if spatial_subset and genweights and weight is None:
        raise ValueError('"weight" must be a valid path if --genweights')

    # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets
    # and it is not a merge only operation.
    should_create_wd = (nchunks_dst is None
                        or not all([ii == 1
                                    for ii in nchunks_dst])) or spatial_subset
    if should_create_wd:
        if wd is None:
            if ocgis.vm.rank == 0:
                wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_')
            wd = ocgis.vm.bcast(wd)
        else:
            exc = None
            if ocgis.vm.rank == 0:
                # The working directory must not exist to proceed.
                if nchunks_dst is not None:
                    if os.path.exists(wd):
                        exc = ValueError(
                            "Working directory {} must not exist.".format(wd))
                    else:
                        # Make the working directory nesting as needed.
                        os.makedirs(wd)
            exc = ocgis.vm.bcast(exc)
            if exc is not None:
                raise exc

        if merge and not spatial_subset or (spatial_subset and genweights):
            if _is_subdir_(wd, weight):
                raise ValueError(
                    'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.'
                )

    # Create the source and destination request datasets.
    rd_src = _create_request_dataset_(source,
                                      esmf_src_type,
                                      data_variables=data_variables)
    rd_dst = _create_request_dataset_(destination, esmf_dst_type)

    # Execute a spatial subset if requested.
    paths = None
    if spatial_subset:
        if spatial_subset_path is None:
            spatial_subset_path = os.path.join(wd, 'spatial_subset.nc')
        msg = "Executing spatial subset. Output path is: {}".format(
            spatial_subset_path)
        ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
        _write_spatial_subset_(rd_src,
                               rd_dst,
                               spatial_subset_path,
                               src_resmax=src_resolution)
    # Only split grids if a spatial subset is not requested.
    else:
        # Update the paths to use for the grid.
        paths = {'wd': wd}

    # Arguments to ESMF regridding.
    esmf_kwargs = {
        'regrid_method': esmf_regrid_method,
        'ignore_degenerate': ignore_degenerate
    }

    # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset.
    gs = GridChunker(rd_src,
                     rd_dst,
                     nchunks_dst=nchunks_dst,
                     src_grid_resolution=src_resolution,
                     paths=paths,
                     dst_grid_resolution=dst_resolution,
                     buffer_value=buffer_distance,
                     redistribute=True,
                     genweights=genweights,
                     esmf_kwargs=esmf_kwargs,
                     use_spatial_decomp='auto',
                     eager=eager)

    # Write subsets and generate weights if requested in the grid splitter.
    # TODO: Need a weight only option. If chunks are written, then weights are written...
    if not spatial_subset and nchunks_dst is not None and not gs.is_one_chunk:
        msg = "Starting main chunking loop..."
        ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
        gs.write_chunks()
    else:
        if spatial_subset:
            source = spatial_subset_path
        if genweights:
            msg = "Writing ESMF weights..."
            ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
            handle_weight_file_check(weight)
            gs.write_esmf_weights(source,
                                  destination,
                                  weight,
                                  filemode=weightfilemode)

    # Create the global weight file. This does not apply to spatial subsets because there will always be one weight
    # file.
    if merge and not spatial_subset and not gs.is_one_chunk:
        # Weight file merge only works in serial.
        exc = None
        with ocgis.vm.scoped('weight file merge', [0]):
            if not ocgis.vm.is_null:
                msg = "Merging chunked weight files to global file. Output global weight file is: {}".format(
                    weight)
                ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
                handle_weight_file_check(weight)
                gs.create_merged_weight_file(weight)
        excs = ocgis.vm.gather(exc)
        excs = ocgis.vm.bcast(excs)
        for exc in excs:
            if exc is not None:
                raise exc

        ocgis.vm.barrier()

    # Append the history string if there is an output weight file.
    if weight and ocgis.vm.rank == 0:
        if os.path.exists(weight):
            # Add some additional stuff for record keeping
            import getpass
            import socket
            import datetime

            with nc.Dataset(weight, 'a') as ds:
                ds.setncattr('created_by_user', getpass.getuser())
                ds.setncattr('created_on_hostname', socket.getfqdn())
                ds.setncattr('history', create_history_string(the_locals))
    ocgis.vm.barrier()

    # Remove the working directory unless the persist flag is provided.
    if not persist:
        if ocgis.vm.rank == 0:
            msg = "Removing working directory since persist is False."
            ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
            shutil.rmtree(wd)
        ocgis.vm.barrier()

    ocgis_lh(msg="Success!", level=logging.INFO, logger=CRWG_LOG)
    return 0
Esempio n. 2
0
File: ocli.py Progetto: huard/ocgis
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type, esmf_dst_type, genweights,
                esmf_regrid_method, spatial_subset, src_resolution, dst_resolution, buffer_distance, wd, persist,
                eager, ignore_degenerate):
    if not ocgis.env.USE_NETCDF4_MPI:
        msg = ('env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is '
               'netCDF4-python built with parallel support?')
        ocgis_lh(msg, level=logging.WARN, logger='ocli.chunked_rwg', force=True)

    if nchunks_dst is not None:
        # Format the chunking decomposition from its string representation.
        if ',' in nchunks_dst:
            nchunks_dst = nchunks_dst.split(',')
        else:
            nchunks_dst = [nchunks_dst]
        nchunks_dst = tuple([int(ii) for ii in nchunks_dst])
    if merge:
        if not spatial_subset and weight is None:
            raise ValueError('"weight" must be a valid path if --merge')
    if spatial_subset and genweights and weight is None:
        raise ValueError('"weight" must be a valid path if --genweights')

    # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets
    # and it is not a merge only operation.
    if wd is None:
        if ocgis.vm.rank == 0:
            wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_')
        wd = ocgis.vm.bcast(wd)
    else:
        if ocgis.vm.rank == 0:
            # The working directory must not exist to proceed.
            if os.path.exists(wd):
                raise ValueError("Working directory 'wd' must not exist.")
            else:
                # Make the working directory nesting as needed.
                os.makedirs(wd)
        ocgis.vm.barrier()

    if merge and not spatial_subset or (spatial_subset and genweights):
        if _is_subdir_(wd, weight):
            raise ValueError(
                'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.')

    # Create the source and destination request datasets.
    rd_src = _create_request_dataset_(source, esmf_src_type)
    rd_dst = _create_request_dataset_(destination, esmf_dst_type)

    # Execute a spatial subset if requested.
    paths = None
    if spatial_subset:
        # TODO: This path should be customizable.
        spatial_subset_path = os.path.join(wd, 'spatial_subset.nc')
        _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path)
    # Only split grids if a spatial subset is not requested.
    else:
        # Update the paths to use for the grid.
        paths = {'wd': wd}

    # Arguments to ESMF regridding.
    esmf_kwargs = {'regrid_method': esmf_regrid_method,
                   'ignore_degenerate': ignore_degenerate}

    # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset.
    gs = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks_dst, src_grid_resolution=src_resolution, paths=paths,
                     dst_grid_resolution=dst_resolution, buffer_value=buffer_distance, redistribute=True,
                     genweights=genweights, esmf_kwargs=esmf_kwargs, use_spatial_decomp='auto', eager=eager)

    # Write subsets and generate weights if requested in the grid splitter.
    # TODO: Need a weight only option. If chunks are written, then weights are written...
    if not spatial_subset and nchunks_dst is not None:
        gs.write_chunks()
    else:
        if spatial_subset:
            source = spatial_subset_path
        if genweights:
            gs.write_esmf_weights(source, destination, weight)

    # Create the global weight file. This does not apply to spatial subsets because there will always be one weight
    # file.
    if merge and not spatial_subset:
        # Weight file merge only works in serial.
        exc = None
        with ocgis.vm.scoped('weight file merge', [0]):
            if not ocgis.vm.is_null:
                gs.create_merged_weight_file(weight)
        excs = ocgis.vm.gather(exc)
        excs = ocgis.vm.bcast(excs)
        for exc in excs:
            if exc is not None:
                raise exc

        ocgis.vm.barrier()

    # Remove the working directory unless the persist flag is provided.
    if not persist:
        if ocgis.vm.rank == 0:
            shutil.rmtree(wd)
        ocgis.vm.barrier()

    return 0
Esempio n. 3
0
File: ocli.py Progetto: NCPP/ocgis
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type, esmf_dst_type, genweights,
                esmf_regrid_method, spatial_subset, src_resolution, dst_resolution, buffer_distance, wd, persist,
                eager, ignore_degenerate, data_variables, spatial_subset_path, verbose, loglvl):
    if verbose:
        ocgis_lh.configure(to_stream=True, level=getattr(logging, loglvl))
    ocgis_lh(msg="Starting Chunked Regrid Weight Generation", level=logging.INFO, logger=CRWG_LOG)

    if not ocgis.env.USE_NETCDF4_MPI:
        msg = ('env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is '
               'netCDF4-python built with parallel support?')
        ocgis_lh(msg, level=logging.WARN, logger=CRWG_LOG, force=True)

    if data_variables is not None:
        data_variables = data_variables.split(',')

    if nchunks_dst is not None:
        # Format the chunking decomposition from its string representation.
        if ',' in nchunks_dst:
            nchunks_dst = nchunks_dst.split(',')
        else:
            nchunks_dst = [nchunks_dst]
        nchunks_dst = tuple([int(ii) for ii in nchunks_dst])
    if merge:
        if not spatial_subset and weight is None:
            raise ValueError('"weight" must be a valid path if --merge')
    if spatial_subset and genweights and weight is None:
        raise ValueError('"weight" must be a valid path if --genweights')

    # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets
    # and it is not a merge only operation.
    if wd is None:
        if ocgis.vm.rank == 0:
            wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_')
        wd = ocgis.vm.bcast(wd)
    else:
        exc = None
        if ocgis.vm.rank == 0:
            # The working directory must not exist to proceed.
            if os.path.exists(wd):
                exc = ValueError("Working directory {} must not exist.".format(wd))
            else:
                # Make the working directory nesting as needed.
                os.makedirs(wd)
        exc = ocgis.vm.bcast(exc)
        if exc is not None:
            raise exc

    if merge and not spatial_subset or (spatial_subset and genweights):
        if _is_subdir_(wd, weight):
            raise ValueError(
                'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.')

    # Create the source and destination request datasets.
    rd_src = _create_request_dataset_(source, esmf_src_type, data_variables=data_variables)
    rd_dst = _create_request_dataset_(destination, esmf_dst_type)

    # Execute a spatial subset if requested.
    paths = None
    if spatial_subset:
        if spatial_subset_path is None:
            spatial_subset_path = os.path.join(wd, 'spatial_subset.nc')
        msg = "Executing spatial subset. Output path is: {}".format(spatial_subset_path)
        ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
        _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path, src_resmax=src_resolution)
    # Only split grids if a spatial subset is not requested.
    else:
        # Update the paths to use for the grid.
        paths = {'wd': wd}

    # Arguments to ESMF regridding.
    esmf_kwargs = {'regrid_method': esmf_regrid_method,
                   'ignore_degenerate': ignore_degenerate}

    # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset.
    gs = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks_dst, src_grid_resolution=src_resolution, paths=paths,
                     dst_grid_resolution=dst_resolution, buffer_value=buffer_distance, redistribute=True,
                     genweights=genweights, esmf_kwargs=esmf_kwargs, use_spatial_decomp='auto', eager=eager)

    # Write subsets and generate weights if requested in the grid splitter.
    # TODO: Need a weight only option. If chunks are written, then weights are written...
    if not spatial_subset and nchunks_dst is not None:
        msg = "Starting main chunking loop..."
        ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
        gs.write_chunks()
    else:
        if spatial_subset:
            source = spatial_subset_path
        if genweights:
            msg = "Writing ESMF weights..."
            ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
            gs.write_esmf_weights(source, destination, weight)

    # Create the global weight file. This does not apply to spatial subsets because there will always be one weight
    # file.
    if merge and not spatial_subset:
        # Weight file merge only works in serial.
        exc = None
        with ocgis.vm.scoped('weight file merge', [0]):
            if not ocgis.vm.is_null:
                msg = "Merging chunked weight files to global file. Output global weight file is: {}".format(weight)
                ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
                gs.create_merged_weight_file(weight)
        excs = ocgis.vm.gather(exc)
        excs = ocgis.vm.bcast(excs)
        for exc in excs:
            if exc is not None:
                raise exc

        ocgis.vm.barrier()

    # Remove the working directory unless the persist flag is provided.
    if not persist:
        if ocgis.vm.rank == 0:
            msg = "Removing working directory since persist is False."
            ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
            shutil.rmtree(wd)
        ocgis.vm.barrier()

    ocgis_lh(msg="Success!", level=logging.INFO, logger=CRWG_LOG)
    return 0
Esempio n. 4
0
    def test_system_negative_values_in_spherical_grid(self):
        original_dir = os.getcwd()
        try:
            xcn = np.arange(-10, 350, step=10, dtype=float)
            xc = np.arange(0, 360, step=10, dtype=float)
            yc = np.arange(-90, 100, step=10, dtype=float)

            xvn = Variable("lon", xcn, dimensions=["lon"])
            xv = Variable("lon", xc, dimensions=["lon"])
            yv = Variable("lat", yc, dimensions=["lat"])

            gridn = Grid(x=xvn.copy(), y=yv.copy(), crs=Spherical())
            gridu = Grid(x=xv.copy(), y=yv.copy(), crs=Spherical())
            gridw = create_gridxy_global(5, with_bounds=False, crs=Spherical())
            grids = [gridn, gridu, gridw]
            for ctr, (src, dst) in enumerate(itertools.product(grids, grids)):
                os.chdir(self.current_dir_output)
                gdirname = "grid-ctr-{}".format(ctr)
                self.dprint(gdirname)
                griddir = os.path.join(self.current_dir_output, gdirname)
                os.mkdir(gdirname)
                os.chdir(gdirname)

                srcgridname = "gridn.nc"
                src.parent.write(srcgridname)
                dstgridname = "grid.nc"
                dst.parent.write(dstgridname)

                nchunks_dst = [(4, 1), (3, 1), (2, 1), (1, 1)]
                for ctr, n in enumerate(nchunks_dst):
                    os.chdir(griddir)
                    dirname = 'ctr-{}'.format(ctr)
                    os.mkdir(dirname)
                    os.chdir(dirname)
                    wd = os.getcwd()
                    self.dprint("current chunks", n)
                    g = GridChunker(src,
                                    dst,
                                    nchunks_dst=n,
                                    genweights=True,
                                    paths={'wd': wd},
                                    esmf_kwargs={'regrid_method': 'BILINEAR'})
                    if not g.is_one_chunk:
                        g.write_chunks()
                        g.create_merged_weight_file(
                            os.path.join(griddir, "ctr-{}".format(ctr),
                                         "merged-weights.nc"))
                    else:
                        g.write_esmf_weights(
                            os.path.join(griddir, srcgridname),
                            os.path.join(griddir, dstgridname),
                            os.path.join(griddir, "global-weights.nc"))

                os.chdir(griddir)
                for ctr in range(0, len(nchunks_dst) - 1):
                    src_filename = os.path.join(griddir, "ctr-{}".format(ctr),
                                                "merged-weights.nc")
                    dst_filename = os.path.join(griddir, "global-weights.nc")
                    self.assertWeightFilesEquivalent(src_filename,
                                                     dst_filename)
        finally:
            os.chdir(original_dir)