Beispiel #1
0
    def test_get_dist_default_distribution(self):
        """Test using default distributions defined by drivers."""

        with vm.scoped('write', [0]):
            if not vm.is_null:
                path = self.get_temporary_file_path('foo.nc')
                varx = Variable('x',
                                np.arange(5),
                                dimensions='five',
                                attrs={'axis': 'X'})
                vary = Variable('y',
                                np.arange(7) + 10,
                                dimensions='seven',
                                attrs={'axis': 'Y'})
                vc = VariableCollection(variables=[varx, vary])
                vc.write(path)
            else:
                path = None
        path = MPI_COMM.bcast(path)

        rd = RequestDataset(path)
        dist = rd.driver.dist

        distributed_dimension = dist.get_dimension('seven')
        self.assertTrue(distributed_dimension.dist)
Beispiel #2
0
    def test_write_variable_collection_object_arrays(self):
        """Test writing variable length arrays in parallel."""

        with vm.scoped('write', [0]):
            if not vm.is_null:
                path_actual = self.get_temporary_file_path('in.nc')
                path_desired = self.get_temporary_file_path('out.nc')

                value = [[1, 3, 5], [7, 9], [11]]
                v = Variable(name='objects',
                             value=value,
                             fill_value=4,
                             dtype=ObjectType(int),
                             dimensions='values')
                v.write(path_desired)
            else:
                v, path_actual, path_desired = [None] * 3
        path_actual = MPI_COMM.bcast(path_actual)
        path_desired = MPI_COMM.bcast(path_desired)

        dest_mpi = OcgDist()
        dest_mpi.create_dimension('values', 3, dist=True)
        dest_mpi.update_dimension_bounds()

        scattered = variable_scatter(v, dest_mpi)
        outvc = VariableCollection(variables=[scattered])

        with vm.scoped_by_emptyable('write', outvc):
            if not vm.is_null:
                outvc.write(path_actual)

        if MPI_RANK == 0:
            self.assertNcEqual(path_actual, path_desired)
Beispiel #3
0
    def test_system_changing_field_name(self):
        path1 = self.get_temporary_file_path('foo1.nc')
        path2 = self.get_temporary_file_path('foo2.nc')

        vc1 = VariableCollection(name='vc1')
        var1 = Variable('var1', value=[1, 2, 3], dimensions='three', parent=vc1)

        vc2 = VariableCollection(name='vc2')
        vc1.add_child(vc2)
        var2 = Variable('var2', value=[4, 5, 6, 7], dimensions='four', parent=vc2)

        vc1.write(path1)

        rd = RequestDataset(path1)
        # rd.inspect()
        nvc = rd.create_raw_field()
        nvc2 = nvc.children['vc2']
        self.assertIsNone(nvc2['var2']._value)
        self.assertEqual(nvc2.name, 'vc2')
        nvc2.set_name('extraordinary')
        self.assertIsNotNone(nvc2['var2'].get_value())
        self.assertEqual(nvc2['var2'].get_value().tolist(), [4, 5, 6, 7])

        nvc.write(path2)
        rd2 = RequestDataset(path2)
        # rd2.inspect()
        n2vc = rd2.create_raw_field()
        self.assertEqual(n2vc.children[nvc2.name].name, nvc2.name)
Beispiel #4
0
    def test_write_variable_collection_object_arrays(self):
        """Test writing variable length arrays in parallel."""

        with vm.scoped('write', [0]):
            if not vm.is_null:
                path_actual = self.get_temporary_file_path('in.nc')
                path_desired = self.get_temporary_file_path('out.nc')

                value = [[1, 3, 5],
                         [7, 9],
                         [11]]
                v = Variable(name='objects', value=value, fill_value=4, dtype=ObjectType(int), dimensions='values')
                v.write(path_desired)
            else:
                v, path_actual, path_desired = [None] * 3
        path_actual = MPI_COMM.bcast(path_actual)
        path_desired = MPI_COMM.bcast(path_desired)

        dest_mpi = OcgDist()
        dest_mpi.create_dimension('values', 3, dist=True)
        dest_mpi.update_dimension_bounds()

        scattered = variable_scatter(v, dest_mpi)
        outvc = VariableCollection(variables=[scattered])

        with vm.scoped_by_emptyable('write', outvc):
            if not vm.is_null:
                outvc.write(path_actual)

        if MPI_RANK == 0:
            self.assertNcEqual(path_actual, path_desired)
Beispiel #5
0
    def test_system_parallel_write_ndvariable(self):
        """Test a parallel CSV write with a n-dimensional variable."""

        ompi = OcgDist()
        ompi.create_dimension('time', 3)
        ompi.create_dimension('extra', 2)
        ompi.create_dimension('x', 4)
        ompi.create_dimension('y', 7, dist=True)
        ompi.update_dimension_bounds()

        if MPI_RANK == 0:
            path = self.get_temporary_file_path('foo.csv')

            t = TemporalVariable(name='time',
                                 value=[1, 2, 3],
                                 dtype=float,
                                 dimensions='time')
            t.set_extrapolated_bounds('the_time_bounds', 'bounds')

            extra = Variable(name='extra', value=[7, 8], dimensions='extra')

            x = Variable(name='x',
                         value=[9, 10, 11, 12],
                         dimensions='x',
                         dtype=float)
            x.set_extrapolated_bounds('x_bounds', 'bounds')

            # This will have the distributed dimension.
            y = Variable(name='y',
                         value=[13, 14, 15, 16, 17, 18, 19],
                         dimensions='y',
                         dtype=float)
            y.set_extrapolated_bounds('y_bounds', 'bounds')

            data = Variable(name='data',
                            value=np.random.rand(3, 2, 7, 4),
                            dimensions=['time', 'extra', 'y', 'x'])

            vc = VariableCollection(variables=[t, extra, x, y, data])
        else:
            path, vc = [None] * 2

        path = MPI_COMM.bcast(path)
        vc = variable_collection_scatter(vc, ompi)

        with vm.scoped_by_emptyable('write', vc):
            if not vm.is_null:
                vc.write(path,
                         iter_kwargs={
                             'variable': 'data',
                             'followers': ['time', 'extra', 'y', 'x']
                         },
                         driver=DriverCSV)

        if MPI_RANK == 0:
            desired = 169
            with open(path, 'r') as f:
                lines = f.readlines()
            self.assertEqual(len(lines), desired)
Beispiel #6
0
 def get_variable_collection(self, **kwargs):
     parent = VariableCollection(**kwargs)
     for n, v in list(self.metadata_source['variables'].items()):
         SourcedVariable(name=n, request_dataset=self.rd, parent=parent)
     GeometryVariable(name=DimensionName.GEOMETRY_DIMENSION,
                      request_dataset=self.rd,
                      parent=parent)
     crs = self.get_crs(self.metadata_source)
     if crs is not None:
         parent.add_variable(crs)
     return parent
Beispiel #7
0
 def test_renamed_dimensions_on_variables(self):
     vc = VariableCollection()
     var1 = Variable(name='ugid', value=[1, 2, 3], dimensions='ocgis_geom')
     var2 = Variable(name='state', value=[20, 30, 40], dimensions='ocgis_geom')
     vc.add_variable(var1)
     vc.add_variable(var2)
     with renamed_dimensions_on_variables(vc, {'geom': ['ocgis_geom']}):
         for var in list(vc.values()):
             self.assertEqual(var.dimensions[0].name, 'geom')
     for var in list(vc.values()):
         self.assertEqual(var.dimensions[0].name, 'ocgis_geom')
Beispiel #8
0
    def get_variable_collection(self, **kwargs):
        """
        :rtype: :class:`ocgis.new_interface.variable.VariableCollection`
        """

        if KeywordArgument.DRIVER not in kwargs:
            kwargs[KeywordArgument.DRIVER] = self
        dimension = list(self.dist.get_group(rank=vm.rank)['dimensions'].values())[0]
        ret = VariableCollection(**kwargs)
        for v in list(self.metadata_source['variables'].values()):
            nvar = SourcedVariable(name=v['name'], dimensions=dimension, dtype=v['dtype'], request_dataset=self.rd)
            ret.add_variable(nvar)
        return ret
Beispiel #9
0
    def __init__(self,
                 alias=None,
                 dtype=None,
                 field=None,
                 file_only=False,
                 vc=None,
                 parms=None,
                 tgd=None,
                 calc_sample_size=False,
                 fill_value=None,
                 meta_attrs=None,
                 tag=TagName.DATA_VARIABLES,
                 spatial_aggregation=False):

        self._curr_variable = None
        self._current_conformed_array = None
        self._dtype = dtype

        self.alias = alias or self.key
        self.fill_value = fill_value
        self.vc = vc or VariableCollection()
        self.field = field
        self.file_only = file_only
        self.parms = get_default_or_apply(parms,
                                          self._format_parms_,
                                          default={})
        self.tgd = tgd
        self.calc_sample_size = calc_sample_size
        self.meta_attrs = deepcopy(meta_attrs)
        self.tag = tag
        self.spatial_aggregation = spatial_aggregation
Beispiel #10
0
    def test_system_changing_field_name(self):
        path1 = self.get_temporary_file_path('foo1.nc')
        path2 = self.get_temporary_file_path('foo2.nc')

        vc1 = VariableCollection(name='vc1')
        var1 = Variable('var1',
                        value=[1, 2, 3],
                        dimensions='three',
                        parent=vc1)

        vc2 = VariableCollection(name='vc2')
        vc1.add_child(vc2)
        var2 = Variable('var2',
                        value=[4, 5, 6, 7],
                        dimensions='four',
                        parent=vc2)

        vc1.write(path1)

        rd = RequestDataset(path1)
        # rd.inspect()
        nvc = rd.get_variable_collection()
        nvc2 = nvc.children['vc2']
        self.assertIsNone(nvc2['var2']._value)
        self.assertEqual(nvc2.name, 'vc2')
        nvc2.set_name('extraordinary')
        self.assertIsNotNone(nvc2['var2'].get_value())
        self.assertEqual(nvc2['var2'].get_value().tolist(), [4, 5, 6, 7])

        nvc.write(path2)
        rd2 = RequestDataset(path2)
        # rd2.inspect()
        n2vc = rd2.get_variable_collection()
        self.assertEqual(n2vc.children[nvc2.name].name, nvc2.name)
Beispiel #11
0
    def test_system_parallel_write_ndvariable(self):
        """Test a parallel CSV write with a n-dimensional variable."""

        ompi = OcgDist()
        ompi.create_dimension('time', 3)
        ompi.create_dimension('extra', 2)
        ompi.create_dimension('x', 4)
        ompi.create_dimension('y', 7, dist=True)
        ompi.update_dimension_bounds()

        if MPI_RANK == 0:
            path = self.get_temporary_file_path('foo.csv')

            t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time')
            t.set_extrapolated_bounds('the_time_bounds', 'bounds')

            extra = Variable(name='extra', value=[7, 8], dimensions='extra')

            x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float)
            x.set_extrapolated_bounds('x_bounds', 'bounds')

            # This will have the distributed dimension.
            y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float)
            y.set_extrapolated_bounds('y_bounds', 'bounds')

            data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x'])

            vc = VariableCollection(variables=[t, extra, x, y, data])
        else:
            path, vc = [None] * 2

        path = MPI_COMM.bcast(path)
        vc = variable_collection_scatter(vc, ompi)

        with vm.scoped_by_emptyable('write', vc):
            if not vm.is_null:
                vc.write(path, iter_kwargs={'variable': 'data', 'followers': ['time', 'extra', 'y', 'x']},
                         driver=DriverCSV)

        if MPI_RANK == 0:
            desired = 169
            with open(path, 'r') as f:
                lines = f.readlines()
            self.assertEqual(len(lines), desired)
Beispiel #12
0
    def test_get_dist_default_distribution(self):
        """Test using default distributions defined by drivers."""

        with vm.scoped('write', [0]):
            if not vm.is_null:
                path = self.get_temporary_file_path('foo.nc')
                varx = Variable('x', np.arange(5), dimensions='five', attrs={'axis': 'X'})
                vary = Variable('y', np.arange(7) + 10, dimensions='seven', attrs={'axis': 'Y'})
                vc = VariableCollection(variables=[varx, vary])
                vc.write(path)
            else:
                path = None
        path = MPI_COMM.bcast(path)

        rd = RequestDataset(path)
        dist = rd.driver.dist

        distributed_dimension = dist.get_dimension('seven')
        self.assertTrue(distributed_dimension.dist)
Beispiel #13
0
 def get_geometryvariable_with_parent():
     vpa = np.array([None, None, None])
     vpa[:] = [Point(1, 2), Point(3, 4), Point(5, 6)]
     value = np.arange(0, 30).reshape(10, 3)
     tas = Variable(name='tas', value=value, dimensions=['time', 'ngeom'])
     backref = VariableCollection(variables=[tas])
     pa = GeometryVariable(value=vpa,
                           parent=backref,
                           name='point',
                           dimensions='ngeom')
     backref[pa.name] = pa
     return pa
Beispiel #14
0
def read_from_collection(target,
                         request_dataset,
                         parent=None,
                         name=None,
                         source_name=constants.UNINITIALIZED,
                         uid=None):
    # Allow an empty variable renaming map. This occurs when there are no visible data variables to the metadata
    # parser.
    try:
        rename_variable_map = request_dataset.rename_variable_map
    except NoDataVariablesFound:
        rename_variable_map = {}

    ret = VariableCollection(attrs=get_netcdf_attributes(target),
                             parent=parent,
                             name=name,
                             source_name=source_name,
                             uid=uid)
    pred = request_dataset.predicate
    for varname, ncvar in target.variables.items():
        if pred is not None and not pred(varname):
            continue
        source_name = varname
        name = rename_variable_map.get(varname, varname)
        sv = SourcedVariable(name=name,
                             request_dataset=request_dataset,
                             parent=ret,
                             source_name=source_name)
        ret[name] = sv

    for group_name, ncgroup in list(target.groups.items()):
        child = read_from_collection(ncgroup,
                                     request_dataset,
                                     parent=ret,
                                     name=group_name,
                                     uid=uid)
        ret.add_child(child)
    return ret
Beispiel #15
0
    def test_system_renaming_dimensions_on_variables(self):
        var1 = Variable(name='var1', value=[1, 2], dimensions='dim')
        var2 = Variable(name='var2', value=[3, 4], dimensions='dim')
        vc = VariableCollection(variables=[var1, var2])
        path = self.get_temporary_file_path('out.nc')
        vc.write(path)

        rd = RequestDataset(path)
        meta = rd.metadata
        meta['dimensions']['new_dim'] = {'size': 2}
        meta['variables']['var2']['dimensions'] = ('new_dim',)

        field = rd.get()
        self.assertEqual(field['var2'].dimensions[0].name, 'new_dim')
        self.assertEqual(field['var2'].get_value().tolist(), [3, 4])

        path2 = self.get_temporary_file_path('out2.nc')
        field.write(path2)

        rd2 = RequestDataset(path2)
        field2 = rd2.get()
        self.assertEqual(field2['var2'].dimensions[0].name, 'new_dim')
        self.assertEqual(field2['var2'].get_value().tolist(), [3, 4])
Beispiel #16
0
    def test_crs(self):
        crs = WGS84()
        gvar = GeometryVariable(crs=crs, name='var')
        self.assertEqual(gvar.crs, crs)
        self.assertIn(crs.name, gvar.parent)
        gvar.crs = None
        self.assertIsNone(gvar.crs)
        self.assertEqual(len(gvar.parent), 1)
        self.assertNotIn(crs.name, gvar.parent)

        # Test coordinate system is maintained.
        gvar = GeometryVariable(crs=crs, name='var')
        vc = VariableCollection(variables=gvar)
        self.assertIn(crs.name, vc)
Beispiel #17
0
    def test_variable_collection_scatter(self):
        dest_mpi = OcgDist()
        five = dest_mpi.create_dimension('five', 5, dist=True)
        ten = dest_mpi.create_dimension('ten', 10)
        dest_mpi.create_variable(name='five', dimensions=five)
        dest_mpi.create_variable(name='all_in', dimensions=ten)
        dest_mpi.create_variable(name='i_could_be_a_coordinate_system')
        dest_mpi.update_dimension_bounds()

        if MPI_RANK == 0:
            var = Variable('holds_five', np.arange(5), dimensions='five')
            var_empty = Variable('i_could_be_a_coordinate_system',
                                 attrs={'reality': 'im_not'})
            var_not_dist = Variable('all_in',
                                    value=np.arange(10) + 10,
                                    dimensions='ten')
            vc = VariableCollection(variables=[var, var_empty, var_not_dist])
        else:
            vc = None

        svc = variable_collection_scatter(vc, dest_mpi)

        self.assertEqual(
            svc['i_could_be_a_coordinate_system'].attrs['reality'], 'im_not')

        if MPI_RANK < 2:
            self.assertFalse(svc['all_in'].is_empty)
            self.assertNumpyAll(svc['all_in'].get_value(), np.arange(10) + 10)
            self.assertFalse(svc.is_empty)
            self.assertFalse(svc['i_could_be_a_coordinate_system'].is_empty)
        else:
            self.assertTrue(svc['all_in'].is_empty)
            self.assertTrue(svc.is_empty)
            self.assertTrue(svc['i_could_be_a_coordinate_system'].is_empty)

        if MPI_RANK == 0:
            self.assertNumpyAll(var.get_value(), vc[var.name].get_value())

        actual = svc['holds_five'].get_value()
        if MPI_SIZE == 2:
            desired = {0: np.arange(3), 1: np.arange(3, 5)}
            self.assertNumpyAll(actual, desired[MPI_RANK])

        actual = svc['holds_five'].is_empty
        if MPI_RANK > 1:
            self.assertTrue(actual)
        else:
            self.assertFalse(actual)
Beispiel #18
0
 def test_renamed_dimensions_on_variables(self):
     vc = VariableCollection()
     var1 = Variable(name='ugid', value=[1, 2, 3], dimensions='ocgis_geom')
     var2 = Variable(name='state',
                     value=[20, 30, 40],
                     dimensions='ocgis_geom')
     vc.add_variable(var1)
     vc.add_variable(var2)
     with renamed_dimensions_on_variables(vc, {'geom': ['ocgis_geom']}):
         for var in list(vc.values()):
             self.assertEqual(var.dimensions[0].name, 'geom')
     for var in list(vc.values()):
         self.assertEqual(var.dimensions[0].name, 'ocgis_geom')
Beispiel #19
0
    def create_merged_weight_file(self, merged_weight_filename, strict=False):
        """
        Merge weight file chunks to a single, global weight file.

        :param str merged_weight_filename: Path to the merged weight file.
        :param bool strict: If ``False``, allow "missing" files where the iterator index cannot create a found file.
         It is best to leave these ``False`` as not all source and destinations are mapped. If ``True``, raise an
        """

        if vm.size > 1:
            raise ValueError("'create_merged_weight_file' does not work in parallel")

        index_filename = self.create_full_path_from_template('index_file')
        ifile = RequestDataset(uri=index_filename).get()
        ifile.load()
        ifc = GridChunkerConstants.IndexFile
        gidx = ifile[ifc.NAME_INDEX_VARIABLE].attrs

        src_global_shape = gidx[ifc.NAME_SRC_GRID_SHAPE]
        dst_global_shape = gidx[ifc.NAME_DST_GRID_SHAPE]

        # Get the global weight dimension size.
        n_s_size = 0
        weight_filename = ifile[gidx[ifc.NAME_WEIGHTS_VARIABLE]]
        wv = weight_filename.join_string_value()
        split_weight_file_directory = self.paths['wd']
        for wfn in map(lambda x: os.path.join(split_weight_file_directory, os.path.split(x)[1]), wv):
            if not os.path.exists(wfn):
                if strict:
                    raise IOError(wfn)
                else:
                    continue
            n_s_size += RequestDataset(wfn).get().dimensions['n_s'].size

        # Create output weight file.
        wf_varnames = ['row', 'col', 'S']
        wf_dtypes = [np.int32, np.int32, np.float64]
        vc = VariableCollection()
        dim = Dimension('n_s', n_s_size)
        for w, wd in zip(wf_varnames, wf_dtypes):
            var = Variable(name=w, dimensions=dim, dtype=wd)
            vc.add_variable(var)
        vc.write(merged_weight_filename)

        # Transfer weights to the merged file.
        sidx = 0
        src_indices = self.src_grid._gc_create_global_indices_(src_global_shape)
        dst_indices = self.dst_grid._gc_create_global_indices_(dst_global_shape)

        out_wds = nc.Dataset(merged_weight_filename, 'a')
        for ii, wfn in enumerate(map(lambda x: os.path.join(split_weight_file_directory, x), wv)):
            if not os.path.exists(wfn):
                if strict:
                    raise IOError(wfn)
                else:
                    continue
            wdata = RequestDataset(wfn).get()
            for wvn in wf_varnames:
                odata = wdata[wvn].get_value()
                try:
                    split_grids_directory = self.paths['wd']
                    odata = self._gc_remap_weight_variable_(ii, wvn, odata, src_indices, dst_indices, ifile, gidx,
                                                            split_grids_directory=split_grids_directory)
                except IndexError as e:
                    msg = "Weight filename: '{}'; Weight Variable Name: '{}'. {}".format(wfn, wvn, str(e))
                    raise IndexError(msg)
                out_wds[wvn][sidx:sidx + odata.size] = odata
                out_wds.sync()
            sidx += odata.size
        out_wds.close()
Beispiel #20
0
    def write_chunks(self):
        """
        Write grid subsets to netCDF files using the provided filename templates. This will also generate ESMF
        regridding weights for each subset if requested.
        """
        src_filenames = []
        dst_filenames = []
        wgt_filenames = []
        dst_slices = []
        src_slices = []
        index_path = self.create_full_path_from_template('index_file')

        # nzeros = len(str(reduce(lambda x, y: x * y, self.nchunks_dst)))

        ctr = 1
        ocgis_lh(logger='grid_chunker', msg='starting self.iter_src_grid_subsets', level=logging.DEBUG)
        for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets(yield_dst=True):
            ocgis_lh(logger='grid_chunker', msg='finished iteration {} for self.iter_src_grid_subsets'.format(ctr),
                     level=logging.DEBUG)

            src_path = self.create_full_path_from_template('src_template', index=ctr)
            dst_path = self.create_full_path_from_template('dst_template', index=ctr)
            wgt_path = self.create_full_path_from_template('wgt_template', index=ctr)

            src_filenames.append(os.path.split(src_path)[1])
            dst_filenames.append(os.path.split(dst_path)[1])
            wgt_filenames.append(wgt_path)
            dst_slices.append(dst_slc)
            src_slices.append(src_slc)

            # Only write destinations if an iterator is not provided.
            if self.iter_dst is None:
                zip_args = [[sub_src, sub_dst], [src_path, dst_path]]
            else:
                zip_args = [[sub_src], [src_path]]

            cc = 1
            for target, path in zip(*zip_args):
                with vm.scoped_by_emptyable('field.write' + str(cc), target):
                    if not vm.is_null:
                        ocgis_lh(logger='grid_chunker', msg='write_chunks:writing: {}'.format(path),
                                 level=logging.DEBUG)
                        field = Field(grid=target)
                        field.write(path)
                        ocgis_lh(logger='grid_chunker', msg='write_chunks:finished writing: {}'.format(path),
                                 level=logging.DEBUG)
                cc += 1

            # Increment the counter outside of the loop to avoid counting empty subsets.
            ctr += 1

            # Generate an ESMF weights file if requested and at least one rank has data on it.
            if self.genweights and len(vm.get_live_ranks_from_object(sub_src)) > 0:
                vm.barrier()
                self.write_esmf_weights(src_path, dst_path, wgt_path, src_grid=sub_src, dst_grid=sub_dst)
                vm.barrier()

        # Global shapes require a VM global scope to collect.
        src_global_shape = global_grid_shape(self.src_grid)
        dst_global_shape = global_grid_shape(self.dst_grid)

        # Gather and collapse source slices as some may be empty and we write on rank 0.
        gathered_src_grid_slice = vm.gather(src_slices)
        if vm.rank == 0:
            len_src_slices = len(src_slices)
            new_src_grid_slice = [None] * len_src_slices
            for idx in range(len_src_slices):
                for rank_src_grid_slice in gathered_src_grid_slice:
                    if rank_src_grid_slice[idx] is not None:
                        new_src_grid_slice[idx] = rank_src_grid_slice[idx]
                        break
            src_slices = new_src_grid_slice

        with vm.scoped('index write', [0]):
            if not vm.is_null:
                dim = Dimension('nfiles', len(src_filenames))
                vname = ['source_filename', 'destination_filename', 'weights_filename']
                values = [src_filenames, dst_filenames, wgt_filenames]
                grid_chunker_destination = GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE
                attrs = [{'esmf_role': 'grid_chunker_source'},
                         {'esmf_role': grid_chunker_destination},
                         {'esmf_role': 'grid_chunker_weights'}]

                vc = VariableCollection()

                grid_chunker_index = GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE
                vidx = Variable(name=grid_chunker_index)
                vidx.attrs['esmf_role'] = grid_chunker_index
                vidx.attrs['grid_chunker_source'] = 'source_filename'
                vidx.attrs[GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename'
                vidx.attrs['grid_chunker_weights'] = 'weights_filename'
                vidx.attrs[GridChunkerConstants.IndexFile.NAME_SRC_GRID_SHAPE] = src_global_shape
                vidx.attrs[GridChunkerConstants.IndexFile.NAME_DST_GRID_SHAPE] = dst_global_shape

                vc.add_variable(vidx)

                for idx in range(len(vname)):
                    v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx])
                    vc.add_variable(v)

                bounds_dimension = Dimension(name='bounds', size=2)
                # TODO: This needs to work with four dimensions.
                # Source -----------------------------------------------------------------------------------------------
                self.src_grid._gc_create_index_bounds_(RegriddingRole.SOURCE, vidx, vc, src_slices, dim,
                                                       bounds_dimension)

                # Destination ------------------------------------------------------------------------------------------
                self.dst_grid._gc_create_index_bounds_(RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim,
                                                       bounds_dimension)

                vc.write(index_path)

        vm.barrier()
Beispiel #21
0
    def _convert_to_ugrid_(field):
        """
        Takes field data out of the OCGIS unstructured format (similar to UGRID) converting to the format expected
        by ESMF Unstructured metadata.
        """

        # The driver for the current field must be NetCDF UGRID to ensure interpretability.
        assert field.dimension_map.get_driver() == DriverKey.NETCDF_UGRID
        grid = field.grid
        # Three-dimensional data is not supported.
        assert not grid.has_z
        # Number of coordinate dimension. This will be 3 for three-dimensional data.
        coord_dim = Dimension('coordDim', 2)

        # Transform ragged array to one-dimensional array. #############################################################

        cindex = grid.cindex
        elements = cindex.get_value()
        num_element_conn_data = [e.shape[0] for e in elements.flat]
        length_connection_count = sum(num_element_conn_data)
        esmf_element_conn = np.zeros(length_connection_count, dtype=elements[0].dtype)
        start = 0

        tag_start_index = MPITag.START_INDEX

        # Collapse the ragged element index array into a single dimensioned vector. This communication block finds the
        # size for the new array. ######################################################################################

        if vm.size > 1:
            max_index = max([ii.max() for ii in elements.flat])
            if vm.rank == 0:
                vm.comm.isend(max_index + 1, dest=1, tag=tag_start_index)
                adjust = 0
            else:
                adjust = vm.comm.irecv(source=vm.rank - 1, tag=tag_start_index)
                adjust = adjust.wait()
                if vm.rank != vm.size - 1:
                    vm.comm.isend(max_index + 1 + adjust, dest=vm.rank + 1, tag=tag_start_index)

        # Fill the new vector for the element connectivity. ############################################################

        for ii in elements.flat:
            if vm.size > 1:
                if grid.archetype.has_multi:
                    mbv = cindex.attrs[OcgisConvention.Name.MULTI_BREAK_VALUE]
                    replace_breaks = np.where(ii == mbv)[0]
                else:
                    replace_breaks = []
                ii = ii + adjust
                if len(replace_breaks) > 0:
                    ii[replace_breaks] = mbv

            esmf_element_conn[start: start + ii.shape[0]] = ii
            start += ii.shape[0]

        # Create the new data representation. ##########################################################################

        connection_count = create_distributed_dimension(esmf_element_conn.size, name='connectionCount')
        esmf_element_conn_var = Variable(name='elementConn', value=esmf_element_conn, dimensions=connection_count,
                                         dtype=np.int32)
        esmf_element_conn_var.attrs[CFName.LONG_NAME] = 'Node indices that define the element connectivity.'
        mbv = cindex.attrs.get(OcgisConvention.Name.MULTI_BREAK_VALUE)
        if mbv is not None:
            esmf_element_conn_var.attrs['polygon_break_value'] = mbv
        esmf_element_conn_var.attrs['start_index'] = grid.start_index
        ret = VariableCollection(variables=field.copy().values(), force=True)

        # Rename the element count dimension.
        original_name = ret[cindex.name].dimensions[0].name
        ret.rename_dimension(original_name, 'elementCount')

        # Add the element-node connectivity variable to the collection.
        ret.add_variable(esmf_element_conn_var)

        num_element_conn = Variable(name='numElementConn',
                                    value=num_element_conn_data,
                                    dimensions=cindex.dimensions[0],
                                    attrs={CFName.LONG_NAME: 'Number of nodes per element.'},
                                    dtype=np.int32)
        ret.add_variable(num_element_conn)

        # Check that the node count dimension is appropriately named.
        gn_name = grid.node_dim.name
        if gn_name != 'nodeCount':
            ret.dimensions[gn_name] = ret.dimensions[gn_name].copy()
            ret.rename_dimension(gn_name, 'nodeCount')

        node_coords = Variable(name='nodeCoords', dimensions=(ret.dimensions['nodeCount'], coord_dim))
        node_coords.units = 'degrees'
        node_coords.attrs[CFName.LONG_NAME] = 'Node coordinate values indexed by element connectivity.'
        node_coords.attrs['coordinates'] = 'x y'
        fill = node_coords.get_value()
        fill[:, 0] = grid.x.get_value()
        fill[:, 1] = grid.y.get_value()
        ret.pop(grid.x.name)
        ret.pop(grid.y.name)
        ret.add_variable(node_coords)

        ret.attrs['gridType'] = 'unstructured'
        ret.attrs['version'] = '0.9'

        # Remove the coordinate index, this does not matter.
        if field.grid.cindex is not None:
            ret.remove_variable(field.grid.cindex.name)

        return ret
Beispiel #22
0
def get_ugrid_data_structure():
    x = Variable(name='node_x', value=[10, 20, 30], dtype=float, dimensions='x')
    y = Variable(name='node_y', value=[-60, -55, -50, -45, -40], dimensions='y')
    grid = Grid(x, y)
    grid.set_extrapolated_bounds('x_bounds', 'y_bounds', 'bounds')
    grid.expand()

    cindex = np.zeros((grid.archetype.size, 4), dtype=int)
    xc = grid.x.bounds.get_value().flatten()
    yc = grid.y.bounds.get_value().flatten()

    for eidx, (ridx, cidx) in enumerate(itertools.product(*[range(ii) for ii in grid.shape])):
        curr_element = grid[ridx, cidx]
        curr_xc = curr_element.x.bounds.get_value().flatten()
        curr_yc = curr_element.y.bounds.get_value().flatten()
        for element_node_idx in range(curr_xc.shape[0]):
            found_idx = find_index([xc, yc], [curr_xc[element_node_idx], curr_yc[element_node_idx]])
            cindex[eidx, element_node_idx] = found_idx

    new_cindex, uindices = reduce_reindex_coordinate_index(cindex.flatten(), start_index=0)
    new_cindex = new_cindex.reshape(*cindex.shape)
    xc = xc[uindices]
    yc = yc[uindices]

    centers = grid.get_value_stacked()
    center_xc = centers[1].flatten()
    center_yc = centers[0].flatten()

    longitude_attrs = {'standard_name': 'longitude', 'units': 'degrees_east'}
    latitude_attrs = {'standard_name': 'latitude', 'units': 'degrees_north'}

    vc = VariableCollection(attrs={'conventions': 'CF-1.6, UGRID-1.0'})
    face_center_x = Variable(name='face_center_x', value=center_xc, dimensions='n_face', parent=vc,
                             attrs=longitude_attrs, dtype=float)
    face_center_y = Variable(name='face_center_y', value=center_yc, dimensions='n_face', parent=vc,
                             attrs=latitude_attrs, dtype=float)

    face_node_index = Variable(name='face_node_index', value=new_cindex, dimensions=['n_face', 'max_nodes'],
                               parent=vc,
                               attrs={'standard_name': 'face_node_connectivity', 'order': 'counterclockwise'})

    face_node_x = Variable(name='face_node_x', value=xc, dimensions='n_node', parent=vc, attrs=longitude_attrs,
                           dtype=float)
    face_node_y = Variable(name='face_node_y', value=yc, dimensions='n_node', parent=vc, attrs=latitude_attrs,
                           dtype=float)

    mesh = Variable(name='mesh',
                    attrs={'standard_name': 'mesh_topology', 'cf_role': 'mesh_topology', 'dimension': 2,
                           'locations': 'face node', 'node_coordinates': 'face_node_x face_node_y',
                           'face_coordinates': 'face_center_x face_center_y',
                           'face_node_connectivity': 'face_node_index'},
                    parent=vc)

    # path = self.get_temporary_file_path('foo.nc')
    # vc.write(path)
    # self.ncdump(path)
    #

    # ==============================================================================================================
    # import matplotlib.pyplot as plt
    # from descartes import PolygonPatch
    # from shapely.geometry import Polygon, MultiPolygon
    #
    # BLUE = '#6699cc'
    # GRAY = '#999999'
    #
    # fig = plt.figure(num=1)
    # ax = fig.add_subplot(111)
    #
    # polys = []
    #
    # for face_idx in range(face_node_index.shape[0]):
    #     sub = face_node_index[face_idx, :].parent
    #     curr_cindex = sub[face_node_index.name].get_value().flatten()
    #     fcx = sub[face_node_x.name].get_value()[curr_cindex]
    #     fcy = sub[face_node_y.name].get_value()[curr_cindex]
    #
    #     coords = np.zeros((4, 2))
    #     coords[:, 0] = fcx
    #     coords[:, 1] = fcy
    #
    #     poly = Polygon(coords)
    #     polys.append(poly)
    #     patch = PolygonPatch(poly, fc=BLUE, ec=GRAY, alpha=0.5, zorder=2)
    #     ax.add_patch(patch)
    #
    # minx, miny, maxx, maxy = MultiPolygon(polys).bounds
    # w, h = maxx - minx, maxy - miny
    # ax.set_xlim(minx - 0.2 * w, maxx + 0.2 * w)
    # ax.set_ylim(miny - 0.2 * h, maxy + 0.2 * h)
    # ax.set_aspect(1)
    #
    # plt.scatter(center_xc, center_yc, zorder=1)
    #
    # plt.show()
    # ===============================================================================================================

    return vc
Beispiel #23
0
    def write_subsets(self, src_template, dst_template, wgt_template, index_path):
        """
        Write grid subsets to netCDF files using the provided filename templates. The template must contain the full
        file path with a single curly-bracer pair to insert the combination counter. ``wgt_template`` should not be a
        full path. This name is used when generating weight files.

        >>> template_example = '/path/to/data_{}.nc'

        :param str src_template: The template for the source subset file.
        :param str dst_template: The template for the destination subset file.
        :param str wgt_template: The template for the weight filename.

        >>> wgt_template = 'esmf_weights_{}.nc'

        :param index_path: Path to the output indexing netCDF.
        """

        src_filenames = []
        dst_filenames = []
        wgt_filenames = []
        dst_slices = []

        # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst)))

        for ctr, (sub_src, sub_dst, dst_slc) in enumerate(self.iter_src_grid_subsets(yield_dst=True), start=1):
            # padded = create_zero_padded_integer(ctr, nzeros)

            src_path = src_template.format(ctr)
            dst_path = dst_template.format(ctr)
            wgt_filename = wgt_template.format(ctr)

            src_filenames.append(os.path.split(src_path)[1])
            dst_filenames.append(os.path.split(dst_path)[1])
            wgt_filenames.append(wgt_filename)
            dst_slices.append(dst_slc)

            for target, path in zip([sub_src, sub_dst], [src_path, dst_path]):
                if target.is_empty:
                    is_empty = True
                    target = None
                else:
                    is_empty = False
                field = Field(grid=target, is_empty=is_empty)
                ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG)
                with vm.scoped_by_emptyable('field.write', field):
                    if not vm.is_null:
                        field.write(path)
                ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG)

        with vm.scoped('index write', [0]):
            if not vm.is_null:
                dim = Dimension('nfiles', len(src_filenames))
                vname = ['source_filename', 'destination_filename', 'weights_filename']
                values = [src_filenames, dst_filenames, wgt_filenames]
                grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE
                attrs = [{'esmf_role': 'grid_splitter_source'},
                         {'esmf_role': grid_splitter_destination},
                         {'esmf_role': 'grid_splitter_weights'}]

                vc = VariableCollection()

                grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE
                vidx = Variable(name=grid_splitter_index)
                vidx.attrs['esmf_role'] = grid_splitter_index
                vidx.attrs['grid_splitter_source'] = 'source_filename'
                vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename'
                vidx.attrs['grid_splitter_weights'] = 'weights_filename'
                x_bounds = GridSplitterConstants.IndexFile.NAME_X_BOUNDS_VARIABLE
                vidx.attrs[x_bounds] = x_bounds
                y_bounds = GridSplitterConstants.IndexFile.NAME_Y_BOUNDS_VARIABLE
                vidx.attrs[y_bounds] = y_bounds
                vc.add_variable(vidx)

                for idx in range(len(vname)):
                    v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx])
                    vc.add_variable(v)

                bounds_dimension = Dimension(name='bounds', size=2)
                xb = Variable(name=x_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'x_split_bounds'},
                              dtype=int)
                yb = Variable(name=y_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'y_split_bounds'},
                              dtype=int)

                x_name = self.dst_grid.x.dimensions[0].name
                y_name = self.dst_grid.y.dimensions[0].name
                for idx, slc in enumerate(dst_slices):
                    xb.get_value()[idx, :] = slc[x_name].start, slc[x_name].stop
                    yb.get_value()[idx, :] = slc[y_name].start, slc[y_name].stop
                vc.add_variable(xb)
                vc.add_variable(yb)

                vc.write(index_path)

        vm.barrier()
Beispiel #24
0
    def create_merged_weight_file(self, merged_weight_filename, strict=False):
        """
        Merge weight file chunks to a single, global weight file.

        :param str merged_weight_filename: Path to the merged weight file.
        :param bool strict: If ``False``, allow "missing" files where the iterator index cannot create a found file.
         It is best to leave these ``False`` as not all source and destinations are mapped. If ``True``, raise an
        """

        if vm.size > 1:
            raise ValueError(
                "'create_merged_weight_file' does not work in parallel")

        index_filename = self.create_full_path_from_template('index_file')
        ifile = RequestDataset(uri=index_filename).get()
        ifile.load()
        ifc = GridChunkerConstants.IndexFile
        gidx = ifile[ifc.NAME_INDEX_VARIABLE].attrs

        src_global_shape = gidx[ifc.NAME_SRC_GRID_SHAPE]
        dst_global_shape = gidx[ifc.NAME_DST_GRID_SHAPE]

        # Get the global weight dimension size.
        n_s_size = 0
        weight_filename = ifile[gidx[ifc.NAME_WEIGHTS_VARIABLE]]
        wv = weight_filename.join_string_value()
        split_weight_file_directory = self.paths['wd']
        for wfn in map(
                lambda x: os.path.join(split_weight_file_directory,
                                       os.path.split(x)[1]), wv):
            ocgis_lh(msg="current merge weight file target: {}".format(wfn),
                     level=logging.DEBUG,
                     logger=_LOCAL_LOGGER)
            if not os.path.exists(wfn):
                if strict:
                    raise IOError(wfn)
                else:
                    continue
            curr_dimsize = RequestDataset(wfn).get().dimensions['n_s'].size
            # ESMF writes the weight file, but it may be empty if there are no generated weights.
            if curr_dimsize is not None:
                n_s_size += curr_dimsize

        # Create output weight file.
        wf_varnames = ['row', 'col', 'S']
        wf_dtypes = [np.int32, np.int32, np.float64]
        vc = VariableCollection()
        dim = Dimension('n_s', n_s_size)
        for w, wd in zip(wf_varnames, wf_dtypes):
            var = Variable(name=w, dimensions=dim, dtype=wd)
            vc.add_variable(var)
        vc.write(merged_weight_filename)

        # Transfer weights to the merged file.
        sidx = 0
        src_indices = self.src_grid._gc_create_global_indices_(
            src_global_shape)
        dst_indices = self.dst_grid._gc_create_global_indices_(
            dst_global_shape)

        out_wds = nc.Dataset(merged_weight_filename, 'a')
        for ii, wfn in enumerate(
                map(lambda x: os.path.join(split_weight_file_directory, x),
                    wv)):
            if not os.path.exists(wfn):
                if strict:
                    raise IOError(wfn)
                else:
                    continue
            wdata = RequestDataset(wfn).get()
            for wvn in wf_varnames:
                odata = wdata[wvn].get_value()
                try:
                    split_grids_directory = self.paths['wd']
                    odata = self._gc_remap_weight_variable_(
                        ii,
                        wvn,
                        odata,
                        src_indices,
                        dst_indices,
                        ifile,
                        gidx,
                        split_grids_directory=split_grids_directory)
                except IndexError as e:
                    msg = "Weight filename: '{}'; Weight Variable Name: '{}'. {}".format(
                        wfn, wvn, str(e))
                    raise IndexError(msg)
                out_wds[wvn][sidx:sidx + odata.size] = odata
                out_wds.sync()
            sidx += odata.size
        out_wds.close()
Beispiel #25
0
    def test_init(self):
        ompi = OcgDist(size=2)
        self.assertEqual(len(ompi.mapping), 2)

        dim_x = Dimension('x', 5, dist=False)
        dim_y = Dimension('y', 11, dist=True)
        var_tas = Variable('tas', value=np.arange(0, 5 * 11).reshape(5, 11), dimensions=(dim_x, dim_y))
        thing = Variable('thing', value=np.arange(11) * 10, dimensions=(dim_y,))

        vc = VariableCollection(variables=[var_tas, thing])
        child = VariableCollection(name='younger')
        vc.add_child(child)
        childer = VariableCollection(name='youngest')
        child.add_child(childer)
        dim_six = Dimension('six', 6)
        hidden = Variable('hidden', value=[6, 7, 8, 9, 0, 10], dimensions=dim_six)
        childer.add_variable(hidden)

        ompi.add_dimensions([dim_x, dim_y])
        ompi.add_dimension(dim_six, group=hidden.group)
        ompi.add_variables([var_tas, thing])
        ompi.add_variable(hidden)

        var = ompi.get_variable(hidden)
        self.assertIsInstance(var, dict)
Beispiel #26
0
    def write_chunks(self):
        """
        Write grid subsets to netCDF files using the provided filename templates. This will also generate ESMF
        regridding weights for each subset if requested.
        """
        src_filenames = []
        dst_filenames = []
        wgt_filenames = []
        dst_slices = []
        src_slices = []
        index_path = self.create_full_path_from_template('index_file')

        # nzeros = len(str(reduce(lambda x, y: x * y, self.nchunks_dst)))

        ctr = 1
        ocgis_lh(logger=_LOCAL_LOGGER,
                 msg='starting self.iter_src_grid_subsets',
                 level=logging.DEBUG)
        for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets(
                yield_dst=True):
            ocgis_lh(
                logger=_LOCAL_LOGGER,
                msg='finished iteration {} for self.iter_src_grid_subsets'.
                format(ctr),
                level=logging.DEBUG)

            src_path = self.create_full_path_from_template('src_template',
                                                           index=ctr)
            dst_path = self.create_full_path_from_template('dst_template',
                                                           index=ctr)
            wgt_path = self.create_full_path_from_template('wgt_template',
                                                           index=ctr)

            src_filenames.append(os.path.split(src_path)[1])
            dst_filenames.append(os.path.split(dst_path)[1])
            wgt_filenames.append(wgt_path)
            dst_slices.append(dst_slc)
            src_slices.append(src_slc)

            # Only write destinations if an iterator is not provided.
            if self.iter_dst is None:
                zip_args = [[sub_src, sub_dst], [src_path, dst_path]]
            else:
                zip_args = [[sub_src], [src_path]]

            cc = 1
            for target, path in zip(*zip_args):
                with vm.scoped_by_emptyable('field.write' + str(cc), target):
                    if not vm.is_null:
                        ocgis_lh(logger=_LOCAL_LOGGER,
                                 msg='write_chunks:writing: {}'.format(path),
                                 level=logging.DEBUG)
                        field = Field(grid=target)
                        field.write(path)
                        ocgis_lh(
                            logger=_LOCAL_LOGGER,
                            msg='write_chunks:finished writing: {}'.format(
                                path),
                            level=logging.DEBUG)
                cc += 1

            # Increment the counter outside of the loop to avoid counting empty subsets.
            ctr += 1

            # Generate an ESMF weights file if requested and at least one rank has data on it.
            if self.genweights and len(
                    vm.get_live_ranks_from_object(sub_src)) > 0:
                vm.barrier()
                ocgis_lh(logger=_LOCAL_LOGGER,
                         msg='write_chunks:writing esmf weights: {}'.format(
                             wgt_path),
                         level=logging.DEBUG)
                self.write_esmf_weights(src_path,
                                        dst_path,
                                        wgt_path,
                                        src_grid=sub_src,
                                        dst_grid=sub_dst)
                vm.barrier()

        # Global shapes require a VM global scope to collect.
        src_global_shape = global_grid_shape(self.src_grid)
        dst_global_shape = global_grid_shape(self.dst_grid)

        # Gather and collapse source slices as some may be empty and we write on rank 0.
        gathered_src_grid_slice = vm.gather(src_slices)
        if vm.rank == 0:
            len_src_slices = len(src_slices)
            new_src_grid_slice = [None] * len_src_slices
            for idx in range(len_src_slices):
                for rank_src_grid_slice in gathered_src_grid_slice:
                    if rank_src_grid_slice[idx] is not None:
                        new_src_grid_slice[idx] = rank_src_grid_slice[idx]
                        break
            src_slices = new_src_grid_slice

        with vm.scoped('index write', [0]):
            if not vm.is_null:
                dim = Dimension('nfiles', len(src_filenames))
                vname = [
                    'source_filename', 'destination_filename',
                    'weights_filename'
                ]
                values = [src_filenames, dst_filenames, wgt_filenames]
                grid_chunker_destination = GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE
                attrs = [{
                    'esmf_role': 'grid_chunker_source'
                }, {
                    'esmf_role': grid_chunker_destination
                }, {
                    'esmf_role': 'grid_chunker_weights'
                }]

                vc = VariableCollection()

                grid_chunker_index = GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE
                vidx = Variable(name=grid_chunker_index)
                vidx.attrs['esmf_role'] = grid_chunker_index
                vidx.attrs['grid_chunker_source'] = 'source_filename'
                vidx.attrs[GridChunkerConstants.IndexFile.
                           NAME_DESTINATION_VARIABLE] = 'destination_filename'
                vidx.attrs['grid_chunker_weights'] = 'weights_filename'
                vidx.attrs[GridChunkerConstants.IndexFile.
                           NAME_SRC_GRID_SHAPE] = src_global_shape
                vidx.attrs[GridChunkerConstants.IndexFile.
                           NAME_DST_GRID_SHAPE] = dst_global_shape

                vc.add_variable(vidx)

                for idx in range(len(vname)):
                    v = Variable(name=vname[idx],
                                 dimensions=dim,
                                 dtype=str,
                                 value=values[idx],
                                 attrs=attrs[idx])
                    vc.add_variable(v)

                bounds_dimension = Dimension(name='bounds', size=2)
                # TODO: This needs to work with four dimensions.
                # Source -----------------------------------------------------------------------------------------------
                self.src_grid._gc_create_index_bounds_(RegriddingRole.SOURCE,
                                                       vidx, vc, src_slices,
                                                       dim, bounds_dimension)

                # Destination ------------------------------------------------------------------------------------------
                self.dst_grid._gc_create_index_bounds_(
                    RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim,
                    bounds_dimension)

                vc.write(index_path)

        vm.barrier()
Beispiel #27
0
    def _get_field_write_target_(cls, field):
        """
        Takes field data out of the OCGIS unstructured format (similar to UGRID) converting to the format expected
        by ESMF Unstructured metadata.
        """

        # The driver for the current field must be NetCDF UGRID to ensure interpretability.
        assert field.dimension_map.get_driver() == DriverKey.NETCDF_UGRID
        grid = field.grid
        # Three-dimensional data is not supported.
        assert not grid.has_z
        # Number of coordinate dimension. This will be 3 for three-dimensional data.
        coord_dim = Dimension('coordDim', 2)

        # Transform ragged array to one-dimensional array. #############################################################

        cindex = grid.cindex
        elements = cindex.get_value()
        num_element_conn_data = [e.shape[0] for e in elements.flat]
        length_connection_count = sum(num_element_conn_data)
        esmf_element_conn = np.zeros(length_connection_count,
                                     dtype=elements[0].dtype)
        start = 0

        tag_start_index = MPITag.START_INDEX

        # Collapse the ragged element index array into a single dimensioned vector. This communication block finds the
        # size for the new array. ######################################################################################

        if vm.size > 1:
            max_index = max([ii.max() for ii in elements.flat])
            if vm.rank == 0:
                vm.comm.isend(max_index + 1, dest=1, tag=tag_start_index)
                adjust = 0
            else:
                adjust = vm.comm.irecv(source=vm.rank - 1, tag=tag_start_index)
                adjust = adjust.wait()
                if vm.rank != vm.size - 1:
                    vm.comm.isend(max_index + 1 + adjust,
                                  dest=vm.rank + 1,
                                  tag=tag_start_index)

        # Fill the new vector for the element connectivity. ############################################################

        for ii in elements.flat:
            if vm.size > 1:
                if grid.archetype.has_multi:
                    mbv = cindex.attrs[OcgisConvention.Name.MULTI_BREAK_VALUE]
                    replace_breaks = np.where(ii == mbv)[0]
                else:
                    replace_breaks = []
                ii = ii + adjust
                if len(replace_breaks) > 0:
                    ii[replace_breaks] = mbv

            esmf_element_conn[start:start + ii.shape[0]] = ii
            start += ii.shape[0]

        # Create the new data representation. ##########################################################################

        connection_count = create_distributed_dimension(esmf_element_conn.size,
                                                        name='connectionCount')
        esmf_element_conn_var = Variable(name='elementConn',
                                         value=esmf_element_conn,
                                         dimensions=connection_count)
        esmf_element_conn_var.attrs[
            CFName.
            LONG_NAME] = 'Node indices that define the element connectivity.'
        mbv = cindex.attrs.get(OcgisConvention.Name.MULTI_BREAK_VALUE)
        if mbv is not None:
            esmf_element_conn_var.attrs['polygon_break_value'] = mbv
        esmf_element_conn_var.attrs['start_index'] = grid.start_index
        ret = VariableCollection(variables=field.copy().values(), force=True)

        # Rename the element count dimension.
        original_name = ret[cindex.name].dimensions[0].name
        ret.rename_dimension(original_name, 'elementCount')

        # Add the element-node connectivity variable to the collection.
        ret.add_variable(esmf_element_conn_var)

        num_element_conn = Variable(
            name='numElementConn',
            value=num_element_conn_data,
            dimensions=cindex.dimensions[0],
            attrs={CFName.LONG_NAME: 'Number of nodes per element.'})
        ret.add_variable(num_element_conn)

        node_coords = Variable(name='nodeCoords',
                               dimensions=(grid.node_dim, coord_dim))
        node_coords.units = 'degrees'
        node_coords.attrs[
            CFName.
            LONG_NAME] = 'Node coordinate values indexed by element connectivity.'
        node_coords.attrs['coordinates'] = 'x y'
        fill = node_coords.get_value()
        fill[:, 0] = grid.x.get_value()
        fill[:, 1] = grid.y.get_value()
        ret.pop(grid.x.name)
        ret.pop(grid.y.name)
        ret.add_variable(node_coords)

        ret.attrs['gridType'] = 'unstructured'
        ret.attrs['version'] = '0.9'

        return ret
Beispiel #28
0
    def iter_src_grid_subsets(self, yield_dst=False):
        """
        Yield source grid subsets using the extent of its associated destination grid subset.

        :param bool yield_dst: If ``True``, yield the destination subset as well as the source grid subset.
        :return: The source grid if ``yield_dst`` is ``False``, otherwise a three-element tuple in the form
         ``(<source grid subset>, <destination grid subset>, <destination grid slice>)``.
        :rtype: :class:`ocgis.Grid` or (:class:`ocgis.Grid`, :class:`ocgis.Grid`, dict)
        """

        if yield_dst:
            yield_slice = True
        else:
            yield_slice = False

        if self.buffer_value is None:
            try:
                if self.dst_grid_resolution is None:
                    dst_grid_resolution = self.dst_grid.resolution
                else:
                    dst_grid_resolution = self.dst_grid_resolution
                if self.src_grid_resolution is None:
                    src_grid_resolution = self.src_grid.resolution
                else:
                    src_grid_resolution = self.src_grid_resolution

                if dst_grid_resolution <= src_grid_resolution:
                    target_resolution = dst_grid_resolution
                else:
                    target_resolution = src_grid_resolution
                buffer_value = 2. * target_resolution
            except NotImplementedError:
                # Unstructured grids do not have an associated resolution.
                if isinstance(self.src_grid, GridUnstruct) or isinstance(self.dst_grid, GridUnstruct):
                    buffer_value = None
                else:
                    raise
        else:
            buffer_value = self.buffer_value

        dst_grid_wrapped_state = self.dst_grid.wrapped_state
        dst_grid_crs = self.dst_grid.crs

        # Use a destination grid iterator if provided.
        if self.iter_dst is not None:
            iter_dst = self.iter_dst(self, yield_slice=yield_slice)
        else:
            iter_dst = self.iter_dst_grid_subsets(yield_slice=yield_slice)

        # Loop over each destination grid subset.
        for yld in iter_dst:
            if yield_slice:
                dst_grid_subset, dst_slice = yld
            else:
                dst_grid_subset = yld

            dst_box = None
            with vm.scoped_by_emptyable('extent_global', dst_grid_subset):
                if not vm.is_null:
                    if self.check_contains:
                        dst_box = box(*dst_grid_subset.extent_global)

                    # Use the envelope! A buffer returns "fancy" borders. We just want to expand the bounding box.
                    extent_global = dst_grid_subset.parent.attrs.get('extent_global')
                    if extent_global is None:
                        extent_global = dst_grid_subset.extent_global
                    sub_box = box(*extent_global)
                    if buffer_value is not None:
                        sub_box = sub_box.buffer(buffer_value).envelope

                    ocgis_lh(msg=str(sub_box.bounds), level=logging.DEBUG)
                else:
                    sub_box, dst_box = [None, None]

            live_ranks = vm.get_live_ranks_from_object(dst_grid_subset)
            sub_box = vm.bcast(sub_box, root=live_ranks[0])

            if self.check_contains:
                dst_box = vm.bcast(dst_box, root=live_ranks[0])

            sub_box = GeometryVariable.from_shapely(sub_box, is_bbox=True, wrapped_state=dst_grid_wrapped_state,
                                                    crs=dst_grid_crs)
            src_grid_subset, src_grid_slice = self.src_grid.get_intersects(sub_box, keep_touches=False, cascade=False,
                                                                           optimized_bbox_subset=self.optimized_bbox_subset,
                                                                           return_slice=True)

            # Reload the data using a new source index distribution.
            if hasattr(src_grid_subset, 'reduce_global'):
                # Only redistribute if we have one live rank.
                if self.redistribute and len(vm.get_live_ranks_from_object(src_grid_subset)) > 0:
                    topology = src_grid_subset.abstractions_available[Topology.POLYGON]
                    cindex = topology.cindex
                    redist_dimname = self.src_grid.abstractions_available[Topology.POLYGON].element_dim.name
                    if src_grid_subset.is_empty:
                        redist_dim = None
                    else:
                        redist_dim = topology.element_dim
                    redistribute_by_src_idx(cindex, redist_dimname, redist_dim)

            with vm.scoped_by_emptyable('src_grid_subset', src_grid_subset):
                if not vm.is_null:
                    if not self.allow_masked:
                        gmask = src_grid_subset.get_mask()
                        if gmask is not None and gmask.any():
                            raise ValueError('Masked values in source grid subset.')

                    if self.check_contains:
                        src_box = box(*src_grid_subset.extent_global)
                        if not does_contain(src_box, dst_box):
                            raise ValueError('Contains check failed.')

                    # Try to reduce the coordinates in the case of unstructured grid data.
                    if hasattr(src_grid_subset, 'reduce_global'):
                        src_grid_subset = src_grid_subset.reduce_global()
                else:
                    src_grid_subset = VariableCollection(is_empty=True)

                if src_grid_subset.is_empty:
                    src_grid_slice = None
                else:
                    src_grid_slice = {src_grid_subset.dimensions[ii].name: src_grid_slice[ii] for ii in
                                      range(src_grid_subset.ndim)}

            if yield_dst:
                yld = (src_grid_subset, src_grid_slice, dst_grid_subset, dst_slice)
            else:
                yld = src_grid_subset, src_grid_slice

            yield yld