Ejemplo n.º 1
0
    def test_create_unique_global_array(self):
        dist = OcgDist()
        dist.create_dimension('dim', 9, dist=True)
        dist.update_dimension_bounds()

        values = [
            [4, 2, 1, 2, 1, 4, 1, 4, 2],
            [44, 25, 16, 27, 18, 49, 10, 41, 22],
            [44, 25, 16, 27, 44, 49, 10, 41, 44],
            [1, 1, 1, 1, 1, 1, 1, 1, 1]
        ]

        for v in values:
            if vm.rank == 0:
                index = Variable(name='cindex', value=v, dimensions='dim')
                desired = np.unique(index.get_value())
                desired_length = len(desired)
            else:
                index = None
            index = variable_scatter(index, dist)

            with vm.scoped_by_emptyable('not empty', index):
                if not vm.is_null:
                    uvar = create_unique_global_array(index.get_value())
                    uvar_gathered = vm.gather(uvar)

                    if vm.rank == 0:
                        uvar_gathered = hgather(uvar_gathered)
                        self.assertEqual(len(uvar_gathered), desired_length)
                        self.assertEqual(set(uvar_gathered), set(desired))
Ejemplo n.º 2
0
    def test_create_unique_global_array(self):
        dist = OcgDist()
        dist.create_dimension('dim', 9, dist=True)
        dist.update_dimension_bounds()

        values = [[4, 2, 1, 2, 1, 4, 1, 4, 2],
                  [44, 25, 16, 27, 18, 49, 10, 41, 22],
                  [44, 25, 16, 27, 44, 49, 10, 41, 44],
                  [1, 1, 1, 1, 1, 1, 1, 1, 1]]

        for v in values:
            if vm.rank == 0:
                index = Variable(name='cindex', value=v, dimensions='dim')
                desired = np.unique(index.get_value())
                desired_length = len(desired)
            else:
                index = None
            index = variable_scatter(index, dist)

            with vm.scoped_by_emptyable('not empty', index):
                if not vm.is_null:
                    uvar = create_unique_global_array(index.get_value())
                    uvar_gathered = vm.gather(uvar)

                    if vm.rank == 0:
                        uvar_gathered = hgather(uvar_gathered)
                        self.assertEqual(len(uvar_gathered), desired_length)
                        self.assertEqual(set(uvar_gathered), set(desired))
Ejemplo n.º 3
0
    def test_init_dimension_map(self):
        """Test initializing with a dimension map only."""

        dmap = DimensionMap()
        x = Variable(value=[1, 2, 3], dimensions='elements', name='x')
        y = Variable(value=[4, 5, 6], dimensions='elements', name='y')
        topo = dmap.get_topology(Topology.POINT, create=True)
        topo.set_variable(DMK.X, x)
        topo.set_variable(DMK.Y, y)
        f = Field(variables=[x, y], dimension_map=dmap)
        p = PointGC(parent=f)
        self.assertNumpyAll(x.get_value(), p.x.get_value())
        self.assertNumpyAll(y.get_value(), p.y.get_value())
Ejemplo n.º 4
0
    def test_init_dimension_map(self):
        """Test initializing with a dimension map only."""

        dmap = DimensionMap()
        x = Variable(value=[1, 2, 3], dimensions='elements', name='x')
        y = Variable(value=[4, 5, 6], dimensions='elements', name='y')
        topo = dmap.get_topology(Topology.POINT, create=True)
        topo.set_variable(DMK.X, x)
        topo.set_variable(DMK.Y, y)
        f = Field(variables=[x, y], dimension_map=dmap)
        p = PointGC(parent=f)
        self.assertNumpyAll(x.get_value(), p.x.get_value())
        self.assertNumpyAll(y.get_value(), p.y.get_value())
Ejemplo n.º 5
0
    def test_create_index_variable_global(self):
        raise SkipTest('not implemented')

        dsrc_size = 5
        ddst_size = 7

        dsrc = Dimension('dsrc', dsrc_size, dist=True)
        src_dist = OcgDist()
        src_dist.add_dimension(dsrc)
        src_dist.update_dimension_bounds()

        ddst = Dimension('ddst', ddst_size, dist=True)
        dst_dist = OcgDist()
        dst_dist.add_dimension(ddst)
        dst_dist.update_dimension_bounds()

        if vm.rank == 0:
            np.random.seed(1)
            dst = np.random.rand(ddst_size)
            src = np.random.choice(dst, size=dsrc_size, replace=False)

            src = Variable(name='src', value=src, dimensions=dsrc.name)
            # TODO: move create_ugid_global to create_global_index on a standard variable object
            dst = GeometryVariable(name='dst', value=dst, dimensions=ddst.name)
        else:
            src, dst = [None] * 2

        src = variable_scatter(src, src_dist)
        dst = variable_scatter(dst, dst_dist)

        actual = create_index_variable_global('index_array', src, dst)

        self.assertNumpyAll(dst.get_value()[actual.get_value()], src.get_value())
Ejemplo n.º 6
0
    def test_create_index_variable_global(self):
        raise SkipTest('not implemented')

        dsrc_size = 5
        ddst_size = 7

        dsrc = Dimension('dsrc', dsrc_size, dist=True)
        src_dist = OcgDist()
        src_dist.add_dimension(dsrc)
        src_dist.update_dimension_bounds()

        ddst = Dimension('ddst', ddst_size, dist=True)
        dst_dist = OcgDist()
        dst_dist.add_dimension(ddst)
        dst_dist.update_dimension_bounds()

        if vm.rank == 0:
            np.random.seed(1)
            dst = np.random.rand(ddst_size)
            src = np.random.choice(dst, size=dsrc_size, replace=False)

            src = Variable(name='src', value=src, dimensions=dsrc.name)
            # TODO: move create_ugid_global to create_global_index on a standard variable object
            dst = GeometryVariable(name='dst', value=dst, dimensions=ddst.name)
        else:
            src, dst = [None] * 2

        src = variable_scatter(src, src_dist)
        dst = variable_scatter(dst, dst_dist)

        actual = create_index_variable_global('index_array', src, dst)

        self.assertNumpyAll(dst.get_value()[actual.get_value()],
                            src.get_value())
Ejemplo n.º 7
0
 def test_broadcast_variable(self):
     value = np.random.rand(3, 4, 5)
     desired_value = deepcopy(value)
     mask = desired_value > 0.5
     desired_mask = deepcopy(mask)
     original_dimensions = ['time', 'lat', 'lon']
     src = Variable(name='src', value=value, mask=mask, dimensions=original_dimensions)
     dst_names = ['lon', 'lat', 'time']
     broadcast_variable(src, dst_names)
     self.assertEqual(src.shape, (5, 4, 3))
     self.assertEqual(src.get_value().shape, (5, 4, 3))
     self.assertEqual(desired_value.sum(), src.get_value().sum())
     broadcast_variable(src, original_dimensions)
     self.assertNumpyAll(desired_value, src.get_value())
     self.assertNumpyMayShareMemory(value, src.get_value())
     self.assertNumpyAll(desired_mask, src.get_mask())
Ejemplo n.º 8
0
 def test_broadcast_variable(self):
     value = np.random.rand(3, 4, 5)
     desired_value = deepcopy(value)
     mask = desired_value > 0.5
     desired_mask = deepcopy(mask)
     original_dimensions = ['time', 'lat', 'lon']
     src = Variable(name='src',
                    value=value,
                    mask=mask,
                    dimensions=original_dimensions)
     dst_names = ['lon', 'lat', 'time']
     broadcast_variable(src, dst_names)
     self.assertEqual(src.shape, (5, 4, 3))
     self.assertEqual(src.get_value().shape, (5, 4, 3))
     self.assertEqual(desired_value.sum(), src.get_value().sum())
     broadcast_variable(src, original_dimensions)
     self.assertNumpyAll(desired_value, src.get_value())
     self.assertNumpyMayShareMemory(value, src.get_value())
     self.assertNumpyAll(desired_mask, src.get_mask())
Ejemplo n.º 9
0
    def fixture_driver_scrip_netcdf_field(self):
        xvalue = np.arange(10., 35., step=5)
        yvalue = np.arange(45., 85., step=10)
        grid_size = xvalue.shape[0] * yvalue.shape[0]

        dim_grid_size = Dimension(name='grid_size', size=grid_size)
        x = Variable(name='grid_center_lon', dimensions=dim_grid_size)
        y = Variable(name='grid_center_lat', dimensions=dim_grid_size)

        for idx, (xv, yv) in enumerate(itertools.product(xvalue, yvalue)):
            x.get_value()[idx] = xv
            y.get_value()[idx] = yv

        gc = PointGC(x=x, y=y, crs=Spherical(), driver=DriverNetcdfSCRIP)
        grid = GridUnstruct(geoms=[gc])
        ret = Field(grid=grid, driver=DriverNetcdfSCRIP)

        grid_dims = Variable(name='grid_dims', value=[yvalue.shape[0], xvalue.shape[0]], dimensions='grid_rank')
        ret.add_variable(grid_dims)

        return ret
Ejemplo n.º 10
0
    def fixture_driver_scrip_netcdf_field(self):
        xvalue = np.arange(10., 35., step=5)
        yvalue = np.arange(45., 85., step=10)
        grid_size = xvalue.shape[0] * yvalue.shape[0]

        dim_grid_size = Dimension(name='grid_size', size=grid_size)
        x = Variable(name='grid_center_lon', dimensions=dim_grid_size)
        y = Variable(name='grid_center_lat', dimensions=dim_grid_size)

        for idx, (xv, yv) in enumerate(itertools.product(xvalue, yvalue)):
            x.get_value()[idx] = xv
            y.get_value()[idx] = yv

        gc = PointGC(x=x, y=y, crs=Spherical(), driver=DriverNetcdfSCRIP)
        grid = GridUnstruct(geoms=[gc])
        ret = Field(grid=grid, driver=DriverNetcdfSCRIP)

        grid_dims = Variable(name='grid_dims',
                             value=[yvalue.shape[0], xvalue.shape[0]],
                             dimensions='grid_rank')
        ret.add_variable(grid_dims)

        return ret
Ejemplo n.º 11
0
    def write_subsets(self, src_template, dst_template, wgt_template, index_path):
        """
        Write grid subsets to netCDF files using the provided filename templates. The template must contain the full
        file path with a single curly-bracer pair to insert the combination counter. ``wgt_template`` should not be a
        full path. This name is used when generating weight files.

        >>> template_example = '/path/to/data_{}.nc'

        :param str src_template: The template for the source subset file.
        :param str dst_template: The template for the destination subset file.
        :param str wgt_template: The template for the weight filename.

        >>> wgt_template = 'esmf_weights_{}.nc'

        :param index_path: Path to the output indexing netCDF.
        """

        src_filenames = []
        dst_filenames = []
        wgt_filenames = []
        dst_slices = []

        # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst)))

        for ctr, (sub_src, sub_dst, dst_slc) in enumerate(self.iter_src_grid_subsets(yield_dst=True), start=1):
            # padded = create_zero_padded_integer(ctr, nzeros)

            src_path = src_template.format(ctr)
            dst_path = dst_template.format(ctr)
            wgt_filename = wgt_template.format(ctr)

            src_filenames.append(os.path.split(src_path)[1])
            dst_filenames.append(os.path.split(dst_path)[1])
            wgt_filenames.append(wgt_filename)
            dst_slices.append(dst_slc)

            for target, path in zip([sub_src, sub_dst], [src_path, dst_path]):
                if target.is_empty:
                    is_empty = True
                    target = None
                else:
                    is_empty = False
                field = Field(grid=target, is_empty=is_empty)
                ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG)
                with vm.scoped_by_emptyable('field.write', field):
                    if not vm.is_null:
                        field.write(path)
                ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG)

        with vm.scoped('index write', [0]):
            if not vm.is_null:
                dim = Dimension('nfiles', len(src_filenames))
                vname = ['source_filename', 'destination_filename', 'weights_filename']
                values = [src_filenames, dst_filenames, wgt_filenames]
                grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE
                attrs = [{'esmf_role': 'grid_splitter_source'},
                         {'esmf_role': grid_splitter_destination},
                         {'esmf_role': 'grid_splitter_weights'}]

                vc = VariableCollection()

                grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE
                vidx = Variable(name=grid_splitter_index)
                vidx.attrs['esmf_role'] = grid_splitter_index
                vidx.attrs['grid_splitter_source'] = 'source_filename'
                vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename'
                vidx.attrs['grid_splitter_weights'] = 'weights_filename'
                x_bounds = GridSplitterConstants.IndexFile.NAME_X_BOUNDS_VARIABLE
                vidx.attrs[x_bounds] = x_bounds
                y_bounds = GridSplitterConstants.IndexFile.NAME_Y_BOUNDS_VARIABLE
                vidx.attrs[y_bounds] = y_bounds
                vc.add_variable(vidx)

                for idx in range(len(vname)):
                    v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx])
                    vc.add_variable(v)

                bounds_dimension = Dimension(name='bounds', size=2)
                xb = Variable(name=x_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'x_split_bounds'},
                              dtype=int)
                yb = Variable(name=y_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'y_split_bounds'},
                              dtype=int)

                x_name = self.dst_grid.x.dimensions[0].name
                y_name = self.dst_grid.y.dimensions[0].name
                for idx, slc in enumerate(dst_slices):
                    xb.get_value()[idx, :] = slc[x_name].start, slc[x_name].stop
                    yb.get_value()[idx, :] = slc[y_name].start, slc[y_name].stop
                vc.add_variable(xb)
                vc.add_variable(yb)

                vc.write(index_path)

        vm.barrier()
Ejemplo n.º 12
0
Archivo: csv_.py Proyecto: wk1984/ocgis
    def _write_coll_(self, f, coll, add_geom_uid=True):
        ocgis_lh(msg='entering _write_coll_ in {}'.format(self.__class__),
                 logger='csv-shp.converter',
                 level=logging.DEBUG)

        # Load the geometries. The geometry identifier is needed for the data write.
        for field, container in coll.iter_fields(yield_container=True):
            field.set_abstraction_geom(create_ugid=True)

        # Write the output CSV file.
        ocgis_lh(msg='before CsvShapefileConverter super call in {}'.format(
            self.__class__),
                 logger='csv-shp.converter',
                 level=logging.DEBUG)
        super(CsvShapefileConverter,
              self)._write_coll_(f, coll, add_geom_uid=add_geom_uid)
        ocgis_lh(msg='after CsvShapefileConverter super call in {}'.format(
            self.__class__),
                 logger='csv-shp.converter',
                 level=logging.DEBUG)

        # The output geometry identifier shapefile path.
        if vm.rank == 0:
            fiona_path = os.path.join(self._get_or_create_shp_folder_(),
                                      self.prefix + '_gid.shp')
        else:
            fiona_path = None
        fiona_path = vm.bcast(fiona_path)

        if self.ops.aggregate:
            ocgis_lh(
                'creating a UGID-GID shapefile is not necessary for aggregated data. use UGID shapefile.',
                'conv.csv-shp', logging.WARN)
        else:
            # Write the geometries for each container/field combination.

            for field, container in coll.iter_fields(yield_container=True):

                # The container may be empty. Only add the unique geometry identifier if the container has an
                # associated geometry.
                if container.geom is not None:
                    ugid_var = Variable(name=container.geom.ugid.name,
                                        dimensions=field.geom.dimensions,
                                        dtype=constants.DEFAULT_NP_INT)
                    ugid_var.get_value()[:] = container.geom.ugid.get_value(
                    )[0]

                # Extract the variable components of the geometry file.
                geom = field.geom.copy()
                geom = geom.extract()
                if field.crs is not None:
                    crs = field.crs.copy()
                    crs = crs.extract()
                else:
                    crs = None

                # If the dataset geometry identifier is not present, create it.
                gid = field[HeaderName.ID_GEOMETRY].copy()
                gid = gid.extract()

                # Construct the field to write.
                field_to_write = Field(geom=geom, crs=crs, uid=field.uid)
                if container.geom is not None:
                    field_to_write.add_variable(ugid_var, is_data=True)
                field_to_write.add_variable(gid, is_data=True)

                # Maintain the field/dataset unique identifier if there is one.
                if field.uid is not None:
                    if gid.repeat_record is None:
                        rr = []
                    else:
                        rr = list(gid.repeat_record)
                    rr.append((HeaderName.DATASET_IDENTIFER, field.uid))
                    gid.repeat_record = rr

                # Write the field.
                field_to_write.write(fiona_path,
                                     write_mode=f[KeywordArgument.WRITE_MODE],
                                     driver=DriverKey.VECTOR)