Esempio n. 1
0
    def test(self):
        gs = self.get_grid_splitter()

        desired_dst_grid_sum = gs.dst_grid.parent['data'].get_value().sum()
        desired_dst_grid_sum = MPI_COMM.gather(desired_dst_grid_sum)
        if MPI_RANK == 0:
            desired_sum = np.sum(desired_dst_grid_sum)

        desired = [{'y': slice(0, 180, None), 'x': slice(0, 240, None)},
                   {'y': slice(0, 180, None), 'x': slice(240, 480, None)},
                   {'y': slice(0, 180, None), 'x': slice(480, 720, None)},
                   {'y': slice(180, 360, None), 'x': slice(0, 240, None)},
                   {'y': slice(180, 360, None), 'x': slice(240, 480, None)},
                   {'y': slice(180, 360, None), 'x': slice(480, 720, None)}]
        actual = list(gs.iter_dst_grid_slices())
        self.assertEqual(actual, desired)

        gs.write_subsets()

        if MPI_RANK == 0:
            rank_sums = []

        for ctr in range(1, gs.nsplits_dst[0] * gs.nsplits_dst[1] + 1):
            src_path = gs.create_full_path_from_template('src_template', index=ctr)
            dst_path = gs.create_full_path_from_template('dst_template', index=ctr)

            src_field = RequestDataset(src_path).get()
            dst_field = RequestDataset(dst_path).get()

            src_envelope_global = box(*src_field.grid.extent_global)
            dst_envelope_global = box(*dst_field.grid.extent_global)

            self.assertTrue(does_contain(src_envelope_global, dst_envelope_global))

            actual = get_variable_names(src_field.data_variables)
            self.assertIn('data', actual)

            actual = get_variable_names(dst_field.data_variables)
            self.assertIn('data', actual)
            actual_data_sum = dst_field['data'].get_value().sum()
            actual_data_sum = MPI_COMM.gather(actual_data_sum)
            if MPI_RANK == 0:
                actual_data_sum = np.sum(actual_data_sum)
                rank_sums.append(actual_data_sum)

        if MPI_RANK == 0:
            self.assertAlmostEqual(desired_sum, np.sum(rank_sums))
            index_path = gs.create_full_path_from_template('index_file')
            self.assertTrue(os.path.exists(index_path))

        MPI_COMM.Barrier()

        index_path = gs.create_full_path_from_template('index_file')
        index_field = RequestDataset(index_path).get()
        self.assertTrue(len(list(index_field.keys())) > 2)
Esempio n. 2
0
    def test(self):
        gs = self.fixture_grid_chunker()

        desired_dst_grid_sum = gs.dst_grid.parent['data'].get_value().sum()
        desired_dst_grid_sum = MPI_COMM.gather(desired_dst_grid_sum)
        if vm.rank == 0:
            desired_sum = np.sum(desired_dst_grid_sum)

        desired = [{'y': slice(0, 180, None), 'x': slice(0, 240, None)},
                   {'y': slice(0, 180, None), 'x': slice(240, 480, None)},
                   {'y': slice(0, 180, None), 'x': slice(480, 720, None)},
                   {'y': slice(180, 360, None), 'x': slice(0, 240, None)},
                   {'y': slice(180, 360, None), 'x': slice(240, 480, None)},
                   {'y': slice(180, 360, None), 'x': slice(480, 720, None)}]
        actual = list(gs.iter_dst_grid_slices())
        self.assertEqual(actual, desired)

        gs.write_chunks()

        if vm.rank == 0:
            rank_sums = []

        for ctr in range(1, gs.nchunks_dst[0] * gs.nchunks_dst[1] + 1):
            src_path = gs.create_full_path_from_template('src_template', index=ctr)
            dst_path = gs.create_full_path_from_template('dst_template', index=ctr)

            src_field = RequestDataset(src_path).get()
            dst_field = RequestDataset(dst_path).get()

            src_envelope_global = box(*src_field.grid.extent_global)
            dst_envelope_global = box(*dst_field.grid.extent_global)

            self.assertTrue(does_contain(src_envelope_global, dst_envelope_global))

            actual = get_variable_names(src_field.data_variables)
            self.assertIn('data', actual)

            actual = get_variable_names(dst_field.data_variables)
            self.assertIn('data', actual)
            actual_data_sum = dst_field['data'].get_value().sum()
            actual_data_sum = MPI_COMM.gather(actual_data_sum)
            if MPI_RANK == 0:
                actual_data_sum = np.sum(actual_data_sum)
                rank_sums.append(actual_data_sum)

        if vm.rank == 0:
            self.assertAlmostEqual(desired_sum, np.sum(rank_sums))
            index_path = gs.create_full_path_from_template('index_file')
            self.assertTrue(os.path.exists(index_path))

        vm.barrier()

        index_path = gs.create_full_path_from_template('index_file')
        index_field = RequestDataset(index_path).get()
        self.assertTrue(len(list(index_field.keys())) > 2)
Esempio n. 3
0
    def test_get_intersection_state_boundaries(self):
        path_shp = self.path_state_boundaries
        geoms = []
        with fiona.open(path_shp) as source:
            for record in source:
                geom = shape(record['geometry'])
                geoms.append(geom)

        gvar = GeometryVariable(value=geoms, dimensions='ngeom')
        gvar_sub = gvar.get_unioned()

        if gvar_sub is not None:
            subset = gvar_sub.get_value().flatten()[0]

        else:
            subset = None
        subset = MPI_COMM.bcast(subset)
        resolution = 2.0

        keywords = dict(with_bounds=[False])

        for k in self.iter_product_keywords(keywords):
            grid = self.get_gridxy_global(resolution=resolution,
                                          with_bounds=k.with_bounds)

            res = grid.get_intersection(subset)

            if not res.is_empty:
                self.assertTrue(res.get_mask().any())
            else:
                self.assertIsInstance(res, GeometryVariable)

            if k.with_bounds:
                area = res.area
                if area is None:
                    area = 0.0
                else:
                    area = area.sum()
                areas = MPI_COMM.gather(area)
                if MPI_RANK == 0:
                    area_global = sum(areas)
                    self.assertAlmostEqual(area_global, 1096.0819224080542)
            else:
                mask = res.get_mask()
                if mask is None:
                    masked = 0
                else:
                    masked = mask.sum()
                masked = MPI_COMM.gather(masked)
                if MPI_RANK == 0:
                    total_masked = sum(masked)
                    self.assertEqual(total_masked, 858)
Esempio n. 4
0
    def test_system_with_distributed_dimensions_from_file_shapefile(self):
        """Test a distributed read from file."""

        path = self.path_state_boundaries

        # These are the desired values.
        with vm.scoped('desired data write', [0]):
            if not vm.is_null:
                rd_desired = RequestDataset(uri=path, driver=DriverVector)
                var_desired = SourcedVariable(name='STATE_NAME',
                                              request_dataset=rd_desired)
                value_desired = var_desired.get_value().tolist()
                self.assertEqual(len(value_desired), 51)

        rd = RequestDataset(uri=path, driver=DriverVector)
        fvar = SourcedVariable(name='STATE_NAME', request_dataset=rd)
        self.assertEqual(len(rd.driver.dist.get_group()['dimensions']), 1)

        self.assertTrue(fvar.dimensions[0].dist)
        self.assertIsNotNone(fvar.get_value())
        if MPI_SIZE > 1:
            self.assertLessEqual(fvar.shape[0], 26)

        values = MPI_COMM.gather(fvar.get_value())
        if MPI_RANK == 0:
            values = hgather(values)
            self.assertEqual(values.tolist(), value_desired)
        else:
            self.assertIsNone(values)
Esempio n. 5
0
    def test_system_with_distributed_dimensions_from_file_shapefile(self):
        """Test a distributed read from file."""

        path = self.path_state_boundaries

        # These are the desired values.
        with vm.scoped('desired data write', [0]):
            if not vm.is_null:
                rd_desired = RequestDataset(uri=path, driver=DriverVector)
                var_desired = SourcedVariable(name='STATE_NAME', request_dataset=rd_desired)
                value_desired = var_desired.get_value().tolist()
                self.assertEqual(len(value_desired), 51)

        rd = RequestDataset(uri=path, driver=DriverVector)
        fvar = SourcedVariable(name='STATE_NAME', request_dataset=rd)
        self.assertEqual(len(rd.driver.dist.get_group()['dimensions']), 1)

        self.assertTrue(fvar.dimensions[0].dist)
        self.assertIsNotNone(fvar.get_value())
        if MPI_SIZE > 1:
            self.assertLessEqual(fvar.shape[0], 26)

        values = MPI_COMM.gather(fvar.get_value())
        if MPI_RANK == 0:
            values = hgather(values)
            self.assertEqual(values.tolist(), value_desired)
        else:
            self.assertIsNone(values)
Esempio n. 6
0
    def test_system_spatial_averaging_from_file(self):
        rd_nc = self.test_data.get_rd('cancm4_tas')

        rd_shp = RequestDataset(self.path_state_boundaries)
        field_shp = rd_shp.get()

        actual = field_shp.dimension_map.get_variable(DMK.GEOM)
        self.assertIsNotNone(actual)
        actual = field_shp.dimension_map.get_dimension(DMK.GEOM)
        self.assertEqual(len(actual), 1)

        self.assertEqual(field_shp.crs, WGS84())

        try:
            index_geom = np.where(
                field_shp['STATE_NAME'].get_value() == 'Nebraska')[0][0]
        except IndexError:
            # Not found on rank.
            polygon_field = None
        else:
            polygon_field = field_shp.get_field_slice({'geom': index_geom})
        polygon_field = MPI_COMM.gather(polygon_field)
        if MPI_RANK == 0:
            for p in polygon_field:
                if p is not None:
                    polygon_field = p
                    break
        polygon_field = MPI_COMM.bcast(polygon_field)
        polygon_field.unwrap()
        polygon = polygon_field.geom.get_value()[0]

        field_nc = rd_nc.get()
        sub_field_nc = field_nc.get_field_slice({'time': slice(0, 10)})
        self.assertEqual(sub_field_nc['tas']._dimensions,
                         field_nc['tas']._dimensions)
        sub = sub_field_nc.grid.get_intersects(polygon)

        # When split across two processes, there are floating point summing differences.
        desired = {1: 2734.5195, 2: 2740.4014}
        with vm.scoped_by_emptyable('grid intersects', sub):
            if not vm.is_null:
                abstraction_geometry = sub.get_abstraction_geometry()
                sub.parent.add_variable(abstraction_geometry, force=True)
                unioned = abstraction_geometry.get_unioned(
                    spatial_average='tas')
                if unioned is not None:
                    tas = unioned.parent['tas']
                    self.assertFalse(tas.is_empty)
                    self.assertAlmostEqual(tas.get_value().sum(),
                                           desired[vm.size],
                                           places=4)
Esempio n. 7
0
    def test_system_parallel_write_ndvariable(self):
        """Test a parallel vector GIS write with a n-dimensional variable."""

        ompi = OcgDist()
        ompi.create_dimension('time', 3)
        ompi.create_dimension('extra', 2)
        ompi.create_dimension('x', 4)
        ompi.create_dimension('y', 7, dist=True)
        ompi.update_dimension_bounds()

        if MPI_RANK == 0:
            path = self.get_temporary_file_path('foo.shp')

            t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time')
            t.set_extrapolated_bounds('the_time_bounds', 'bounds')

            extra = Variable(name='extra', value=[7, 8], dimensions='extra')

            x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float)
            x.set_extrapolated_bounds('x_bounds', 'bounds')

            # This will have the distributed dimension.
            y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float)
            y.set_extrapolated_bounds('y_bounds', 'bounds')

            data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x'])

            dimension_map = {'x': {'variable': 'x', 'bounds': 'x_bounds'},
                             'y': {'variable': 'y', 'bounds': 'y_bounds'},
                             'time': {'variable': 'time', 'bounds': 'the_time_bounds'}}

            vc = Field(variables=[t, extra, x, y, data], dimension_map=dimension_map, is_data='data')
            vc.set_abstraction_geom()
        else:
            path, vc = [None] * 2

        path = MPI_COMM.bcast(path)
        vc = variable_collection_scatter(vc, ompi)
        with vm.scoped_by_emptyable('write', vc):
            if not vm.is_null:
                vc.write(path, driver=DriverVector)
        MPI_COMM.Barrier()

        desired = 168
        rd = RequestDataset(path, driver=DriverVector)
        sizes = MPI_COMM.gather(rd.get().geom.shape[0])
        if MPI_RANK == 0:
            self.assertEqual(sum(sizes), desired)
Esempio n. 8
0
    def test_system_parallel_write_ndvariable(self):
        """Test a parallel vector GIS write with a n-dimensional variable."""

        ompi = OcgDist()
        ompi.create_dimension('time', 3)
        ompi.create_dimension('extra', 2)
        ompi.create_dimension('x', 4)
        ompi.create_dimension('y', 7, dist=True)
        ompi.update_dimension_bounds()

        if MPI_RANK == 0:
            path = self.get_temporary_file_path('foo.shp')

            t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time')
            t.set_extrapolated_bounds('the_time_bounds', 'bounds')

            extra = Variable(name='extra', value=[7, 8], dimensions='extra')

            x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float)
            x.set_extrapolated_bounds('x_bounds', 'bounds')

            # This will have the distributed dimension.
            y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float)
            y.set_extrapolated_bounds('y_bounds', 'bounds')

            data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x'])

            dimension_map = {'x': {'variable': 'x', 'bounds': 'x_bounds'},
                             'y': {'variable': 'y', 'bounds': 'y_bounds'},
                             'time': {'variable': 'time', 'bounds': 'the_time_bounds'}}

            vc = Field(variables=[t, extra, x, y, data], dimension_map=dimension_map, is_data='data')
            vc.set_abstraction_geom()
        else:
            path, vc = [None] * 2

        path = MPI_COMM.bcast(path)
        vc = variable_collection_scatter(vc, ompi)
        with vm.scoped_by_emptyable('write', vc):
            if not vm.is_null:
                vc.write(path, driver=DriverVector)
        MPI_COMM.Barrier()

        desired = 168
        rd = RequestDataset(path, driver=DriverVector)
        sizes = MPI_COMM.gather(rd.get().geom.shape[0])
        if MPI_RANK == 0:
            self.assertEqual(sum(sizes), desired)
Esempio n. 9
0
    def test_system_spatial_averaging_from_file(self):
        rd_nc = self.test_data.get_rd('cancm4_tas')

        rd_shp = RequestDataset(self.path_state_boundaries)
        field_shp = rd_shp.get()

        actual = field_shp.dimension_map.get_variable(DMK.GEOM)
        self.assertIsNotNone(actual)
        actual = field_shp.dimension_map.get_dimension(DMK.GEOM)
        self.assertEqual(len(actual), 1)

        self.assertEqual(field_shp.crs, WGS84())

        try:
            index_geom = np.where(field_shp['STATE_NAME'].get_value() == 'Nebraska')[0][0]
        except IndexError:
            # Not found on rank.
            polygon_field = None
        else:
            polygon_field = field_shp.get_field_slice({'geom': index_geom})
        polygon_field = MPI_COMM.gather(polygon_field)
        if MPI_RANK == 0:
            for p in polygon_field:
                if p is not None:
                    polygon_field = p
                    break
        polygon_field = MPI_COMM.bcast(polygon_field)
        polygon_field.unwrap()
        polygon = polygon_field.geom.get_value()[0]

        field_nc = rd_nc.get()
        sub_field_nc = field_nc.get_field_slice({'time': slice(0, 10)})
        self.assertEqual(sub_field_nc['tas']._dimensions, field_nc['tas']._dimensions)
        sub = sub_field_nc.grid.get_intersects(polygon)

        # When split across two processes, there are floating point summing differences.
        desired = {1: 2734.5195, 2: 2740.4014}
        with vm.scoped_by_emptyable('grid intersects', sub):
            if not vm.is_null:
                abstraction_geometry = sub.get_abstraction_geometry()
                sub.parent.add_variable(abstraction_geometry, force=True)
                unioned = abstraction_geometry.get_unioned(spatial_average='tas')
                if unioned is not None:
                    tas = unioned.parent['tas']
                    self.assertFalse(tas.is_empty)
                    self.assertAlmostEqual(tas.get_value().sum(), desired[vm.size], places=4)