Example #1
0
    def test_collect_multipart_gdal_raster(self):
        """Datastack: test collect multipart gdal raster."""
        from natcap.invest import datastack
        params = {
            'raster': os.path.join(DATA_DIR, 'dem'),
        }

        # Collect the raster's files into a single archive
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)

        # extract the archive
        out_directory = os.path.join(self.workspace, 'extracted_archive')

        with tarfile.open(archive_path) as tar:
            tar.extractall(out_directory)

        archived_params = json.load(
            open(
                os.path.join(out_directory,
                             datastack.DATASTACK_PARAMETER_FILENAME)))['args']

        self.assertEqual(len(archived_params), 1)
        model_array = pygeoprocessing.raster_to_numpy_array(params['raster'])
        reg_array = pygeoprocessing.raster_to_numpy_array(
            os.path.join(out_directory, archived_params['raster']))
        numpy.testing.assert_allclose(model_array, reg_array)
Example #2
0
    def test_list_of_inputs(self):
        from natcap.invest import datastack
        params = {
            'file_list': [
                os.path.join(self.workspace, 'foo.txt'),
                os.path.join(self.workspace, 'bar.txt'),
            ]
        }
        for filename in params['file_list']:
            with open(filename, 'w') as textfile:
                textfile.write(filename)

        src_digest = pygeoprocessing.testing.digest_file_list(
            params['file_list'])

        # Collect the file into an archive
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)

        # extract the archive
        out_directory = os.path.join(self.workspace, 'extracted_archive')
        with tarfile.open(archive_path) as tar:
            tar.extractall(out_directory)

        archived_params = json.load(
            open(os.path.join(out_directory,
                              datastack.DATASTACK_PARAMETER_FILENAME)))['args']
        dest_digest = pygeoprocessing.testing.digest_file_list(
            [os.path.join(out_directory, filename)
             for filename in archived_params['file_list']])

        self.assertEqual(len(archived_params), 1)  # sanity check
        if src_digest != dest_digest:
            self.fail('Digest mismatch: src:%s != dest:%s' % (
                src_digest, dest_digest))
Example #3
0
    def execute_model(workspace, source_parameter_set):
        """Helper function to run a model from its parameter set file.

        Args:
            workspace (str): The path to the workspace to use for the test run.
                All files will be written here.
            source_parameter_set (str): The path to the parameter set from
                which the args dict and model name should be loaded.

        Returns:
            ``None``
        """
        from natcap.invest import datastack

        source_args = datastack.extract_parameter_set(source_parameter_set)
        model_name = source_args.model_name

        datastack_archive_path = os.path.join(workspace,
                                              'datastack.invs.tar.gz')
        datastack.build_datastack_archive(source_args.args, model_name,
                                          datastack_archive_path)

        extraction_dir = os.path.join(workspace, 'archived_data')
        args = datastack.extract_datastack_archive(datastack_archive_path,
                                                   extraction_dir)
        args['workspace_dir'] = os.path.join(workspace, 'workspace')

        # validate the args for good measure
        module = importlib.import_module(name=model_name)
        errors = module.validate(args)
        if errors != []:
            raise AssertionError(f"Errors founds: {pprint.pformat(errors)}")

        module.execute(args)
Example #4
0
    def test_collect_simple_parameters(self):
        """Datastack: test collect simple parameters."""
        from natcap.invest import datastack
        params = {
            'a': 1,
            'b': 'hello there',
            'c': 'plain bytestring',
            'd': '',
            'workspace_dir': os.path.join(self.workspace),
        }

        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')

        datastack.build_datastack_archive(
            params, 'test_datastack_modules.simple_parameters', archive_path)
        out_directory = os.path.join(self.workspace, 'extracted_archive')

        with tarfile.open(archive_path) as tar:
            tar.extractall(out_directory)

        self.assertEqual(len(os.listdir(out_directory)), 3)

        # We expect the workspace to be excluded from the resulting args dict.
        self.assertEqual(
            json.load(
                open(
                    os.path.join(
                        out_directory,
                        datastack.DATASTACK_PARAMETER_FILENAME)))['args'], {
                            'a': 1,
                            'b': 'hello there',
                            'c': 'plain bytestring',
                            'd': ''
                        })
Example #5
0
    def test_mixed_path_separators_in_archive(self):
        """Datastacks: datastack archives must handle windows, linux paths."""
        from natcap.invest import datastack

        args = {
            'windows_path': os.path.join(self.workspace,
                                         'dir1\\filepath1.txt'),
            'linux_path': os.path.join(self.workspace,
                                       'dir2/filepath2.txt'),
        }
        for filepath in args.values():
            normalized_path = os.path.normpath(filepath.replace('\\', os.sep))
            try:
                os.makedirs(os.path.dirname(normalized_path))
            except OSError:
                pass

            with open(normalized_path, 'w') as open_file:
                open_file.write('the contents of this file do not matter.')

        datastack_path = os.path.join(self.workspace, 'archive.invest.tar.gz')
        datastack.build_datastack_archive(args, 'sample_model', datastack_path)

        extraction_path = os.path.join(self.workspace, 'extracted_dir')
        extracted_args = datastack.extract_datastack_archive(datastack_path,
                                                             extraction_path)

        expected_args = {
            u'windows_path': os.path.join(extraction_path, 'data',
                                          u'filepath1.txt'),
            u'linux_path': os.path.join(extraction_path, u'data',
                                        u'filepath2.txt'),
        }
        self.maxDiff = None  # show whole exception on failure
        self.assertEqual(extracted_args, expected_args)
Example #6
0
    def test_collect_ogr_table(self):
        from natcap.invest import datastack
        params = {
            'table': os.path.join(DATA_DIR, 'carbon_pools_samp.csv'),
        }

        # Collect the raster's files into a single archive
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)

        # extract the archive
        out_directory = os.path.join(self.workspace, 'extracted_archive')
        with tarfile.open(archive_path) as tar:
            tar.extractall(out_directory)

        archived_params = json.load(
            open(os.path.join(
                out_directory,
                datastack.DATASTACK_PARAMETER_FILENAME)))['args']
        pygeoprocessing.testing.assert_csv_equal(
            params['table'], os.path.join(out_directory,
                                          archived_params['table'])
        )

        self.assertEqual(len(archived_params), 1)  # sanity check
Example #7
0
    def test_nonspatial_single_file(self):
        from natcap.invest import datastack

        params = {
            'some_file': os.path.join(self.workspace, 'foo.txt')
        }
        with open(params['some_file'], 'w') as textfile:
            textfile.write('some text here!')

        # Collect the file into an archive
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)

        # extract the archive
        out_directory = os.path.join(self.workspace, 'extracted_archive')
        with tarfile.open(archive_path) as tar:
            tar.extractall(out_directory)

        archived_params = json.load(
            open(os.path.join(out_directory,
                              datastack.DATASTACK_PARAMETER_FILENAME)))['args']
        pygeoprocessing.testing.assert_text_equal(
            params['some_file'], os.path.join(out_directory,
                                              archived_params['some_file'])
        )

        self.assertEqual(len(archived_params), 1)  # sanity check
Example #8
0
    def test_collect_multipart_gdal_raster(self):
        from natcap.invest import datastack
        params = {
            'raster': os.path.join(FW_DATA, 'dem'),
        }

        # Collect the raster's files into a single archive
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)

        # extract the archive
        out_directory = os.path.join(self.workspace, 'extracted_archive')

        with tarfile.open(archive_path) as tar:
            tar.extractall(out_directory)

        archived_params = json.load(
            open(
                os.path.join(out_directory,
                             datastack.DATASTACK_PARAMETER_FILENAME)))['args']

        self.assertEqual(len(archived_params), 1)
        pygeoprocessing.testing.assert_rasters_equal(
            params['raster'],
            os.path.join(out_directory, archived_params['raster']))
Example #9
0
    def test_collect_rasters(self):
        """Datastack: test collect GDAL rasters."""
        from natcap.invest import datastack
        for raster_filename in (
                'dem',  # This is a multipart raster
                'landcover.tif'):  # This is a single-file raster

            params = {
                'raster': os.path.join(DATA_DIR, raster_filename),
            }

            # Collect the raster's files into a single archive
            archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
            datastack.build_datastack_archive(params,
                                              'test_datastack_modules.raster',
                                              archive_path)

            # extract the archive
            out_directory = os.path.join(self.workspace, 'extracted_archive')

            with tarfile.open(archive_path) as tar:
                tar.extractall(out_directory)

            archived_params = json.load(
                open(
                    os.path.join(
                        out_directory,
                        datastack.DATASTACK_PARAMETER_FILENAME)))['args']

            self.assertEqual(len(archived_params), 1)
            model_array = pygeoprocessing.raster_to_numpy_array(
                params['raster'])
            reg_array = pygeoprocessing.raster_to_numpy_array(
                os.path.join(out_directory, archived_params['raster']))
            numpy.testing.assert_allclose(model_array, reg_array)
Example #10
0
    def test_collect_simple_parameters(self):
        from natcap.invest import datastack
        params = {
            'a': 1,
            'b': u'hello there',
            'c': 'plain bytestring',
            'd': '',
        }

        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')

        datastack.build_datastack_archive(params, 'sample_model', archive_path)
        out_directory = os.path.join(self.workspace, 'extracted_archive')

        with tarfile.open(archive_path) as tar:
            tar.extractall(out_directory)

        self.assertEqual(len(os.listdir(out_directory)), 3)

        self.assertEqual(
            json.load(
                open(
                    os.path.join(
                        out_directory,
                        datastack.DATASTACK_PARAMETER_FILENAME)))['args'], {
                            'a': 1,
                            'b': u'hello there',
                            'c': u'plain bytestring',
                            'd': ''
                        })
Example #11
0
    def test_duplicate_filepaths(self):
        from natcap.invest import datastack
        params = {
            'foo': os.path.join(self.workspace, 'foo.txt'),
            'bar': os.path.join(self.workspace, 'foo.txt'),
        }
        with open(params['foo'], 'w') as textfile:
            textfile.write('hello world!')

        # Collect the file into an archive
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)

        # extract the archive
        out_directory = os.path.join(self.workspace, 'extracted_archive')
        with tarfile.open(archive_path) as tar:
            tar.extractall(out_directory)

        archived_params = json.load(
            open(os.path.join(out_directory,
                              datastack.DATASTACK_PARAMETER_FILENAME)))['args']

        # Assert that the archived 'foo' and 'bar' params point to the same
        # file.
        self.assertEqual(archived_params['foo'], archived_params['bar'])

        # Assert we have the expected number of files in the archive
        self.assertEqual(len(os.listdir(os.path.join(out_directory))), 3)

        # Assert we have the expected number of files in the data dir.
        self.assertEqual(
            len(os.listdir(os.path.join(out_directory, 'data'))), 1)
Example #12
0
    def test_list_of_inputs(self):
        """Datastack: test list of inputs."""
        from natcap.invest import datastack
        params = {
            'file_list': [
                os.path.join(self.workspace, 'foo.txt'),
                os.path.join(self.workspace, 'bar.txt'),
            ]
        }
        for filename in params['file_list']:
            with open(filename, 'w') as textfile:
                textfile.write(filename)

        # Collect the file into an archive
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)

        # extract the archive
        out_directory = os.path.join(self.workspace, 'extracted_archive')
        with tarfile.open(archive_path) as tar:
            tar.extractall(out_directory)

        archived_params = json.load(
            open(os.path.join(out_directory,
                              datastack.DATASTACK_PARAMETER_FILENAME)))['args']
        archived_file_list = [
            os.path.join(out_directory, filename)
            for filename in archived_params['file_list']]

        self.assertEqual(len(archived_params), 1)  # sanity check
        for expected_file, archive_file in zip(
                params['file_list'], archived_file_list):
            if not filecmp.cmp(expected_file, archive_file, shallow=False):
                self.fail(f'File mismatch: {expected_file} != {archive_file}')
Example #13
0
    def test_archive_extraction(self):
        from natcap.invest import datastack
        params = {
            'blank': '',
            'a': 1,
            'b': u'hello there',
            'c': 'plain bytestring',
            'foo': os.path.join(self.workspace, 'foo.txt'),
            'bar': os.path.join(self.workspace, 'foo.txt'),
            'file_list': [
                os.path.join(self.workspace, 'file1.txt'),
                os.path.join(self.workspace, 'file2.txt'),
            ],
            'data_dir': os.path.join(self.workspace, 'data_dir'),
            'raster': os.path.join(DATA_DIR, 'dem'),
            'vector': os.path.join(DATA_DIR, 'watersheds.shp'),
            'table': os.path.join(DATA_DIR, 'carbon_pools_samp.csv'),
        }
        # synthesize sample data
        os.makedirs(params['data_dir'])
        for filename in ('foo.txt', 'bar.txt', 'baz.txt'):
            data_filepath = os.path.join(params['data_dir'], filename)
            with open(data_filepath, 'w') as textfile:
                textfile.write(filename)

        with open(params['foo'], 'w') as textfile:
            textfile.write('hello world!')

        for filename in params['file_list']:
            with open(filename, 'w') as textfile:
                textfile.write(filename)

        # collect parameters:
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)
        out_directory = os.path.join(self.workspace, 'extracted_archive')
        archive_params = datastack.extract_datastack_archive(
            archive_path, out_directory)
        pygeoprocessing.testing.assert_rasters_equal(
            archive_params['raster'], params['raster'])
        pygeoprocessing.testing.assert_vectors_equal(
            archive_params['vector'], params['vector'], field_tolerance=1e-6)
        pygeoprocessing.testing.assert_csv_equal(
            archive_params['table'], params['table'])
        for key in ('blank', 'a', 'b', 'c'):
            self.assertEqual(archive_params[key],
                             params[key],
                             'Params differ for key %s' % key)

        for key in ('foo', 'bar'):
            pygeoprocessing.testing.assert_text_equal(
                archive_params[key], params[key])

        self.assertEqual(
            pygeoprocessing.testing.digest_file_list(
                archive_params['file_list']),
            pygeoprocessing.testing.digest_file_list(params['file_list']))
Example #14
0
    def test_nested_args_keys(self):
        from natcap.invest import datastack

        params = {'a': {'b': 1}}

        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)
        out_directory = os.path.join(self.workspace, 'extracted_archive')
        archive_params = datastack.extract_datastack_archive(
            archive_path, out_directory)
        self.assertEqual(archive_params, params)
Example #15
0
    def test_nonspatial_files(self):
        """Datastack: test nonspatial files."""
        from natcap.invest import datastack

        params = {
            'some_file': os.path.join(self.workspace, 'foo.txt'),
            'data_dir': os.path.join(self.workspace, 'data_dir')
        }
        with open(params['some_file'], 'w') as textfile:
            textfile.write('some text here!')

        os.makedirs(params['data_dir'])
        for filename in ('foo.txt', 'bar.txt', 'baz.txt'):
            data_filepath = os.path.join(params['data_dir'], filename)
            with open(data_filepath, 'w') as textfile:
                textfile.write(filename)

        # make a folder within the data folder.
        nested_folder = os.path.join(params['data_dir'], 'nested')
        os.makedirs(nested_folder)
        with open(os.path.join(nested_folder, 'nested.txt'), 'w') as textfile:
            textfile.write('hello, world!')

        # Collect the file into an archive
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(
            params, 'test_datastack_modules.nonspatial_files', archive_path)

        # extract the archive
        out_directory = os.path.join(self.workspace, 'extracted_archive')
        with tarfile.open(archive_path) as tar:
            tar.extractall(out_directory)

        archived_params = json.load(
            open(
                os.path.join(out_directory,
                             datastack.DATASTACK_PARAMETER_FILENAME)))['args']
        self.assertTrue(
            filecmp.cmp(params['some_file'],
                        os.path.join(out_directory,
                                     archived_params['some_file']),
                        shallow=False))
        self.assertEqual(len(archived_params), 2)  # sanity check

        common_files = ['foo.txt', 'bar.txt', 'baz.txt', 'nested/nested.txt']
        matched_files, mismatch_files, error_files = filecmp.cmpfiles(
            params['data_dir'],
            os.path.join(out_directory, archived_params['data_dir']),
            common_files,
            shallow=False)
        if mismatch_files or error_files:
            self.fail('Directory mismatch or error. The mismatches are'
                      f' {mismatch_files} ; and the errors are {error_files}')
Example #16
0
    def test_collect_geotiff(self):
        # Necessary test, as this is proving to be an issue.
        from natcap.invest import datastack
        params = {
            'raster': os.path.join(DATA_DIR, 'landcover.tif'),
        }
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)

        dest_dir = os.path.join(self.workspace, 'extracted_archive')
        archived_params = datastack.extract_datastack_archive(archive_path,
                                                              dest_dir)
        pygeoprocessing.testing.assert_rasters_equal(
            params['raster'],
            os.path.join(dest_dir, 'data', archived_params['raster']))
Example #17
0
    def test_collect_geotiff(self):
        """Datastack: test collect geotiff."""
        # Necessary test, as this is proving to be an issue.
        from natcap.invest import datastack
        params = {
            'raster': os.path.join(DATA_DIR, 'landcover.tif'),
        }
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)

        dest_dir = os.path.join(self.workspace, 'extracted_archive')
        archived_params = datastack.extract_datastack_archive(
            archive_path, dest_dir)
        model_array = pygeoprocessing.raster_to_numpy_array(params['raster'])
        reg_array = pygeoprocessing.raster_to_numpy_array(
            os.path.join(dest_dir, 'data', archived_params['raster']))
        numpy.testing.assert_allclose(model_array, reg_array)
Example #18
0
def build_datastack_archive():
    """Writes a compressed archive of invest model input data.

    Body (JSON string):
        filepath: string - the target path to save the archive
        moduleName: string (e.g. natcap.invest.carbon) the python module name
        args: JSON string of InVEST model args keys and values

    Returns:
        A string.
    """
    payload = request.get_json()
    datastack.build_datastack_archive(json.loads(payload['args']),
                                      payload['moduleName'],
                                      payload['filepath'])

    return 'datastack archive created'
Example #19
0
    def test_get_datastack_info_archive(self):
        """Datastacks: verify we can get info from an archive."""
        import natcap.invest
        from natcap.invest import datastack

        params = {
            'a': 1,
            'b': u'hello there',
            'c': 'plain bytestring',
            'd': '',
        }

        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)

        stack_type, stack_info = datastack.get_datastack_info(archive_path)

        self.assertEqual(stack_type, 'archive')
        self.assertEqual(stack_info, datastack.ParameterSet(
            params, 'sample_model', natcap.invest.__version__))
Example #20
0
    def test_collect_vectors(self):
        """Datastack: test collect ogr vector."""
        from natcap.invest import datastack
        from natcap.invest.utils import _assert_vectors_equal
        source_vector_path = os.path.join(DATA_DIR, 'watersheds.shp')
        source_vector = ogr.Open(source_vector_path)

        for format_name, extension in (('ESRI Shapefile', 'shp'), ('GeoJSON',
                                                                   'geojson')):
            dest_dir = os.path.join(self.workspace, format_name)
            os.makedirs(dest_dir)
            dest_vector_path = os.path.join(dest_dir, 'vector.%s' % extension)
            params = {
                'vector': dest_vector_path,
            }
            driver = ogr.GetDriverByName(format_name)
            driver.CopyDataSource(source_vector, dest_vector_path)

            archive_path = os.path.join(dest_dir, 'archive.invs.tar.gz')

            # Collect the vector's files into a single archive
            datastack.build_datastack_archive(params,
                                              'test_datastack_modules.vector',
                                              archive_path)

            # extract the archive
            out_directory = os.path.join(dest_dir, 'extracted_archive')
            with tarfile.open(archive_path) as tar:
                tar.extractall(out_directory)

            archived_params = json.load(
                open(
                    os.path.join(
                        out_directory,
                        datastack.DATASTACK_PARAMETER_FILENAME)))['args']
            _assert_vectors_equal(
                params['vector'],
                os.path.join(out_directory, archived_params['vector']))

            self.assertEqual(len(archived_params), 1)  # sanity check
Example #21
0
    def test_data_dir(self):
        from natcap.invest import datastack
        params = {
            'data_dir': os.path.join(self.workspace, 'data_dir')
        }
        os.makedirs(params['data_dir'])
        for filename in ('foo.txt', 'bar.txt', 'baz.txt'):
            data_filepath = os.path.join(params['data_dir'], filename)
            with open(data_filepath, 'w') as textfile:
                textfile.write(filename)

        # make a folder within the data folder.
        nested_folder = os.path.join(params['data_dir'], 'nested')
        os.makedirs(nested_folder)
        with open(os.path.join(nested_folder, 'nested.txt'), 'w') as textfile:
            textfile.write('hello, world!')

        src_datadir_digest = pygeoprocessing.testing.digest_folder(
            params['data_dir'])

        # Collect the file into an archive
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)

        # extract the archive
        out_directory = os.path.join(self.workspace, 'extracted_archive')
        with tarfile.open(archive_path) as tar:
            tar.extractall(out_directory)

        archived_params = json.load(
            open(os.path.join(out_directory,
                              datastack.DATASTACK_PARAMETER_FILENAME)))['args']
        dest_datadir_digest = pygeoprocessing.testing.digest_folder(
            os.path.join(out_directory, archived_params['data_dir']))

        self.assertEqual(len(archived_params), 1)  # sanity check
        if src_datadir_digest != dest_datadir_digest:
            self.fail('Digest mismatch: src:%s != dest:%s' % (
                src_datadir_digest, dest_datadir_digest))
Example #22
0
    def test_archive_extraction(self):
        """Datastack: test archive extraction."""
        from natcap.invest import datastack
        from natcap.invest import utils

        params = {
            'blank': '',
            'a': 1,
            'b': 'hello there',
            'c': 'plain bytestring',
            'foo': os.path.join(self.workspace, 'foo.txt'),
            'bar': os.path.join(self.workspace, 'foo.txt'),
            'data_dir': os.path.join(self.workspace, 'data_dir'),
            'raster': os.path.join(DATA_DIR, 'dem'),
            'vector': os.path.join(DATA_DIR, 'watersheds.shp'),
            'simple_table': os.path.join(DATA_DIR, 'carbon_pools_samp.csv'),
            'spatial_table': os.path.join(self.workspace, 'spatial_table.csv'),
        }
        # synthesize sample data
        os.makedirs(params['data_dir'])
        for filename in ('foo.txt', 'bar.txt', 'baz.txt'):
            data_filepath = os.path.join(params['data_dir'], filename)
            with open(data_filepath, 'w') as textfile:
                textfile.write(filename)

        with open(params['foo'], 'w') as textfile:
            textfile.write('hello world!')

        with open(params['spatial_table'], 'w') as spatial_csv:
            # copy existing DEM
            # copy existing watersheds
            # new raster
            # new vector
            spatial_csv.write('ID,path\n')
            spatial_csv.write(f"1,{params['raster']}\n")
            spatial_csv.write(f"2,{params['vector']}\n")

            # Create a raster only referenced by the CSV
            target_csv_raster_path = os.path.join(self.workspace,
                                                  'new_raster.tif')
            pygeoprocessing.new_raster_from_base(params['raster'],
                                                 target_csv_raster_path,
                                                 gdal.GDT_UInt16, [0])
            spatial_csv.write(f'3,{target_csv_raster_path}\n')

            # Create a vector only referenced by the CSV
            target_csv_vector_path = os.path.join(self.workspace,
                                                  'new_vector.geojson')
            pygeoprocessing.shapely_geometry_to_vector(
                [shapely.geometry.Point(100, 100)],
                target_csv_vector_path,
                pygeoprocessing.get_raster_info(
                    params['raster'])['projection_wkt'],
                'GeoJSON',
                ogr_geom_type=ogr.wkbPoint)
            spatial_csv.write(f'4,{target_csv_vector_path}\n')

        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(
            params, 'test_datastack_modules.archive_extraction', archive_path)
        out_directory = os.path.join(self.workspace, 'extracted_archive')
        archive_params = datastack.extract_datastack_archive(
            archive_path, out_directory)
        model_array = pygeoprocessing.raster_to_numpy_array(
            archive_params['raster'])
        reg_array = pygeoprocessing.raster_to_numpy_array(params['raster'])
        numpy.testing.assert_allclose(model_array, reg_array)
        utils._assert_vectors_equal(archive_params['vector'], params['vector'])
        pandas.testing.assert_frame_equal(
            pandas.read_csv(archive_params['simple_table']),
            pandas.read_csv(params['simple_table']))
        for key in ('blank', 'a', 'b', 'c'):
            self.assertEqual(archive_params[key], params[key],
                             f'Params differ for key {key}')

        for key in ('foo', 'bar'):
            self.assertTrue(
                filecmp.cmp(archive_params[key], params[key], shallow=False))

        spatial_csv_dict = utils.build_lookup_from_csv(
            archive_params['spatial_table'], 'ID', to_lower=True)
        spatial_csv_dir = os.path.dirname(archive_params['spatial_table'])
        numpy.testing.assert_allclose(
            pygeoprocessing.raster_to_numpy_array(
                os.path.join(spatial_csv_dir, spatial_csv_dict[3]['path'])),
            pygeoprocessing.raster_to_numpy_array(target_csv_raster_path))
        utils._assert_vectors_equal(
            os.path.join(spatial_csv_dir, spatial_csv_dict[4]['path']),
            target_csv_vector_path)
Example #23
0
    def test_archive_extraction(self):
        """Datastack: test archive extraction."""
        from natcap.invest import datastack
        from natcap.invest.utils import _assert_vectors_equal
        params = {
            'blank':
            '',
            'a':
            1,
            'b':
            'hello there',
            'c':
            'plain bytestring',
            'foo':
            os.path.join(self.workspace, 'foo.txt'),
            'bar':
            os.path.join(self.workspace, 'foo.txt'),
            'file_list': [
                os.path.join(self.workspace, 'file1.txt'),
                os.path.join(self.workspace, 'file2.txt'),
            ],
            'data_dir':
            os.path.join(self.workspace, 'data_dir'),
            'raster':
            os.path.join(DATA_DIR, 'dem'),
            'vector':
            os.path.join(DATA_DIR, 'watersheds.shp'),
            'table':
            os.path.join(DATA_DIR, 'carbon_pools_samp.csv'),
        }
        # synthesize sample data
        os.makedirs(params['data_dir'])
        for filename in ('foo.txt', 'bar.txt', 'baz.txt'):
            data_filepath = os.path.join(params['data_dir'], filename)
            with open(data_filepath, 'w') as textfile:
                textfile.write(filename)

        with open(params['foo'], 'w') as textfile:
            textfile.write('hello world!')

        for filename in params['file_list']:
            with open(filename, 'w') as textfile:
                textfile.write(filename)

        # collect parameters:
        archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz')
        datastack.build_datastack_archive(params, 'sample_model', archive_path)
        out_directory = os.path.join(self.workspace, 'extracted_archive')
        archive_params = datastack.extract_datastack_archive(
            archive_path, out_directory)
        model_array = pygeoprocessing.raster_to_numpy_array(
            archive_params['raster'])
        reg_array = pygeoprocessing.raster_to_numpy_array(params['raster'])
        numpy.testing.assert_allclose(model_array, reg_array)
        _assert_vectors_equal(archive_params['vector'], params['vector'])
        model_df = pandas.read_csv(archive_params['table'])
        reg_df = pandas.read_csv(params['table'])
        pandas.testing.assert_frame_equal(model_df, reg_df)
        for key in ('blank', 'a', 'b', 'c'):
            self.assertEqual(archive_params[key], params[key],
                             f'Params differ for key {key}')

        for key in ('foo', 'bar'):
            self.assertTrue(
                filecmp.cmp(archive_params[key], params[key], shallow=False))

        for expected_file, archive_file in zip(params['file_list'],
                                               archive_params['file_list']):
            self.assertTrue(
                filecmp.cmp(expected_file, archive_file, shallow=False))