def test_collect_multipart_gdal_raster(self): """Datastack: test collect multipart gdal raster.""" from natcap.invest import datastack params = { 'raster': os.path.join(DATA_DIR, 'dem'), } # Collect the raster's files into a single archive archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) # extract the archive out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) archived_params = json.load( open( os.path.join(out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'] self.assertEqual(len(archived_params), 1) model_array = pygeoprocessing.raster_to_numpy_array(params['raster']) reg_array = pygeoprocessing.raster_to_numpy_array( os.path.join(out_directory, archived_params['raster'])) numpy.testing.assert_allclose(model_array, reg_array)
def test_list_of_inputs(self): from natcap.invest import datastack params = { 'file_list': [ os.path.join(self.workspace, 'foo.txt'), os.path.join(self.workspace, 'bar.txt'), ] } for filename in params['file_list']: with open(filename, 'w') as textfile: textfile.write(filename) src_digest = pygeoprocessing.testing.digest_file_list( params['file_list']) # Collect the file into an archive archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) # extract the archive out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) archived_params = json.load( open(os.path.join(out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'] dest_digest = pygeoprocessing.testing.digest_file_list( [os.path.join(out_directory, filename) for filename in archived_params['file_list']]) self.assertEqual(len(archived_params), 1) # sanity check if src_digest != dest_digest: self.fail('Digest mismatch: src:%s != dest:%s' % ( src_digest, dest_digest))
def execute_model(workspace, source_parameter_set): """Helper function to run a model from its parameter set file. Args: workspace (str): The path to the workspace to use for the test run. All files will be written here. source_parameter_set (str): The path to the parameter set from which the args dict and model name should be loaded. Returns: ``None`` """ from natcap.invest import datastack source_args = datastack.extract_parameter_set(source_parameter_set) model_name = source_args.model_name datastack_archive_path = os.path.join(workspace, 'datastack.invs.tar.gz') datastack.build_datastack_archive(source_args.args, model_name, datastack_archive_path) extraction_dir = os.path.join(workspace, 'archived_data') args = datastack.extract_datastack_archive(datastack_archive_path, extraction_dir) args['workspace_dir'] = os.path.join(workspace, 'workspace') # validate the args for good measure module = importlib.import_module(name=model_name) errors = module.validate(args) if errors != []: raise AssertionError(f"Errors founds: {pprint.pformat(errors)}") module.execute(args)
def test_collect_simple_parameters(self): """Datastack: test collect simple parameters.""" from natcap.invest import datastack params = { 'a': 1, 'b': 'hello there', 'c': 'plain bytestring', 'd': '', 'workspace_dir': os.path.join(self.workspace), } archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive( params, 'test_datastack_modules.simple_parameters', archive_path) out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) self.assertEqual(len(os.listdir(out_directory)), 3) # We expect the workspace to be excluded from the resulting args dict. self.assertEqual( json.load( open( os.path.join( out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'], { 'a': 1, 'b': 'hello there', 'c': 'plain bytestring', 'd': '' })
def test_mixed_path_separators_in_archive(self): """Datastacks: datastack archives must handle windows, linux paths.""" from natcap.invest import datastack args = { 'windows_path': os.path.join(self.workspace, 'dir1\\filepath1.txt'), 'linux_path': os.path.join(self.workspace, 'dir2/filepath2.txt'), } for filepath in args.values(): normalized_path = os.path.normpath(filepath.replace('\\', os.sep)) try: os.makedirs(os.path.dirname(normalized_path)) except OSError: pass with open(normalized_path, 'w') as open_file: open_file.write('the contents of this file do not matter.') datastack_path = os.path.join(self.workspace, 'archive.invest.tar.gz') datastack.build_datastack_archive(args, 'sample_model', datastack_path) extraction_path = os.path.join(self.workspace, 'extracted_dir') extracted_args = datastack.extract_datastack_archive(datastack_path, extraction_path) expected_args = { u'windows_path': os.path.join(extraction_path, 'data', u'filepath1.txt'), u'linux_path': os.path.join(extraction_path, u'data', u'filepath2.txt'), } self.maxDiff = None # show whole exception on failure self.assertEqual(extracted_args, expected_args)
def test_collect_ogr_table(self): from natcap.invest import datastack params = { 'table': os.path.join(DATA_DIR, 'carbon_pools_samp.csv'), } # Collect the raster's files into a single archive archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) # extract the archive out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) archived_params = json.load( open(os.path.join( out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'] pygeoprocessing.testing.assert_csv_equal( params['table'], os.path.join(out_directory, archived_params['table']) ) self.assertEqual(len(archived_params), 1) # sanity check
def test_nonspatial_single_file(self): from natcap.invest import datastack params = { 'some_file': os.path.join(self.workspace, 'foo.txt') } with open(params['some_file'], 'w') as textfile: textfile.write('some text here!') # Collect the file into an archive archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) # extract the archive out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) archived_params = json.load( open(os.path.join(out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'] pygeoprocessing.testing.assert_text_equal( params['some_file'], os.path.join(out_directory, archived_params['some_file']) ) self.assertEqual(len(archived_params), 1) # sanity check
def test_collect_multipart_gdal_raster(self): from natcap.invest import datastack params = { 'raster': os.path.join(FW_DATA, 'dem'), } # Collect the raster's files into a single archive archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) # extract the archive out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) archived_params = json.load( open( os.path.join(out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'] self.assertEqual(len(archived_params), 1) pygeoprocessing.testing.assert_rasters_equal( params['raster'], os.path.join(out_directory, archived_params['raster']))
def test_collect_rasters(self): """Datastack: test collect GDAL rasters.""" from natcap.invest import datastack for raster_filename in ( 'dem', # This is a multipart raster 'landcover.tif'): # This is a single-file raster params = { 'raster': os.path.join(DATA_DIR, raster_filename), } # Collect the raster's files into a single archive archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'test_datastack_modules.raster', archive_path) # extract the archive out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) archived_params = json.load( open( os.path.join( out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'] self.assertEqual(len(archived_params), 1) model_array = pygeoprocessing.raster_to_numpy_array( params['raster']) reg_array = pygeoprocessing.raster_to_numpy_array( os.path.join(out_directory, archived_params['raster'])) numpy.testing.assert_allclose(model_array, reg_array)
def test_collect_simple_parameters(self): from natcap.invest import datastack params = { 'a': 1, 'b': u'hello there', 'c': 'plain bytestring', 'd': '', } archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) self.assertEqual(len(os.listdir(out_directory)), 3) self.assertEqual( json.load( open( os.path.join( out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'], { 'a': 1, 'b': u'hello there', 'c': u'plain bytestring', 'd': '' })
def test_duplicate_filepaths(self): from natcap.invest import datastack params = { 'foo': os.path.join(self.workspace, 'foo.txt'), 'bar': os.path.join(self.workspace, 'foo.txt'), } with open(params['foo'], 'w') as textfile: textfile.write('hello world!') # Collect the file into an archive archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) # extract the archive out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) archived_params = json.load( open(os.path.join(out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'] # Assert that the archived 'foo' and 'bar' params point to the same # file. self.assertEqual(archived_params['foo'], archived_params['bar']) # Assert we have the expected number of files in the archive self.assertEqual(len(os.listdir(os.path.join(out_directory))), 3) # Assert we have the expected number of files in the data dir. self.assertEqual( len(os.listdir(os.path.join(out_directory, 'data'))), 1)
def test_list_of_inputs(self): """Datastack: test list of inputs.""" from natcap.invest import datastack params = { 'file_list': [ os.path.join(self.workspace, 'foo.txt'), os.path.join(self.workspace, 'bar.txt'), ] } for filename in params['file_list']: with open(filename, 'w') as textfile: textfile.write(filename) # Collect the file into an archive archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) # extract the archive out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) archived_params = json.load( open(os.path.join(out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'] archived_file_list = [ os.path.join(out_directory, filename) for filename in archived_params['file_list']] self.assertEqual(len(archived_params), 1) # sanity check for expected_file, archive_file in zip( params['file_list'], archived_file_list): if not filecmp.cmp(expected_file, archive_file, shallow=False): self.fail(f'File mismatch: {expected_file} != {archive_file}')
def test_archive_extraction(self): from natcap.invest import datastack params = { 'blank': '', 'a': 1, 'b': u'hello there', 'c': 'plain bytestring', 'foo': os.path.join(self.workspace, 'foo.txt'), 'bar': os.path.join(self.workspace, 'foo.txt'), 'file_list': [ os.path.join(self.workspace, 'file1.txt'), os.path.join(self.workspace, 'file2.txt'), ], 'data_dir': os.path.join(self.workspace, 'data_dir'), 'raster': os.path.join(DATA_DIR, 'dem'), 'vector': os.path.join(DATA_DIR, 'watersheds.shp'), 'table': os.path.join(DATA_DIR, 'carbon_pools_samp.csv'), } # synthesize sample data os.makedirs(params['data_dir']) for filename in ('foo.txt', 'bar.txt', 'baz.txt'): data_filepath = os.path.join(params['data_dir'], filename) with open(data_filepath, 'w') as textfile: textfile.write(filename) with open(params['foo'], 'w') as textfile: textfile.write('hello world!') for filename in params['file_list']: with open(filename, 'w') as textfile: textfile.write(filename) # collect parameters: archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) out_directory = os.path.join(self.workspace, 'extracted_archive') archive_params = datastack.extract_datastack_archive( archive_path, out_directory) pygeoprocessing.testing.assert_rasters_equal( archive_params['raster'], params['raster']) pygeoprocessing.testing.assert_vectors_equal( archive_params['vector'], params['vector'], field_tolerance=1e-6) pygeoprocessing.testing.assert_csv_equal( archive_params['table'], params['table']) for key in ('blank', 'a', 'b', 'c'): self.assertEqual(archive_params[key], params[key], 'Params differ for key %s' % key) for key in ('foo', 'bar'): pygeoprocessing.testing.assert_text_equal( archive_params[key], params[key]) self.assertEqual( pygeoprocessing.testing.digest_file_list( archive_params['file_list']), pygeoprocessing.testing.digest_file_list(params['file_list']))
def test_nested_args_keys(self): from natcap.invest import datastack params = {'a': {'b': 1}} archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) out_directory = os.path.join(self.workspace, 'extracted_archive') archive_params = datastack.extract_datastack_archive( archive_path, out_directory) self.assertEqual(archive_params, params)
def test_nonspatial_files(self): """Datastack: test nonspatial files.""" from natcap.invest import datastack params = { 'some_file': os.path.join(self.workspace, 'foo.txt'), 'data_dir': os.path.join(self.workspace, 'data_dir') } with open(params['some_file'], 'w') as textfile: textfile.write('some text here!') os.makedirs(params['data_dir']) for filename in ('foo.txt', 'bar.txt', 'baz.txt'): data_filepath = os.path.join(params['data_dir'], filename) with open(data_filepath, 'w') as textfile: textfile.write(filename) # make a folder within the data folder. nested_folder = os.path.join(params['data_dir'], 'nested') os.makedirs(nested_folder) with open(os.path.join(nested_folder, 'nested.txt'), 'w') as textfile: textfile.write('hello, world!') # Collect the file into an archive archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive( params, 'test_datastack_modules.nonspatial_files', archive_path) # extract the archive out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) archived_params = json.load( open( os.path.join(out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'] self.assertTrue( filecmp.cmp(params['some_file'], os.path.join(out_directory, archived_params['some_file']), shallow=False)) self.assertEqual(len(archived_params), 2) # sanity check common_files = ['foo.txt', 'bar.txt', 'baz.txt', 'nested/nested.txt'] matched_files, mismatch_files, error_files = filecmp.cmpfiles( params['data_dir'], os.path.join(out_directory, archived_params['data_dir']), common_files, shallow=False) if mismatch_files or error_files: self.fail('Directory mismatch or error. The mismatches are' f' {mismatch_files} ; and the errors are {error_files}')
def test_collect_geotiff(self): # Necessary test, as this is proving to be an issue. from natcap.invest import datastack params = { 'raster': os.path.join(DATA_DIR, 'landcover.tif'), } archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) dest_dir = os.path.join(self.workspace, 'extracted_archive') archived_params = datastack.extract_datastack_archive(archive_path, dest_dir) pygeoprocessing.testing.assert_rasters_equal( params['raster'], os.path.join(dest_dir, 'data', archived_params['raster']))
def test_collect_geotiff(self): """Datastack: test collect geotiff.""" # Necessary test, as this is proving to be an issue. from natcap.invest import datastack params = { 'raster': os.path.join(DATA_DIR, 'landcover.tif'), } archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) dest_dir = os.path.join(self.workspace, 'extracted_archive') archived_params = datastack.extract_datastack_archive( archive_path, dest_dir) model_array = pygeoprocessing.raster_to_numpy_array(params['raster']) reg_array = pygeoprocessing.raster_to_numpy_array( os.path.join(dest_dir, 'data', archived_params['raster'])) numpy.testing.assert_allclose(model_array, reg_array)
def build_datastack_archive(): """Writes a compressed archive of invest model input data. Body (JSON string): filepath: string - the target path to save the archive moduleName: string (e.g. natcap.invest.carbon) the python module name args: JSON string of InVEST model args keys and values Returns: A string. """ payload = request.get_json() datastack.build_datastack_archive(json.loads(payload['args']), payload['moduleName'], payload['filepath']) return 'datastack archive created'
def test_get_datastack_info_archive(self): """Datastacks: verify we can get info from an archive.""" import natcap.invest from natcap.invest import datastack params = { 'a': 1, 'b': u'hello there', 'c': 'plain bytestring', 'd': '', } archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) stack_type, stack_info = datastack.get_datastack_info(archive_path) self.assertEqual(stack_type, 'archive') self.assertEqual(stack_info, datastack.ParameterSet( params, 'sample_model', natcap.invest.__version__))
def test_collect_vectors(self): """Datastack: test collect ogr vector.""" from natcap.invest import datastack from natcap.invest.utils import _assert_vectors_equal source_vector_path = os.path.join(DATA_DIR, 'watersheds.shp') source_vector = ogr.Open(source_vector_path) for format_name, extension in (('ESRI Shapefile', 'shp'), ('GeoJSON', 'geojson')): dest_dir = os.path.join(self.workspace, format_name) os.makedirs(dest_dir) dest_vector_path = os.path.join(dest_dir, 'vector.%s' % extension) params = { 'vector': dest_vector_path, } driver = ogr.GetDriverByName(format_name) driver.CopyDataSource(source_vector, dest_vector_path) archive_path = os.path.join(dest_dir, 'archive.invs.tar.gz') # Collect the vector's files into a single archive datastack.build_datastack_archive(params, 'test_datastack_modules.vector', archive_path) # extract the archive out_directory = os.path.join(dest_dir, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) archived_params = json.load( open( os.path.join( out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'] _assert_vectors_equal( params['vector'], os.path.join(out_directory, archived_params['vector'])) self.assertEqual(len(archived_params), 1) # sanity check
def test_data_dir(self): from natcap.invest import datastack params = { 'data_dir': os.path.join(self.workspace, 'data_dir') } os.makedirs(params['data_dir']) for filename in ('foo.txt', 'bar.txt', 'baz.txt'): data_filepath = os.path.join(params['data_dir'], filename) with open(data_filepath, 'w') as textfile: textfile.write(filename) # make a folder within the data folder. nested_folder = os.path.join(params['data_dir'], 'nested') os.makedirs(nested_folder) with open(os.path.join(nested_folder, 'nested.txt'), 'w') as textfile: textfile.write('hello, world!') src_datadir_digest = pygeoprocessing.testing.digest_folder( params['data_dir']) # Collect the file into an archive archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) # extract the archive out_directory = os.path.join(self.workspace, 'extracted_archive') with tarfile.open(archive_path) as tar: tar.extractall(out_directory) archived_params = json.load( open(os.path.join(out_directory, datastack.DATASTACK_PARAMETER_FILENAME)))['args'] dest_datadir_digest = pygeoprocessing.testing.digest_folder( os.path.join(out_directory, archived_params['data_dir'])) self.assertEqual(len(archived_params), 1) # sanity check if src_datadir_digest != dest_datadir_digest: self.fail('Digest mismatch: src:%s != dest:%s' % ( src_datadir_digest, dest_datadir_digest))
def test_archive_extraction(self): """Datastack: test archive extraction.""" from natcap.invest import datastack from natcap.invest import utils params = { 'blank': '', 'a': 1, 'b': 'hello there', 'c': 'plain bytestring', 'foo': os.path.join(self.workspace, 'foo.txt'), 'bar': os.path.join(self.workspace, 'foo.txt'), 'data_dir': os.path.join(self.workspace, 'data_dir'), 'raster': os.path.join(DATA_DIR, 'dem'), 'vector': os.path.join(DATA_DIR, 'watersheds.shp'), 'simple_table': os.path.join(DATA_DIR, 'carbon_pools_samp.csv'), 'spatial_table': os.path.join(self.workspace, 'spatial_table.csv'), } # synthesize sample data os.makedirs(params['data_dir']) for filename in ('foo.txt', 'bar.txt', 'baz.txt'): data_filepath = os.path.join(params['data_dir'], filename) with open(data_filepath, 'w') as textfile: textfile.write(filename) with open(params['foo'], 'w') as textfile: textfile.write('hello world!') with open(params['spatial_table'], 'w') as spatial_csv: # copy existing DEM # copy existing watersheds # new raster # new vector spatial_csv.write('ID,path\n') spatial_csv.write(f"1,{params['raster']}\n") spatial_csv.write(f"2,{params['vector']}\n") # Create a raster only referenced by the CSV target_csv_raster_path = os.path.join(self.workspace, 'new_raster.tif') pygeoprocessing.new_raster_from_base(params['raster'], target_csv_raster_path, gdal.GDT_UInt16, [0]) spatial_csv.write(f'3,{target_csv_raster_path}\n') # Create a vector only referenced by the CSV target_csv_vector_path = os.path.join(self.workspace, 'new_vector.geojson') pygeoprocessing.shapely_geometry_to_vector( [shapely.geometry.Point(100, 100)], target_csv_vector_path, pygeoprocessing.get_raster_info( params['raster'])['projection_wkt'], 'GeoJSON', ogr_geom_type=ogr.wkbPoint) spatial_csv.write(f'4,{target_csv_vector_path}\n') archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive( params, 'test_datastack_modules.archive_extraction', archive_path) out_directory = os.path.join(self.workspace, 'extracted_archive') archive_params = datastack.extract_datastack_archive( archive_path, out_directory) model_array = pygeoprocessing.raster_to_numpy_array( archive_params['raster']) reg_array = pygeoprocessing.raster_to_numpy_array(params['raster']) numpy.testing.assert_allclose(model_array, reg_array) utils._assert_vectors_equal(archive_params['vector'], params['vector']) pandas.testing.assert_frame_equal( pandas.read_csv(archive_params['simple_table']), pandas.read_csv(params['simple_table'])) for key in ('blank', 'a', 'b', 'c'): self.assertEqual(archive_params[key], params[key], f'Params differ for key {key}') for key in ('foo', 'bar'): self.assertTrue( filecmp.cmp(archive_params[key], params[key], shallow=False)) spatial_csv_dict = utils.build_lookup_from_csv( archive_params['spatial_table'], 'ID', to_lower=True) spatial_csv_dir = os.path.dirname(archive_params['spatial_table']) numpy.testing.assert_allclose( pygeoprocessing.raster_to_numpy_array( os.path.join(spatial_csv_dir, spatial_csv_dict[3]['path'])), pygeoprocessing.raster_to_numpy_array(target_csv_raster_path)) utils._assert_vectors_equal( os.path.join(spatial_csv_dir, spatial_csv_dict[4]['path']), target_csv_vector_path)
def test_archive_extraction(self): """Datastack: test archive extraction.""" from natcap.invest import datastack from natcap.invest.utils import _assert_vectors_equal params = { 'blank': '', 'a': 1, 'b': 'hello there', 'c': 'plain bytestring', 'foo': os.path.join(self.workspace, 'foo.txt'), 'bar': os.path.join(self.workspace, 'foo.txt'), 'file_list': [ os.path.join(self.workspace, 'file1.txt'), os.path.join(self.workspace, 'file2.txt'), ], 'data_dir': os.path.join(self.workspace, 'data_dir'), 'raster': os.path.join(DATA_DIR, 'dem'), 'vector': os.path.join(DATA_DIR, 'watersheds.shp'), 'table': os.path.join(DATA_DIR, 'carbon_pools_samp.csv'), } # synthesize sample data os.makedirs(params['data_dir']) for filename in ('foo.txt', 'bar.txt', 'baz.txt'): data_filepath = os.path.join(params['data_dir'], filename) with open(data_filepath, 'w') as textfile: textfile.write(filename) with open(params['foo'], 'w') as textfile: textfile.write('hello world!') for filename in params['file_list']: with open(filename, 'w') as textfile: textfile.write(filename) # collect parameters: archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive(params, 'sample_model', archive_path) out_directory = os.path.join(self.workspace, 'extracted_archive') archive_params = datastack.extract_datastack_archive( archive_path, out_directory) model_array = pygeoprocessing.raster_to_numpy_array( archive_params['raster']) reg_array = pygeoprocessing.raster_to_numpy_array(params['raster']) numpy.testing.assert_allclose(model_array, reg_array) _assert_vectors_equal(archive_params['vector'], params['vector']) model_df = pandas.read_csv(archive_params['table']) reg_df = pandas.read_csv(params['table']) pandas.testing.assert_frame_equal(model_df, reg_df) for key in ('blank', 'a', 'b', 'c'): self.assertEqual(archive_params[key], params[key], f'Params differ for key {key}') for key in ('foo', 'bar'): self.assertTrue( filecmp.cmp(archive_params[key], params[key], shallow=False)) for expected_file, archive_file in zip(params['file_list'], archive_params['file_list']): self.assertTrue( filecmp.cmp(expected_file, archive_file, shallow=False))