def test_transition_table(self): """CBC Preprocessor: Test creation of transition table.""" from natcap.invest.coastal_blue_carbon import preprocessor srs = osr.SpatialReference() srs.ImportFromEPSG(3157) projection_wkt = srs.ExportToWkt() origin = (443723.127327877911739, 4956546.905980412848294) matrix_a = numpy.array([[0, 1], [0, 1], [0, 1]], dtype=numpy.int16) filename_a = os.path.join(self.workspace_dir, 'raster_a.tif') pygeoprocessing.numpy_array_to_raster(matrix_a, -1, (100, -100), origin, projection_wkt, filename_a) matrix_b = numpy.array([[0, 1], [1, 0], [-1, -1]], dtype=numpy.int16) filename_b = os.path.join(self.workspace_dir, 'raster_b.tif') pygeoprocessing.numpy_array_to_raster(matrix_b, -1, (100, -100), origin, projection_wkt, filename_b) landcover_table_path = os.path.join(self.workspace_dir, 'lulc_table.csv') with open(landcover_table_path, 'w') as lulc_csv: lulc_csv.write('code,lulc-class,is_coastal_blue_carbon_habitat\n') lulc_csv.write('0,mangrove,True\n') lulc_csv.write('1,parking lot,False\n') landcover_table = utils.build_lookup_from_csv(landcover_table_path, 'code') target_table_path = os.path.join(self.workspace_dir, 'transition_table.csv') # Remove landcover code 1 from the table; expect error. del landcover_table[1] with self.assertRaises(ValueError) as context: preprocessor._create_transition_table(landcover_table, [filename_a, filename_b], target_table_path) self.assertIn('missing a row with the landuse code 1', str(context.exception)) # Re-load the landcover table landcover_table = utils.build_lookup_from_csv(landcover_table_path, 'code') preprocessor._create_transition_table(landcover_table, [filename_a, filename_b], target_table_path) with open(target_table_path) as transition_table: self.assertEqual(transition_table.readline(), 'lulc-class,mangrove,parking lot\n') self.assertEqual(transition_table.readline(), 'mangrove,accum,disturb\n') self.assertEqual(transition_table.readline(), 'parking lot,accum,NCC\n') # After the above lines is a blank line, then the legend. # Deliberately not testing the legend. self.assertEqual(transition_table.readline(), '\n')
def test_key_not_in_header(self): """utils: test that ValueError is raised when key not in header.""" from natcap.invest import utils csv_file = os.path.join(self.workspace, 'csv.csv') with open(csv_file, 'w') as file_obj: file_obj.write( textwrap.dedent("""header1,header2,header3 1,2,3 4,FOO,bar """)) with self.assertRaises(ValueError): utils.build_lookup_from_csv(csv_file, 'some_key')
def test_missing_key_field(self): """utils: test error is raised when missing key field.""" from natcap.invest import utils csv_text = ("luode,desc,val1,val2\n" "1,corn,0.5,2\n" "2,bread,1,4\n" "3,beans,0.5,4\n" "4,butter,9,1") table_path = os.path.join(self.workspace_dir, 'table.csv') with open(table_path, 'w') as table_file: table_file.write(csv_text) with self.assertRaises(KeyError): utils.build_lookup_from_csv(table_path, 'lucode', to_lower=True)
def test_non_unique_keys(self): """utils: test error is raised if keys are not unique.""" from natcap.invest import utils csv_text = ("lucode,desc,val1,val2\n" "1,corn,0.5,2\n" "2,bread,1,4\n" "2,beans,0.5,4\n" "4,butter,9,1") table_path = os.path.join(self.workspace_dir, 'table.csv') with open(table_path, 'w') as table_file: table_file.write(csv_text) with self.assertRaises(ValueError): utils.build_lookup_from_csv(table_path, 'lucode', to_lower=True)
def test_create_carbon_pool_transient_table_template(self): """Coastal Blue Carbon: Test creation of transient table template.""" from natcap.invest.coastal_blue_carbon import preprocessor args = _get_preprocessor_args(1, self.workspace_dir) filepath = os.path.join(self.workspace_dir, 'transient_temp.csv') code_to_lulc_dict = {1: 'one', 2: 'two', 3: 'three'} preprocessor._create_carbon_pool_transient_table_template( filepath, code_to_lulc_dict) transient_dict = utils.build_lookup_from_csv(filepath, 'code') # demonstrate that output table contains all input land cover classes for i in [1, 2, 3]: self.assertTrue(i in transient_dict.keys())
def _assert_regression_results_eq(result_vector_path, agg_results_path): """Test output vector against expected aggregate results. Parameters: result_vector_path (string): path to the summary shapefile produced by GLOBIO model agg_results_path (string): path to a csv file that has the expected aoi_summary.shp table in the form of fid,msa_mean per line Returns: None Raises: AssertionError if results are out of range by `tolerance_places` """ result_vector = gdal.OpenEx(result_vector_path, gdal.OF_VECTOR) result_layer = result_vector.GetLayer() # The tolerance of 3 digits after the decimal was determined by # experimentation on the application with the given range of numbers. # This is an apparently reasonable approach as described by ChrisF: # http://stackoverflow.com/a/3281371/42897 # and even more reading about picking numerical tolerance (it's hard): # https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ tolerance_places = 3 expected_results = utils.build_lookup_from_csv(agg_results_path, 'fid') try: for feature in result_layer: fid = feature.GetFID() result_value = feature.GetField('msa_mean') if result_value is not None: # The coefficient of 1.5 here derives from when # `assert_almost_equal` was used, which had parameter # `decimal`. In the numpy implementation, this meant an # absolute tolerance of 1.5 * 10**-decimal. # In other places we were able to round 1.5 down to 1, # but here the slightly larger tolerance is needed. numpy.testing.assert_allclose( result_value, float(expected_results[fid]['msa_mean']), rtol=0, atol=1.5 * 10**-tolerance_places) else: # the out-of-bounds polygon will have no result_value assert (expected_results[fid]['msa_mean'] == '') finally: feature = None result_layer = None gdal.Dataset.__swig_destroy__(result_vector) result_vector = None
def test_existing_regression_coef(self): """Recreation test regression coefficients handle existing output.""" from natcap.invest.recreation import recmodel_client # Initialize a TaskGraph taskgraph_db_dir = os.path.join(self.workspace_dir, '_taskgraph_working_dir') n_workers = -1 # single process mode. task_graph = taskgraph.TaskGraph(taskgraph_db_dir, n_workers) response_vector_path = os.path.join(self.workspace_dir, 'no_grid_vector_path.shp') response_polygons_lookup_path = os.path.join( self.workspace_dir, 'response_polygons_lookup.pickle') recmodel_client._copy_aoi_no_grid( os.path.join(SAMPLE_DATA, 'andros_aoi.shp'), response_vector_path) predictor_table_path = os.path.join(SAMPLE_DATA, 'predictors.csv') # make outputs to be overwritten predictor_dict = utils.build_lookup_from_csv(predictor_table_path, 'id') predictor_list = predictor_dict.keys() tmp_working_dir = tempfile.mkdtemp(dir=self.workspace_dir) empty_json_list = [ os.path.join(tmp_working_dir, x + '.json') for x in predictor_list ] out_coefficient_vector_path = os.path.join( self.workspace_dir, 'out_coefficient_vector.shp') _make_empty_files([out_coefficient_vector_path] + empty_json_list) prepare_response_polygons_task = task_graph.add_task( func=recmodel_client._prepare_response_polygons_lookup, args=(response_vector_path, response_polygons_lookup_path), target_path_list=[response_polygons_lookup_path], task_name='prepare response polygons for geoprocessing') # build again to test against overwriting output recmodel_client._schedule_predictor_data_processing( response_vector_path, response_polygons_lookup_path, prepare_response_polygons_task, predictor_table_path, out_coefficient_vector_path, tmp_working_dir, task_graph) expected_coeff_vector_path = os.path.join( REGRESSION_DATA, 'test_regression_coefficients.shp') pygeoprocessing.testing.assert_vectors_equal( out_coefficient_vector_path, expected_coeff_vector_path, 1E-6)
def test_sample_data(self): """CBC Preprocessor: Test on sample data.""" from natcap.invest.coastal_blue_carbon import preprocessor snapshot_csv_path = os.path.join(REGRESSION_DATA, 'inputs', 'snapshots.csv') args = { 'workspace_dir': os.path.join(self.workspace_dir, 'workspace'), 'results_suffix': '150225', 'lulc_lookup_table_path': os.path.join(REGRESSION_DATA, 'inputs', 'lulc_lookup.csv'), 'landcover_snapshot_csv': snapshot_csv_path, } preprocessor.execute(args) # walk through all files in the workspace and assert that outputs have # the file suffix. non_suffixed_files = [] outputs_dir = os.path.join(args['workspace_dir'], 'outputs_preprocessor') for root_dir, dirnames, filenames in os.walk(outputs_dir): for filename in filenames: if not filename.lower().endswith('.txt'): # ignore logfile basename, extension = os.path.splitext(filename) if not basename.endswith('_150225'): path_rel_to_workspace = os.path.relpath( os.path.join(root_dir, filename), args['workspace_dir']) non_suffixed_files.append(path_rel_to_workspace) if non_suffixed_files: self.fail( '%s files are missing suffixes: %s' % (len(non_suffixed_files), pprint.pformat(non_suffixed_files))) expected_landcover_codes = set(range(0, 24)) found_landcover_codes = set( utils.build_lookup_from_csv( os.path.join(outputs_dir, 'carbon_biophysical_table_template_150225.csv'), 'code').keys()) self.assertEqual(expected_landcover_codes, found_landcover_codes)
def test_csv_latin_1_encoding(self): """utils: test that CSV read correctly with Latin-1 encoding.""" from natcap.invest import utils csv_file = os.path.join(self.workspace, 'csv.csv') with codecs.open(csv_file, 'w', encoding='iso-8859-1') as file_obj: file_obj.write( textwrap.dedent(""" header 1,HEADER 2,header 3 1,2,bar1 4,5,FOO """).strip()) lookup_dict = utils.build_lookup_from_csv(csv_file, 'header 1') self.assertEqual(lookup_dict[4]['header 2'], 5) self.assertEqual(lookup_dict[4]['header 3'], 'foo') self.assertEqual(lookup_dict[1]['header 1'], 1)
def test_csv_utf8_bom_encoding(self): """utils: test that CSV read correctly with UTF-8 BOM encoding.""" from natcap.invest import utils csv_file = os.path.join(self.workspace, 'csv.csv') with open(csv_file, 'w') as file_obj: file_obj.write( textwrap.dedent(""" \xef\xbb\xbfheader1,HEADER2,header3 1,2,bar 4,5,FOO """).strip()) lookup_dict = utils.build_lookup_from_csv(csv_file, 'header1') self.assertEqual(lookup_dict[4]['header2'], 5) self.assertEqual(lookup_dict[4]['header3'], 'foo') self.assertEqual(lookup_dict[1]['header1'], 1)
def test_build_lookup_from_csv(self): """utils: test build_lookup_from_csv.""" from natcap.invest import utils table_str = 'a,b,foo,bar,_\n0.0,x,-1,bar,apple\n' table_path = os.path.join(self.workspace_dir, 'table.csv') with open(table_path, 'w') as table_file: table_file.write(table_str) result = utils.build_lookup_from_csv(table_path, 'a', to_lower=True) expected_dict = { 0.0: { 'a': 0.0, 'b': 'x', 'foo': -1.0, 'bar': 'bar', '_': 'apple' }, } self.assertEqual(result, expected_dict)
def test_trailing_comma_second_line(self): """utils: test a trailing comma on second line is handled properly.""" from natcap.invest import utils csv_text = ("lucode,desc,val1,val2\n" "1,corn,0.5,2\n" "2,bread,1,4,\n" "3,beans,0.5,4\n" "4,butter,9,1") table_path = os.path.join(self.workspace_dir, 'table.csv') with open(table_path, 'w') as table_file: table_file.write(csv_text) result = utils.build_lookup_from_csv(table_path, 'lucode', to_lower=True) expected_result = { 1: { 'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1 }, 2: { 'desc': 'bread', 'val1': 1, 'val2': 4, 'lucode': 2 }, 3: { 'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3 }, 4: { 'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4 } } self.assertDictEqual(result, expected_result)
def test_unique_key_not_first_column(self): """utils: test success when key field is not first column.""" from natcap.invest import utils csv_text = ("desc,lucode,val1,val2\n" "corn,1,0.5,2\n" "bread,2,1,4\n" "beans,3,0.5,4\n" "butter,4,9,1") table_path = os.path.join(self.workspace_dir, 'table.csv') with open(table_path, 'w') as table_file: table_file.write(csv_text) result = utils.build_lookup_from_csv(table_path, 'lucode', to_lower=True) expected_result = { 1: { 'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1 }, 2: { 'desc': 'bread', 'val1': 1, 'val2': 4, 'lucode': 2 }, 3: { 'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3 }, 4: { 'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4 } } self.assertDictEqual(result, expected_result)
def test_nan_holes(self): """utils: test empty strings returned when missing data is present.""" from natcap.invest import utils csv_text = ("lucode,desc,val1,val2\n" "1,corn,0.5,2\n" "2,,1,4\n" "3,beans,0.5,4\n" "4,butter,,1") table_path = os.path.join(self.workspace_dir, 'table.csv') with open(table_path, 'w') as table_file: table_file.write(csv_text) result = utils.build_lookup_from_csv(table_path, 'lucode', to_lower=True) expected_result = { 1: { 'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1 }, 2: { 'desc': '', 'val1': 1, 'val2': 4, 'lucode': 2 }, 3: { 'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3 }, 4: { 'desc': 'butter', 'val1': '', 'val2': 1, 'lucode': 4 } } self.assertDictEqual(result, expected_result)
def test_results_lowercase_non_numeric(self): """utils: text handling of converting to lowercase.""" from natcap.invest import utils csv_file = os.path.join(self.workspace, 'csv.csv') with open(csv_file, 'w') as file_obj: file_obj.write( textwrap.dedent(""" header1,HEADER2,header3 1,2,bar 4,5,FOO """).strip()) lookup_dict = utils.build_lookup_from_csv(csv_file, 'header1', to_lower=True) self.assertEqual(lookup_dict[4]['header3'], 'foo') self.assertEqual(lookup_dict[1]['header2'], 2)
def test_nan_row(self): """utils: test NaN row is dropped.""" from natcap.invest import utils csv_text = ("lucode,desc,val1,val2\n" "1,corn,0.5,2\n" ",,,\n" "3,beans,0.5,4\n" "4,butter,9,1") table_path = os.path.join(self.workspace_dir, 'table.csv') with open(table_path, 'w') as table_file: table_file.write(csv_text) result = utils.build_lookup_from_csv( table_path, 'lucode', to_lower=True) expected_result = { 1.0: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1.0}, 3.0: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3.0}, 4.0: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4.0}} self.assertDictEqual(result, expected_result)
def test_results_uppercase_numeric_cast(self): """utils: test handling of uppercase, num. casting, blank values.""" from natcap.invest import utils csv_file = os.path.join(self.workspace, 'csv.csv') with open(csv_file, 'w') as file_obj: file_obj.write( textwrap.dedent(""" header1,HEADER2,header3,missing_column, 1,2,3, 4,FOO,bar, """).strip()) lookup_dict = utils.build_lookup_from_csv(csv_file, 'header1', to_lower=False) self.assertEqual(lookup_dict[4]['HEADER2'], 'FOO') self.assertEqual(lookup_dict[4]['header3'], 'bar') self.assertEqual(lookup_dict[1]['header1'], 1)
def test_csv_dialect_detection_semicolon_delimited(self): """utils: test that we can parse semicolon-delimited CSVs.""" from natcap.invest import utils csv_file = os.path.join(self.workspace, 'csv.csv') with open(csv_file, 'w') as file_obj: file_obj.write( textwrap.dedent(""" header1;HEADER2;header3; 1;2;3; 4;FOO;bar; """).strip()) lookup_dict = utils.build_lookup_from_csv(csv_file, 'header1', to_lower=False) self.assertEqual(lookup_dict[4]['HEADER2'], 'FOO') self.assertEqual(lookup_dict[4]['header3'], 'bar') self.assertEqual(lookup_dict[1]['header1'], 1)
def test_column_subset(self): """utils: test column subset is properly returned.""" from natcap.invest import utils csv_text = ("lucode,desc,val1,val2\n" "1,corn,0.5,2\n" "2,bread,1,4\n" "3,beans,0.5,4\n" "4,butter,9,1") table_path = os.path.join(self.workspace_dir, 'table.csv') with open(table_path, 'w') as table_file: table_file.write(csv_text) result = utils.build_lookup_from_csv(table_path, 'lucode', to_lower=True, column_list=['val1', 'val2']) expected_result = { 1: { 'val1': 0.5, 'val2': 2, 'lucode': 1 }, 2: { 'val1': 1, 'val2': 4, 'lucode': 2 }, 3: { 'val1': 0.5, 'val2': 4, 'lucode': 3 }, 4: { 'val1': 9, 'val2': 1, 'lucode': 4 } } self.assertDictEqual(result, expected_result)
def test_csv_utf8_bom_encoding(self): """utils: test that CSV read correctly with UTF-8 BOM encoding.""" from natcap.invest import utils csv_file = os.path.join(self.workspace, 'csv.csv') # writing with utf-8-sig will prepend the BOM with open(csv_file, 'w', encoding='utf-8-sig') as file_obj: file_obj.write( textwrap.dedent(""" header1,HEADER2,header3 1,2,bar 4,5,FOO """).strip()) # confirm that the file has the BOM prefix with open(csv_file, 'rb') as file_obj: self.assertTrue(file_obj.read().startswith(codecs.BOM_UTF8)) lookup_dict = utils.build_lookup_from_csv(csv_file, 'header1') # assert the BOM prefix was correctly parsed and skipped self.assertEqual(lookup_dict[4]['header2'], 5) self.assertEqual(lookup_dict[4]['header3'], 'foo') self.assertEqual(lookup_dict[1]['header1'], 1)
def test_archive_extraction(self): """Datastack: test archive extraction.""" from natcap.invest import datastack from natcap.invest import utils params = { 'blank': '', 'a': 1, 'b': 'hello there', 'c': 'plain bytestring', 'foo': os.path.join(self.workspace, 'foo.txt'), 'bar': os.path.join(self.workspace, 'foo.txt'), 'data_dir': os.path.join(self.workspace, 'data_dir'), 'raster': os.path.join(DATA_DIR, 'dem'), 'vector': os.path.join(DATA_DIR, 'watersheds.shp'), 'simple_table': os.path.join(DATA_DIR, 'carbon_pools_samp.csv'), 'spatial_table': os.path.join(self.workspace, 'spatial_table.csv'), } # synthesize sample data os.makedirs(params['data_dir']) for filename in ('foo.txt', 'bar.txt', 'baz.txt'): data_filepath = os.path.join(params['data_dir'], filename) with open(data_filepath, 'w') as textfile: textfile.write(filename) with open(params['foo'], 'w') as textfile: textfile.write('hello world!') with open(params['spatial_table'], 'w') as spatial_csv: # copy existing DEM # copy existing watersheds # new raster # new vector spatial_csv.write('ID,path\n') spatial_csv.write(f"1,{params['raster']}\n") spatial_csv.write(f"2,{params['vector']}\n") # Create a raster only referenced by the CSV target_csv_raster_path = os.path.join(self.workspace, 'new_raster.tif') pygeoprocessing.new_raster_from_base(params['raster'], target_csv_raster_path, gdal.GDT_UInt16, [0]) spatial_csv.write(f'3,{target_csv_raster_path}\n') # Create a vector only referenced by the CSV target_csv_vector_path = os.path.join(self.workspace, 'new_vector.geojson') pygeoprocessing.shapely_geometry_to_vector( [shapely.geometry.Point(100, 100)], target_csv_vector_path, pygeoprocessing.get_raster_info( params['raster'])['projection_wkt'], 'GeoJSON', ogr_geom_type=ogr.wkbPoint) spatial_csv.write(f'4,{target_csv_vector_path}\n') archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive( params, 'test_datastack_modules.archive_extraction', archive_path) out_directory = os.path.join(self.workspace, 'extracted_archive') archive_params = datastack.extract_datastack_archive( archive_path, out_directory) model_array = pygeoprocessing.raster_to_numpy_array( archive_params['raster']) reg_array = pygeoprocessing.raster_to_numpy_array(params['raster']) numpy.testing.assert_allclose(model_array, reg_array) utils._assert_vectors_equal(archive_params['vector'], params['vector']) pandas.testing.assert_frame_equal( pandas.read_csv(archive_params['simple_table']), pandas.read_csv(params['simple_table'])) for key in ('blank', 'a', 'b', 'c'): self.assertEqual(archive_params[key], params[key], f'Params differ for key {key}') for key in ('foo', 'bar'): self.assertTrue( filecmp.cmp(archive_params[key], params[key], shallow=False)) spatial_csv_dict = utils.build_lookup_from_csv( archive_params['spatial_table'], 'ID', to_lower=True) spatial_csv_dir = os.path.dirname(archive_params['spatial_table']) numpy.testing.assert_allclose( pygeoprocessing.raster_to_numpy_array( os.path.join(spatial_csv_dir, spatial_csv_dict[3]['path'])), pygeoprocessing.raster_to_numpy_array(target_csv_raster_path)) utils._assert_vectors_equal( os.path.join(spatial_csv_dir, spatial_csv_dict[4]['path']), target_csv_vector_path)