def test_xwalk_state_bgp2000_tr2010_unrounded(self): knw_str_vals = numpy.array([ ["G10000509355299999051304R1", "G1000050051305", "10005051305"], ["G10000509355299999051304R1", "G1000050051306", "10005051306"], ["G10000509355299999051304R1", "G1000050051400", "10005051400"], ["G10000509355299999051304R2", "G1000050051305", "10005051305"], ]) knw_num_vals = numpy.array([ [6.80605382e-01, 6.33909150e-01, 6.57366450e-01, 6.59501671e-01], [3.19167389e-01, 3.65781711e-01, 3.42281879e-01, 3.40110906e-01], [2.27229039e-04, 3.09138740e-04, 3.51671251e-04, 3.87423412e-04], [8.02660754e-01, 8.17567568e-01, 8.20895522e-01, 8.36236934e-01], ]) obs_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk2000_blk2010, source_year=_00, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_2000, input_var=input_vars_2000_SF1b, weight_var=input_var_tags, stfips=stfips, vectorized=False, keep_base=False, weights_precision=None, ) ix1, ix2 = 1025, 1029 id_cols = ["bgp2000gj", "tr2010gj", "tr2010ge"] obs_str_vals = obs_xwalk.xwalk[id_cols][ix1:ix2].values wgt_cols = ["wt_pop", "wt_fam", "wt_hh", "wt_hu"] obs_num_vals = obs_xwalk.xwalk[wgt_cols][ix1:ix2].values numpy.testing.assert_equal(knw_str_vals, obs_str_vals) numpy.testing.assert_allclose(knw_num_vals, obs_num_vals)
def test_xwalk_extract_state_failure_bgp2000_tr2010(self): obs_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk2000_blk2010, source_year=_00, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_2000, input_var=input_vars_2000_SF1b, weight_var=input_var_tags, keep_base=True, stfips=stfips, ) with self.assertRaises(RuntimeError): obs_target_nan_xwalk = nhgisxwalk.extract_state( obs_xwalk.xwalk, "nan", obs_xwalk.xwalk_name, obs_xwalk.target) with self.assertRaises(RuntimeError): obs_source_nan_xwalk = nhgisxwalk.extract_state( obs_xwalk.xwalk, "nan", obs_xwalk.xwalk_name, obs_xwalk.source) with self.assertRaises(RuntimeError): obs_target_nan_base = nhgisxwalk.extract_state( obs_xwalk.base, "nan", obs_xwalk.xwalk_name, obs_xwalk.base_target_col) with self.assertRaises(RuntimeError): obs_source_nan_base = nhgisxwalk.extract_state( obs_xwalk.base, "nan", obs_xwalk.xwalk_name, obs_xwalk.base_source_col)
def test_xwalk_source_code_co(self): # testing for triggered errors with self.assertRaises(AttributeError): observed_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk1990_blk2010, source_year=_90, target_year=_10, source_geo=co, target_geo=tr, base_source_table=tab_data_path_1990, input_var=input_vars_1990, weight_var=input_var_tags, )
def test_xwalk_uneven_input(self): # testing for triggered errors with self.assertRaises(RuntimeError): observed_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk2000_blk2010, source_year=_00, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_2000, input_var=input_vars_2000_SF1b, weight_var=["one", "two"], stfips=stfips, )
def test_xwalk_state_bgp1990_tr2010(self, ): knw_str_vals = numpy.array([ [ "G100001090444999990421009999999219012", "G1000010042100", "10001042100", ], [ "G100001090444999990421009999999999921", "G1000010042100", "10001042100", ], [ "G100001090444999990421009999999999921", "G1000010042201", "10001042201", ], [ "G100001090444999990421009999999999922", "G1000010042100", "10001042100", ], ]) knw_num_vals = numpy.array([ [1.0, 1.0, 1.0, 1.0], [0.99766436, 0.99716625, 0.99714829, 0.99727768], [0.00233564, 0.00283375, 0.00285171, 0.00272232], [1.0, 1.0, 1.0, 1.0], ]) obs_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk1990_blk2010, source_year=_90, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_1990, supp_source_table=supplement_data_path_90, input_var=input_vars_1990, weight_var=input_var_tags, stfips=stfips, vectorized=False, keep_base=False, ) ix1, ix2 = 13, 17 id_cols = ["bgp1990gj", "tr2010gj", "tr2010ge"] obs_str_vals = obs_xwalk.xwalk[id_cols][ix1:ix2].values wgt_cols = ["wt_pop", "wt_fam", "wt_hh", "wt_hu"] obs_num_vals = obs_xwalk.xwalk[wgt_cols][ix1:ix2].values numpy.testing.assert_equal(knw_str_vals, obs_str_vals) numpy.testing.assert_allclose(knw_num_vals, obs_num_vals, atol=6)
def test_xwalk_bgp1990_tr2010_no_supp_error(self): with self.assertRaises(RuntimeError): obs_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk1990_blk2010, source_year=_90, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_1990, supp_source_table=None, input_var=input_vars_1990, weight_var=input_var_tags, stfips=stfips, vectorized=False, keep_base=False, )
def test_xwalk_write_read_csv_from_class(self): write_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk2000_blk2010, source_year=_00, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_2000, input_var=input_vars_2000_SF1b, weight_var=input_var_tags, keep_base=False, stfips=stfips, ) nhgisxwalk.xwalk_df_to_csv(cls=write_xwalk) read_xwalk = nhgisxwalk.xwalk_df_from_csv(write_xwalk.xwalk_name) known_values = write_xwalk.xwalk["wt_pop"].values observed_values = read_xwalk["wt_pop"].values numpy.testing.assert_allclose(known_values, observed_values)
def test_xwalk_extract_state_bgp1990_tr2010(self): known_target_nan_xwalk = numpy.empty((0, 7)) known_source_nan_xwalk = numpy.array( [[numpy.nan, "G1000050990000", "10005990000", 0.0, 0.0, 0.0, 0.0]], dtype=object, ) known_target_nan_base = numpy.empty((0, 6)) known_source_nan_base_shape = (149, 6) obs_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk1990_blk2010, source_year=_90, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_1990, supp_source_table=supplement_data_path_90, input_var=input_vars_1990, weight_var=input_var_tags, keep_base=True, ) obs_target_nan_xwalk = nhgisxwalk.extract_state( obs_xwalk.xwalk, "nan", obs_xwalk.xwalk_name, obs_xwalk.target).values numpy.testing.assert_array_equal(known_target_nan_xwalk, obs_target_nan_xwalk) obs_source_nan_xwalk = nhgisxwalk.extract_state( obs_xwalk.xwalk, "nan", obs_xwalk.xwalk_name, obs_xwalk.source).values numpy.testing.assert_array_equal(known_source_nan_xwalk[0, 0], obs_source_nan_xwalk[0, 0]) numpy.testing.assert_array_equal(known_source_nan_xwalk[0, 1:3], obs_source_nan_xwalk[0, 1:3]) obs_target_nan_base = nhgisxwalk.extract_state( obs_xwalk.base, "nan", obs_xwalk.xwalk_name, obs_xwalk.base_target_col).values numpy.testing.assert_array_equal(known_target_nan_base, obs_target_nan_base) obs_source_nan_base = nhgisxwalk.extract_state( obs_xwalk.base, "nan", obs_xwalk.xwalk_name, obs_xwalk.base_source_col).values self.assertEqual(known_source_nan_base_shape, obs_source_nan_base.shape)
def test_xwalk_extract_unique_stfips_df_bgp2000_tr2010(self): known_target_fips = set(["10"]) known_source_fips = set(["10", "34"]) obs_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk2000_blk2010, source_year=_00, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_2000, input_var=input_vars_2000_SF1b, weight_var=input_var_tags, keep_base=False, stfips=stfips, ) obs_target_fips = nhgisxwalk.extract_unique_stfips(df=obs_xwalk.xwalk, endpoint="tr2010gj") self.assertEqual(known_target_fips, obs_target_fips) obs_source_fips = nhgisxwalk.extract_unique_stfips( df=obs_xwalk.xwalk, endpoint="bgp2000gj") self.assertEqual(known_source_fips, obs_source_fips)
def test_xwalk_extract_unique_stfips_cls_bgp1990_tr2010(self): known_target_fips = set(["10"]) known_source_fips = set(["10", "34", "nan"]) obs_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk1990_blk2010, source_year=_90, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_1990, supp_source_table=supplement_data_path_90, input_var=input_vars_1990, weight_var=input_var_tags, keep_base=False, stfips=stfips, ) obs_target_fips = nhgisxwalk.extract_unique_stfips(cls=obs_xwalk, endpoint="target") self.assertEqual(known_target_fips, obs_target_fips) obs_source_fips = nhgisxwalk.extract_unique_stfips(cls=obs_xwalk, endpoint="source") self.assertEqual(known_source_fips, obs_source_fips)
def test_xwalk_extract_state_bgp2000_tr2010(self): known_target_nan_xwalk = numpy.empty((0, 7)) known_source_nan_xwalk = numpy.empty((0, 7)) known_target_nan_base = numpy.empty((0, 6)) known_source_nan_base = numpy.empty((0, 6)) obs_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk2000_blk2010, source_year=_00, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_2000, input_var=input_vars_2000_SF1b, weight_var=input_var_tags, keep_base=True, ) obs_target_nan_xwalk = nhgisxwalk.extract_state( obs_xwalk.xwalk, "nan", obs_xwalk.xwalk_name, obs_xwalk.target).values numpy.testing.assert_array_equal(known_target_nan_xwalk, obs_target_nan_xwalk) obs_source_nan_xwalk = nhgisxwalk.extract_state( obs_xwalk.xwalk, "nan", obs_xwalk.xwalk_name, obs_xwalk.source).values numpy.testing.assert_array_equal(known_source_nan_xwalk, obs_source_nan_xwalk) obs_target_nan_base = nhgisxwalk.extract_state( obs_xwalk.base, "nan", obs_xwalk.xwalk_name, obs_xwalk.base_target_col).values numpy.testing.assert_array_equal(known_target_nan_base, obs_target_nan_base) obs_source_nan_base = nhgisxwalk.extract_state( obs_xwalk.base, "nan", obs_xwalk.xwalk_name, obs_xwalk.base_source_col).values numpy.testing.assert_array_equal(known_source_nan_base, obs_source_nan_base)
def test_generate_data_product(self): # records known data values knw_str_vals = numpy.array([ [ "G100001090444999990421009999999219012", "G1000010042100", "10001042100", ], [ "G100001090444999990421009999999999921", "G1000010042100", "10001042100", ], [ "G100001090444999990421009999999999921", "G1000010042201", "10001042201", ], [ "G100001090444999990421009999999999922", "G1000010042100", "10001042100", ], ]) knw_num_vals = numpy.array([ [1.0, 1.0, 1.0, 1.0], [0.99766436, 0.99716625, 0.99714829, 0.99727768], [0.00233564, 0.00283375, 0.00285171, 0.00272232], [1.0, 1.0, 1.0, 1.0], ]) # generate the product xwalk_args = { "source_year": _90, "target_year": _10, "source_geo": bgp, "target_geo": tr, "base_source_table": tab_data_path_1990, "supp_source_table": supplement_data_path_90, "input_var": input_vars_1990, "weight_var": input_var_tags, "keep_base": False, "add_geoid": True, } nhgisxwalk.generate_data_product(base_xwalk_blk1990_blk2010, xwalk_args, data_dir) # record observed obs_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk1990_blk2010, source_year=_90, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_1990, supp_source_table=supplement_data_path_90, input_var=input_vars_1990, weight_var=input_var_tags, ) # test id_cols = ["bgp1990gj", "tr2010gj", "tr2010ge"] data_types = nhgisxwalk.str_types(id_cols) from_csv_kws = { "path": data_dir, "archived": True, "remove_unpacked": True, } read_csv_kws = {"dtype": data_types} read_xwalk = nhgisxwalk.xwalk_df_from_csv(obs_xwalk.xwalk_name, **from_csv_kws, **read_csv_kws) ix1, ix2 = 13, 17 obs_str_vals = obs_xwalk.xwalk[id_cols][ix1:ix2].values wgt_cols = ["wt_pop", "wt_fam", "wt_hh", "wt_hu"] obs_num_vals = obs_xwalk.xwalk[wgt_cols][ix1:ix2].values numpy.testing.assert_equal(knw_str_vals, obs_str_vals) numpy.testing.assert_allclose(knw_num_vals, obs_num_vals, atol=6)