def test_regenerate_blk_blk_xwalk(self): known_ids = numpy.array([ "G10000100401001000", "G10000100401001001", "G10000100401001002", "G10000100401001003", "G10000100401001003", ]) xwalk_name = base_xwalk_name_fmat % (blk, _90, blk, _10, gj) path_in = data_dir + xwalk_name + ".%s" % ZIP path_out = data_dir dtype = nhgisxwalk.str_types(nhgisxwalk.ID_COLS) nhgisxwalk.regenerate_blk_blk_xwalk(path_in, path_out, "GJOIN2010", dtype, remove_unpacked=True) # read in the crosswalk gjoin = "GJOIN%s" gj_src, gj_trg = gjoin % _90, gjoin % _10 data_types = nhgisxwalk.str_types([gj_src, gj_trg]) from_csv_kws = { "path": path_out, "archived": True, "remove_unpacked": True, } read_csv_kws = {"dtype": data_types} read_xwalk = nhgisxwalk.xwalk_df_from_csv(xwalk_name, **from_csv_kws, **read_csv_kws) observed_ids = read_xwalk["GJOIN2010"].head().values numpy.testing.assert_array_equal(known_ids, observed_ids)
def fetch_base_xwalk(sg, tg, sy, ty): base_xwalk_name = base_xwalk_name_fmat % (sg, sy, tg, ty, gj) data_types = nhgisxwalk.str_types(["GJOIN%s" % sy, "GJOIN%s" % ty]) from_csv_kws = { "path": data_dir, "archived": True, "remove_unpacked": True } read_csv_kws = {"dtype": data_types} base_xwalk = nhgisxwalk.xwalk_df_from_csv(base_xwalk_name, **from_csv_kws, **read_csv_kws) return base_xwalk
def test_xwalk_write_read_csv_from_class(self): write_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk2000_blk2010, source_year=_00, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_2000, input_var=input_vars_2000_SF1b, weight_var=input_var_tags, keep_base=False, stfips=stfips, ) nhgisxwalk.xwalk_df_to_csv(cls=write_xwalk) read_xwalk = nhgisxwalk.xwalk_df_from_csv(write_xwalk.xwalk_name) known_values = write_xwalk.xwalk["wt_pop"].values observed_values = read_xwalk["wt_pop"].values numpy.testing.assert_allclose(known_values, observed_values)
def test_prepare_data_product(self): xwalk_name = prod_xwalk_name_fmat % (bgp, _90, co, _10) path_out = data_dir + xwalk_name nhgisxwalk.prepare_data_product(self.example_df, xwalk_name, path_out, remove=True) # read in the crosswalk from_csv_kws = { "path": data_dir, "archived": True, "remove_unpacked": True, } read_xwalk = nhgisxwalk.xwalk_df_from_csv(xwalk_name, **from_csv_kws) known = numpy.array([1.0, 0.3, 0.7, 1.0, 1.0]) observed = read_xwalk["wt"].values numpy.testing.assert_array_equal(known, observed)
def test_split_xwalk(self): known_ids = numpy.array([ "G10000100401001000", "G10000100401001001", "G10000100401001002", "G10000100401001003", "G10000100401001003", ]) xwalk_name = base_xwalk_name_fmat % (blk, _90, blk, _10, gj) xwalk_path = data_dir + xwalk_name + "_state" sorter = nhgisxwalk.SORT_BYS[xwalk_name] nhgisxwalk.split_xwalk( base_xwalk_blk1990_blk2010, "GJOIN2010", xwalk_name, gj, fpath=xwalk_path, sort_by=sorter, ) # read in the crosswalk gjoin = "GJOIN%s" gj_src, gj_trg = gjoin % _90, gjoin % _10 data_types = nhgisxwalk.str_types([gj_src, gj_trg]) from_csv_kws = { "path": xwalk_path + "/", "archived": True, "remove_unpacked": True, } read_csv_kws = {"dtype": data_types} read_xwalk = nhgisxwalk.xwalk_df_from_csv(xwalk_name + "_%s" % stfips, **from_csv_kws, **read_csv_kws) observed_ids = read_xwalk["GJOIN2010"].head().values numpy.testing.assert_array_equal(known_ids, observed_ids)
def test_generate_data_product(self): # records known data values knw_str_vals = numpy.array([ [ "G100001090444999990421009999999219012", "G1000010042100", "10001042100", ], [ "G100001090444999990421009999999999921", "G1000010042100", "10001042100", ], [ "G100001090444999990421009999999999921", "G1000010042201", "10001042201", ], [ "G100001090444999990421009999999999922", "G1000010042100", "10001042100", ], ]) knw_num_vals = numpy.array([ [1.0, 1.0, 1.0, 1.0], [0.99766436, 0.99716625, 0.99714829, 0.99727768], [0.00233564, 0.00283375, 0.00285171, 0.00272232], [1.0, 1.0, 1.0, 1.0], ]) # generate the product xwalk_args = { "source_year": _90, "target_year": _10, "source_geo": bgp, "target_geo": tr, "base_source_table": tab_data_path_1990, "supp_source_table": supplement_data_path_90, "input_var": input_vars_1990, "weight_var": input_var_tags, "keep_base": False, "add_geoid": True, } nhgisxwalk.generate_data_product(base_xwalk_blk1990_blk2010, xwalk_args, data_dir) # record observed obs_xwalk = nhgisxwalk.GeoCrossWalk( base_xwalk_blk1990_blk2010, source_year=_90, target_year=_10, source_geo=bgp, target_geo=tr, base_source_table=tab_data_path_1990, supp_source_table=supplement_data_path_90, input_var=input_vars_1990, weight_var=input_var_tags, ) # test id_cols = ["bgp1990gj", "tr2010gj", "tr2010ge"] data_types = nhgisxwalk.str_types(id_cols) from_csv_kws = { "path": data_dir, "archived": True, "remove_unpacked": True, } read_csv_kws = {"dtype": data_types} read_xwalk = nhgisxwalk.xwalk_df_from_csv(obs_xwalk.xwalk_name, **from_csv_kws, **read_csv_kws) ix1, ix2 = 13, 17 obs_str_vals = obs_xwalk.xwalk[id_cols][ix1:ix2].values wgt_cols = ["wt_pop", "wt_fam", "wt_hh", "wt_hu"] obs_num_vals = obs_xwalk.xwalk[wgt_cols][ix1:ix2].values numpy.testing.assert_equal(knw_str_vals, obs_str_vals) numpy.testing.assert_allclose(knw_num_vals, obs_num_vals, atol=6)