Example #1
0
    def test_regenerate_blk_blk_xwalk(self):
        known_ids = numpy.array([
            "G10000100401001000",
            "G10000100401001001",
            "G10000100401001002",
            "G10000100401001003",
            "G10000100401001003",
        ])
        xwalk_name = base_xwalk_name_fmat % (blk, _90, blk, _10, gj)
        path_in = data_dir + xwalk_name + ".%s" % ZIP
        path_out = data_dir
        dtype = nhgisxwalk.str_types(nhgisxwalk.ID_COLS)
        nhgisxwalk.regenerate_blk_blk_xwalk(path_in,
                                            path_out,
                                            "GJOIN2010",
                                            dtype,
                                            remove_unpacked=True)

        # read in the crosswalk
        gjoin = "GJOIN%s"
        gj_src, gj_trg = gjoin % _90, gjoin % _10
        data_types = nhgisxwalk.str_types([gj_src, gj_trg])
        from_csv_kws = {
            "path": path_out,
            "archived": True,
            "remove_unpacked": True,
        }
        read_csv_kws = {"dtype": data_types}
        read_xwalk = nhgisxwalk.xwalk_df_from_csv(xwalk_name, **from_csv_kws,
                                                  **read_csv_kws)

        observed_ids = read_xwalk["GJOIN2010"].head().values
        numpy.testing.assert_array_equal(known_ids, observed_ids)
Example #2
0
def fetch_base_xwalk(sg, tg, sy, ty):
    base_xwalk_name = base_xwalk_name_fmat % (sg, sy, tg, ty, gj)
    data_types = nhgisxwalk.str_types(["GJOIN%s" % sy, "GJOIN%s" % ty])
    from_csv_kws = {
        "path": data_dir,
        "archived": True,
        "remove_unpacked": True
    }
    read_csv_kws = {"dtype": data_types}
    base_xwalk = nhgisxwalk.xwalk_df_from_csv(base_xwalk_name, **from_csv_kws,
                                              **read_csv_kws)
    return base_xwalk
Example #3
0
 def test_xwalk_write_read_csv_from_class(self):
     write_xwalk = nhgisxwalk.GeoCrossWalk(
         base_xwalk_blk2000_blk2010,
         source_year=_00,
         target_year=_10,
         source_geo=bgp,
         target_geo=tr,
         base_source_table=tab_data_path_2000,
         input_var=input_vars_2000_SF1b,
         weight_var=input_var_tags,
         keep_base=False,
         stfips=stfips,
     )
     nhgisxwalk.xwalk_df_to_csv(cls=write_xwalk)
     read_xwalk = nhgisxwalk.xwalk_df_from_csv(write_xwalk.xwalk_name)
     known_values = write_xwalk.xwalk["wt_pop"].values
     observed_values = read_xwalk["wt_pop"].values
     numpy.testing.assert_allclose(known_values, observed_values)
Example #4
0
    def test_prepare_data_product(self):
        xwalk_name = prod_xwalk_name_fmat % (bgp, _90, co, _10)
        path_out = data_dir + xwalk_name
        nhgisxwalk.prepare_data_product(self.example_df,
                                        xwalk_name,
                                        path_out,
                                        remove=True)

        # read in the crosswalk
        from_csv_kws = {
            "path": data_dir,
            "archived": True,
            "remove_unpacked": True,
        }
        read_xwalk = nhgisxwalk.xwalk_df_from_csv(xwalk_name, **from_csv_kws)

        known = numpy.array([1.0, 0.3, 0.7, 1.0, 1.0])
        observed = read_xwalk["wt"].values
        numpy.testing.assert_array_equal(known, observed)
Example #5
0
    def test_split_xwalk(self):
        known_ids = numpy.array([
            "G10000100401001000",
            "G10000100401001001",
            "G10000100401001002",
            "G10000100401001003",
            "G10000100401001003",
        ])
        xwalk_name = base_xwalk_name_fmat % (blk, _90, blk, _10, gj)
        xwalk_path = data_dir + xwalk_name + "_state"
        sorter = nhgisxwalk.SORT_BYS[xwalk_name]
        nhgisxwalk.split_xwalk(
            base_xwalk_blk1990_blk2010,
            "GJOIN2010",
            xwalk_name,
            gj,
            fpath=xwalk_path,
            sort_by=sorter,
        )

        # read in the crosswalk
        gjoin = "GJOIN%s"
        gj_src, gj_trg = gjoin % _90, gjoin % _10
        data_types = nhgisxwalk.str_types([gj_src, gj_trg])
        from_csv_kws = {
            "path": xwalk_path + "/",
            "archived": True,
            "remove_unpacked": True,
        }
        read_csv_kws = {"dtype": data_types}
        read_xwalk = nhgisxwalk.xwalk_df_from_csv(xwalk_name + "_%s" % stfips,
                                                  **from_csv_kws,
                                                  **read_csv_kws)

        observed_ids = read_xwalk["GJOIN2010"].head().values
        numpy.testing.assert_array_equal(known_ids, observed_ids)
Example #6
0
    def test_generate_data_product(self):
        # records known data values
        knw_str_vals = numpy.array([
            [
                "G100001090444999990421009999999219012",
                "G1000010042100",
                "10001042100",
            ],
            [
                "G100001090444999990421009999999999921",
                "G1000010042100",
                "10001042100",
            ],
            [
                "G100001090444999990421009999999999921",
                "G1000010042201",
                "10001042201",
            ],
            [
                "G100001090444999990421009999999999922",
                "G1000010042100",
                "10001042100",
            ],
        ])
        knw_num_vals = numpy.array([
            [1.0, 1.0, 1.0, 1.0],
            [0.99766436, 0.99716625, 0.99714829, 0.99727768],
            [0.00233564, 0.00283375, 0.00285171, 0.00272232],
            [1.0, 1.0, 1.0, 1.0],
        ])

        # generate the product
        xwalk_args = {
            "source_year": _90,
            "target_year": _10,
            "source_geo": bgp,
            "target_geo": tr,
            "base_source_table": tab_data_path_1990,
            "supp_source_table": supplement_data_path_90,
            "input_var": input_vars_1990,
            "weight_var": input_var_tags,
            "keep_base": False,
            "add_geoid": True,
        }
        nhgisxwalk.generate_data_product(base_xwalk_blk1990_blk2010,
                                         xwalk_args, data_dir)

        # record observed
        obs_xwalk = nhgisxwalk.GeoCrossWalk(
            base_xwalk_blk1990_blk2010,
            source_year=_90,
            target_year=_10,
            source_geo=bgp,
            target_geo=tr,
            base_source_table=tab_data_path_1990,
            supp_source_table=supplement_data_path_90,
            input_var=input_vars_1990,
            weight_var=input_var_tags,
        )

        # test
        id_cols = ["bgp1990gj", "tr2010gj", "tr2010ge"]
        data_types = nhgisxwalk.str_types(id_cols)
        from_csv_kws = {
            "path": data_dir,
            "archived": True,
            "remove_unpacked": True,
        }
        read_csv_kws = {"dtype": data_types}
        read_xwalk = nhgisxwalk.xwalk_df_from_csv(obs_xwalk.xwalk_name,
                                                  **from_csv_kws,
                                                  **read_csv_kws)
        ix1, ix2 = 13, 17
        obs_str_vals = obs_xwalk.xwalk[id_cols][ix1:ix2].values
        wgt_cols = ["wt_pop", "wt_fam", "wt_hh", "wt_hu"]
        obs_num_vals = obs_xwalk.xwalk[wgt_cols][ix1:ix2].values
        numpy.testing.assert_equal(knw_str_vals, obs_str_vals)
        numpy.testing.assert_allclose(knw_num_vals, obs_num_vals, atol=6)