def __init__(self, key, controls_csv, tazset=None, puma_data_dir=None, fips_file=None, write_households_csv=None, write_persons_csv=None, write_append=False, start_hhid=1, start_persid=1): SFCTAStarter.__init__(self, key, controls_csv, tazset, puma_data_dir, fips_file, write_households_csv, write_persons_csv, write_append, start_hhid, start_persid) # Remove 0-group quarters controls self.controls = self.controls[self.controls['GQPOP']>0] self.hh_controls = cat.categorize(self.controls, {("hhsize_cat","1"):"GQPOP"}, index_cols=['SFTAZ']) # cat_name hhsize_cat # cat_value 1 # SFTAZ # 1 5 # 2 5 # 3 4 # 4 7 # 5 6 # 6 12 # 7 3 # 8 2 # 9 6 # 10 29 self.person_controls = cat.categorize(self.controls, {("gqworker_cat","1" ):"GQWKRS", ("gqworker_cat","0" ):"GQNONWKRS", ("gqage_cat", "0-64"):"GQAGE064", ("gqage_cat", "65+" ):"GQAGE65P" }, index_cols=['SFTAZ'])
def __init__(self, key, controls_csv, tazset=None, puma_data_dir=None, fips_file=None, write_households_csv=None, write_persons_csv=None, write_append=False, start_hhid=1, start_persid=1): SFCTAStarter.__init__(self, key, controls_csv, tazset, puma_data_dir, fips_file, write_households_csv, write_persons_csv, write_append, start_hhid, start_persid) # Remove 0-household controls self.controls = self.controls[self.controls['HHLDS']>0] # self.controls = self.controls.iloc[:2,] print "Household controls has length %d" % len(self.controls) self.hh_controls = cat.categorize(self.controls, {("income_cat", "0-25k" ): "HHINCQ1", ("income_cat", "25-45k" ): "HHINCQ2", ("income_cat", "45-75k" ): "HHINCQ3", ("income_cat", "75k+" ): "HHINCQ4", ("hhsize_cat", "1" ): "SZ1_HHLDS", ("hhsize_cat", "2" ): "SZ2_HHLDS", ("hhsize_cat", "3" ): "SZ3_HHLDS", ("hhsize_cat", "4" ): "SZ4_HHLDS", ("hhsize_cat", "5+" ): "SZ5_HHLDS", ("workers_cat", "0" ): "WKR0_HHLDS", ("workers_cat", "1" ): "WKR1_HHLDS", ("workers_cat", "2" ): "WKR2_HHLDS", ("workers_cat", "3+" ): "WKR3_HHLDS", ("htype_cat", "HAGE1K0" ): "HAGE1KIDS0", ("htype_cat", "HAGE1K1" ): "HAGE1KIDS1", ("htype_cat", "HAGE65KALL" ): "HAGE65KIDSWHATEV" }, index_cols=['SFTAZ']) # print self.hh_controls.loc[1:10,:] # cat_name hhsize_cat income_cat workers_cat # cat_value 1 2 3 4 5+ 0-30k 100k+ 30-60k 60-100k 0 1 2 3+ # SFTAZ # 1 28.365 46.970 40.260 49.410 139.995 51.540 61.848 130.700 59.912 52.155 93.025 96.990 62.830 # 2 36.663 54.540 45.147 51.207 115.443 54.976 73.970 83.514 90.540 48.783 81.507 93.627 79.083 # 3 73.610 104.353 77.074 71.012 106.951 261.136 28.188 106.849 37.827 158.911 196.149 67.115 10.825 # 4 45.133 67.140 55.577 63.037 142.113 67.002 90.663 102.678 111.657 60.053 100.337 115.257 97.353 # 5 34.038 56.364 48.312 59.292 167.994 62.707 75.211 157.363 71.719 62.586 111.630 116.388 75.396 # 6 65.403 94.302 78.078 85.176 184.041 144.312 50.742 138.288 173.658 84.669 148.551 159.705 114.075 # 7 46.920 66.516 49.128 45.264 68.172 166.646 17.759 68.419 24.176 101.292 125.028 42.780 6.900 # 8 54.282 74.883 56.898 53.301 87.636 67.002 74.937 95.038 90.023 61.476 126.222 104.967 34.335 # 9 42.471 63.180 52.299 59.319 133.731 63.566 85.465 97.339 105.630 56.511 94.419 108.459 91.611 # 10 55.480 77.140 60.800 62.700 123.880 90.195 64.092 101.710 124.003 68.780 134.520 122.740 53.960 # todo: add HAGE1KIDS0, HAGE1KIDS1, HAGE1KIDSWHATEV self.person_controls = cat.categorize(self.controls, {("age_cat", "0-4" ): "AGE0004", ("age_cat", "5-19" ): "AGE0519", ("age_cat", "20-44"): "AGE2044", ("age_cat", "45-64"): "AGE4564", ("age_cat", "65+" ): "AGE65P"}, index_cols=['SFTAZ'])
def __init__(self, key, controls_csv, tazset=None, puma_data_dir=None, fips_file=None, write_households_csv=None, write_persons_csv=None, write_append=False, start_hhid=1, start_persid=1): SFCTAStarter.__init__(self, key, controls_csv, tazset, puma_data_dir, fips_file, write_households_csv, write_persons_csv, write_append, start_hhid, start_persid) # Remove 0-group quarters controls self.controls = self.controls[self.controls['GQPOP'] > 0] self.hh_controls = cat.categorize(self.controls, {("hhsize_cat", "1"): "GQPOP"}, index_cols=['SFTAZ']) # cat_name hhsize_cat # cat_value 1 # SFTAZ # 1 5 # 2 5 # 3 4 # 4 7 # 5 6 # 6 12 # 7 3 # 8 2 # 9 6 # 10 29 self.person_controls = cat.categorize(self.controls, { ("gqworker_cat", "1"): "GQWKRS", ("gqworker_cat", "0"): "GQNONWKRS", ("gqage_cat", "0-64"): "GQAGE064", ("gqage_cat", "65+"): "GQAGE65P" }, index_cols=['SFTAZ'])
def __init__(self, key, controls_csv, tazset=None, puma_data_dir=None, fips_file=None, write_households_csv=None, write_persons_csv=None, write_append=False, start_hhid=1, start_persid=1): SFCTAStarter.__init__(self, key, controls_csv, tazset, puma_data_dir, fips_file, write_households_csv, write_persons_csv, write_append, start_hhid, start_persid) # Remove 0-household controls self.controls = self.controls[self.controls['HHLDS'] > 0] # self.controls = self.controls.iloc[:2,] print "Household controls has length %d" % len(self.controls) self.hh_controls = cat.categorize( self.controls, { ("income_cat", "0-25k"): "HHINCQ1", ("income_cat", "25-45k"): "HHINCQ2", ("income_cat", "45-75k"): "HHINCQ3", ("income_cat", "75k+"): "HHINCQ4", ("hhsize_cat", "1"): "SZ1_HHLDS", ("hhsize_cat", "2"): "SZ2_HHLDS", ("hhsize_cat", "3"): "SZ3_HHLDS", ("hhsize_cat", "4"): "SZ4_HHLDS", ("hhsize_cat", "5+"): "SZ5_HHLDS", ("workers_cat", "0"): "WKR0_HHLDS", ("workers_cat", "1"): "WKR1_HHLDS", ("workers_cat", "2"): "WKR2_HHLDS", ("workers_cat", "3+"): "WKR3_HHLDS", ("htype_cat", "HAGE1K0"): "HAGE1KIDS0", ("htype_cat", "HAGE1K1"): "HAGE1KIDS1", ("htype_cat", "HAGE65KALL"): "HAGE65KIDSWHATEV" }, index_cols=['SFTAZ']) # print self.hh_controls.loc[1:10,:] # cat_name hhsize_cat income_cat workers_cat # cat_value 1 2 3 4 5+ 0-30k 100k+ 30-60k 60-100k 0 1 2 3+ # SFTAZ # 1 28.365 46.970 40.260 49.410 139.995 51.540 61.848 130.700 59.912 52.155 93.025 96.990 62.830 # 2 36.663 54.540 45.147 51.207 115.443 54.976 73.970 83.514 90.540 48.783 81.507 93.627 79.083 # 3 73.610 104.353 77.074 71.012 106.951 261.136 28.188 106.849 37.827 158.911 196.149 67.115 10.825 # 4 45.133 67.140 55.577 63.037 142.113 67.002 90.663 102.678 111.657 60.053 100.337 115.257 97.353 # 5 34.038 56.364 48.312 59.292 167.994 62.707 75.211 157.363 71.719 62.586 111.630 116.388 75.396 # 6 65.403 94.302 78.078 85.176 184.041 144.312 50.742 138.288 173.658 84.669 148.551 159.705 114.075 # 7 46.920 66.516 49.128 45.264 68.172 166.646 17.759 68.419 24.176 101.292 125.028 42.780 6.900 # 8 54.282 74.883 56.898 53.301 87.636 67.002 74.937 95.038 90.023 61.476 126.222 104.967 34.335 # 9 42.471 63.180 52.299 59.319 133.731 63.566 85.465 97.339 105.630 56.511 94.419 108.459 91.611 # 10 55.480 77.140 60.800 62.700 123.880 90.195 64.092 101.710 124.003 68.780 134.520 122.740 53.960 # todo: add HAGE1KIDS0, HAGE1KIDS1, HAGE1KIDSWHATEV self.person_controls = cat.categorize(self.controls, { ("age_cat", "0-4"): "AGE0004", ("age_cat", "5-19"): "AGE0519", ("age_cat", "20-44"): "AGE2044", ("age_cat", "45-64"): "AGE4564", ("age_cat", "65+"): "AGE65P" }, index_cols=['SFTAZ'])
def test_categorize(acs_data, pums_data): p_acs_cat = cat.categorize(acs_data, { ("population", "total"): "B01001_001E", ("age", "19 and under"): "B01001_003E + B01001_004E + B01001_005E + " "B01001_006E + B01001_007E + B01001_027E + " "B01001_028E + B01001_029E + B01001_030E + " "B01001_031E", ("age", "20 to 35"): "B01001_008E + B01001_009E + B01001_010E + " "B01001_011E + B01001_012E + B01001_032E + " "B01001_033E + B01001_034E + B01001_035E + " "B01001_036E", ("age", "35 to 60"): "B01001_013E + B01001_014E + B01001_015E + " "B01001_016E + B01001_017E + B01001_037E + " "B01001_038E + B01001_039E + B01001_040E + " "B01001_041E", ("age", "above 60"): "B01001_018E + B01001_019E + B01001_020E + " "B01001_021E + B01001_022E + B01001_023E + " "B01001_024E + B01001_025E + B01001_042E + " "B01001_043E + B01001_044E + B01001_045E + " "B01001_046E + B01001_047E + B01001_048E + " "B01001_049E", ("race", "white"): "B02001_002E", ("race", "black"): "B02001_003E", ("race", "asian"): "B02001_005E", ("race", "other"): "B02001_004E + B02001_006E + B02001_007E + " "B02001_008E", ("sex", "male"): "B01001_002E", ("sex", "female"): "B01001_026E" }, index_cols=['NAME']) assert len(p_acs_cat) == 3 assert len(p_acs_cat.columns) == 11 assert len(p_acs_cat.columns.names) == 2 assert p_acs_cat.columns[0][0] == "age" assert np.all(cat.sum_accross_category(p_acs_cat) < 2) def age_cat(r): if r.AGEP <= 19: return "19 and under" elif r.AGEP <= 35: return "20 to 35" elif r.AGEP <= 60: return "35 to 60" return "above 60" def race_cat(r): if r.RAC1P == 1: return "white" elif r.RAC1P == 2: return "black" elif r.RAC1P == 6: return "asian" return "other" def sex_cat(r): if r.SEX == 1: return "male" return "female" pums_data, jd_persons = cat.joint_distribution( pums_data, cat.category_combinations(p_acs_cat.columns), {"age": age_cat, "race": race_cat, "sex": sex_cat} )
def __init__(self, key, state=None, county=None, tract=None): self.c = c = Census(key) self.state = state self.county = county self.tract = tract #income_columns = ['B19001_0%02dE' % i for i in range(1, 18)] #vehicle_columns = ['B08201_0%02dE' % i for i in range(1, 7)] #workers_columns = ['B08202_0%02dE' % i for i in range(1, 6)] #families_columns = ['B11001_001E', 'B11001_002E'] #block_group_columns = income_columns + families_columns #tract_columns = vehicle_columns + workers_columns #h_acs = c.block_group_and_tract_query(block_group_columns, #tract_columns, state, county, #merge_columns=['tract', 'county', #'state'], #block_group_size_attr="B11001_001E", #tract_size_attr="B08201_001E", #tract=tract) # HS # read the marginals h_acs = pd.read_csv('data/HHmarginals.csv', dtype={ "state": "int32", "county": "int32", "tract": "int32", "block group": "object" }) p_acs = pd.read_csv('data/Personmarginals.csv', dtype={ "state": "int32", "county": "int32", "tract": "int32", "block group": "object" }) # reduce the datasets if county and tract are specified if state is not None: state_id, county_id = c.try_fips_lookup(state, county) h_ind = h_acs['state'] == int(state_id) p_ind = p_acs['state'] == int(state_id) if county is not None: h_ind = logical_and(h_ind, h_acs['county'] == int(county_id)) p_ind = logical_and(p_ind, p_acs['county'] == int(county_id)) if tract is not None: h_ind = logical_and(h_ind, h_acs['tract'] == int(tract_id)) p_ind = logical_and(p_ind, p_acs['tract'] == int(tract_id)) h_acs = h_acs[h_ind] p_acs = p_acs[p_ind] # define the categories as {(category_name, category_value): corresponding_column_in_marginals} self.h_acs_cat = cat.categorize(h_acs, { ("HHsize", "one"): "HH1p", ("HHsize", "two"): "HH2p", ("HHsize", "three"): "HH3p", ("HHsize", "four"): "HH4p", ("HHsize", "five"): "HH5p", ("HHsize", "six"): "HH6p", ("HHsize", "seven+"): "HH7p", }, index_cols=['state', 'county', 'tract', 'block group']) self.p_acs_cat = cat.categorize(p_acs, { ("age", "category 1"): "Age1", ("age", "category 2"): "Age2", }, index_cols=['state', 'county', 'tract', 'block group'])
h_acs=h_acs.loc[h_acs['block_group_id'].isin(all_block_group_ids)] p_acs=p_acs.loc[p_acs['block_group_id'].isin(all_block_group_ids)] # ============================================================================= # # create categorised versions # ============================================================================= # Households h_acs_cat = cat.categorize(h_acs, { # ("households", "total"): "B11001_001E", ("children", "yes"): "B11001_002E", ("children", "no"): "B11001_001E - B11001_002E", ("income", "lt35"): "B19001_002E + B19001_003E + B19001_004E + " "B19001_005E + B19001_006E + B19001_007E", ("income", "gt35-lt100"): "B19001_008E + B19001_009E + " "B19001_010E + B19001_011E + B19001_012E" "+ B19001_013E", ("income", "gt100"): "B19001_014E + B19001_015E + B19001_016E" "+ B19001_017E", ("cars", "none"): "B08201_002E", ("cars", "one"): "B08201_003E", ("cars", "two or more"): "B08201_004E + B08201_005E + B08201_006E", ("workers", "none"): "B08202_002E", ("workers", "one"): "B08202_003E", ("workers", "two or more"): "B08202_004E + B08202_005E", ("tenure", "owned"): "B25063_001E", ("tenure", "rented"): "B25075_001E" }, index_cols=['NAME']) h_acs_cat.head() # Persons p_acs_cat = cat.categorize(p_acs, {