def test_shiftGeocodesUp(): n = nodes.GeounitNode(geocode='123456789abcdefg', geocode_dict={ 16: 'Block', 12: 'Block_Group', 11: 'Tract', 5: 'County', 2: 'State', 0: 'US' }) n.shiftGeocodesUp() assert n.geocode == '123456789abc' assert n.geolevel == 'Block_Group' n.shiftGeocodesUp() assert n.geocode == '123456789ab' assert n.geolevel == 'Tract' n.shiftGeocodesUp() assert n.geocode == '12345' assert n.geolevel == 'County' n.shiftGeocodesUp() assert n.geocode == '12' assert n.geolevel == 'State' n.shiftGeocodesUp() assert n.geocode == '' assert n.geolevel == 'US'
def n(): geocode_dict = {16: 'Block', 12: 'Block_Group', 11: 'Tract', 5: 'County'} histogram = sparse.multiSparse( np.array([[[[5, 0], [0, 4]], [[5, 0], [0, 4]]], [[[5, 0], [0, 4]], [[5, 0], [0, 4]]]])) housing_hist = sparse.multiSparse((np.array([0, 1, 1, 0, 0, 0, 7, 2]))) return nodes.GeounitNode(geocode='123456789abcdefg', geocode_dict=geocode_dict, raw=histogram, raw_housing=housing_hist)
def test_wconstraints(): geocode_dict = { 16: 'Block', 12: 'Block_Group', 11: 'Tract', 5: 'County' } histogram, housing_hist = table2hists( np.array( [ # columns: 'hhgq', 'votingage', 'hispanic', 'cenrace', 'unique unitid' (shape 8,2,2,63 + unitUID) # each row is a person [0, 1, 1, 20, 0], [1, 0, 0, 1, 1], [3, 1, 0, 10, 2], [3, 0, 0, 15, 2], [1, 1, 0, 15, 3] ]), SchemaMaker.fromName(CC.SCHEMA_PL94), CC.ATTR_HHGQ) inv_dict = InvariantsMaker.make(schema=CC.SCHEMA_PL94, raw=histogram, raw_housing=housing_hist, invariant_names=("tot", "gqhh_vect", "gqhh_tot", "va")) con_dict = cPL94.ConstraintsCreator(hist_shape=(histogram.shape, housing_hist.shape), invariants=inv_dict, constraint_names=("total",))\ .calculateConstraints().constraints_dict n1 = nodes.GeounitNode(geocode='123456789abcdefg', geocode_dict=geocode_dict, raw=histogram, raw_housing=housing_hist, cons=con_dict, invar=inv_dict) n2 = nodes.GeounitNode(geocode='123456789abcdefg', geocode_dict=geocode_dict, raw=histogram, raw_housing=housing_hist, cons=con_dict, invar=inv_dict) assert n1 == n2
def test_node_init(): n = nodes.GeounitNode(geocode='123456789abcdefg', geocode_dict={ 16: 'Block', 12: 'Block_Group', 11: 'Tract', 5: 'County' }) assert n.parentGeocode == '123456789abc' assert n.geolevel == 'Block' n = nodes.GeounitNode(geocode='12', geocode_dict={ 16: 'Block', 12: 'Block_Group', 11: 'Tract', 5: 'County', 2: 'State' }) assert n.parentGeocode == '' assert n.geolevel == 'State'
def makeBlockNode(self, person_unit_arrays): """ This function makes block nodes from person unit arrays for a given geocode. Inputs: config: a configuration object person_unit_arrays: a RDD of (geocode, arrays), where arrays are the tables defined in the config Output: block_node: a nodes.GeounitNode object for the given geocode """ geocode, arrays = person_unit_arrays # Assign arrays to table names in a dictionary {name:array} and fill in with zeros if array is non-existent assert len(arrays) == len(self.data_names) data_dict = { n: a if a is not None else np.zeros(self.shape_dict[n]).astype( int ) # TODO: Wonder if this creation of zeros takes too much time, maybe directly in multisparse? for n, a in zip(self.data_names, arrays) } # geocode is a tuple where the [1] entry is empty. We only want the [0] entry. geocode = geocode[0] logging.info(f"creating geocode: {geocode}") raw = sparse.multiSparse( data_dict[self.privacy_table_name], shape=self.shape_dict[self.privacy_table_name]) raw_housing = sparse.multiSparse( data_dict[self.constraint_table_name], shape=self.shape_dict[self.constraint_table_name]) # Make Invariants invariants_dict = self.setup.makeInvariants( raw=raw, raw_housing=raw_housing, invariant_names=self.invar_names) # Make Constraints constraints_dict = self.setup.makeConstraints( hist_shape=(self.setup.hist_shape, self.setup.unit_hist_shape), invariants=invariants_dict, constraint_names=self.cons_names) block_node = nodes.GeounitNode(geocode=geocode, geocode_dict=self.modified_geocode_dict, raw=raw, raw_housing=raw_housing, cons=constraints_dict, invar=invariants_dict) return block_node