def makeNode(self, persons, geocode='0123456789abcdef'): person_hist, unit_hist = table2hists(np.array(persons), self.schema, housing_varname=CC.ATTR_HHGQ) invar = InvariantsMaker.make(schema=CC.DAS_DHCP_HHGQ, raw=person_hist, raw_housing=unit_hist, invariant_names=('tot', 'gqhh_tot', 'gqhh_vect')) cons = PConstraintsCreator( hist_shape=(person_hist.shape, unit_hist.shape), invariants=invar, constraint_names=( 'hhgq_total_lb', 'hhgq_total_ub', 'nurse_nva_0')).calculateConstraints().constraints_dict node = GeounitNode(raw=person_hist, raw_housing=unit_hist, invar=invar, cons=cons, geocode_dict={ 16: 'Block', 12: 'Block_Group', 11: 'Tract', 5: 'County', 2: 'State', 1: 'US' }, geocode=geocode) node.syn = node.raw return node
def makeNode(d, geocode, geocode_dict, addsyn=False, dpq_make=False, querydict=None, consn=consn, invn=invn): ph = convertPL94(d) syn = ph[0].toDense() + np.ones(np.prod(ph[0].shape)).reshape( ph[0].shape) if addsyn else None dpqdict = { C.DETAILED: DPquery(QueryFactory.makeTabularGroupQuery(array_dims=ph[0].shape), GeometricMechanism(Fraction(1, 10), 2, ph[0].toDense())) } if dpq_make else {} if querydict: dpqdict.update({ name: DPquery( query, GeometricMechanism(Fraction(1, 10), 2, query.answer(ph[0].toDense()))) for name, query in SchemaMaker.fromName(CC.SCHEMA_PL94).getQueries( querydict).items() }) inv_dict = makeInvs(ph, invn) return GeounitNode(geocode, raw=ph[0], raw_housing=ph[1], syn=syn, cons=makeCons(ph, consn, inv_dict), invar=inv_dict, geocode_dict=geocode_dict, dp_queries=dpqdict)
def makeNode(self, hholds, units, geocode='0'): hhold_hist, unit_hist = table2hists( np.array(hholds), self.schema), table2hists(np.array(units), self.unit_schema, CC.ATTR_HHGQ, units=True) invar = InvariantsMaker.make(schema=CC.SCHEMA_HOUSEHOLD2010, raw=hhold_hist, raw_housing=unit_hist, invariant_names=('tot', 'gqhh_vect')) cons = HHConstraintsCreator( hist_shape=(hhold_hist.shape, unit_hist.shape), invariants=invar, constraint_names=('no_vacant', 'living_alone', 'size2')).calculateConstraints().constraints_dict node = GeounitNode(raw=hhold_hist, raw_housing=unit_hist, invar=invar, cons=cons, geocode_dict={1: 'Stub'}, geocode=geocode) node.syn = node.raw return node
def getToyGeounitData_GeounitNode(schema, geocodes=[ '000', '001', '002', '003', '010', '011', '012', '020', '022' ], geocode_dict={ 3: 'block', 2: 'county' }, raw_params={ 'low': 0, 'high': 2 }, syn_params={ 'low': 0, 'high': 5 }): geounits = [] for geocode in du.aslist(geocodes): if raw_params is not None: raw = np.random.randint(low=raw_params['low'], high=raw_params['high'], size=schema.size).reshape(schema.shape) if syn_params is not None: syn = np.random.randint(low=syn_params['low'], high=syn_params['high'], size=schema.size).reshape(schema.shape) geounits.append( GeounitNode(geocode=geocode, geocode_dict=geocode_dict, raw=multiSparse(raw), syn=multiSparse(syn))) return geounits
def Data(self): """ Data in the shape of histograms for 1 Block. Hist shape (2,) (for, e.g., Male, Female). """ b1 = multiSparse(np.array([1, 2])) block_nodes = [ GeounitNode('b1', raw=b1, raw_housing=b1, invar={}, cons={}, geocode_dict={2: "Block"}), ] return block_nodes
def dict2node(self, s_node: Union[Dict[str, Any]]) -> GeounitNode: """ Convert dictionary of node fields into geo node, substituting synthetic data as raw :rtype: nodes.GeounitNode :param s_node: saved node, either a Geounit node object or a GeounitNode constructor arguments dict, read from file, has node field values :return: geoNode """ # These fields are needed for nodes to be reconstructed. Geocode is obvious, syn is to use as raw data, raw_housing to recreate invariants for key, msg_pref in zip((GEOCODE, SYN, RAW_HOUSING), ("Geocodes", "Synthetic data", "Raw housing")): if not hasNodeAttr(s_node, key): raise ValueError(f"{msg_pref} should be present in the saved pickled nodes. Missing from geocode {getNodeAttr(s_node, GEOCODE)}") # If geocodedict is saved, it should be the same. Otherwise, it's probably wrong config used if hasNodeAttr(s_node, GEOCODEDICT): assert getNodeAttr(s_node, GEOCODE) == self.geocode_dict, f"Saved geocode_dict ({getNodeAttr(s_node, GEOCODE)}) is different from the one in config ({self.geocode_dict})!" # Check the shape of the saved syn histogram. If it is different, most likely it's the wrong config file assert getNodeAttr(s_node, SYN).shape == self.setup.hist_shape, f"Saved histogram shape {getNodeAttr(s_node, SYN).shape} does not correspond to schema in config {self.setup.schema}:{self.setup.hist_shape}!" # Create the node node = GeounitNode(geocode=getNodeAttr(s_node, GEOCODE), raw=getNodeAttr(s_node, SYN), raw_housing=getNodeAttr(s_node, RAW_HOUSING), geocode_dict=self.geocode_dict) # Recreate invariants and constraints. Note, it is done from raw housing histogram (which is also needed further on when # in topdown procedure we add invariants pertaining to each level # Invariants recr_inv = self.setup.makeInvariants(raw=node.raw, raw_housing=node.raw_housing, invariant_names=self.invar_names) if hasNodeAttr(s_node, INVAR): # invar is dictionary of numpy arrays. Have to use all() to get single boolean msg = "Saved invariants are different from reconstructed!" assert getNodeAttr(s_node, INVAR).keys() == recr_inv.keys(), msg for inv_name in getNodeAttr(s_node, INVAR).keys(): assert np.array_equal(getNodeAttr(s_node, INVAR)[inv_name], recr_inv[inv_name]), msg node.invar = recr_inv # Constraints recr_cons = self.setup.makeConstraints(hist_shape=(self.setup.hist_shape, self.setup.unit_hist_shape), invariants=recr_inv, constraint_names=self.cons_names) if hasNodeAttr(s_node, CONS): assert getNodeAttr(s_node, CONS) == recr_cons, "Saved constraints are different from reconstructed!" node.cons = recr_cons return node
def mfUrData(self, setup_instance): """ Data in the shape of histograms for 3 Rural Blocks in 1 Rural county and 3 Urban blocks in 1 Urban county, all in 1 states Histogram is shape (2,) for sex, i.e. each block provides number of male and number of female. This is the same test example as in JavaScript simulator. """ rb1 = multiSparse(np.array([1, 2])) rb2 = multiSparse(np.array([3, 4])) rb3 = multiSparse(np.array([5, 6])) ub1 = multiSparse(np.array([101, 102])) ub2 = multiSparse(np.array([103, 104])) ub3 = multiSparse(np.array([105, 106])) block_nodes = [] for block, geocode in zip( [rb1, rb2, rb3, ub1, ub2, ub3], ['1RB1', '1RB2', '1RB3', '1UB1', '1UB2', '1UB3']): invariants = setup_instance.makeInvariants( raw=block, raw_housing=block, invariant_names=setup_instance.inv_con_by_level['Block'] ['invar_names']) constraints = setup_instance.makeConstraints( hist_shape=(2, ), invariants=invariants, constraint_names=setup_instance.inv_con_by_level['Block'] ['cons_names']) block_nodes.append( GeounitNode(geocode, raw=block, raw_housing=block, invar=invariants, cons=constraints, geocode_dict={ 4: "Block", 3: "County", 1: "State" })) return block_nodes
def test_makeAdditionalInvariantsConstraints(self, block_cons, state_cons, county_cons): class TestSetup(DASDecennialSetup): def __init__(self): self.hist_shape = (2, ) self.hist_vars = ("sex", ) self.validate_input_data_constraints = False self.inv_con_by_level = { 'Block': { 'invar_names': ('tot', ) if block_cons else (), 'cons_names': ('total', ) if block_cons else (), }, 'County': { 'invar_names': ('tot', ) if county_cons else (), 'cons_names': ('total', ) if county_cons else (), }, 'State': { 'invar_names': ('tot', ) if state_cons else (), 'cons_names': ('total', ) if state_cons else () } } @staticmethod def makeInvariants(raw, raw_housing, invariant_names): inv_dict = {} if 'tot' in invariant_names: inv_dict.update({'tot': np.sum(raw.toDense())}) return inv_dict @staticmethod def makeConstraints(hist_shape, invariants, constraint_names): cons_dict = {} if 'total' in constraint_names: cons_dict.update({ 'total': Constraint( MultiHistQuery((QueryFactory.makeTabularGroupQuery( (2, ), add_over_margins=(0, )), StubQuery( (2, 1), "stub")), (1, 0)), np.array(invariants['tot']), "=", "total") }) return cons_dict setup_instance = TestSetup() rb1 = sparse.multiSparse(np.array([1, 2])) rb2 = sparse.multiSparse(np.array([3, 4])) rb3 = sparse.multiSparse(np.array([5, 6])) ub1 = sparse.multiSparse(np.array([101, 102])) ub2 = sparse.multiSparse(np.array([103, 104])) ub3 = sparse.multiSparse(np.array([105, 106])) block_nodes = [] for block, geocode in zip( [rb1, rb2, rb3, ub1, ub2, ub3], ['1RB1', '1RB2', '1RB3', '1UB1', '1UB2', '1UB3']): invariants = setup_instance.makeInvariants( raw=block, raw_housing=block, invariant_names=setup_instance.inv_con_by_level['Block'] ['invar_names']) constraints = setup_instance.makeConstraints( hist_shape=(2, ), invariants=invariants, constraint_names=setup_instance.inv_con_by_level['Block'] ['cons_names']) block_nodes.append( GeounitNode(geocode, raw=block, raw_housing=block, invar=invariants, cons=constraints, geocode_dict={ 4: "Block", 3: "County", 1: "State" })) rc = block_nodes[0].addInReduce(block_nodes[1]).addInReduce( block_nodes[2]).shiftGeocodesUp() rc.makeAdditionalInvariantsConstraints(setup_instance) uc = block_nodes[3].addInReduce(block_nodes[4]).addInReduce( block_nodes[5]).shiftGeocodesUp() uc.makeAdditionalInvariantsConstraints(setup_instance) state = rc.addInReduce(uc).shiftGeocodesUp() state.makeAdditionalInvariantsConstraints(setup_instance) assert state.checkConstraints() assert rc.checkConstraints() assert uc.checkConstraints()