Beispiel #1
0
    def makeNode(self, persons, geocode='0123456789abcdef'):
        person_hist, unit_hist = table2hists(np.array(persons),
                                             self.schema,
                                             housing_varname=CC.ATTR_HHGQ)

        invar = InvariantsMaker.make(schema=CC.DAS_DHCP_HHGQ,
                                     raw=person_hist,
                                     raw_housing=unit_hist,
                                     invariant_names=('tot', 'gqhh_tot',
                                                      'gqhh_vect'))
        cons = PConstraintsCreator(
            hist_shape=(person_hist.shape, unit_hist.shape),
            invariants=invar,
            constraint_names=(
                'hhgq_total_lb', 'hhgq_total_ub',
                'nurse_nva_0')).calculateConstraints().constraints_dict
        node = GeounitNode(raw=person_hist,
                           raw_housing=unit_hist,
                           invar=invar,
                           cons=cons,
                           geocode_dict={
                               16: 'Block',
                               12: 'Block_Group',
                               11: 'Tract',
                               5: 'County',
                               2: 'State',
                               1: 'US'
                           },
                           geocode=geocode)
        node.syn = node.raw

        return node
Beispiel #2
0
    def makeNode(self, hholds, units, geocode='0'):
        hhold_hist, unit_hist = table2hists(
            np.array(hholds), self.schema), table2hists(np.array(units),
                                                        self.unit_schema,
                                                        CC.ATTR_HHGQ,
                                                        units=True)

        invar = InvariantsMaker.make(schema=CC.SCHEMA_HOUSEHOLD2010,
                                     raw=hhold_hist,
                                     raw_housing=unit_hist,
                                     invariant_names=('tot', 'gqhh_vect'))
        cons = HHConstraintsCreator(
            hist_shape=(hhold_hist.shape, unit_hist.shape),
            invariants=invar,
            constraint_names=('no_vacant', 'living_alone',
                              'size2')).calculateConstraints().constraints_dict
        node = GeounitNode(raw=hhold_hist,
                           raw_housing=unit_hist,
                           invar=invar,
                           cons=cons,
                           geocode_dict={1: 'Stub'},
                           geocode=geocode)
        node.syn = node.raw

        return node
Beispiel #3
0
    def test_addInvariantsErrorsRaising(self, test_input, errmsg):
        n1 = makeEmptyNode('123456789017')
        n2 = makeEmptyNode('123456789013')
        n1.invar, n2.invar = test_input

        with pytest.raises(IncompatibleAddendsError) as err:
            GeounitNode.addInvariants(n1, n2)
        assert errmsg in str(err.value)
Beispiel #4
0
    def makeDPNode(self, geounit_node: GeounitNode) -> GeounitNode:
        """
        This function takes a GeounitNode with "raw" data and generates
        noisy DP query answers depending the specifications in the
        config object.

        NOTE: This function is called inside the mapper (see above),
        so it is executed for every GeounitNode, on the workers.

        This may be confusing, because the .map() function is called from
        self.noisyAnswers() (above), which is run on the Master node.

        Inputs:
            geounit_node: a Node object with "raw" data
            dp_queries: boolean indicating whether dpqueries are present in measurement set, based on config
        Outputs:
            dp_geounit_node: a Node object with selected DP measurements

        """
        logging.info(
            json.dumps({
                'geocode': geounit_node.geocode,
                'geolevel': geounit_node.geolevel
            }))

        geolevel_prop = self.getNodePLB(geounit_node)

        main_hist: np.ndarray = geounit_node.getDenseRaw()
        unit_hist: np.ndarray = geounit_node.getDenseRawHousing()

        dp_geounit_node = geounit_node
        dp_geounit_node.dp_queries, dp_geounit_node.unit_dp_queries = self.nodeDPQueries(
            geolevel_prop, main_hist, unit_hist, geounit_node.geolevel)

        if self.optimization_query_ordering.est_and_qadd_queries:
            print("multi_pass_try")
            # make estimation queries
            dp_geounit_node = self.optimization_query_ordering.makeOptQueries(
                dp_geounit_node)

        rounder_queries = {}
        if self.optimization_query_ordering.rounder_query_names:
            rounder_queries = self.setup.schema_obj.getQueries(
                list(self.optimization_query_ordering.rounder_query_names[
                    geounit_node.geolevel]))
        dp_geounit_node.rounder_queries = rounder_queries
        dp_geounit_node.query_ordering = self.optimization_query_ordering.query_ordering[
            geounit_node.geolevel]

        return dp_geounit_node
Beispiel #5
0
def getToyGeounitData_GeounitNode(schema,
                                  geocodes=[
                                      '000', '001', '002', '003', '010', '011',
                                      '012', '020', '022'
                                  ],
                                  geocode_dict={
                                      3: 'block',
                                      2: 'county'
                                  },
                                  raw_params={
                                      'low': 0,
                                      'high': 2
                                  },
                                  syn_params={
                                      'low': 0,
                                      'high': 5
                                  }):
    geounits = []
    for geocode in du.aslist(geocodes):
        if raw_params is not None:
            raw = np.random.randint(low=raw_params['low'],
                                    high=raw_params['high'],
                                    size=schema.size).reshape(schema.shape)
        if syn_params is not None:
            syn = np.random.randint(low=syn_params['low'],
                                    high=syn_params['high'],
                                    size=schema.size).reshape(schema.shape)
        geounits.append(
            GeounitNode(geocode=geocode,
                        geocode_dict=geocode_dict,
                        raw=multiSparse(raw),
                        syn=multiSparse(syn)))
    return geounits
Beispiel #6
0
def makeNode(d,
             geocode,
             geocode_dict,
             addsyn=False,
             dpq_make=False,
             querydict=None,
             consn=consn,
             invn=invn):
    ph = convertPL94(d)
    syn = ph[0].toDense() + np.ones(np.prod(ph[0].shape)).reshape(
        ph[0].shape) if addsyn else None
    dpqdict = {
        C.DETAILED:
        DPquery(QueryFactory.makeTabularGroupQuery(array_dims=ph[0].shape),
                GeometricMechanism(Fraction(1, 10), 2, ph[0].toDense()))
    } if dpq_make else {}
    if querydict:
        dpqdict.update({
            name: DPquery(
                query,
                GeometricMechanism(Fraction(1, 10), 2,
                                   query.answer(ph[0].toDense())))
            for name, query in SchemaMaker.fromName(CC.SCHEMA_PL94).getQueries(
                querydict).items()
        })
    inv_dict = makeInvs(ph, invn)
    return GeounitNode(geocode,
                       raw=ph[0],
                       raw_housing=ph[1],
                       syn=syn,
                       cons=makeCons(ph, consn, inv_dict),
                       invar=inv_dict,
                       geocode_dict=geocode_dict,
                       dp_queries=dpqdict)
 def node2SparkRows(node: GeounitNode):
     nodedict = node.toDict((SYN, INVAR, GEOCODE))
     persons = makeHistRowsFromMultiSparse(
         nodedict,
         schema,
         row_recoder=self.row_recoder,
         geocode_dict=inverted_geodict)
     return persons
Beispiel #8
0
    def dict2node(self, s_node: Union[Dict[str, Any]]) -> GeounitNode:
        """
        Convert dictionary of node fields into geo node, substituting synthetic data as raw
        :rtype: nodes.GeounitNode
        :param s_node: saved node, either a Geounit node object or a GeounitNode constructor arguments dict, read from file, has node field values
        :return: geoNode
        """

        # These fields are needed for nodes to be reconstructed. Geocode is obvious, syn is to use as raw data, raw_housing to recreate invariants
        for key, msg_pref in zip((GEOCODE, SYN, RAW_HOUSING),
                                 ("Geocodes", "Synthetic data", "Raw housing")):
            if not hasNodeAttr(s_node, key):
                raise ValueError(f"{msg_pref} should be present in the saved pickled nodes. Missing from geocode {getNodeAttr(s_node, GEOCODE)}")

        # If geocodedict is saved, it should be the same. Otherwise, it's probably wrong config used
        if hasNodeAttr(s_node, GEOCODEDICT):
            assert getNodeAttr(s_node, GEOCODE) == self.geocode_dict, f"Saved geocode_dict ({getNodeAttr(s_node, GEOCODE)}) is different from the one in config ({self.geocode_dict})!"

        # Check the shape of the saved syn histogram. If it is different, most likely it's the wrong config file
        assert getNodeAttr(s_node, SYN).shape == self.setup.hist_shape, f"Saved histogram shape {getNodeAttr(s_node, SYN).shape} does not correspond to schema in config {self.setup.schema}:{self.setup.hist_shape}!"

        # Create the node
        node = GeounitNode(geocode=getNodeAttr(s_node, GEOCODE), raw=getNodeAttr(s_node, SYN), raw_housing=getNodeAttr(s_node, RAW_HOUSING), geocode_dict=self.geocode_dict)

        # Recreate invariants and constraints. Note, it is done from raw housing histogram (which is also needed further on when
        # in topdown procedure we add invariants pertaining to each level

        # Invariants
        recr_inv = self.setup.makeInvariants(raw=node.raw, raw_housing=node.raw_housing, invariant_names=self.invar_names)
        if hasNodeAttr(s_node, INVAR):
            # invar is dictionary of numpy arrays. Have to use all() to get single boolean
            msg = "Saved invariants are different from reconstructed!"
            assert getNodeAttr(s_node, INVAR).keys() == recr_inv.keys(), msg
            for inv_name in getNodeAttr(s_node, INVAR).keys():
                assert np.array_equal(getNodeAttr(s_node, INVAR)[inv_name], recr_inv[inv_name]), msg
        node.invar = recr_inv

        # Constraints
        recr_cons = self.setup.makeConstraints(hist_shape=(self.setup.hist_shape, self.setup.unit_hist_shape), invariants=recr_inv, constraint_names=self.cons_names)
        if hasNodeAttr(s_node, CONS):
            assert getNodeAttr(s_node, CONS) == recr_cons, "Saved constraints are different from reconstructed!"

        node.cons = recr_cons

        return node
def test_compare_with_saved(reader_instance, join_data):
    """
    WILL CHANGE IF TEST DATA  in .txt FILES or CONFIG CHANGES!
    """
    bn = reader_instance.makeBlockNode(join_data)
    fname = os.path.join(os.path.dirname(__file__), 'geounitnode.pickle')
    # with(open(fname, 'wb')) as f:
    #     pickle.dump(bn.toDict(keep_attrs=bn.__slots__), f)
    sbn = GeounitNode.fromDict(pickle.load(open(fname, 'rb')))
    assert sbn == bn
 def node2SparkRows(node: GeounitNode):
     nodedict = node.toDict((SYN, INVAR, GEOCODE))
     households = makeHistRowsFromMultiSparse(
         nodedict, schema, row_recoder=self.row_recoder)
     units = addEmptyAndGQ(
         nodedict,
         schema,
         households,
         row_recoder=self.row_recoder,
         gqtype_recoder=HHGQUnitDemoProductAttr.das2mdf,
         geocode_dict=inverted_geodict)
     return units
Beispiel #11
0
    def test_addInvariants(self, invs1, invs2):
        n1 = makeEmptyNode('123456789017')
        n2 = makeEmptyNode('123456789013')
        n1.invar = invs1
        n2.invar = invs2

        sum_inv = GeounitNode.addInvariants(n1, n2)
        assert set(sum_inv.keys()) == set(invs1.keys())
        assert set(sum_inv.keys()) == set(invs2.keys())
        for invname in sum_inv.keys():
            assert np.array_equal(invs1[invname] + invs2[invname],
                                  sum_inv[invname])
def constraintsCheck(node: GeounitNode, parent_geocode=None):
    """
    This function checks that a set of constraints is met given a solution.
    It will raise an exception if any constraint is not met.
    Inputs:
        node: geounit node with "syn" field to be checked against the node constraints
    """
    if parent_geocode is None:
        parent_geocode = node.parentGeocode
    msg = f"Constraints are for parent geocode {parent_geocode}"
    if not node.checkConstraints(raw=False):
        raise RuntimeError(msg + " failed!")
    print(msg + " satisfied.")
Beispiel #13
0
 def Data(self):
     """
         Data in the shape of histograms for 1 Block. Hist shape (2,) (for, e.g., Male, Female).
     """
     b1 = multiSparse(np.array([1, 2]))
     block_nodes = [
         GeounitNode('b1',
                     raw=b1,
                     raw_housing=b1,
                     invar={},
                     cons={},
                     geocode_dict={2: "Block"}),
     ]
     return block_nodes
Beispiel #14
0
 def test_toDict(self, add_querydict, dpq_make):
     geocode_dict = {
         16: 'Block',
         12: 'Block_Group',
         11: 'Tract',
         5: 'County',
         2: 'State',
         1: 'US'
     }
     n1 = makeNode(d1,
                   '12',
                   geocode_dict=geocode_dict,
                   addsyn=False,
                   dpq_make=dpq_make,
                   querydict=add_querydict)
     ndict1 = n1.toDict(keep_attrs=n1.__slots__)
     n2 = GeounitNode.fromDict(ndict1)
     assert n1 == n2
 def node2SparkRows(node: GeounitNode):
     nodedict = node.toDict((SYN, INVAR, GEOCODE))
     households = makeHistRowsFromMultiSparse(
         nodedict,
         schema,
         row_recoder=self.row_recoder,
         geocode_dict=inverted_geodict,
         microdata_field=None)
     units = addGroupQuarters(nodedict,
                              schema,
                              households,
                              row_recoder=self.row_recoder,
                              geocode_dict=inverted_geodict,
                              to_microdata=False)
     # 'priv' means "protected via the differential privacy routines in this code base" variable to be renamed after P.L.94-171 production
     ordered_cols = self.var_list + ['priv']
     return [
         Row(*ordered_cols)(*[unit[col] for col in ordered_cols])
         for unit in units
     ]
Beispiel #16
0
 def mfUrData(self, setup_instance):
     """
     Data in the shape of histograms for 3 Rural Blocks in 1 Rural county and 3 Urban blocks in 1 Urban county, all in 1 states
     Histogram is shape (2,) for sex, i.e. each block provides number of male and number of female.
     This is the same test example as in JavaScript simulator.
     """
     rb1 = multiSparse(np.array([1, 2]))
     rb2 = multiSparse(np.array([3, 4]))
     rb3 = multiSparse(np.array([5, 6]))
     ub1 = multiSparse(np.array([101, 102]))
     ub2 = multiSparse(np.array([103, 104]))
     ub3 = multiSparse(np.array([105, 106]))
     block_nodes = []
     for block, geocode in zip(
         [rb1, rb2, rb3, ub1, ub2, ub3],
         ['1RB1', '1RB2', '1RB3', '1UB1', '1UB2', '1UB3']):
         invariants = setup_instance.makeInvariants(
             raw=block,
             raw_housing=block,
             invariant_names=setup_instance.inv_con_by_level['Block']
             ['invar_names'])
         constraints = setup_instance.makeConstraints(
             hist_shape=(2, ),
             invariants=invariants,
             constraint_names=setup_instance.inv_con_by_level['Block']
             ['cons_names'])
         block_nodes.append(
             GeounitNode(geocode,
                         raw=block,
                         raw_housing=block,
                         invar=invariants,
                         cons=constraints,
                         geocode_dict={
                             4: "Block",
                             3: "County",
                             1: "State"
                         }))
     return block_nodes
Beispiel #17
0
def geoimp_wrapper_root(*,
                        config,
                        parent_shape,
                        root_node: GeounitNode,
                        optimizers,
                        min_schema=None,
                        keep_debug_info=False):
    """
    This function performs the Post-Processing Step of Root Geonode (e.g. US or a State) to Root Geonode level.
    It is called from engine_utils.py:topdown in a Spark map operation

    Inputs:
        config: configuration object
        root_node: a GeounitNode object referring to the top/root node of the geographical tree (e.g. US, US+PR or a single state for state-size runs)

    Output:
        root_node: a GeounitNode object referring to the top/root node of the geographical tree (e.g. US, US+PR or a single state for state-size runs)
        :param optimizers:
    """

    # Make sure that the logger is set up on all of the nodes
    clogging.setup(level=logging.INFO)
    # t_start = time.time()
    parent_hist = [None] * len(parent_shape)

    children = [root_node.unzipNoisy()]

    children, backup_solve_status = makeInputsAndRunOptimizer(
        children,
        config,
        min_schema,
        parent_hist,
        parent_shape,
        "root_to_root",
        optimizers,
        keep_debug_info=keep_debug_info)

    return children[0]
    def sample_histogram(node: GeounitNode, sample_target: int):
        """
        :param node: The input GeounitNode which will receive a new sampled histogram 
        :param sample_target: The size of the target sample population
        :return: The input node with its syn attribute set to the sampled histogram
        """
        assert all([
            node.raw is not None,
            isinstance(node.raw, multiSparse), node.raw.sparse_array
            is not None, node.raw.sparse_array.data is not None
        ])

        # Grab the sparse data array from the node to do work on directly
        # This is in the format of a 1D array
        data_shape = node.raw.shape

        # Get the shape and indices of populated values in the sparse matrix to be able
        # to recreate a new one
        csr_shape = node.raw.sparse_array.shape
        indices = node.raw.sparse_array.indices
        indptr = node.raw.sparse_array.indptr

        # Get the probability vector
        pval = BootstrapEngine.compute_pval(node)

        # Sample from a multinomial of the pval
        sampled_data = numpy.random.multinomial(sample_target, pval)

        # Produce the new CSR matrix and histogram
        new_matrix = ss.csr_matrix((sampled_data, indices, indptr),
                                   shape=csr_shape)
        new_histogram: __HistData__ = multiSparse(new_matrix, shape=data_shape)

        # Set the node's syn attribute
        node.syn = new_histogram
        return node
def geoimp_wrapper_nat(*,
                       config,
                       parent_shape,
                       nat_node: GeounitNode,
                       min_schema=None):
    """
    This function performs the Post-Processing Step of National to National level.
    It is called from engine_utils.py:topdown in a Spark map operation

    Inputs:
        config: configuration object
        nat_node: a GeounitNode object referring to entire nation

    Output:
        nat_node: a GeounitNode object referring to entire nation
    """

    # Make sure that the logger is set up on all of the nodes
    clogging.setup(level=logging.INFO,
                   syslog=True,
                   syslog_address=(das_utils.getMasterIp(), C.SYSLOG_UDP))
    # t_start = time.time()
    parent_hist = None

    noisy_child = np.expand_dims(
        asDense(nat_node.dp.DPanswer), axis=len(
            nat_node.dp.DPanswer.shape)) if nat_node.dp else None
    noisy_child_weight = 1. / nat_node.dp.Var if nat_node.dp else None
    parent_geocode = "nat_to_nat"

    # TODO: Maybe filtering out the detailed querty form node.dp_queries can be done neater
    dp_queries_comb = stackNodeProperties([
        nat_node,
    ], lambda node: node.dp_queries, cons_dpq.StackedDPquery,
                                          lambda name: name != C.DETAILED)
    query_weights = map(
        lambda sdpq: 1. / sdpq.Var, dp_queries_comb
    )  # We can get actual variance for each query if we want
    constraints_comb = stackNodeProperties([
        nat_node,
    ], lambda node: node.cons, cons_dpq.StackedConstraint)

    # Create an L2PlusRounderWithBackup object
    seq_opt = sequential_optimizers.L2PlusRounderWithBackup(
        das=None,
        parent=parent_hist,
        parent_shape=parent_shape,
        NoisyChild=noisy_child,
        childGeoLen=1,
        config=config,
        DPqueries=dp_queries_comb,
        constraints=constraints_comb,
        NoisyChild_weight=noisy_child_weight,
        query_weights=query_weights,
        identifier="nat_to_nat",
        min_schema=min_schema,
        stat_node=nat_node)

    l2_answer, int_answer, backup_solve_status = seq_opt.run()

    # get rid of extra dimension
    int_answer = int_answer.squeeze()
    l2_answer = l2_answer.squeeze()

    nat_node.syn = int_answer
    constraintsCheck(nat_node, parent_geocode)

    nat_node.syn = sparse.multiSparse(int_answer)
    nat_node.syn_unrounded = sparse.multiSparse(l2_answer)
    return nat_node
    def makeDPNode(self,
                   geounit_node: GeounitNode,
                   tot_budget=None,
                   dp_queries=True,
                   small_cell_query=False) -> GeounitNode:
        """
        This function takes a GeounitNode with "raw" data and generates
        noisy DP query answers depending the specifications in the
        config object.

        NOTE: This function is called inside the mapper (see above),
        so it is executed for every GeounitNode, on the workers.

        This may be confusing, because the .map() function is called from
        self.noisyAnswers() (above), which is run on the Master node.

        Inputs:
            geounit_node: a Node object with "raw" data
            tot_budget: in minimal schema runs (there are two) the total budget differs, using only a part
            dp_queries: boolean indicating whether dpqueries are present in measurement set, based on config
            small_cell_query: boolean indicating whether a public-historical-data smallCellQuery is present, based on config
        Outputs:
            dp_geounit_node: a Node object with selected DP measurements

        """
        logging.info(
            json.dumps({
                'geocode': geounit_node.geocode,
                'geolevel': geounit_node.geolevel
            }))

        # For minimal schema, a part of budget is used in phase 1 and
        # the rest in phase 2, so this function is called with those
        # as tot_budget
        if tot_budget is None:
            tot_budget = self.total_budget

        # For minimal schema phase 1, no DP preset queries are made, so the argument should be set to False
        # Also, no budget is to be spent on the other queries than the detailed one in phase 1
        if dp_queries:
            dp_queries = self.dp_queries
            detailed_prop: float = self.detailed_prop
        else:
            detailed_prop = 1.0

        # For bottom-up (for example), no proportioning over geolevels
        if self.geolevel_prop_budgets is None:
            geolevel_prop = 1.
        else:
            # index relative to the top level
            index = self.levels_reversed.index(geounit_node.geolevel)
            geolevel_prop = self.geolevel_prop_budgets[index]

        node_hist: np.ndarray = geounit_node.getDenseRaw()
        dp_budget: float = tot_budget * geolevel_prop

        dp_geounit_node = geounit_node
        dp_geounit_node.dp_queries = self.nodeDPQueries(
            dp_budget, node_hist) if dp_queries else {}
        dp_geounit_node.dp = self.makeDPQuery(
            hist=node_hist,
            query=querybase.QueryFactory.makeTabularGroupQuery(
                array_dims=node_hist.shape),
            epsilon=detailed_prop * dp_budget)
        if small_cell_query:
            smallCellName = C.SMALLCELLBASENAME + f"_geocode{geounit_node.geocode}"
            multiindices = das_utils.loadJSONFile(
                self.small_cell_basepath +
                f"geocode{geounit_node.geocode}.json")
            dp_geounit_node.smallCellQuery = querybase.QueryFactory.makeInefficientCountQuery(
                array_dims=node_hist.shape,
                multiindices=multiindices,
                name=smallCellName)
            print(
                f"In geocode {geounit_node.geocode}, smallCellQuery has answer: {dp_geounit_node.smallCellQuery.answer(node_hist)}"
            )
            assert True == False, "Thou shallt not pass"

        return dp_geounit_node
Beispiel #21
0
    def test_makeAdditionalInvariantsConstraints(self, block_cons, state_cons,
                                                 county_cons):
        class TestSetup(DASDecennialSetup):
            def __init__(self):
                self.hist_shape = (2, )
                self.hist_vars = ("sex", )
                self.validate_input_data_constraints = False
                self.inv_con_by_level = {
                    'Block': {
                        'invar_names': ('tot', ) if block_cons else (),
                        'cons_names': ('total', ) if block_cons else (),
                    },
                    'County': {
                        'invar_names': ('tot', ) if county_cons else (),
                        'cons_names': ('total', ) if county_cons else (),
                    },
                    'State': {
                        'invar_names': ('tot', ) if state_cons else (),
                        'cons_names': ('total', ) if state_cons else ()
                    }
                }

            @staticmethod
            def makeInvariants(raw, raw_housing, invariant_names):
                inv_dict = {}
                if 'tot' in invariant_names:
                    inv_dict.update({'tot': np.sum(raw.toDense())})
                return inv_dict

            @staticmethod
            def makeConstraints(hist_shape, invariants, constraint_names):
                cons_dict = {}
                if 'total' in constraint_names:
                    cons_dict.update({
                        'total':
                        Constraint(
                            MultiHistQuery((QueryFactory.makeTabularGroupQuery(
                                (2, ), add_over_margins=(0, )),
                                            StubQuery(
                                                (2, 1), "stub")), (1, 0)),
                            np.array(invariants['tot']), "=", "total")
                    })
                return cons_dict

        setup_instance = TestSetup()
        rb1 = sparse.multiSparse(np.array([1, 2]))
        rb2 = sparse.multiSparse(np.array([3, 4]))
        rb3 = sparse.multiSparse(np.array([5, 6]))
        ub1 = sparse.multiSparse(np.array([101, 102]))
        ub2 = sparse.multiSparse(np.array([103, 104]))
        ub3 = sparse.multiSparse(np.array([105, 106]))

        block_nodes = []
        for block, geocode in zip(
            [rb1, rb2, rb3, ub1, ub2, ub3],
            ['1RB1', '1RB2', '1RB3', '1UB1', '1UB2', '1UB3']):
            invariants = setup_instance.makeInvariants(
                raw=block,
                raw_housing=block,
                invariant_names=setup_instance.inv_con_by_level['Block']
                ['invar_names'])
            constraints = setup_instance.makeConstraints(
                hist_shape=(2, ),
                invariants=invariants,
                constraint_names=setup_instance.inv_con_by_level['Block']
                ['cons_names'])
            block_nodes.append(
                GeounitNode(geocode,
                            raw=block,
                            raw_housing=block,
                            invar=invariants,
                            cons=constraints,
                            geocode_dict={
                                4: "Block",
                                3: "County",
                                1: "State"
                            }))

        rc = block_nodes[0].addInReduce(block_nodes[1]).addInReduce(
            block_nodes[2]).shiftGeocodesUp()
        rc.makeAdditionalInvariantsConstraints(setup_instance)
        uc = block_nodes[3].addInReduce(block_nodes[4]).addInReduce(
            block_nodes[5]).shiftGeocodesUp()
        uc.makeAdditionalInvariantsConstraints(setup_instance)
        state = rc.addInReduce(uc).shiftGeocodesUp()
        state.makeAdditionalInvariantsConstraints(setup_instance)

        assert state.checkConstraints()
        assert rc.checkConstraints()
        assert uc.checkConstraints()
Beispiel #22
0
    def test_addGeounitNodes(self, addsyn, ic, dpq_make, add_dpqueries,
                             add_querydict):
        geocode_dict = {
            16: 'Block',
            12: 'Block_Group',
            11: 'Tract',
            5: 'County',
            2: 'State',
            1: 'US'
        }
        node1 = makeNode(d1,
                         '12',
                         geocode_dict=geocode_dict,
                         addsyn=addsyn,
                         dpq_make=dpq_make,
                         querydict=add_querydict)
        node2 = makeNode(d2,
                         '12',
                         geocode_dict=geocode_dict,
                         addsyn=addsyn,
                         dpq_make=dpq_make,
                         querydict=add_querydict)
        node_sum = node1.addInReduce(node2,
                                     inv_con=ic,
                                     add_dpqueries=add_dpqueries)
        assert node_sum.raw == node1.raw + node2.raw
        if addsyn:
            assert np.array_equal(node_sum.syn, node1.syn + node2.syn)
        else:
            assert node_sum.syn is None

        if ic:
            # Check if constraints are summed
            for cons_name in set(consn):
                assert node_sum.cons[cons_name] == (node1.cons +
                                                    node2.cons)[cons_name]

            # Check more in depth
            for cons_name in set(consn):
                sign = node_sum.cons[cons_name].sign
                assert sign == node1.cons[cons_name].sign
                assert sign == node2.cons[cons_name].sign
                query = node_sum.cons[cons_name].query
                assert query == node1.cons[cons_name].query
                assert query == node2.cons[cons_name].query

            # # Removed checking constraints rhs, that is done in constraint creator testing
            # for cons_name in set(consn) - {'nurse_nva_0', }:
            #     assert np.array_equal(node_sum.cons[cons_name].rhs, data1[cons_name] + data2[cons_name])

            assert np.array_equal(node_sum.cons['nurse_nva_0'].rhs,
                                  np.array([0]))

            # Check if invariants are summed
            for inv_name in set(invn):
                assert np.all(
                    node_sum.invar[inv_name] == GeounitNode.addInvariants(
                        node1, node2)[inv_name])
        else:
            assert not node_sum.cons and not node_sum.invar

        if add_dpqueries and dpq_make:
            assert node_sum.dp.Var == node1.dp.Var + node2.dp.Var
            assert np.array_equal(node_sum.unzipNoisy().dp.DPanswer,
                                  node1.dp.DPanswer + node2.dp.DPanswer)
        else:
            assert node_sum.dp is None

        if add_dpqueries and add_querydict:

            for name, query in node_sum.dp_queries.items():
                assert query.Var == node1.dp_queries[
                    name].Var + node2.dp_queries[name].Var
                assert np.array_equal(
                    query.unzipDPanswer().DPanswer,
                    node1.dp_queries[name].DPanswer +
                    node2.dp_queries[name].DPanswer)
 def conform2PL94(node: GeounitNode):
     DP_counts = node.getDenseSyn()
     PL94_counts = node.invar['pl94counts']
     node.syn = multiSparse(
         np.where(DP_counts > PL94_counts, PL94_counts, DP_counts))
     return node