コード例 #1
0
    def makeNode(self, persons, geocode='0123456789abcdef'):
        person_hist, unit_hist = table2hists(np.array(persons),
                                             self.schema,
                                             housing_varname=CC.ATTR_HHGQ)

        invar = InvariantsMaker.make(schema=CC.DAS_DHCP_HHGQ,
                                     raw=person_hist,
                                     raw_housing=unit_hist,
                                     invariant_names=('tot', 'gqhh_tot',
                                                      'gqhh_vect'))
        cons = PConstraintsCreator(
            hist_shape=(person_hist.shape, unit_hist.shape),
            invariants=invar,
            constraint_names=(
                'hhgq_total_lb', 'hhgq_total_ub',
                'nurse_nva_0')).calculateConstraints().constraints_dict
        node = GeounitNode(raw=person_hist,
                           raw_housing=unit_hist,
                           invar=invar,
                           cons=cons,
                           geocode_dict={
                               16: 'Block',
                               12: 'Block_Group',
                               11: 'Tract',
                               5: 'County',
                               2: 'State',
                               1: 'US'
                           },
                           geocode=geocode)
        node.syn = node.raw

        return node
コード例 #2
0
    def makeNode(self, hholds, units, geocode='0'):
        hhold_hist, unit_hist = table2hists(
            np.array(hholds), self.schema), table2hists(np.array(units),
                                                        self.unit_schema,
                                                        CC.ATTR_HHGQ,
                                                        units=True)

        invar = InvariantsMaker.make(schema=CC.SCHEMA_HOUSEHOLD2010,
                                     raw=hhold_hist,
                                     raw_housing=unit_hist,
                                     invariant_names=('tot', 'gqhh_vect'))
        cons = HHConstraintsCreator(
            hist_shape=(hhold_hist.shape, unit_hist.shape),
            invariants=invar,
            constraint_names=('no_vacant', 'living_alone',
                              'size2')).calculateConstraints().constraints_dict
        node = GeounitNode(raw=hhold_hist,
                           raw_housing=unit_hist,
                           invar=invar,
                           cons=cons,
                           geocode_dict={1: 'Stub'},
                           geocode=geocode)
        node.syn = node.raw

        return node
コード例 #3
0
    def sample_histogram(node: GeounitNode, sample_target: int):
        """
        :param node: The input GeounitNode which will receive a new sampled histogram 
        :param sample_target: The size of the target sample population
        :return: The input node with its syn attribute set to the sampled histogram
        """
        assert all([
            node.raw is not None,
            isinstance(node.raw, multiSparse), node.raw.sparse_array
            is not None, node.raw.sparse_array.data is not None
        ])

        # Grab the sparse data array from the node to do work on directly
        # This is in the format of a 1D array
        data_shape = node.raw.shape

        # Get the shape and indices of populated values in the sparse matrix to be able
        # to recreate a new one
        csr_shape = node.raw.sparse_array.shape
        indices = node.raw.sparse_array.indices
        indptr = node.raw.sparse_array.indptr

        # Get the probability vector
        pval = BootstrapEngine.compute_pval(node)

        # Sample from a multinomial of the pval
        sampled_data = numpy.random.multinomial(sample_target, pval)

        # Produce the new CSR matrix and histogram
        new_matrix = ss.csr_matrix((sampled_data, indices, indptr),
                                   shape=csr_shape)
        new_histogram: __HistData__ = multiSparse(new_matrix, shape=data_shape)

        # Set the node's syn attribute
        node.syn = new_histogram
        return node
コード例 #4
0
def geoimp_wrapper_nat(*,
                       config,
                       parent_shape,
                       nat_node: GeounitNode,
                       min_schema=None):
    """
    This function performs the Post-Processing Step of National to National level.
    It is called from engine_utils.py:topdown in a Spark map operation

    Inputs:
        config: configuration object
        nat_node: a GeounitNode object referring to entire nation

    Output:
        nat_node: a GeounitNode object referring to entire nation
    """

    # Make sure that the logger is set up on all of the nodes
    clogging.setup(level=logging.INFO,
                   syslog=True,
                   syslog_address=(das_utils.getMasterIp(), C.SYSLOG_UDP))
    # t_start = time.time()
    parent_hist = None

    noisy_child = np.expand_dims(
        asDense(nat_node.dp.DPanswer), axis=len(
            nat_node.dp.DPanswer.shape)) if nat_node.dp else None
    noisy_child_weight = 1. / nat_node.dp.Var if nat_node.dp else None
    parent_geocode = "nat_to_nat"

    # TODO: Maybe filtering out the detailed querty form node.dp_queries can be done neater
    dp_queries_comb = stackNodeProperties([
        nat_node,
    ], lambda node: node.dp_queries, cons_dpq.StackedDPquery,
                                          lambda name: name != C.DETAILED)
    query_weights = map(
        lambda sdpq: 1. / sdpq.Var, dp_queries_comb
    )  # We can get actual variance for each query if we want
    constraints_comb = stackNodeProperties([
        nat_node,
    ], lambda node: node.cons, cons_dpq.StackedConstraint)

    # Create an L2PlusRounderWithBackup object
    seq_opt = sequential_optimizers.L2PlusRounderWithBackup(
        das=None,
        parent=parent_hist,
        parent_shape=parent_shape,
        NoisyChild=noisy_child,
        childGeoLen=1,
        config=config,
        DPqueries=dp_queries_comb,
        constraints=constraints_comb,
        NoisyChild_weight=noisy_child_weight,
        query_weights=query_weights,
        identifier="nat_to_nat",
        min_schema=min_schema,
        stat_node=nat_node)

    l2_answer, int_answer, backup_solve_status = seq_opt.run()

    # get rid of extra dimension
    int_answer = int_answer.squeeze()
    l2_answer = l2_answer.squeeze()

    nat_node.syn = int_answer
    constraintsCheck(nat_node, parent_geocode)

    nat_node.syn = sparse.multiSparse(int_answer)
    nat_node.syn_unrounded = sparse.multiSparse(l2_answer)
    return nat_node
コード例 #5
0
 def conform2PL94(node: GeounitNode):
     DP_counts = node.getDenseSyn()
     PL94_counts = node.invar['pl94counts']
     node.syn = multiSparse(
         np.where(DP_counts > PL94_counts, PL94_counts, DP_counts))
     return node