コード例 #1
0
 def hhgq_total_ub(self):
     subset = None
     add_over_margins = (1, 2, 3)
     query = cenquery.Query(array_dims=self.hist_shape,
                            subset=subset,
                            add_over_margins=add_over_margins)
     other_gqs = self.invariants["gqhh_tot"] - self.invariants["gqhh_vect"]
     if "tot" in self.invariants.keys():
         total = np.broadcast_to(np.array(self.invariants["tot"]), (1, ))
         total_ext = np.broadcast_to(total, (8, ))
         rhs = np.where(self.invariants["gqhh_vect"] > 0,
                        total_ext - other_gqs, np.zeros(8))
     else:
         rhs = self.invariants["gqhh_vect"] * self.hhgq_cap
     rhs = rhs.astype(int)
     sign = "le"
     self.constraints_dict["hhgq_total_ub"] = cenquery.Constraint(
         query=query, rhs=rhs, sign=sign, name="hhgq_total_ub")
コード例 #2
0
    def hhgq_total_lb(self):
        subset = None
        add_over_margins = (1, 2, 3)
        query = cenquery.Query(array_dims=self.hist_shape,
                               subset=subset,
                               add_over_margins=add_over_margins)

        # gq_hh in other cats
        other_gqs = self.invariants["gqhh_tot"] - self.invariants["gqhh_vect"]
        if "tot" in self.invariants.keys():
            total = np.broadcast_to(np.array(self.invariants["tot"]), (1, ))
            total_ext = np.broadcast_to(total, (8, ))
            rhs = np.where(other_gqs > 0, self.invariants["gqhh_vect"],
                           total_ext)
        else:
            rhs = self.invariants["gqhh_vect"]
        rhs = rhs.astype(int)
        sign = "ge"
        self.constraints_dict["hhgq_total_lb"] = cenquery.Constraint(
            query=query, rhs=rhs, sign=sign, name="hhgq_total_lb")
コード例 #3
0
 def union_hhgq_ub(self):
     cats = self.cats
     all_cat_combs = self.all_cat_combs
     for cat_comb in all_cat_combs:
         other_cats = list(set(range(8)).difference(set(cat_comb)))
         name = "union_hhgq_ub." + ".".join([str(x) for x in cat_comb])
         subset = (cat_comb, range(2), range(2), range(6))
         add_over_margins = (0, 1, 2, 3)
         if set(cats).issubset(set(cat_comb)):
             rhs = self.invariants["tot"]
         else:
             gq_min_ext = self.invariants["gqhh_vect"]
             gq_min_other = gq_min_ext[other_cats].sum(0)
             rhs = self.invariants["tot"] - gq_min_other
         sign = "le"
         query = cenquery.Query(array_dims=self.hist_shape,
                                 subset=subset, add_over_margins=add_over_margins)
         self.constraints_dict[name] = cenquery.Constraint(query=query, rhs=np.array(rhs).astype(int),
                                                             sign=sign, name = name, union =True,
                                                             union_type="union_hhgq_ub")
コード例 #4
0
def installPhase1Constraints(node_pair, config):
    block_node = node_pair[0]
    block_node_ms = node_pair[1]

    add_over_margins = re.split(
        das_utils.DELIM,
        config["minimal_schema"]["minSchema.add_over_margins"])
    add_over_margins = tuple([int(x) for x in add_over_margins])

    query = cenquery.Query(array_dims=block_node.raw.shape,
                           add_over_margins=add_over_margins,
                           name="minSchema")

    rhs = block_node_ms.raw.toDense()
    ms_constraint = cenquery.Constraint(query, rhs, sign="=", name="minSchema")
    cons = {}
    cons["minSchema"] = ms_constraint

    block_node.cons = cons

    return block_node
コード例 #5
0
 def union_hhgq_lb(self):
     cats = self.cats
     all_cat_combs = self.all_cat_combs
     for cat_comb in all_cat_combs:
         name = "union_hhgq_lb." + ".".join([str(x) for x in cat_comb])
         subset = (cat_comb, range(2), range(2), range(63))
         add_over_margins = (0, 1, 2, 3)
         if set(cats).issubset(set(cat_comb)):
             rhs = self.invariants["tot"]
         else:
             rhs = self.constraints_dict["hhgq_total_lb"].rhs[cat_comb].sum(
             )
         sign = "ge"
         query = cenquery.Query(array_dims=self.hist_shape,
                                subset=subset,
                                add_over_margins=add_over_margins)
         self.constraints_dict[name] = cenquery.Constraint(
             query=query,
             rhs=np.array(rhs).astype(int),
             sign=sign,
             name=name,
             union=True,
             union_type="union_hhgq_lb")
コード例 #6
0
def geoimp_wrapper(config, parent_child_node, accum):
    """
    This function performs the Post-Processing Step for a generic parent to the Child geography.
    
    Inputs:
        config: configuration object
        parent_child_node: a collection of geounitNode objects containing one parent and multiple child
        accum: spark accumulator object

    Output:
        children: a collection of geounitNode objects for each of the children, after post-processing
    """

    import programs.engine.geoimpgbopt as geoimpgbopt
    from itertools import compress

    parent_child_node = list(parent_child_node)
    parent_geocode = parent_child_node[0]
    print("parent geocode is", parent_geocode)
    # a list of the node objects
    nodes = list(list(parent_child_node)[1])

    #calculate the length of each of the geocodes (to determine which is the parent)
    geocode_lens = [len(node.geocode) for node in nodes]
    #the parent is the shortest geocode
    parent = nodes[np.argmin(geocode_lens)]

    #subset the children nodes
    children = nodes[:np.argmin(geocode_lens
                                )] + nodes[np.argmin(geocode_lens) + 1:]
    children = sorted(children,
                      key=lambda geocode_data: int(geocode_data.geocode))
    child_geos = [child.geocode for child in children]
    n_children = len(child_geos)

    #stack the dp arrays on top of one another, if only 1 child just expand the axis
    if n_children > 1:
        NoisyChild = np.stack([child.dp.DPanswer for child in children],
                              axis=-1)
    else:
        NoisyChild = np.expand_dims(children[0].dp.DPanswer,
                                    axis=len(children[0].dp.DPanswer.shape))

    #combine DPqueries without geography to combined DPqueries with geography
    #if no DPqueries, change this to an empty list
    if any(children[0].dp_queries) == False:
        DPqueries_comb = []
    else:
        DPqueries = list(list(child.dp_queries.values()) for child in children)
        n_q = len(DPqueries[0])
        DPqueries_comb = []
        for i in range(n_q):
            subset_input = tuple(
                list(DPqueries[0][i].query.subset_input) +
                [range(NoisyChild.shape[-1])])
            query = cenquery.Query(
                array_dims=NoisyChild.shape,
                subset=subset_input,
                add_over_margins=DPqueries[0][i].query.add_over_margins)
            q_answer = np.stack([DPquery[i].DPanswer for DPquery in DPqueries],
                                axis=-1)
            DP_query = cenquery.DPquery(query=query, DPanswer=q_answer)
            DPqueries_comb.append(DP_query)

    #delete redundant union constraints
    #which gq cat are non-zero

    #combine cenquery.Constraint objects without geography to build combined cenquery.Constraint
    constraints_comb = []
    #now children may have different constraints. only combine the ones that match.
    if any(children[0].cons) == False:
        constraints_comb = None
    else:
        all_keys = []
        for child in children:
            all_keys.extend(list(child.cons.keys()))
        #subset to unique names
        constraint_keys = tuple(list(set(all_keys)))

        #children is a list of nodes
        for key in constraint_keys:
            #make a list of individual constraints for all children who have them
            #find which children have the key
            ind = [key in child.cons.keys() for child in children]
            #children_sub is subset of children with that key
            children_sub = list(compress(children, ind))
            constraints = list(child.cons[key] for child in children_sub)

            #get the list of geos that have this constraint
            subset_geos = list(compress(range(NoisyChild.shape[-1]), ind))
            subset_input = tuple(
                list(constraints[0].query.subset_input) + [
                    subset_geos,
                ])
            query = cenquery.Query(
                array_dims=NoisyChild.shape,
                subset=subset_input,
                add_over_margins=constraints[0].query.add_over_margins)
            rhs = np.stack([con.rhs for con in constraints], axis=-1)
            constraint = cenquery.Constraint(query=query,
                                             rhs=rhs,
                                             sign=constraints[0].sign,
                                             name=constraints[0].name)
            constraints_comb.append(constraint)

    parent_hist = parent.syn.toDense()
    parent_geocode = parent.geocode
    parent_constraints = parent.cons  #for checking purposes

    #this is the actual post-processing optimization step
    l2_answer, int_answer, backup_solve_status = geoimpgbopt.L2geoimp_wrapper(
        config=config,
        parent=parent_hist,
        NoisyChild=NoisyChild,
        DPqueries=DPqueries_comb,
        constraints=constraints_comb,
        identifier=parent_geocode,
        parent_constraints=parent_constraints)

    #check constraints
    if constraints_comb is not None:
        check = True
        for x in constraints_comb:
            check = bool(np.prod(x.check(int_answer)) * check)
        print("constraints are ", check, "for parent geocode ", parent_geocode)

    temps = []
    for i in range(len(child_geos)):
        temp = int_answer[tuple(
            [
                slice(0, int_answer.shape[x])
                for x in range(len(int_answer.shape) - 1)
            ] + [slice(i, i + 1)]
        )]  #this is really ugly - feel free to improve, trying to subset to each geography
        temp = temp.squeeze()  #gets rid of dimensions of size 1
        temps.append(temp)

    #do this for unrounded too
    temps2 = []
    for i in range(len(child_geos)):
        temp2 = l2_answer[tuple(
            [
                slice(0, l2_answer.shape[x])
                for x in range(len(l2_answer.shape) - 1)
            ] + [slice(i, i + 1)]
        )]  #this is really ugly - feel free to improve, trying to subset to each geography
        temp2 = temp2.squeeze()  #gets rid of dimensions of size 1
        temps2.append(temp2)

    for i, geocode in enumerate(child_geos):
        children[i].syn = sparse.multiSparse(temps[i])
        children[i].syn_unrounded = sparse.multiSparse(temps2[i])

    if backup_solve_status == True:
        accum += 1

    return (children)