def hhgq_total_ub(self): subset = None add_over_margins = (1, 2, 3) query = cenquery.Query(array_dims=self.hist_shape, subset=subset, add_over_margins=add_over_margins) other_gqs = self.invariants["gqhh_tot"] - self.invariants["gqhh_vect"] if "tot" in self.invariants.keys(): total = np.broadcast_to(np.array(self.invariants["tot"]), (1, )) total_ext = np.broadcast_to(total, (8, )) rhs = np.where(self.invariants["gqhh_vect"] > 0, total_ext - other_gqs, np.zeros(8)) else: rhs = self.invariants["gqhh_vect"] * self.hhgq_cap rhs = rhs.astype(int) sign = "le" self.constraints_dict["hhgq_total_ub"] = cenquery.Constraint( query=query, rhs=rhs, sign=sign, name="hhgq_total_ub")
def hhgq_total_lb(self): subset = None add_over_margins = (1, 2, 3) query = cenquery.Query(array_dims=self.hist_shape, subset=subset, add_over_margins=add_over_margins) # gq_hh in other cats other_gqs = self.invariants["gqhh_tot"] - self.invariants["gqhh_vect"] if "tot" in self.invariants.keys(): total = np.broadcast_to(np.array(self.invariants["tot"]), (1, )) total_ext = np.broadcast_to(total, (8, )) rhs = np.where(other_gqs > 0, self.invariants["gqhh_vect"], total_ext) else: rhs = self.invariants["gqhh_vect"] rhs = rhs.astype(int) sign = "ge" self.constraints_dict["hhgq_total_lb"] = cenquery.Constraint( query=query, rhs=rhs, sign=sign, name="hhgq_total_lb")
def union_hhgq_ub(self): cats = self.cats all_cat_combs = self.all_cat_combs for cat_comb in all_cat_combs: other_cats = list(set(range(8)).difference(set(cat_comb))) name = "union_hhgq_ub." + ".".join([str(x) for x in cat_comb]) subset = (cat_comb, range(2), range(2), range(6)) add_over_margins = (0, 1, 2, 3) if set(cats).issubset(set(cat_comb)): rhs = self.invariants["tot"] else: gq_min_ext = self.invariants["gqhh_vect"] gq_min_other = gq_min_ext[other_cats].sum(0) rhs = self.invariants["tot"] - gq_min_other sign = "le" query = cenquery.Query(array_dims=self.hist_shape, subset=subset, add_over_margins=add_over_margins) self.constraints_dict[name] = cenquery.Constraint(query=query, rhs=np.array(rhs).astype(int), sign=sign, name = name, union =True, union_type="union_hhgq_ub")
def installPhase1Constraints(node_pair, config): block_node = node_pair[0] block_node_ms = node_pair[1] add_over_margins = re.split( das_utils.DELIM, config["minimal_schema"]["minSchema.add_over_margins"]) add_over_margins = tuple([int(x) for x in add_over_margins]) query = cenquery.Query(array_dims=block_node.raw.shape, add_over_margins=add_over_margins, name="minSchema") rhs = block_node_ms.raw.toDense() ms_constraint = cenquery.Constraint(query, rhs, sign="=", name="minSchema") cons = {} cons["minSchema"] = ms_constraint block_node.cons = cons return block_node
def union_hhgq_lb(self): cats = self.cats all_cat_combs = self.all_cat_combs for cat_comb in all_cat_combs: name = "union_hhgq_lb." + ".".join([str(x) for x in cat_comb]) subset = (cat_comb, range(2), range(2), range(63)) add_over_margins = (0, 1, 2, 3) if set(cats).issubset(set(cat_comb)): rhs = self.invariants["tot"] else: rhs = self.constraints_dict["hhgq_total_lb"].rhs[cat_comb].sum( ) sign = "ge" query = cenquery.Query(array_dims=self.hist_shape, subset=subset, add_over_margins=add_over_margins) self.constraints_dict[name] = cenquery.Constraint( query=query, rhs=np.array(rhs).astype(int), sign=sign, name=name, union=True, union_type="union_hhgq_lb")
def geoimp_wrapper(config, parent_child_node, accum): """ This function performs the Post-Processing Step for a generic parent to the Child geography. Inputs: config: configuration object parent_child_node: a collection of geounitNode objects containing one parent and multiple child accum: spark accumulator object Output: children: a collection of geounitNode objects for each of the children, after post-processing """ import programs.engine.geoimpgbopt as geoimpgbopt from itertools import compress parent_child_node = list(parent_child_node) parent_geocode = parent_child_node[0] print("parent geocode is", parent_geocode) # a list of the node objects nodes = list(list(parent_child_node)[1]) #calculate the length of each of the geocodes (to determine which is the parent) geocode_lens = [len(node.geocode) for node in nodes] #the parent is the shortest geocode parent = nodes[np.argmin(geocode_lens)] #subset the children nodes children = nodes[:np.argmin(geocode_lens )] + nodes[np.argmin(geocode_lens) + 1:] children = sorted(children, key=lambda geocode_data: int(geocode_data.geocode)) child_geos = [child.geocode for child in children] n_children = len(child_geos) #stack the dp arrays on top of one another, if only 1 child just expand the axis if n_children > 1: NoisyChild = np.stack([child.dp.DPanswer for child in children], axis=-1) else: NoisyChild = np.expand_dims(children[0].dp.DPanswer, axis=len(children[0].dp.DPanswer.shape)) #combine DPqueries without geography to combined DPqueries with geography #if no DPqueries, change this to an empty list if any(children[0].dp_queries) == False: DPqueries_comb = [] else: DPqueries = list(list(child.dp_queries.values()) for child in children) n_q = len(DPqueries[0]) DPqueries_comb = [] for i in range(n_q): subset_input = tuple( list(DPqueries[0][i].query.subset_input) + [range(NoisyChild.shape[-1])]) query = cenquery.Query( array_dims=NoisyChild.shape, subset=subset_input, add_over_margins=DPqueries[0][i].query.add_over_margins) q_answer = np.stack([DPquery[i].DPanswer for DPquery in DPqueries], axis=-1) DP_query = cenquery.DPquery(query=query, DPanswer=q_answer) DPqueries_comb.append(DP_query) #delete redundant union constraints #which gq cat are non-zero #combine cenquery.Constraint objects without geography to build combined cenquery.Constraint constraints_comb = [] #now children may have different constraints. only combine the ones that match. if any(children[0].cons) == False: constraints_comb = None else: all_keys = [] for child in children: all_keys.extend(list(child.cons.keys())) #subset to unique names constraint_keys = tuple(list(set(all_keys))) #children is a list of nodes for key in constraint_keys: #make a list of individual constraints for all children who have them #find which children have the key ind = [key in child.cons.keys() for child in children] #children_sub is subset of children with that key children_sub = list(compress(children, ind)) constraints = list(child.cons[key] for child in children_sub) #get the list of geos that have this constraint subset_geos = list(compress(range(NoisyChild.shape[-1]), ind)) subset_input = tuple( list(constraints[0].query.subset_input) + [ subset_geos, ]) query = cenquery.Query( array_dims=NoisyChild.shape, subset=subset_input, add_over_margins=constraints[0].query.add_over_margins) rhs = np.stack([con.rhs for con in constraints], axis=-1) constraint = cenquery.Constraint(query=query, rhs=rhs, sign=constraints[0].sign, name=constraints[0].name) constraints_comb.append(constraint) parent_hist = parent.syn.toDense() parent_geocode = parent.geocode parent_constraints = parent.cons #for checking purposes #this is the actual post-processing optimization step l2_answer, int_answer, backup_solve_status = geoimpgbopt.L2geoimp_wrapper( config=config, parent=parent_hist, NoisyChild=NoisyChild, DPqueries=DPqueries_comb, constraints=constraints_comb, identifier=parent_geocode, parent_constraints=parent_constraints) #check constraints if constraints_comb is not None: check = True for x in constraints_comb: check = bool(np.prod(x.check(int_answer)) * check) print("constraints are ", check, "for parent geocode ", parent_geocode) temps = [] for i in range(len(child_geos)): temp = int_answer[tuple( [ slice(0, int_answer.shape[x]) for x in range(len(int_answer.shape) - 1) ] + [slice(i, i + 1)] )] #this is really ugly - feel free to improve, trying to subset to each geography temp = temp.squeeze() #gets rid of dimensions of size 1 temps.append(temp) #do this for unrounded too temps2 = [] for i in range(len(child_geos)): temp2 = l2_answer[tuple( [ slice(0, l2_answer.shape[x]) for x in range(len(l2_answer.shape) - 1) ] + [slice(i, i + 1)] )] #this is really ugly - feel free to improve, trying to subset to each geography temp2 = temp2.squeeze() #gets rid of dimensions of size 1 temps2.append(temp2) for i, geocode in enumerate(child_geos): children[i].syn = sparse.multiSparse(temps[i]) children[i].syn_unrounded = sparse.multiSparse(temps2[i]) if backup_solve_status == True: accum += 1 return (children)