def test_answer1(data, tol=0.00000001): """ tests query answers without add_over_margins """ array_dims = data.shape subset = (slice(1, 3), slice(0, 2), slice(2, 4)) query_no_sumover = cenquery.Query(array_dims, subset, None) query_empty_sumover = cenquery.Query(array_dims, subset, ()) true_answer = data[subset] assert numpy.abs(query_no_sumover.answer(data) - true_answer).sum() <= tol assert numpy.abs(query_empty_sumover.answer(data) - true_answer).sum() <= tol
def test_kron_rep1(data, tol=0.00000001): """ tests query answers from kron representation without add_over_margins """ array_dims = data.shape flat_data = data.flatten() subset = (slice(1, 3), slice(0, 2), slice(2, 4)) query_no_sumover = cenquery.Query(array_dims, subset, None) query_empty_sumover = cenquery.Query(array_dims, subset, ()) true_answer = data[subset].flatten() assert numpy.abs(query_no_sumover.answer(flat_data) - true_answer).sum() <= tol assert numpy.abs(query_empty_sumover.answer(flat_data) - true_answer).sum() <= tol
def hhgq(self, name="hhgq", subset=None): add_over_margins = [1, 2, 3] self.queries_dict[name] = cenquery.Query( self.hist_shape, subset=subset, add_over_margins=add_over_margins, name=name)
def minSchematize(node, array_dims, add_over_margins): minSchemaQuery = cenquery.Query(array_dims=array_dims, subset=None, add_over_margins=add_over_margins) node.raw = sparse.multiSparse( minSchemaQuery.answer_original(node.raw.toDense())) minSchema_shape = node.raw.shape dims_keep = [ x for x in set(range(len(array_dims))).difference(set(add_over_margins)) ] constraint_keys = node.cons.keys() for key in constraint_keys: node.cons[key].query.array_dims = minSchema_shape node.cons[key].query.add_over_margins = tuple([ x for x in set(dims_keep).intersection( set(node.cons[key].query.add_over_margins)) ]) node.cons[key].query.subset_input = [ node.cons[key].query.subset_input[x] for x in dims_keep ] node.cons[key].query.subset = np.ix_( *tuple(node.cons[key].query.subset_input)) #axis_groupings = () ?? currently no axis groupings in constraints print(node.cons[key].query) node.cons[key].check_after_update() return node
def union_hhgq_ub(self): cats = self.cats all_cat_combs = self.all_cat_combs for cat_comb in all_cat_combs: other_cats = list(set(range(8)).difference(set(cat_comb))) name = "union_hhgq_ub." + ".".join([str(x) for x in cat_comb]) subset = (cat_comb, range(2), range(2), range(63)) add_over_margins = (0, 1, 2, 3) if set(cats).issubset(set(cat_comb)): rhs = self.invariants["tot"] else: gq_min_ext = self.invariants["gqhh_vect"] gq_min_other = gq_min_ext[other_cats].sum(0) rhs = self.invariants["tot"] - gq_min_other sign = "le" query = cenquery.Query(array_dims=self.hist_shape, subset=subset, add_over_margins=add_over_margins) self.constraints_dict[name] = cenquery.Constraint( query=query, rhs=np.array(rhs).astype(int), sign=sign, name=name, union=True, union_type="union_hhgq_ub")
def total(self): subset = None add_over_margins = (0, 1, 2, 3) query = cenquery.Query(array_dims=self.hist_shape,subset=subset, add_over_margins=add_over_margins) rhs = self.invariants["tot"].astype(int) sign = "=" self.constraints_dict["total"] = cenquery.Constraint(query=query, rhs=rhs, sign=sign, name="total")
def test_makeTabularGroupQuery(data): import programs.engine.cenquery as cenquery_old shape = data.shape add_over_margins = (2,) subset = (range(3), [1,2], range(5)) groupings = {1: [[1],[2]]} axis_groupings = [ (1, ([0,1],[2])), (2, ([1,3],[0,2] ))] groupings2 = {1: [[0,1],[2]], 2: [[1,3],[0,2]]} q1 = cenquery_old.Query(shape, add_over_margins=add_over_margins).convertToQuerybase() q2 = cenquery.QueryFactory.makeTabularGroupQuery(shape, add_over_margins=add_over_margins) assert compare_arrays(q1.answer(data),q2.answer(data)) q3 = cenquery_old.Query(shape, add_over_margins=add_over_margins, subset=subset).convertToQuerybase() q4 = cenquery.QueryFactory.makeTabularGroupQuery(shape, add_over_margins=add_over_margins, groupings=groupings) assert compare_arrays(q3.answer(data),q4.answer(data)) q5 = cenquery_old.Query(shape, add_over_margins=(0,), axis_groupings = axis_groupings).convertToQuerybase() q6 = cenquery.QueryFactory.makeTabularGroupQuery(shape, add_over_margins=(0,), groupings=groupings2) assert compare_arrays(q5.answer(data),q6.answer(data))
def tot(self): data = self.raw add_over_margins = (0, 1, 2, 3) subset = None query = cenquery.Query(array_dims=data.shape, subset=subset, add_over_margins=add_over_margins) self.invariants_dict["tot"] = np.array(query.answer(data)).astype(int)
def gq_vect(self): data = self.raw_housing add_over_margins = None subset = (range(1, 8), ) query = cenquery.Query(array_dims=data.shape, subset=subset, add_over_margins=add_over_margins) self.invariants_dict["gq_vect"] = np.array( query.answer(data)).astype(int)
def number_of_races(self, name="number_of_races", subset=None): add_over_margins = [0,1,2] axis_groupings = [ (3, (range(0,7), range(7,22), range(22,42), range(42,57), range(57,63) ))] self.queries_dict[name] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, subset=subset, axis_groupings=axis_groupings, name=name)
def nurse_nva_0(self): subset = ([3], [0], range(2), range(63)) add_over_margins = (0, 1, 2, 3) query = cenquery.Query(array_dims=self.hist_shape, subset=subset, add_over_margins=add_over_margins) rhs = np.array(0) sign = "=" self.constraints_dict["nurse_nva_0"] = cenquery.Constraint( query=query, rhs=rhs, sign=sign, name="nurse_nva_0")
def test_answer2(data, tol=0.00000001): """ tests query answers with add_over_margins """ array_dims = data.shape subset = (slice(1, 3), slice(0, 2), slice(2, 4)) axes = [(1, ), (0, 2), (0, 1, 2)] true_answers = [data[subset].sum(axis=ax) for ax in axes] queries = [cenquery.Query(array_dims, subset, ax) for ax in axes] answers = [q.answer(data) for q in queries] errors = [ numpy.abs(a - ta).sum() for (a, ta) in zip(answers, true_answers) ] assert max(errors) < tol
def test_kron_consistency(data, tol=0.00000001): """ checks whether kron rep is consistent with flattened answers """ array_dims = data.shape subset = (slice(1, 3), slice(0, 2), slice(2, 4)) axes = [(1, ), (0, 2), (0, 1, 2)] flattened_data = data.flatten() queries = [cenquery.Query(array_dims, subset, ax) for ax in axes] flatten_answers = [q.answer(data, flatten=True) for q in queries] kron_answers = [q.answer(flattened_data) for q in queries] errors = [ numpy.abs(fa - ka).sum() for (fa, ka) in zip(flatten_answers, kron_answers) ] assert max(errors) < tol
def hhgq_total_ub(self): subset = None add_over_margins = (1, 2, 3) query = cenquery.Query(array_dims=self.hist_shape, subset=subset, add_over_margins=add_over_margins) other_gqs = self.invariants["gqhh_tot"] - self.invariants["gqhh_vect"] if "tot" in self.invariants.keys(): total = np.broadcast_to(np.array( self.invariants["tot"] ), (1,) ) total_ext= np.broadcast_to(total, (8,)) rhs = np.where(self.invariants["gqhh_vect"]>0, total_ext-other_gqs, np.zeros(8)) else: rhs = self.invariants["gqhh_vect"]*self.hhgq_cap rhs = rhs.astype(int) sign = "le" self.constraints_dict["hhgq_total_ub"] = cenquery.Constraint(query=query, rhs=rhs, sign=sign, name="hhgq_total_ub")
def hhgq_total_lb(self): subset = None add_over_margins = (1, 2, 3) query = cenquery.Query(array_dims=self.hist_shape, subset=subset, add_over_margins=add_over_margins) # gq_hh in other cats other_gqs = self.invariants["gqhh_tot"] - self.invariants["gqhh_vect"] if "tot" in self.invariants.keys(): total = np.broadcast_to(np.array(self.invariants["tot"]), (1,)) total_ext = np.broadcast_to(total, (8,)) rhs = np.where(other_gqs > 0, self.invariants["gqhh_vect"], total_ext) else: rhs = self.invariants["gqhh_vect"] rhs = rhs.astype(int) sign = "ge" self.constraints_dict["hhgq_total_lb"] = cenquery.Constraint(query=query, rhs=rhs, sign=sign, name="hhgq_total_lb")
def union_hhgq_lb(self): cats = self.cats all_cat_combs = self.all_cat_combs for cat_comb in all_cat_combs: name = "union_hhgq_lb." + ".".join([str(x) for x in cat_comb]) subset = (cat_comb, range(2), range(2), range(6)) add_over_margins = (0, 1, 2, 3) if set(cats).issubset(set(cat_comb)): rhs = self.invariants["tot"] else: rhs = self.constraints_dict["hhgq_total_lb"].rhs[cat_comb].sum() sign = "ge" query = cenquery.Query(array_dims=self.hist_shape, subset=subset, add_over_margins=add_over_margins) self.constraints_dict[name] = cenquery.Constraint(query=query, rhs=np.array(rhs).astype(int), sign=sign, name = name, union =True, union_type="union_hhgq_lb")
def installPhase1Constraints(node_pair, config): block_node = node_pair[0] block_node_ms = node_pair[1] add_over_margins = re.split( das_utils.DELIM, config["minimal_schema"]["minSchema.add_over_margins"]) add_over_margins = tuple([int(x) for x in add_over_margins]) query = cenquery.Query(array_dims=block_node.raw.shape, add_over_margins=add_over_margins, name="minSchema") rhs = block_node_ms.raw.toDense() ms_constraint = cenquery.Constraint(query, rhs, sign="=", name="minSchema") cons = {} cons["minSchema"] = ms_constraint block_node.cons = cons return block_node
def black_in_combo(self): add_over_margins = [0, 1, 2, 3] axis_groupings = [(3, ([1], [6], range(11, 15), range(21, 25), range(31, 37), range(41, 47), range(51, 55), range(56, 60), range(61,63)))] self.queries_dict["black_in_combo"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, axis_groupings=axis_groupings, name="black_in_combo")
def hhgq_va_hisp(self): add_over_margins = [3] self.queries_dict["hhgq_va_hisp"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, name="hhgq_va_hisp")
def va_race(self): add_over_margins = [0, 2] self.queries_dict["va_race"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, name="va_race")
def race_ethnicity(self): add_over_margins = [0, 1] self.queries_dict["race_ethnicity"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, name="race_ethnicity")
def race(self, name="race", subset=None): add_over_margins = [0,1,2] self.queries_dict[name] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, subset=subset, name=name)
def hhgq_race(self): add_over_margins = [1, 2] self.queries_dict["hhgq_race"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, name="hhgq_race")
def racecomb(self): add_over_margins = [0,1,2,3] axis_groupings = [ (4, ([0,1,2],[3,4,5]))] self.queries_dict["racecomb"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, axis_groupings=axis_groupings, name="racecomb")
def voting_age(self): add_over_margins = [0,2,3] self.queries_dict["voting_age"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, name="voting_age")
def race(self): add_over_margins = [0,1,2,3] self.queries_dict["race"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, name="race")
def weight_test(self): add_over_margins = [0,2,3] #weight_array = np.random.rand(self.hist_shape) weight_array = np.full(self.hist_shape, 1.5) self.queries_dict["weight_test"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, weight_array = weight_array, name="weight_test")
def nhpi_in_combo(self): add_over_margins = [0, 1, 2, 3] axis_groupings = [(3, ([4], [9], [13], [16], [18], [20], [23], [26], [28], [30], [32], [34], range(36, 38), range(39, 41), [42], [44], range(46, 48), range(49, 52), range(53, 57), range(58, 63)))] self.queries_dict["nhpi_in_combo"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, axis_groupings=axis_groupings, name="nhpi_in_combo")
def asian_in_combo(self): add_over_margins = [0, 1, 2, 3] axis_groupings = [(3, ([3],[8],[12],[16], range(18,20), [23],[25], range(28,30), [31], range(34,36), range(37,39), range(40, 42), range(44, 46), range(47, 49), range(50, 53), range(54, 58), range(59,63)))] self.queries_dict["asian_in_combo"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, axis_groupings=axis_groupings, name="asian_in_combo")
def aian_in_combo(self): add_over_margins = [0, 1, 2, 3] axis_groupings = [(3, ([2], [7], [11], range(15, 18), [21], range(25, 28), range(31, 34), range(37, 40), range(41, 44), range(47, 50), range(51, 54), range(55, 59), range(60, 63)))] self.queries_dict["aian_in_combo"] = cenquery.Query(self.hist_shape, add_over_margins=add_over_margins, axis_groupings=axis_groupings, name="aian_in_combo")