def makeNode(self, persons, geocode='0123456789abcdef'): person_hist, unit_hist = table2hists(np.array(persons), self.schema, housing_varname=CC.ATTR_HHGQ) invar = InvariantsMaker.make(schema=CC.DAS_DHCP_HHGQ, raw=person_hist, raw_housing=unit_hist, invariant_names=('tot', 'gqhh_tot', 'gqhh_vect')) cons = PConstraintsCreator( hist_shape=(person_hist.shape, unit_hist.shape), invariants=invar, constraint_names=( 'hhgq_total_lb', 'hhgq_total_ub', 'nurse_nva_0')).calculateConstraints().constraints_dict node = GeounitNode(raw=person_hist, raw_housing=unit_hist, invar=invar, cons=cons, geocode_dict={ 16: 'Block', 12: 'Block_Group', 11: 'Tract', 5: 'County', 2: 'State', 1: 'US' }, geocode=geocode) node.syn = node.raw return node
def makeNode(self, hholds, units, geocode='0'): hhold_hist, unit_hist = table2hists( np.array(hholds), self.schema), table2hists(np.array(units), self.unit_schema, CC.ATTR_HHGQ, units=True) invar = InvariantsMaker.make(schema=CC.SCHEMA_HOUSEHOLD2010, raw=hhold_hist, raw_housing=unit_hist, invariant_names=('tot', 'gqhh_vect')) cons = HHConstraintsCreator( hist_shape=(hhold_hist.shape, unit_hist.shape), invariants=invar, constraint_names=('no_vacant', 'living_alone', 'size2')).calculateConstraints().constraints_dict node = GeounitNode(raw=hhold_hist, raw_housing=unit_hist, invar=invar, cons=cons, geocode_dict={1: 'Stub'}, geocode=geocode) node.syn = node.raw return node
def test_addInvariantsErrorsRaising(self, test_input, errmsg): n1 = makeEmptyNode('123456789017') n2 = makeEmptyNode('123456789013') n1.invar, n2.invar = test_input with pytest.raises(IncompatibleAddendsError) as err: GeounitNode.addInvariants(n1, n2) assert errmsg in str(err.value)
def makeDPNode(self, geounit_node: GeounitNode) -> GeounitNode: """ This function takes a GeounitNode with "raw" data and generates noisy DP query answers depending the specifications in the config object. NOTE: This function is called inside the mapper (see above), so it is executed for every GeounitNode, on the workers. This may be confusing, because the .map() function is called from self.noisyAnswers() (above), which is run on the Master node. Inputs: geounit_node: a Node object with "raw" data dp_queries: boolean indicating whether dpqueries are present in measurement set, based on config Outputs: dp_geounit_node: a Node object with selected DP measurements """ logging.info( json.dumps({ 'geocode': geounit_node.geocode, 'geolevel': geounit_node.geolevel })) geolevel_prop = self.getNodePLB(geounit_node) main_hist: np.ndarray = geounit_node.getDenseRaw() unit_hist: np.ndarray = geounit_node.getDenseRawHousing() dp_geounit_node = geounit_node dp_geounit_node.dp_queries, dp_geounit_node.unit_dp_queries = self.nodeDPQueries( geolevel_prop, main_hist, unit_hist, geounit_node.geolevel) if self.optimization_query_ordering.est_and_qadd_queries: print("multi_pass_try") # make estimation queries dp_geounit_node = self.optimization_query_ordering.makeOptQueries( dp_geounit_node) rounder_queries = {} if self.optimization_query_ordering.rounder_query_names: rounder_queries = self.setup.schema_obj.getQueries( list(self.optimization_query_ordering.rounder_query_names[ geounit_node.geolevel])) dp_geounit_node.rounder_queries = rounder_queries dp_geounit_node.query_ordering = self.optimization_query_ordering.query_ordering[ geounit_node.geolevel] return dp_geounit_node
def getToyGeounitData_GeounitNode(schema, geocodes=[ '000', '001', '002', '003', '010', '011', '012', '020', '022' ], geocode_dict={ 3: 'block', 2: 'county' }, raw_params={ 'low': 0, 'high': 2 }, syn_params={ 'low': 0, 'high': 5 }): geounits = [] for geocode in du.aslist(geocodes): if raw_params is not None: raw = np.random.randint(low=raw_params['low'], high=raw_params['high'], size=schema.size).reshape(schema.shape) if syn_params is not None: syn = np.random.randint(low=syn_params['low'], high=syn_params['high'], size=schema.size).reshape(schema.shape) geounits.append( GeounitNode(geocode=geocode, geocode_dict=geocode_dict, raw=multiSparse(raw), syn=multiSparse(syn))) return geounits
def makeNode(d, geocode, geocode_dict, addsyn=False, dpq_make=False, querydict=None, consn=consn, invn=invn): ph = convertPL94(d) syn = ph[0].toDense() + np.ones(np.prod(ph[0].shape)).reshape( ph[0].shape) if addsyn else None dpqdict = { C.DETAILED: DPquery(QueryFactory.makeTabularGroupQuery(array_dims=ph[0].shape), GeometricMechanism(Fraction(1, 10), 2, ph[0].toDense())) } if dpq_make else {} if querydict: dpqdict.update({ name: DPquery( query, GeometricMechanism(Fraction(1, 10), 2, query.answer(ph[0].toDense()))) for name, query in SchemaMaker.fromName(CC.SCHEMA_PL94).getQueries( querydict).items() }) inv_dict = makeInvs(ph, invn) return GeounitNode(geocode, raw=ph[0], raw_housing=ph[1], syn=syn, cons=makeCons(ph, consn, inv_dict), invar=inv_dict, geocode_dict=geocode_dict, dp_queries=dpqdict)
def node2SparkRows(node: GeounitNode): nodedict = node.toDict((SYN, INVAR, GEOCODE)) persons = makeHistRowsFromMultiSparse( nodedict, schema, row_recoder=self.row_recoder, geocode_dict=inverted_geodict) return persons
def dict2node(self, s_node: Union[Dict[str, Any]]) -> GeounitNode: """ Convert dictionary of node fields into geo node, substituting synthetic data as raw :rtype: nodes.GeounitNode :param s_node: saved node, either a Geounit node object or a GeounitNode constructor arguments dict, read from file, has node field values :return: geoNode """ # These fields are needed for nodes to be reconstructed. Geocode is obvious, syn is to use as raw data, raw_housing to recreate invariants for key, msg_pref in zip((GEOCODE, SYN, RAW_HOUSING), ("Geocodes", "Synthetic data", "Raw housing")): if not hasNodeAttr(s_node, key): raise ValueError(f"{msg_pref} should be present in the saved pickled nodes. Missing from geocode {getNodeAttr(s_node, GEOCODE)}") # If geocodedict is saved, it should be the same. Otherwise, it's probably wrong config used if hasNodeAttr(s_node, GEOCODEDICT): assert getNodeAttr(s_node, GEOCODE) == self.geocode_dict, f"Saved geocode_dict ({getNodeAttr(s_node, GEOCODE)}) is different from the one in config ({self.geocode_dict})!" # Check the shape of the saved syn histogram. If it is different, most likely it's the wrong config file assert getNodeAttr(s_node, SYN).shape == self.setup.hist_shape, f"Saved histogram shape {getNodeAttr(s_node, SYN).shape} does not correspond to schema in config {self.setup.schema}:{self.setup.hist_shape}!" # Create the node node = GeounitNode(geocode=getNodeAttr(s_node, GEOCODE), raw=getNodeAttr(s_node, SYN), raw_housing=getNodeAttr(s_node, RAW_HOUSING), geocode_dict=self.geocode_dict) # Recreate invariants and constraints. Note, it is done from raw housing histogram (which is also needed further on when # in topdown procedure we add invariants pertaining to each level # Invariants recr_inv = self.setup.makeInvariants(raw=node.raw, raw_housing=node.raw_housing, invariant_names=self.invar_names) if hasNodeAttr(s_node, INVAR): # invar is dictionary of numpy arrays. Have to use all() to get single boolean msg = "Saved invariants are different from reconstructed!" assert getNodeAttr(s_node, INVAR).keys() == recr_inv.keys(), msg for inv_name in getNodeAttr(s_node, INVAR).keys(): assert np.array_equal(getNodeAttr(s_node, INVAR)[inv_name], recr_inv[inv_name]), msg node.invar = recr_inv # Constraints recr_cons = self.setup.makeConstraints(hist_shape=(self.setup.hist_shape, self.setup.unit_hist_shape), invariants=recr_inv, constraint_names=self.cons_names) if hasNodeAttr(s_node, CONS): assert getNodeAttr(s_node, CONS) == recr_cons, "Saved constraints are different from reconstructed!" node.cons = recr_cons return node
def test_compare_with_saved(reader_instance, join_data): """ WILL CHANGE IF TEST DATA in .txt FILES or CONFIG CHANGES! """ bn = reader_instance.makeBlockNode(join_data) fname = os.path.join(os.path.dirname(__file__), 'geounitnode.pickle') # with(open(fname, 'wb')) as f: # pickle.dump(bn.toDict(keep_attrs=bn.__slots__), f) sbn = GeounitNode.fromDict(pickle.load(open(fname, 'rb'))) assert sbn == bn
def node2SparkRows(node: GeounitNode): nodedict = node.toDict((SYN, INVAR, GEOCODE)) households = makeHistRowsFromMultiSparse( nodedict, schema, row_recoder=self.row_recoder) units = addEmptyAndGQ( nodedict, schema, households, row_recoder=self.row_recoder, gqtype_recoder=HHGQUnitDemoProductAttr.das2mdf, geocode_dict=inverted_geodict) return units
def test_addInvariants(self, invs1, invs2): n1 = makeEmptyNode('123456789017') n2 = makeEmptyNode('123456789013') n1.invar = invs1 n2.invar = invs2 sum_inv = GeounitNode.addInvariants(n1, n2) assert set(sum_inv.keys()) == set(invs1.keys()) assert set(sum_inv.keys()) == set(invs2.keys()) for invname in sum_inv.keys(): assert np.array_equal(invs1[invname] + invs2[invname], sum_inv[invname])
def constraintsCheck(node: GeounitNode, parent_geocode=None): """ This function checks that a set of constraints is met given a solution. It will raise an exception if any constraint is not met. Inputs: node: geounit node with "syn" field to be checked against the node constraints """ if parent_geocode is None: parent_geocode = node.parentGeocode msg = f"Constraints are for parent geocode {parent_geocode}" if not node.checkConstraints(raw=False): raise RuntimeError(msg + " failed!") print(msg + " satisfied.")
def Data(self): """ Data in the shape of histograms for 1 Block. Hist shape (2,) (for, e.g., Male, Female). """ b1 = multiSparse(np.array([1, 2])) block_nodes = [ GeounitNode('b1', raw=b1, raw_housing=b1, invar={}, cons={}, geocode_dict={2: "Block"}), ] return block_nodes
def test_toDict(self, add_querydict, dpq_make): geocode_dict = { 16: 'Block', 12: 'Block_Group', 11: 'Tract', 5: 'County', 2: 'State', 1: 'US' } n1 = makeNode(d1, '12', geocode_dict=geocode_dict, addsyn=False, dpq_make=dpq_make, querydict=add_querydict) ndict1 = n1.toDict(keep_attrs=n1.__slots__) n2 = GeounitNode.fromDict(ndict1) assert n1 == n2
def node2SparkRows(node: GeounitNode): nodedict = node.toDict((SYN, INVAR, GEOCODE)) households = makeHistRowsFromMultiSparse( nodedict, schema, row_recoder=self.row_recoder, geocode_dict=inverted_geodict, microdata_field=None) units = addGroupQuarters(nodedict, schema, households, row_recoder=self.row_recoder, geocode_dict=inverted_geodict, to_microdata=False) # 'priv' means "protected via the differential privacy routines in this code base" variable to be renamed after P.L.94-171 production ordered_cols = self.var_list + ['priv'] return [ Row(*ordered_cols)(*[unit[col] for col in ordered_cols]) for unit in units ]
def mfUrData(self, setup_instance): """ Data in the shape of histograms for 3 Rural Blocks in 1 Rural county and 3 Urban blocks in 1 Urban county, all in 1 states Histogram is shape (2,) for sex, i.e. each block provides number of male and number of female. This is the same test example as in JavaScript simulator. """ rb1 = multiSparse(np.array([1, 2])) rb2 = multiSparse(np.array([3, 4])) rb3 = multiSparse(np.array([5, 6])) ub1 = multiSparse(np.array([101, 102])) ub2 = multiSparse(np.array([103, 104])) ub3 = multiSparse(np.array([105, 106])) block_nodes = [] for block, geocode in zip( [rb1, rb2, rb3, ub1, ub2, ub3], ['1RB1', '1RB2', '1RB3', '1UB1', '1UB2', '1UB3']): invariants = setup_instance.makeInvariants( raw=block, raw_housing=block, invariant_names=setup_instance.inv_con_by_level['Block'] ['invar_names']) constraints = setup_instance.makeConstraints( hist_shape=(2, ), invariants=invariants, constraint_names=setup_instance.inv_con_by_level['Block'] ['cons_names']) block_nodes.append( GeounitNode(geocode, raw=block, raw_housing=block, invar=invariants, cons=constraints, geocode_dict={ 4: "Block", 3: "County", 1: "State" })) return block_nodes
def geoimp_wrapper_root(*, config, parent_shape, root_node: GeounitNode, optimizers, min_schema=None, keep_debug_info=False): """ This function performs the Post-Processing Step of Root Geonode (e.g. US or a State) to Root Geonode level. It is called from engine_utils.py:topdown in a Spark map operation Inputs: config: configuration object root_node: a GeounitNode object referring to the top/root node of the geographical tree (e.g. US, US+PR or a single state for state-size runs) Output: root_node: a GeounitNode object referring to the top/root node of the geographical tree (e.g. US, US+PR or a single state for state-size runs) :param optimizers: """ # Make sure that the logger is set up on all of the nodes clogging.setup(level=logging.INFO) # t_start = time.time() parent_hist = [None] * len(parent_shape) children = [root_node.unzipNoisy()] children, backup_solve_status = makeInputsAndRunOptimizer( children, config, min_schema, parent_hist, parent_shape, "root_to_root", optimizers, keep_debug_info=keep_debug_info) return children[0]
def sample_histogram(node: GeounitNode, sample_target: int): """ :param node: The input GeounitNode which will receive a new sampled histogram :param sample_target: The size of the target sample population :return: The input node with its syn attribute set to the sampled histogram """ assert all([ node.raw is not None, isinstance(node.raw, multiSparse), node.raw.sparse_array is not None, node.raw.sparse_array.data is not None ]) # Grab the sparse data array from the node to do work on directly # This is in the format of a 1D array data_shape = node.raw.shape # Get the shape and indices of populated values in the sparse matrix to be able # to recreate a new one csr_shape = node.raw.sparse_array.shape indices = node.raw.sparse_array.indices indptr = node.raw.sparse_array.indptr # Get the probability vector pval = BootstrapEngine.compute_pval(node) # Sample from a multinomial of the pval sampled_data = numpy.random.multinomial(sample_target, pval) # Produce the new CSR matrix and histogram new_matrix = ss.csr_matrix((sampled_data, indices, indptr), shape=csr_shape) new_histogram: __HistData__ = multiSparse(new_matrix, shape=data_shape) # Set the node's syn attribute node.syn = new_histogram return node
def geoimp_wrapper_nat(*, config, parent_shape, nat_node: GeounitNode, min_schema=None): """ This function performs the Post-Processing Step of National to National level. It is called from engine_utils.py:topdown in a Spark map operation Inputs: config: configuration object nat_node: a GeounitNode object referring to entire nation Output: nat_node: a GeounitNode object referring to entire nation """ # Make sure that the logger is set up on all of the nodes clogging.setup(level=logging.INFO, syslog=True, syslog_address=(das_utils.getMasterIp(), C.SYSLOG_UDP)) # t_start = time.time() parent_hist = None noisy_child = np.expand_dims( asDense(nat_node.dp.DPanswer), axis=len( nat_node.dp.DPanswer.shape)) if nat_node.dp else None noisy_child_weight = 1. / nat_node.dp.Var if nat_node.dp else None parent_geocode = "nat_to_nat" # TODO: Maybe filtering out the detailed querty form node.dp_queries can be done neater dp_queries_comb = stackNodeProperties([ nat_node, ], lambda node: node.dp_queries, cons_dpq.StackedDPquery, lambda name: name != C.DETAILED) query_weights = map( lambda sdpq: 1. / sdpq.Var, dp_queries_comb ) # We can get actual variance for each query if we want constraints_comb = stackNodeProperties([ nat_node, ], lambda node: node.cons, cons_dpq.StackedConstraint) # Create an L2PlusRounderWithBackup object seq_opt = sequential_optimizers.L2PlusRounderWithBackup( das=None, parent=parent_hist, parent_shape=parent_shape, NoisyChild=noisy_child, childGeoLen=1, config=config, DPqueries=dp_queries_comb, constraints=constraints_comb, NoisyChild_weight=noisy_child_weight, query_weights=query_weights, identifier="nat_to_nat", min_schema=min_schema, stat_node=nat_node) l2_answer, int_answer, backup_solve_status = seq_opt.run() # get rid of extra dimension int_answer = int_answer.squeeze() l2_answer = l2_answer.squeeze() nat_node.syn = int_answer constraintsCheck(nat_node, parent_geocode) nat_node.syn = sparse.multiSparse(int_answer) nat_node.syn_unrounded = sparse.multiSparse(l2_answer) return nat_node
def makeDPNode(self, geounit_node: GeounitNode, tot_budget=None, dp_queries=True, small_cell_query=False) -> GeounitNode: """ This function takes a GeounitNode with "raw" data and generates noisy DP query answers depending the specifications in the config object. NOTE: This function is called inside the mapper (see above), so it is executed for every GeounitNode, on the workers. This may be confusing, because the .map() function is called from self.noisyAnswers() (above), which is run on the Master node. Inputs: geounit_node: a Node object with "raw" data tot_budget: in minimal schema runs (there are two) the total budget differs, using only a part dp_queries: boolean indicating whether dpqueries are present in measurement set, based on config small_cell_query: boolean indicating whether a public-historical-data smallCellQuery is present, based on config Outputs: dp_geounit_node: a Node object with selected DP measurements """ logging.info( json.dumps({ 'geocode': geounit_node.geocode, 'geolevel': geounit_node.geolevel })) # For minimal schema, a part of budget is used in phase 1 and # the rest in phase 2, so this function is called with those # as tot_budget if tot_budget is None: tot_budget = self.total_budget # For minimal schema phase 1, no DP preset queries are made, so the argument should be set to False # Also, no budget is to be spent on the other queries than the detailed one in phase 1 if dp_queries: dp_queries = self.dp_queries detailed_prop: float = self.detailed_prop else: detailed_prop = 1.0 # For bottom-up (for example), no proportioning over geolevels if self.geolevel_prop_budgets is None: geolevel_prop = 1. else: # index relative to the top level index = self.levels_reversed.index(geounit_node.geolevel) geolevel_prop = self.geolevel_prop_budgets[index] node_hist: np.ndarray = geounit_node.getDenseRaw() dp_budget: float = tot_budget * geolevel_prop dp_geounit_node = geounit_node dp_geounit_node.dp_queries = self.nodeDPQueries( dp_budget, node_hist) if dp_queries else {} dp_geounit_node.dp = self.makeDPQuery( hist=node_hist, query=querybase.QueryFactory.makeTabularGroupQuery( array_dims=node_hist.shape), epsilon=detailed_prop * dp_budget) if small_cell_query: smallCellName = C.SMALLCELLBASENAME + f"_geocode{geounit_node.geocode}" multiindices = das_utils.loadJSONFile( self.small_cell_basepath + f"geocode{geounit_node.geocode}.json") dp_geounit_node.smallCellQuery = querybase.QueryFactory.makeInefficientCountQuery( array_dims=node_hist.shape, multiindices=multiindices, name=smallCellName) print( f"In geocode {geounit_node.geocode}, smallCellQuery has answer: {dp_geounit_node.smallCellQuery.answer(node_hist)}" ) assert True == False, "Thou shallt not pass" return dp_geounit_node
def test_makeAdditionalInvariantsConstraints(self, block_cons, state_cons, county_cons): class TestSetup(DASDecennialSetup): def __init__(self): self.hist_shape = (2, ) self.hist_vars = ("sex", ) self.validate_input_data_constraints = False self.inv_con_by_level = { 'Block': { 'invar_names': ('tot', ) if block_cons else (), 'cons_names': ('total', ) if block_cons else (), }, 'County': { 'invar_names': ('tot', ) if county_cons else (), 'cons_names': ('total', ) if county_cons else (), }, 'State': { 'invar_names': ('tot', ) if state_cons else (), 'cons_names': ('total', ) if state_cons else () } } @staticmethod def makeInvariants(raw, raw_housing, invariant_names): inv_dict = {} if 'tot' in invariant_names: inv_dict.update({'tot': np.sum(raw.toDense())}) return inv_dict @staticmethod def makeConstraints(hist_shape, invariants, constraint_names): cons_dict = {} if 'total' in constraint_names: cons_dict.update({ 'total': Constraint( MultiHistQuery((QueryFactory.makeTabularGroupQuery( (2, ), add_over_margins=(0, )), StubQuery( (2, 1), "stub")), (1, 0)), np.array(invariants['tot']), "=", "total") }) return cons_dict setup_instance = TestSetup() rb1 = sparse.multiSparse(np.array([1, 2])) rb2 = sparse.multiSparse(np.array([3, 4])) rb3 = sparse.multiSparse(np.array([5, 6])) ub1 = sparse.multiSparse(np.array([101, 102])) ub2 = sparse.multiSparse(np.array([103, 104])) ub3 = sparse.multiSparse(np.array([105, 106])) block_nodes = [] for block, geocode in zip( [rb1, rb2, rb3, ub1, ub2, ub3], ['1RB1', '1RB2', '1RB3', '1UB1', '1UB2', '1UB3']): invariants = setup_instance.makeInvariants( raw=block, raw_housing=block, invariant_names=setup_instance.inv_con_by_level['Block'] ['invar_names']) constraints = setup_instance.makeConstraints( hist_shape=(2, ), invariants=invariants, constraint_names=setup_instance.inv_con_by_level['Block'] ['cons_names']) block_nodes.append( GeounitNode(geocode, raw=block, raw_housing=block, invar=invariants, cons=constraints, geocode_dict={ 4: "Block", 3: "County", 1: "State" })) rc = block_nodes[0].addInReduce(block_nodes[1]).addInReduce( block_nodes[2]).shiftGeocodesUp() rc.makeAdditionalInvariantsConstraints(setup_instance) uc = block_nodes[3].addInReduce(block_nodes[4]).addInReduce( block_nodes[5]).shiftGeocodesUp() uc.makeAdditionalInvariantsConstraints(setup_instance) state = rc.addInReduce(uc).shiftGeocodesUp() state.makeAdditionalInvariantsConstraints(setup_instance) assert state.checkConstraints() assert rc.checkConstraints() assert uc.checkConstraints()
def test_addGeounitNodes(self, addsyn, ic, dpq_make, add_dpqueries, add_querydict): geocode_dict = { 16: 'Block', 12: 'Block_Group', 11: 'Tract', 5: 'County', 2: 'State', 1: 'US' } node1 = makeNode(d1, '12', geocode_dict=geocode_dict, addsyn=addsyn, dpq_make=dpq_make, querydict=add_querydict) node2 = makeNode(d2, '12', geocode_dict=geocode_dict, addsyn=addsyn, dpq_make=dpq_make, querydict=add_querydict) node_sum = node1.addInReduce(node2, inv_con=ic, add_dpqueries=add_dpqueries) assert node_sum.raw == node1.raw + node2.raw if addsyn: assert np.array_equal(node_sum.syn, node1.syn + node2.syn) else: assert node_sum.syn is None if ic: # Check if constraints are summed for cons_name in set(consn): assert node_sum.cons[cons_name] == (node1.cons + node2.cons)[cons_name] # Check more in depth for cons_name in set(consn): sign = node_sum.cons[cons_name].sign assert sign == node1.cons[cons_name].sign assert sign == node2.cons[cons_name].sign query = node_sum.cons[cons_name].query assert query == node1.cons[cons_name].query assert query == node2.cons[cons_name].query # # Removed checking constraints rhs, that is done in constraint creator testing # for cons_name in set(consn) - {'nurse_nva_0', }: # assert np.array_equal(node_sum.cons[cons_name].rhs, data1[cons_name] + data2[cons_name]) assert np.array_equal(node_sum.cons['nurse_nva_0'].rhs, np.array([0])) # Check if invariants are summed for inv_name in set(invn): assert np.all( node_sum.invar[inv_name] == GeounitNode.addInvariants( node1, node2)[inv_name]) else: assert not node_sum.cons and not node_sum.invar if add_dpqueries and dpq_make: assert node_sum.dp.Var == node1.dp.Var + node2.dp.Var assert np.array_equal(node_sum.unzipNoisy().dp.DPanswer, node1.dp.DPanswer + node2.dp.DPanswer) else: assert node_sum.dp is None if add_dpqueries and add_querydict: for name, query in node_sum.dp_queries.items(): assert query.Var == node1.dp_queries[ name].Var + node2.dp_queries[name].Var assert np.array_equal( query.unzipDPanswer().DPanswer, node1.dp_queries[name].DPanswer + node2.dp_queries[name].DPanswer)
def conform2PL94(node: GeounitNode): DP_counts = node.getDenseSyn() PL94_counts = node.invar['pl94counts'] node.syn = multiSparse( np.where(DP_counts > PL94_counts, PL94_counts, DP_counts)) return node