def select(self): shape = self.domain_shape if shape == (1, 1): # skip the calucation of newgrids if shape is of size 1 matrix = sparse.csr_matrix(([1], ([0], [0])), shape=(1, 1)) newgrid = 1 else: eps = self.eps_par cur_noisy_x = self.x_hat noisycnt = cur_noisy_x.sum() # compute second level grid size if noisycnt <= 0: m2 = 1 else: m2 = int(math.sqrt(noisycnt * eps / self.c2) - 1) + 1 M2 = m2**2 nn, mm = shape newgrid = int(math.sqrt(nn * mm * 1.0 / M2) - 1) + 1 if newgrid <= 0: newgrid = 1 num1 = int(util.old_div((nn - 1), newgrid) + 1) num2 = int(util.old_div((mm - 1), newgrid) + 1) # generate cell and pending queries base on new celss cells = GenerateCells(nn, mm, num1, num2, newgrid) matrix = cells_to_query(cells, (nn, mm)) return matrix
def mapping(self): n, m = self.domain_shape N = self.data_sum eps = self.eps_par if self.ag_flag: m1 = int(math.sqrt((N * eps) / self.c) / 4 - 1) + 1 if m1 < 10: m1 = 10 M = m1**2 grid = int(math.sqrt(n * m * 1.0 / M) - 1) + 1 if grid <= 0: grid = 1 else: M = util.old_div((N * eps), self.c) if self.gz == 0: grid = int(math.sqrt(n * m / M) - 1) + 1 else: grid = int(self.gz) if grid < 1: grid = 1 num1 = int(util.old_div((n - 1), grid) + 1) num2 = int(util.old_div((m - 1), grid) + 1) # TODO: potential optimization if grid ==1 identity workload cells = UGridPartition.GenerateCells(n, m, num1, num2, grid) return cells_to_mapping(cells, (n, m))
def select(self): shape = self.domain_shape if shape == (1, 1): # skip the calucation of newgrids if shape is of size 1 return workload.RangeQueries((1, 1), lower=np.array([[0, 0]]), higher=np.array([[0, 0]])) else: eps = self.eps_par cur_noisy_x = self.x_hat noisycnt = cur_noisy_x.sum() # compute second level grid size if noisycnt <= 0: m2 = 1 else: m2 = int(math.sqrt(noisycnt * eps / self.c2) - 1) + 1 M2 = m2**2 nn, mm = shape newgrid = int(math.sqrt(nn * mm * 1.0 / M2) - 1) + 1 if newgrid <= 0: newgrid = 1 num1 = int(util.old_div((nn - 1), newgrid) + 1) num2 = int(util.old_div((mm - 1), newgrid) + 1) # generate cell and pending queries base on new celss lower, higher = AdaptiveGrid.grid_split_range( (0, 0), (nn - 1, mm - 1), branching_list=[num1, num2]) return workload.RangeQueries(self.domain_shape, np.array(lower), np.array(higher))
def rect_to_quads(x): ''' Given an np array it splits it correctly to 4 quads in the midpoints can handle arrays of arbitrary shape (1D as well) ''' n_rows = x.shape[0] n_cols = x.shape[1] # If ncol is odd, do vert splits in balanced manner col_parity = 0 if n_cols % 2: col_parity = 1 col_midpoint = util.old_div(x.shape[1], 2) row_midpoint = util.old_div(x.shape[0], 2) if x.shape[0] == 1: # if x has only one row then do only vertical split x1, x2 = np.split(x, [col_midpoint], axis=1) return [x1, x2] if x.shape[1] == 1: # if x has only one col then do only horizontal split x1, x2 = np.split(x, [row_midpoint], axis=0) return [x1, x2] # o/w do both splits x_h1, x_h2 = np.split(x, [row_midpoint], axis=0) x1, x2 = np.split(x_h1, [col_midpoint], axis=1) x3, x4 = np.split(x_h2, [col_midpoint + col_parity], axis=1) return [x1, x2, x3, x4]
def hilbert(N): """ Produce coordinates of an NxN Hilbert curve. @param N: the length of side, assumed to be a power of 2 ( >= 2) @returns: x and y, each as an array of integers representing coordinates of points along the Hilbert curve. Calling plot(x, y) will plot the Hilbert curve. From Wikipedia """ assert 2**int(math.ceil(math.log( N, 2))) == N, "N={0} is not a power of 2!".format(N) if N == 2: return np.array((0, 0, 1, 1)), np.array((0, 1, 1, 0)) else: x, y = HilbertTransform.hilbert(util.old_div(N, 2)) xl = np.r_[y, x, util.old_div(N, 2) + x, N - 1 - y] yl = np.r_[x, util.old_div(N, 2) + y, util.old_div(N, 2) + y, util.old_div(N, 2) - 1 - x] return xl, yl
def select(self): n, m = self.domain_shape N = self.data_sum eps = self.eps_par if self.ag_flag: m1 = int(math.sqrt((N * eps) / self.c) / 4 - 1) + 1 if m1 < 10: m1 = 10 M = m1**2 grid = int(math.sqrt(n * m * 1.0 / M) - 1) + 1 if grid <= 0: grid = 1 else: M = util.old_div((N * eps), self.c) if self.gz == 0: grid = int(math.sqrt(n * m / M) - 1) + 1 else: grid = int(self.gz) if grid < 1: grid = 1 num1 = int(util.old_div((n - 1), grid) + 1) num2 = int(util.old_div((m - 1), grid) + 1) lower, upper = GenerateCells(n, m, num1, num2, grid) return workload.RangeQueries((n, m), np.array(lower), np.array(upper))
def Run(self, QtQ, x, epsilon, seed): """ QtQ - given the workload Q in matrix form, QtQ is the multiplication between the transpose of Q and Q. """ assert seed is not None, 'seed must be set' prng = numpy.random.RandomState(seed) x = numpy.array(x) assert len( x.shape ) == 1, '%s is defined for 1D data only' % self.__class__.__name__ n = len(x) err, inv, dist, query = self._GreedyHierByLv(QtQ, n, 0, withRoot=False) qmat = [] y2 = [] for c in range(len(dist)): if dist[c] > 0: lb, rb = query[c] currow = numpy.zeros(n) currow[lb:rb + 1] = dist[c] qmat.append(currow) y2.append(sum(x[lb:(rb + 1)]) * dist[c]) qmat = numpy.array(qmat) y2 += prng.laplace(0.0, util.old_div(1.0, epsilon), len(y2)) return numpy.dot(inv, numpy.dot(qmat.T, y2))
def setUp(self): n = 1024 scale = 1E5 self.hist = numpy.array(list(range(n))) self.d = dataset.Dataset(self.hist, None) self.dist = numpy.random.exponential(1, n) self.dist = util.old_div(self.dist, float(self.dist.sum())) self.ds = dataset.DatasetSampled(self.dist, scale, None, 1001)
def GenerateCells(n, m, num1, num2, grid): # this function used to generate all the cells in UGrid assert math.ceil(util.old_div(n, float(grid))) == num1 and math.ceil( util.old_div(m, float(grid)) ) == num2, "Unable to generate cells for Ugrid: check grid number and grid size" cells = [] for i in range(num1): for j in range(num2): lb = [int(i * grid), int(j * grid)] rb = [int((i + 1) * grid - 1), int((j + 1) * grid - 1)] if rb[0] >= n: rb[0] = int(n - 1) if rb[1] >= m: rb[1] = int(m - 1) cells = cells + [[lb, rb]] return cells
def _rebuild(partition, counts, n): """Rebuild an estimated data using uniform expansion.""" estx = numpy.zeros(n) n2 = len(counts) for c in range(n2): lb, rb = partition[c] estx[lb:(rb + 1)] = util.old_div(counts[c], float(rb - lb + 1)) return estx
def get_A(M, noise_scales): """ Calculate matrix 'A' of measurements, scaled appropriately for inference """ sf = (util.old_div(1.0, np.array(noise_scales)) ) # reciprocal of each noise scale D = sparse.spdiags(sf, 0, sf.size, sf.size) return D * M # scale rows
def get_y(ans, noise_scales): """ Calculate 'y' of answers, scaled appropriately for inference """ sf = (util.old_div(1.0, np.array(noise_scales)) ) # reciprocal of each noise scale y = ans * sf # element-wise multiplication y = y[:, np.newaxis] # make column vector return y
def L1partition_approx(x, epsilon, ratio=0.5, gethist=False,seed =None): """Compute the noisy L1 histogram using interval buckets of size 2^k Args: x - list of numeric values. The input data vector epsilon - double. Total private budget ratio - double in (0, 1) the use ratio*epsilon for partition computation and (1-ratio)*epsilon for querying the count in each partition gethist - boolean. If set to truth, return the partition directly (the privacy budget used is still ratio*epsilon) Return: if gethist == False, return an estimated data vector. Otherwise, return the partition """ assert seed is not None, "seed must be set" prng = numpy.random.RandomState(seed) n = len(x) # check that the input vector x is of appropriate type assert (x.dtype == numpy.dtype(int) or x.dtype == numpy.dtype("int32")), "Input vector must be int! %s given" %x.dtype y=x.astype('int32') #numpy type int32 is not not JSON serializable check = (x ==y) assert check.sum() == len(check), "Casting error from int to int32" x=y hist = cutil.L1partition_approx(n+1, x, epsilon, ratio, prng.randint(500000)) hatx = numpy.zeros(n) rb = n if gethist: bucks = [] for lb in hist[1:]: bucks.insert(0, [lb, rb-1]) rb = lb if lb == 0: break return bucks else: for lb in hist[1:]: hatx[lb:rb] = util.old_div(max(0, sum(x[lb:rb]) + prng.laplace(0, util.old_div(1.0,(epsilon*(1-ratio))), 1)), float(rb - lb)) rb = lb if lb == 0: break return hatx
def g(*idx): """ This function will receive an index tuple from numpy.fromfunction It's behavior depends on grid_shape: take (i,j) and divide by grid_shape (in each dimension) That becomes an identifier of the block; then assign a unique integer to it using pairing. """ x = numpy.array(idx) y = numpy.array(grid_shape) return general_pairing(util.old_div( x, y)) # broadcasting integer division
def Run(self, W, x, eps, seed): domain_dimension = len(self.domain_shape) eps_share = util.old_div(float(eps), domain_dimension) x = x.flatten() prng = np.random.RandomState(seed) Ms = [] ys = [] scale_factors = [] for i in range(domain_dimension): # Reducde domain to get marginals marginal_mapping = mapper.MarginalPartition( domain_shape=self.domain_shape, proj_dim=i).mapping() reducer = transformation.ReduceByPartition(marginal_mapping) x_i = reducer.transform(x) if self.domain_shape[i] < 50: # run identity subplan M_i = selection.Identity(x_i.shape).select() y_i = measurement.Laplace(M_i, eps_share).measure(x_i, prng) noise_scale_factor = laplace_scale_factor( M_i, eps_share) else: # run dawa subplan W = get_matrix(W) W_i = W * support.expansion_matrix(marginal_mapping) dawa = pmapper.Dawa(eps_share, self.ratio, self.approx) mapping = dawa.mapping(x_i, prng) reducer = transformation.ReduceByPartition(mapping) x_bar = reducer.transform(x_i) W_bar = W_i * support.expansion_matrix(mapping) M_bar = selection.GreedyH(x_bar.shape, W_bar).select() y_i = measurement.Laplace( M_bar, eps_share * (1 - self.ratio)).measure(x_bar, prng) noise_scale_factor = laplace_scale_factor( M_bar, eps_share * (1 - self.ratio)) # expand the dawa reduction M_i = M_bar * support.reduction_matrix(mapping) MM = M_i * support.reduction_matrix(marginal_mapping) Ms.append(MM) ys.append(y_i) scale_factors.append(noise_scale_factor) x_hat = inference.LeastSquares(method='lsmr').infer(Ms, ys, scale_factors) return x_hat
def statistics(self): assert self.hist is not None assert self.edges is not None hist_data = {} hist_data['nz_perc'] = util.old_div(np.count_nonzero(self.hist), float(self.hist.size)) hist_data['max_bin_val'] = self.hist.max() hist_data['total_records'] = self.hist.sum() return hist_data
def quad_split_range(cur_range_l, cur_range_u, **kwarg): ''' Given an rectangular domain represented using boarder cordinates (upper_left, lower_right), it splits it correctly to 4 quads in the midpoints ''' ul, lr = cur_range_l, cur_range_u upper, left = ul lower, right = lr n_rows = lower - upper + 1 n_cols = right - left + 1 # If ncol is odd, do vert splits in balanced manner col_parity = 0 if n_cols % 2: col_parity = 1 col_midpoint = left + util.old_div(n_cols, 2) row_midpoint = upper + util.old_div(n_rows, 2) if n_rows == 1: # if x has only one row then do only vertical split row = lr[0] return [ul, (row, col_midpoint)], [(row, col_midpoint - 1), lr] if n_cols == 1: # if x has only one col then do only horizontal split col = lr[1] return [ul, (row_midpoint, col)], [(row_midpoint - 1, col), lr] # o/w do both splits q1 = (ul, (row_midpoint - 1, col_midpoint - 1)) q2 = ((upper, col_midpoint), (row_midpoint - 1, right)) q3 = ((row_midpoint, left), (lower, col_midpoint - 1 + col_parity)) q4 = ((row_midpoint, col_midpoint + col_parity), lr) lower = [coordinates[0] for coordinates in [q1, q2, q3, q4]] upper = [coordinates[1] for coordinates in [q1, q2, q3, q4]] return lower, upper
def GenerateCells(n, m, num1, num2, grid): ''' Generate grid shaped celles for UniformGrid and AdaptiveGrid.and n, m: 2D domain shape num1, num2: number of cells along two dimensions grid: grid size ''' assert math.ceil(util.old_div(n, float(grid))) == num1 and math.ceil( util.old_div(m, float(grid)) ) == num2, "Unable to generate cells for Ugrid: check grid number and grid size" lower, upper = [], [] for i in range(num1): for j in range(num2): lb = [int(i * grid), int(j * grid)] rb = [int((i + 1) * grid - 1), int((j + 1) * grid - 1)] if rb[0] >= n: rb[0] = int(n - 1) if rb[1] >= m: rb[1] = int(m - 1) lower.append(lb) upper.append(rb) return lower, upper
def Run(self, W, x, eps): domain_dimension = len(self.domain_shape) eps_share = util.old_div(float(eps), domain_dimension) Ms = [] ys = [] scale_factors = [] for i in range(domain_dimension): # Reducde domain to get marginals marginal_mapping = marginal_partition(self.domain_shape, i) x_i = x.reduce_by_partition(marginal_mapping) if self.domain_shape[i] < 50: # run identity subplan M_i = identity((self.domain_shape[i], )) y_i = x_i.laplace(M_i, eps_share) noise_scale_factor = laplace_scale_factor(M_i, eps_share) else: # run dawa subplan W_i = W * support.expansion_matrix(marginal_mapping) mapping = x_i.dawa(self.ratio, self.approx, eps_share) x_bar = x_i.reduce_by_partition(mapping) W_bar = W_i * support.expansion_matrix(mapping) M_bar = greedyH((len(set(mapping)), ), W_bar) y_i = x_bar.laplace(M_bar, eps_share * (1 - self.ratio)) noise_scale_factor = laplace_scale_factor( M_bar, eps_share * (1 - self.ratio)) # expand the dawa reduction M_i = M_bar * support.reduction_matrix(mapping) # TODO: Ideally this would be just M_i * support.reduction_matrix(marginal_mapping) # but currently that returns an int type matrix # because the type of P_i is int MM = (support.reduction_matrix(marginal_mapping).T * M_i.T).T Ms.append(MM) ys.append(y_i) scale_factors.append(noise_scale_factor) x_hat = least_squares(Ms, ys, scale_factors) return x_hat
def get_boarder(dim_len, branching): if branching > dim_len: split_num = dim_len boarder = [(i, i) for i in range(split_num)] elif dim_len % branching != 0: new_hsize = np.divide(float(dim_len), branching) split_num = [ np.ceil(new_hsize * (i + 1)).astype(int) for i in range(branching - 1) ] temp = [i - 1 for i in split_num] boarder = list(zip(([0] + split_num), (temp + [dim_len - 1]))) else: cell_size_h = util.old_div(dim_len, branching) boarder = [(i * cell_size_h, (i + 1) * cell_size_h - 1) for i in range(branching)] return boarder
def test_old_div(self): self.assertEqual(util.old_div(1, 2), 0) self.assertEqual(util.old_div(2, 2), 1) self.assertEqual(util.old_div(3, 2), 1) self.assertEqual(util.old_div(1, 2.0), 0.5) x = np.array((1.0,)) y = np.array((2.0,), dtype=np.int_) z = np.array((2.0,), dtype=np.float_) w = 1.0 zero = np.zeros((1,)) half = 0.5 * np.ones((1,)) self.assertEqual(util.old_div(x, y), zero) self.assertEqual(util.old_div(x, z), half) self.assertEqual(util.old_div(w, z), np.array(half))
def Run(self, Q, x, epsilon, seed): """Run three engines in order with given epsilons to estimate a dataset x to answer query set Q Q - the query workload x - the underlying dataset epsilon - the total privacy budget """ assert seed is not None, 'seed must be set' prng = numpy.random.RandomState(seed) n = len(x) pSeed = prng.randint(500000) eSeed = prng.randint(500000) if self._partition_engine is None: # ignore ratio when partition_engine is omitted return self._DirectRun(Q, x, epsilon, eSeed) else: if self._ratio < 0 or self._ratio >= 1: raise ValueError('ratio must in range [0, 1)') partition = self.Compute_partition(x, epsilon, pSeed) # check that partition buckets span domain assert min(itertools.chain(*partition)) == 0 assert max(itertools.chain(*partition)) == (n - 1) eps2 = ( 1 - self._ratio ) * epsilon # this is epsilon_2 used in paper (the epsilon for estimation) devs = abs(numpy.array(x) - (util.old_div(sum(x), float(len(x))))) counts = self._estimate_engine.Run( self._workload_reform(Q, partition, n), self._dataset_reform(x, partition), epsilon * (1 - self._ratio), eSeed) return self._rebuild(partition, counts, n)
def __init__(self, uniformity, dom_shape, scale, seed=None): ''' Generate synthetic data of varying uniformity uniformity: parameter in [0,1] where 1 produces perfectly uniform data, 0 is maximally non-uniform All cells set to zero except fraction equal to 'uniformity' value. All non-zero cells are set to same value, then shuffled randomly. ''' self.init_params = util.init_params_from_locals(locals()) self.u = uniformity assert 0 <= uniformity and uniformity <= 1 n = numpy.prod(dom_shape) # total domain size hist = numpy.zeros(n) num_nonzero = max(1, int(uniformity * n)) hist_value = util.old_div(scale, num_nonzero) hist[0:num_nonzero] = hist_value prng = numpy.random.RandomState(seed) prng.shuffle(hist) super(DatasetUniformityVaryingSynthetic, self).__init__(hist.reshape(dom_shape), reduce_to_domain_shape=None, dist=None)
def infer(self, Ms, ys, scale_factors=None): ''' Either: 1) Ms is a single M and ys is a single y (scale_factors ignored) or 2) Ms and ys are lists of M matrices and y vectors and scale_factors is a list of the same length. ''' A, y = self._apply_scales(Ms, ys, scale_factors) if self.known_total is not None: A, y = self.__known_total_problem(A, y) if self.method == 'standard': assert self.l2_reg == 0, 'l2 reg not supported with method=standard' assert isinstance( A, np.ndarray), "method 'standard' only works with dense matrices" (x_est, _, rank, _) = linalg.lstsq(A, y, lapack_driver='gelsy') elif self.method == 'lsmr': res = lsmr(A, y, atol=0, btol=0, damp=self.l2_reg) x_est = res[0] elif self.method == 'lsqr': res = lsqr(A, y, atol=0, btol=0, damp=self.l2_reg) x_est = res[0] if self.known_total is not None: x_est = np.append(x_est, self.known_total - x_est.sum()) x_est = x_est.reshape(A.shape[1]) # reshape to match shape of x # James-Stein estimation if self.stein and x_est.size >= 3: adjustment = 1.0 - util.old_div((x_est.size - 2), (x_est**2).sum()) x_est *= adjustment return x_est
def variance(N, b): '''Computes variance given domain of size N and branchng factor b. Equation 3 from paper.''' h = math.ceil(math.log(N, b)) return (((b - 1) * h**3) - (util.old_div((2 * (b + 1) * h**2), 3)))
def cantor_pairing(a, b): """ A function returning a unique positive integer for every pair (a,b) of positive integers """ return util.old_div((a + b) * (a + b + 1), 2) + b
def fractionZeros(self): zero_count = (self.payload == 0).sum() return util.old_div(float(zero_count), self.payload.size)
It's behavior depends on grid_shape: take (i,j) and divide by grid_shape (in each dimension) That becomes an identifier of the block; then assign a unique integer to it using pairing. """ x = numpy.array(idx) y = numpy.array(grid_shape) return general_pairing(util.old_div( x, y)) # broadcasting integer division h = numpy.vectorize(g) # numpy.fromfunction builds an array of domain_shape by calling a function with each index tuple (e.g. (i,j)) partition_array = numpy.fromfunction(h, domain_shape, dtype=int) # transform to canonical order partition_array = canonicalTransform(partition_array) return partition_array if __name__ == '__main__': scale = 10000 for u in [util.old_div(i, 10.0) for i in range(0, 11)]: print(u) d = DatasetUniformityVaryingSynthetic(uniformity=u, dom_shape=(10, ), scale=scale, seed=999) size = d.payload.size unif = numpy.empty_like(d.payload) unif.fill(util.old_div(scale, float(size))) print(sum(abs(unif - d.payload)))
def _GreedyHierByLv(self, fullQtQ, n, offset, depth=0, withRoot=False): """Compute the weight distribution of one node of the tree by minimzing error locally. fullQtQ - the same matrix as QtQ in the Run method n - the size of the submatrix that is corresponding to current node offset - the location of the submatrix in fullQtQ that is corresponding to current node depth - the depth of current node in the tree withRoot - whether the accurate root count is given Returns: error, inv, weights, queries error - the variance of query on current node with epsilon=1 inv - for the query strategy (the actrual weighted queries to be asked) matrix A, inv is the inverse matrix of A^TA weights - the weights of queries to be asked queries - the list of queries to be asked (all with weight 1) """ if n == 1: return numpy.linalg.norm(fullQtQ[:, offset], 2)**2, \ numpy.array([[1.0]]), \ numpy.array([1.0]), [[offset, offset]] QtQ = fullQtQ[:, offset:offset + n] if (numpy.min(QtQ, axis=1) == numpy.max(QtQ, axis=1)).all(): mat = numpy.zeros([n, n]) mat.fill(util.old_div(1.0, n**2)) return numpy.linalg.norm(QtQ[:,0], 2)**2, \ mat, numpy.array([1.0]), [[offset, offset+n-1]] if n <= self._branch: bound = list(zip(list(range(n)), list(range(1, n + 1)))) else: rem = n % self._branch step = util.old_div((n - rem), self._branch) swi = (self._branch - rem) * step sep = list(range(0, swi, step)) + list(range(swi, n, step + 1)) + [n] bound = list(zip(sep[:-1], sep[1:])) serr, sinv, sdist, sq = list( zip(*[ self._GreedyHierByLv( fullQtQ, c[1] - c[0], offset + c[0], depth=depth + 1) for c in bound ])) invAuList = [c.sum(axis=0) for c in sinv] invAu = numpy.hstack(invAuList) k = invAu.sum() m1 = sum( map( lambda rng, v: numpy.linalg.norm( numpy.dot(QtQ[:, rng[0]:rng[1]], v), 2)**2, bound, invAuList)) m = numpy.linalg.norm(numpy.dot(QtQ, invAu), 2)**2 sumerr = sum(serr) if withRoot: return sumerr, block_diag(*sinv), \ numpy.hstack([[0], numpy.hstack(sdist)]), \ [[offset, offset+n-1]] + list(itertools.chain(*sq)) decay = util.old_div(1.0, (self._branch**(util.old_div(depth, 2.0)))) err1 = numpy.array(list(range(self._granu, 0, -1)))**2 err2 = numpy.array(list(range(self._granu)))**2 * decay toterr = 1.0 / err1 * (sumerr - ((m - m1) * decay + m1) * err2 / (err1 + err2 * k)) err = toterr.min() * self._granu**2 perc = 1 - util.old_div(numpy.argmin(toterr), float(self._granu)) inv = (util.old_div(1.0, perc))**2 * ( block_diag(*sinv) - (1 - perc)**2 / (perc**2 + k * (1 - perc)**2) * numpy.dot(invAu.reshape([n, 1]), invAu.reshape([1, n]))) dist = numpy.hstack([[1 - perc], perc * numpy.hstack(sdist)]) return err, inv, dist, \ [[offset, offset+n-1]] + list(itertools.chain(*sq))
def testDatasetReduce(self): div = 4 new_shape = (util.old_div(self.hist.shape[0], div), ) dr = dataset.Dataset(hist=self.hist, reduce_to_domain_shape=new_shape) self.assertEqual(dr.domain_shape, new_shape)