def __mul__(self, other): if isinstance(other, MarginalsGram) and self.domain == other.domain: X, XT = self._Xmatrix(self.weights) vect = X.dot(other.weights) return MarginalsGram(self.domain, vect) else: return EkteloMatrix.__mul__(self, other)
def process_workload(wd, eps): blockinfo = {"columnNames": [], 'buildingBlock': [], 'p': []} for bb in wd['data']: blockinfo['columnNames'].append(bb['name']) size = int((float(bb['maximum']) - float(bb['minimum'])) / float(bb['bucketSize']) + 1) pv = math.ceil(size / 16.0) if math.ceil( size / 16.0) != 2 else math.ceil(size / 16.0) - 1 if bb['buildingBlock'] == 'identity': blockinfo['buildingBlock'].append(Identity(size)) pv = 1 elif bb['buildingBlock'] == 'allrange': blockinfo['buildingBlock'].append(AllRange(size)) elif bb['buildingBlock'] == 'prefix': blockinfo['buildingBlock'].append(Prefix(size)) elif bb['buildingBlock'] == 'customized': domainMatrix = parse_customized(bb, size) blockinfo['buildingBlock'].append(EkteloMatrix(domainMatrix)) pv = 1 else: blockinfo['buildingBlock'].append(Total(size)) pv = 1 blockinfo['p'].append(pv) gc.collect() gc.collect() wgt = np.sqrt(float(wd['weight'])) return wgt * Kronecker(blockinfo['buildingBlock']), blockinfo
def synthesize(self, file_path, eps, seed): # setup random state prng = np.random.RandomState(seed) # load data vector relation = Relation(self.config) relation.load_csv(file_path) self._numerize(relation._df) # perform measurement attributes = [field_name for field_name in self.config.keys()] measurements = [] w_sum = sum(Ai.weight for Ai in self.strategy.matrices) for Ai in self.strategy.matrices: w = Ai.weight proj = [ attributes[i] for i, B in enumerate(Ai.base.matrices) if type(B).__name__ != 'Ones' ] matrix = [ B for B in Ai.base.matrices if type(B).__name__ != 'Ones' ] matrix = EkteloMatrix(np.ones( (1, 1))) if len(matrix) == 0 else Kronecker(matrix) proj_rel = copy.deepcopy(relation) proj_rel.project(proj) if proj_rel.df.shape[1] == 0: x = np.array([proj_rel.df.shape[0]]) else: x = Vectorize('').transform(proj_rel).flatten() y = Laplace(matrix, w * eps / w_sum).measure(x, prng) measurements.append((matrix.sparse_matrix(), y, 1.0 / w, proj)) # generate synthetic data sizes = [field['bins'] for field in self.config.values()] dom = Domain(attributes, sizes) engine = FactoredInference(dom) model = engine.estimate(measurements) df = model.synthetic_data().df self._denumerize(df) self._sample_numerical(df) return df
def WidthKRange(n, widths): if type(widths) is int: widths = [widths] m = sum(n - k + 1 for k in widths) W = np.zeros((m, n)) row = 0 for k in widths: for i in range(n - k + 1): W[row + i, i:i + k] = 1.0 row += n - k + 1 return EkteloMatrix(W)
def get_measurements(domain, workload): # get measurements using OPT+ parameterization lookup = {} # optimal strategy for Identity is Identity for attr in domain: n = domain.size(attr) lookup[attr] = Identity(n) # optimal strategy for Prefix is precomputed and loaded lookup['age'] = EkteloMatrix(np.load('prefix-85.npy')) lookup['fnlwgt'] = EkteloMatrix(np.load('prefix-100.npy')) lookup['capital-gain'] = EkteloMatrix(np.load('prefix-100.npy')) lookup['capital-loss'] = EkteloMatrix(np.load('prefix-100.npy')) lookup['hours-per-week'] = EkteloMatrix(np.load('prefix-99.npy')) measurements = [] for proj, _ in workload: Q = Kronecker([lookup[a] for a in proj]) measurements.append((proj, Q.sparse_matrix())) return measurements
def select(self, x, prng): relation = x seed = prng.randint(1E4, 1E9) # convert config to privBayes format config_str = self.get_config_str(relation) model_str = privBayesSelect.py_get_model( np.ascontiguousarray(relation.df.astype(np.int32)), config_str.encode('utf-8'), self.eps, self.theta, seed) model = PrivBayesSelect.make_models(model_str.decode('utf-8')) M = PrivBayesSelect.get_measurements(model, self.domain_shape) return EkteloMatrix(M)
def __init__(self, domain, lower, higher, dtype=np.float64): """ :param domain: the domain size, as an int for 1D or tuple for d-dimensional domains where each bound is a tuple with the same size as domain. :param lower: a q x d array of lower boundaries for the q queries :param higher: a q x d array of upper boundareis for the q queries """ assert lower.shape == higher.shape, 'lower and higher must have same shape' #assert np.all(lower <= higher), 'lower index must be <= than higher index' if type(domain) is int: domain = (domain, ) lower = lower[:, None] higher = higher[:, None] self.domain = domain self.shape = (lower.shape[0], np.prod(domain)) self.dtype = dtype self._lower = lower self._higher = higher idx = np.arange(np.prod(domain), dtype=np.int32).reshape(domain) shape = (lower.shape[0], np.prod(domain)) corners = np.array( list(itertools.product(*[(False, True)] * len(domain)))) size = len(corners) * lower.shape[0] row_ind = np.zeros(size, dtype=np.int32) col_ind = np.zeros(size, dtype=np.int32) data = np.zeros(size, dtype=dtype) queries = np.arange(shape[0], dtype=np.int32) start = 0 for corner in corners: tmp = np.where(corner, lower - 1, higher) keep = np.all(tmp >= 0, axis=1) index = idx[tuple(tmp.T)] coef = np.sum(corner) % 2 * 2 - 1 end = start + keep.sum() row_ind[start:end] = queries[keep] col_ind[start:end] = index[keep] data[start:end] = -coef start = end self._transformer = sparse.csr_matrix( (data[:end], (row_ind[:end], col_ind[:end])), shape, dtype) P = Kronecker([Prefix(n, dtype) for n in domain]) T = EkteloMatrix(self._transformer) Product.__init__(self, T, P)
def reduction_matrix(mapping, canonical_order=False): """ Returns an m x n matrix R where n is the dimension of the original data and m is the dimension of the reduced data. Reduces data vector x with R x Expands workload matrix W with W' R """ assert mapping.ndim == 1, "Can only handle 1-dimesional mappings for now, domain should be flattened" unique, indices, inverse, counts = mapping_statistics(mapping) if canonical_order: mapping = canonical_ordering(mapping) n = mapping.size m = unique.size data = np.ones(n) cols = np.arange(n) rows = inverse return EkteloMatrix(sparse.csr_matrix((data, (rows, cols)), shape=(m, n), dtype=int))
def projection_matrix(mapping, idx): """ Returns m x n matrix P where n is the dimension of the original data and m is the number of occurence of idx in mapping. :param mapping: vector with indices representing groups :param idx: index of group from which to create projection Projects vector x with P x and matrix W with W P^T Unprojects vector x with P^T x and matrix W with W P """ mask = np.ma.masked_where(mapping!=idx, mapping).mask if np.all(~mask): # when all entries are False, a single False will be returned mask = np.array([False]*len(mapping)) cols = np.where(~mask)[0] rows = np.arange(cols.size) vals = np.ones_like(rows) P = sparse.csr_matrix((vals, (rows, cols)), (rows.size, mask.size)) return EkteloMatrix(P)
def expansion_matrix(mapping, canonical_order=False): """ Returns an n x m matrix E where n is the dimension of the original data and m is the dimension of the reduced data. Expands data vector x with E x' Reduces workload matrix W with W E """ assert mapping.ndim == 1, "Can only handle 1-dimesional mappings for now, domain should be flattened" unique, indices, inverse, counts = mapping_statistics(mapping) if canonical_order: mapping = canonical_ordering(mapping) n = mapping.size m = unique.size data = np.ones(n) cols = np.arange(n) rows = inverse R = sparse.csr_matrix((data, (rows, cols)), shape=(m, n), dtype=int) scale = sparse.spdiags(1.0 /counts, 0, m, m) return EkteloMatrix(R.T * scale)
def setUp(self): self.n = 8 self.eps_share = 0.1 self.prng = np.random.RandomState(10) self.A = EkteloMatrix(np.eye(self.n)) self.X = np.random.rand(self.n)
def gram(self): y = 1 + np.arange(self.n).astype(self.dtype) return EkteloMatrix(np.minimum(y, y[:, None]))
def convert_implicit(A): if isinstance(A, EkteloMatrix) or isinstance(A, workload.ExplicitGram): return A return EkteloMatrix(A)
def Moments(n, k=3): N = np.arange(n) K = np.arange(1, k + 1) W = N[None]**K[:, None] return EkteloMatrix(W)
def gram(self): return EkteloMatrix(self.matrix)
def convert_implicit(A): if isinstance(A, EkteloMatrix): return A return EkteloMatrix(A)
def gram(self): WtW = self.base.gram().dense_matrix() return EkteloMatrix(WtW[self.idx, :][:, self.idx])
def gram(self): r = np.arange(self.n) + 1 X = np.outer(r, r[::-1]) return EkteloMatrix(np.minimum(X, X.T))
def test_nnls(self): A = EkteloMatrix(np.random.rand(self.n, self.n))
def setUp(self): self.domain_shape_1D = (16, ) self.domain_shape_2D = (16, 16) self.W = EkteloMatrix(sparse.eye(16))