コード例 #1
0
 def __mul__(self, other):
     if isinstance(other, MarginalsGram) and self.domain == other.domain:
         X, XT = self._Xmatrix(self.weights)
         vect = X.dot(other.weights)
         return MarginalsGram(self.domain, vect)
     else:
         return EkteloMatrix.__mul__(self, other)
コード例 #2
0
ファイル: app.py プロジェクト: dpcomp-org/hdmm
def process_workload(wd, eps):
    blockinfo = {"columnNames": [], 'buildingBlock': [], 'p': []}
    for bb in wd['data']:
        blockinfo['columnNames'].append(bb['name'])
        size = int((float(bb['maximum']) - float(bb['minimum'])) /
                   float(bb['bucketSize']) + 1)
        pv = math.ceil(size / 16.0) if math.ceil(
            size / 16.0) != 2 else math.ceil(size / 16.0) - 1
        if bb['buildingBlock'] == 'identity':
            blockinfo['buildingBlock'].append(Identity(size))
            pv = 1
        elif bb['buildingBlock'] == 'allrange':
            blockinfo['buildingBlock'].append(AllRange(size))
        elif bb['buildingBlock'] == 'prefix':
            blockinfo['buildingBlock'].append(Prefix(size))
        elif bb['buildingBlock'] == 'customized':
            domainMatrix = parse_customized(bb, size)
            blockinfo['buildingBlock'].append(EkteloMatrix(domainMatrix))
            pv = 1
        else:
            blockinfo['buildingBlock'].append(Total(size))
            pv = 1
        blockinfo['p'].append(pv)
        gc.collect()
    gc.collect()
    wgt = np.sqrt(float(wd['weight']))
    return wgt * Kronecker(blockinfo['buildingBlock']), blockinfo
コード例 #3
0
    def synthesize(self, file_path, eps, seed):
        # setup random state
        prng = np.random.RandomState(seed)

        # load data vector
        relation = Relation(self.config)
        relation.load_csv(file_path)
        self._numerize(relation._df)

        # perform measurement
        attributes = [field_name for field_name in self.config.keys()]
        measurements = []
        w_sum = sum(Ai.weight for Ai in self.strategy.matrices)
        for Ai in self.strategy.matrices:
            w = Ai.weight
            proj = [
                attributes[i] for i, B in enumerate(Ai.base.matrices)
                if type(B).__name__ != 'Ones'
            ]
            matrix = [
                B for B in Ai.base.matrices if type(B).__name__ != 'Ones'
            ]
            matrix = EkteloMatrix(np.ones(
                (1, 1))) if len(matrix) == 0 else Kronecker(matrix)
            proj_rel = copy.deepcopy(relation)
            proj_rel.project(proj)
            if proj_rel.df.shape[1] == 0:
                x = np.array([proj_rel.df.shape[0]])
            else:
                x = Vectorize('').transform(proj_rel).flatten()
            y = Laplace(matrix, w * eps / w_sum).measure(x, prng)
            measurements.append((matrix.sparse_matrix(), y, 1.0 / w, proj))

        # generate synthetic data
        sizes = [field['bins'] for field in self.config.values()]
        dom = Domain(attributes, sizes)
        engine = FactoredInference(dom)
        model = engine.estimate(measurements)
        df = model.synthetic_data().df
        self._denumerize(df)
        self._sample_numerical(df)

        return df
コード例 #4
0
def WidthKRange(n, widths):
    if type(widths) is int:
        widths = [widths]
    m = sum(n - k + 1 for k in widths)
    W = np.zeros((m, n))
    row = 0
    for k in widths:
        for i in range(n - k + 1):
            W[row + i, i:i + k] = 1.0
        row += n - k + 1
    return EkteloMatrix(W)
コード例 #5
0
def get_measurements(domain, workload):
    # get measurements using OPT+ parameterization
    lookup = {}
    # optimal strategy for Identity is Identity
    for attr in domain:
        n = domain.size(attr)
        lookup[attr] = Identity(n)
    # optimal strategy for Prefix is precomputed and loaded
    lookup['age'] = EkteloMatrix(np.load('prefix-85.npy'))
    lookup['fnlwgt'] = EkteloMatrix(np.load('prefix-100.npy'))
    lookup['capital-gain'] = EkteloMatrix(np.load('prefix-100.npy'))
    lookup['capital-loss'] = EkteloMatrix(np.load('prefix-100.npy'))
    lookup['hours-per-week'] = EkteloMatrix(np.load('prefix-99.npy'))

    measurements = []
    for proj, _ in workload:
        Q = Kronecker([lookup[a] for a in proj])
        measurements.append((proj, Q.sparse_matrix()))

    return measurements
コード例 #6
0
ファイル: pselection.py プロジェクト: sgtjayus/ektelo
    def select(self, x, prng):
        relation = x

        seed = prng.randint(1E4, 1E9)
        # convert config to privBayes format
        config_str = self.get_config_str(relation)

        model_str = privBayesSelect.py_get_model(
            np.ascontiguousarray(relation.df.astype(np.int32)),
            config_str.encode('utf-8'), self.eps, self.theta, seed)

        model = PrivBayesSelect.make_models(model_str.decode('utf-8'))
        M = PrivBayesSelect.get_measurements(model, self.domain_shape)

        return EkteloMatrix(M)
コード例 #7
0
    def __init__(self, domain, lower, higher, dtype=np.float64):
        """
        :param domain: the domain size, as an int for 1D or tuple for d-dimensional 
            domains where each bound is a tuple with the same size as domain.
        :param lower: a q x d array of lower boundaries for the q queries
        :param higher: a q x d array of upper boundareis for the q queries
        """
        assert lower.shape == higher.shape, 'lower and higher must have same shape'
        #assert np.all(lower <= higher), 'lower index must be <= than higher index'

        if type(domain) is int:
            domain = (domain, )
            lower = lower[:, None]
            higher = higher[:, None]
        self.domain = domain
        self.shape = (lower.shape[0], np.prod(domain))
        self.dtype = dtype
        self._lower = lower
        self._higher = higher

        idx = np.arange(np.prod(domain), dtype=np.int32).reshape(domain)
        shape = (lower.shape[0], np.prod(domain))
        corners = np.array(
            list(itertools.product(*[(False, True)] * len(domain))))
        size = len(corners) * lower.shape[0]
        row_ind = np.zeros(size, dtype=np.int32)
        col_ind = np.zeros(size, dtype=np.int32)
        data = np.zeros(size, dtype=dtype)
        queries = np.arange(shape[0], dtype=np.int32)
        start = 0

        for corner in corners:
            tmp = np.where(corner, lower - 1, higher)
            keep = np.all(tmp >= 0, axis=1)
            index = idx[tuple(tmp.T)]
            coef = np.sum(corner) % 2 * 2 - 1
            end = start + keep.sum()
            row_ind[start:end] = queries[keep]
            col_ind[start:end] = index[keep]
            data[start:end] = -coef
            start = end

        self._transformer = sparse.csr_matrix(
            (data[:end], (row_ind[:end], col_ind[:end])), shape, dtype)

        P = Kronecker([Prefix(n, dtype) for n in domain])
        T = EkteloMatrix(self._transformer)
        Product.__init__(self, T, P)
コード例 #8
0
ファイル: support.py プロジェクト: sgtjayus/ektelo
def reduction_matrix(mapping, canonical_order=False):
    """ Returns an m x n matrix R where n is the dimension of 
        the original data and m is the dimension of the reduced data.

        Reduces data vector x with R x
        Expands workload matrix W with W' R
    """
    assert mapping.ndim == 1, "Can only handle 1-dimesional mappings for now, domain should be flattened"

    unique, indices, inverse, counts = mapping_statistics(mapping)

    if canonical_order:
        mapping = canonical_ordering(mapping)

    n = mapping.size
    m = unique.size
    data = np.ones(n)
    cols = np.arange(n)
    rows = inverse

    return EkteloMatrix(sparse.csr_matrix((data, (rows, cols)), shape=(m, n), dtype=int))
コード例 #9
0
ファイル: support.py プロジェクト: sgtjayus/ektelo
def projection_matrix(mapping, idx):
    """ Returns m x n matrix P where n is the dimension of the 
        original data and m is the number of occurence of idx
        in mapping.

        :param mapping: vector with indices representing groups
        :param idx: index of group from which to create projection

        Projects vector x with P x and matrix W with W P^T
        Unprojects vector x with P^T x and matrix W with W P
    """
    mask = np.ma.masked_where(mapping!=idx, mapping).mask

    if np.all(~mask): # when all entries are False, a single False will be returned
        mask = np.array([False]*len(mapping))

    cols = np.where(~mask)[0]
    rows = np.arange(cols.size)
    vals = np.ones_like(rows)
    P = sparse.csr_matrix((vals, (rows, cols)), (rows.size, mask.size))

    return EkteloMatrix(P)
コード例 #10
0
ファイル: support.py プロジェクト: sgtjayus/ektelo
def expansion_matrix(mapping, canonical_order=False):
    """ Returns an n x m matrix E where n is the dimension of 
        the original data and m is the dimension of the reduced data.

        Expands data vector x with E x'
        Reduces workload matrix W with W E
    """
    assert mapping.ndim == 1, "Can only handle 1-dimesional mappings for now, domain should be flattened"

    unique, indices, inverse, counts = mapping_statistics(mapping)

    if canonical_order:
        mapping = canonical_ordering(mapping)

    n = mapping.size
    m = unique.size
    data = np.ones(n)
    cols = np.arange(n)
    rows = inverse

    R = sparse.csr_matrix((data, (rows, cols)), shape=(m, n), dtype=int)
    scale = sparse.spdiags(1.0 /counts, 0, m, m)

    return EkteloMatrix(R.T * scale)
コード例 #11
0
 def setUp(self):
     self.n = 8
     self.eps_share = 0.1
     self.prng = np.random.RandomState(10)
     self.A = EkteloMatrix(np.eye(self.n))
     self.X = np.random.rand(self.n)
コード例 #12
0
 def gram(self):
     y = 1 + np.arange(self.n).astype(self.dtype)
     return EkteloMatrix(np.minimum(y, y[:, None]))
コード例 #13
0
ファイル: error.py プロジェクト: dpcomp-org/hdmm
def convert_implicit(A):
    if isinstance(A, EkteloMatrix) or isinstance(A, workload.ExplicitGram):
        return A
    return EkteloMatrix(A)
コード例 #14
0
def Moments(n, k=3):
    N = np.arange(n)
    K = np.arange(1, k + 1)
    W = N[None]**K[:, None]
    return EkteloMatrix(W)
コード例 #15
0
 def gram(self):
     return EkteloMatrix(self.matrix)
コード例 #16
0
ファイル: error.py プロジェクト: sgtjayus/ektelo
def convert_implicit(A):
    if isinstance(A, EkteloMatrix):
        return A
    return EkteloMatrix(A)
コード例 #17
0
 def gram(self):
     WtW = self.base.gram().dense_matrix()
     return EkteloMatrix(WtW[self.idx, :][:, self.idx])
コード例 #18
0
 def gram(self):
     r = np.arange(self.n) + 1
     X = np.outer(r, r[::-1])
     return EkteloMatrix(np.minimum(X, X.T))
コード例 #19
0
 def test_nnls(self):
     A = EkteloMatrix(np.random.rand(self.n, self.n))
コード例 #20
0
ファイル: test_selection.py プロジェクト: sgtjayus/ektelo
    def setUp(self):

        self.domain_shape_1D = (16, )
        self.domain_shape_2D = (16, 16)
        self.W = EkteloMatrix(sparse.eye(16))