def example1(): """ Optimize AllRange workload using PIdentity template and report the expected error """ print('Example 1') W = workload.AllRange(256) pid = templates.PIdentity(16, 256) res = pid.optimize(W) err = error.rootmse(W, pid.strategy()) err2 = error.rootmse(W, workload.Identity(256)) print(err, err2)
def example3(): """ Optimize Union-of-Kronecker product workload using kronecker parameterization and marginals parameterization """ print('Example 3') sub_workloads1 = [workload.Prefix(64) for _ in range(4)] sub_workloads2 = [workload.AllRange(64) for _ in range(4)] W1 = workload.Kronecker(sub_workloads1) W2 = workload.Kronecker(sub_workloads2) W = workload.VStack([W1, W2]) K = templates.KronPIdentity([4]*4, [64]*4) K.optimize(W) print(error.expected_error(W, K.strategy())) M = templates.Marginals([64]*4) M.optimize(W) print(error.expected_error(W, M.strategy())) identity = workload.Kronecker([workload.Identity(64) for _ in range(4)]) print(error.expected_error(W, identity))
def setUp(self): self.prng = np.random.RandomState(0) self.domain = (2,3,4) I = lambda n: workload.Identity(n) T = lambda n: workload.Total(n) P = lambda n: workload.Prefix(n) R = lambda n: matrix.EkteloMatrix(self.prng.rand(n,n)) W1 = workload.Kronecker([I(2), T(3), P(4)]) W2 = workload.Kronecker([T(2), T(3), I(4)]) W3 = workload.Kronecker([I(2), I(3), T(4)]) self.W = workload.VStack([W1, W2, W3]) # three representations of Identity matrix self.A1 = I(2*3*4) self.A2 = workload.Kronecker([I(2),I(3),I(4)]) self.A3 = workload.Marginals.fromtuples(self.domain, {(0,1,2) : 1.0 }) self.A4 = workload.Marginals(self.domain, self.prng.rand(8)) self.A5 = workload.Kronecker([R(2), R(3), R(4)])
def adult_benchmark(): data = Dataset.load('../data/adult.csv', '../data/adult-domain.json') projections = [('occupation', 'race', 'capital-loss'), ('occupation', 'sex', 'native-country'), ('marital-status', 'relationship', 'income>50K'), ('age', 'education-num', 'sex'), ('workclass', 'education-num', 'occupation'), ('marital-status', 'occupation', 'income>50K'), ('race', 'native-country', 'income>50K'), ('occupation', 'capital-gain', 'income>50K'), ('marital-status', 'hours-per-week', 'income>50K'), ('workclass', 'race', 'capital-gain'), ('marital-status', 'relationship', 'capital-gain'), ('workclass', 'education-num', 'capital-gain'), ('education-num', 'relationship', 'race'), ('fnlwgt', 'hours-per-week', 'income>50K'), ('workclass', 'sex', 'native-country')] lookup = {} for attr in data.domain: n = data.domain.size(attr) lookup[attr] = workload.Identity(n) lookup['age'] = workload.Prefix(85) lookup['fnlwgt'] = workload.Prefix(100) lookup['capital-gain'] = workload.Prefix(100) lookup['capital-loss'] = workload.Prefix(100) lookup['hours-per-week'] = workload.Prefix(99) workloads = [] for proj in projections: W = workload.Kronecker([lookup[a] for a in proj]) workloads.append((proj, W)) return data, workloads
def Identity(n): """ Builds a template strategy that is always Identity """ return Static(workload.Identity(n))