def update_params(self, epsilon=None, d=None, index_mapper=None, l=None, w=None, use_median=None): super().update_params(epsilon, d, index_mapper) self.l = l if l is not None else self.l self.w = w if w is not None else self.w self.use_median = use_median if use_median is not None else self.use_median # if l or w is updated we need to reset the sketch matrix and generate new hash functions.. if l is not None or w is not None: self.sketch_matrix = np.zeros((self.l, self.w)) self.h_funcs = generate_hash_funcs(self.l, self.w) self.g_funcs = generate_hash_funcs(self.l, 2) self.hash_funcs = list(zip(self.h_funcs, self.g_funcs))
def __init__(self, epsilon, l, w, use_median=True, index_mapper=None): """ Private Count Sketch (PCS) Algorithm Args: epsilon (float): Privacy Budget Epsilon l (integer): Number of hash functions for the sketch w (integer): Size of sketch vector use_median (optional - boolean): If True, uses median in the count-sketch estimation index_mapper (optional function): Index mapper function """ super().__init__(epsilon, None, index_mapper) self.l = l self.w = w self.sketch_matrix = np.zeros((self.l, self.w)) self.use_median = use_median self.name = "PCSServer" self.h_funcs = generate_hash_funcs(l, w) self.g_funcs = generate_hash_funcs(l, 2) self.hash_funcs = list(zip(self.h_funcs, self.g_funcs))
def update_params(self, k=None, m=None, epsilon=None, index_mapper=None): """ Updated internal parameters Args: k (optional int): Number of hash functions m (optional int): Size of hash domain epsilon (optional float): Privacy Budget d (optional int): Size of domain index_mapper (optional func): Index map function """ self.k = k if k is not None else self.k self.m = m if m is not None else self.m self.hash_funcs = generate_hash_funcs(self.k, self.m) super().update_params(epsilon=epsilon, index_mapper=index_mapper ) # This also calls reset() to reset sketch size if epsilon is not None: if self.is_hadamard: self.c = (math.pow(math.e, epsilon) + 1) / (math.pow(math.e, epsilon) - 1) else: self.c = (math.pow(math.e, epsilon / 2) + 1) / (math.pow(math.e, epsilon / 2) - 1)
def __init__(self, epsilon, k, m, is_hadamard=False, index_mapper=None): """ Server frequency oracle for Apple's Count Mean Sketch (CMS) Args: epsilon (float): Privacy Budget k (int): Number of hash functions m (int): Size of the hash domain is_hadamard (optional bool): If True, uses Hadamard Count Mean Sketch (HCMS) index_mapper (optional func): Index map function """ super().__init__(epsilon, None, index_mapper) self.update_params(k, m, epsilon, d=None, index_mapper=None) self.hash_funcs = generate_hash_funcs(k, m) self.sketch_matrix = np.zeros((self.k, self.m)) self.transformed_matrix = np.zeros((self.k, self.m)) self.is_hadamard = is_hadamard self.last_estimated = self.n self.ones = np.ones(self.m) if self.is_hadamard: self.had = hadamard(self.m)
def __init__(self, epsilon, k, m, index_mapper=None, fo_server=None, lh_k=100, estimator_norm=0, sketch_method=0, count_sketch=False): """ Args: epsilon (float): Privacy budget k (integer): The number of hash functions used in the sketch m (integer): Size of the sketch vector to privatise index_mapper (optional function): Index mapper function fo_server (FreqOracleServer): The FO server used for estimation. Needs to be the same as the FO client that is being used. Default is FastLH Server lh_k (Optional int): If no FO server is passed, this can be used to set the k parameter for the FastLH server that is used estimator_norm (Optional int): Normalisation performed when estimated sketch rows 0 - No Norm 1 - Additive Norm 2 - Prob Simplex 3 (or otherwise) - Threshold cut sketch_method (Optional int): The sketch method used in the estimation - 0 - Takes the minimum of sketch entries (Count-Min Sketch) 1 - Takes the median of sketch entries (Count-Median Sketch) 2 - Takes the mean (no debiasing) Anything else - Takes the Unbiased Mean (Count-Mean Sketch) count_sketch (optional - boolean): If True, will use count-sketch for estimation (instead of count-mean sketch) """ self.sketch_based = True self.k = k self.lh_k = lh_k self.m = m self.hash_funcs = generate_hash_funcs(self.k, self.m) self.count_sketch = count_sketch self.sketch_method = sketch_method self.estimator_norm = estimator_norm self.set_name("Sketch Response") d = self.m self.cs_map = None if self.count_sketch: self.h_funcs = generate_hash_funcs(k, m) self.g_funcs = generate_hash_funcs(k, 2) d = 2 * self.m def cs_map(x): if x > 0: return x - 1 else: return 2 * self.m - abs(x) self.cs_map = cs_map self.hash_funcs = list(zip(self.h_funcs, self.g_funcs)) super().__init__(epsilon, d, index_mapper=index_mapper) self.aggregated_data = defaultdict(list) self.estimator_list = [] if isinstance(fo_server, FreqOracleServer) and not isinstance(fo_server, FastLHServer): fo_server.update_params(index_mapper=lambda x: x, d=d) for i in range(0, self.k): self.estimator_list.append(copy.deepcopy(fo_server)) else: try: lh_k = fo_server.k except AttributeError: lh_k = self.lh_k # All FLH estimators will use the same hash funcs so we only need to generate the hash matrix across the domain once to save time for i in range(0, self.k): if i >= 1: self.estimator_list.append( FastLHServer(self.epsilon, d, lh_k, hash_matrix=self.estimator_list[0].hash_matrix, index_mapper=lambda x: x)) else: self.estimator_list.append(FastLHServer(self.epsilon, d, lh_k, index_mapper=lambda x: x)) try: self.server_fo_hash_funcs = self.estimator_list[0].get_hash_funcs() except AttributeError: self.server_fo_hash_funcs = None self.reset() # This should be reworked to just initialise the sketch_matrix here...