def update_params(self,
                      epsilon=None,
                      d=None,
                      index_mapper=None,
                      l=None,
                      w=None,
                      use_median=None):
        super().update_params(epsilon, d, index_mapper)
        self.l = l if l is not None else self.l
        self.w = w if w is not None else self.w
        self.use_median = use_median if use_median is not None else self.use_median

        # if l or w is updated we need to reset the sketch matrix and generate new hash functions..
        if l is not None or w is not None:
            self.sketch_matrix = np.zeros((self.l, self.w))
            self.h_funcs = generate_hash_funcs(self.l, self.w)
            self.g_funcs = generate_hash_funcs(self.l, 2)
            self.hash_funcs = list(zip(self.h_funcs, self.g_funcs))
 def __init__(self, epsilon, l, w, use_median=True, index_mapper=None):
     """
     Private Count Sketch (PCS) Algorithm
     Args:
         epsilon (float): Privacy Budget Epsilon
         l (integer): Number of hash functions for the sketch
         w (integer): Size of sketch  vector
         use_median (optional - boolean): If True, uses median in the count-sketch estimation
         index_mapper (optional function): Index mapper function
     """
     super().__init__(epsilon, None, index_mapper)
     self.l = l
     self.w = w
     self.sketch_matrix = np.zeros((self.l, self.w))
     self.use_median = use_median
     self.name = "PCSServer"
     self.h_funcs = generate_hash_funcs(l, w)
     self.g_funcs = generate_hash_funcs(l, 2)
     self.hash_funcs = list(zip(self.h_funcs, self.g_funcs))
Exemple #3
0
 def update_params(self, k=None, m=None, epsilon=None, index_mapper=None):
     """
     Updated internal parameters
     Args:
         k (optional int): Number of hash functions
         m (optional int): Size of hash domain
         epsilon (optional float): Privacy Budget
         d (optional int): Size of domain
         index_mapper (optional func): Index map function
     """
     self.k = k if k is not None else self.k
     self.m = m if m is not None else self.m
     self.hash_funcs = generate_hash_funcs(self.k, self.m)
     super().update_params(epsilon=epsilon, index_mapper=index_mapper
                           )  # This also calls reset() to reset sketch size
     if epsilon is not None:
         if self.is_hadamard:
             self.c = (math.pow(math.e, epsilon) +
                       1) / (math.pow(math.e, epsilon) - 1)
         else:
             self.c = (math.pow(math.e, epsilon / 2) +
                       1) / (math.pow(math.e, epsilon / 2) - 1)
Exemple #4
0
    def __init__(self, epsilon, k, m, is_hadamard=False, index_mapper=None):
        """
        Server frequency oracle for Apple's Count Mean Sketch (CMS)

        Args:
            epsilon (float): Privacy Budget
            k (int): Number of hash functions
            m (int): Size of the hash domain
            is_hadamard (optional bool): If True, uses Hadamard Count Mean Sketch (HCMS)
            index_mapper (optional func): Index map function
        """
        super().__init__(epsilon, None, index_mapper)
        self.update_params(k, m, epsilon, d=None, index_mapper=None)
        self.hash_funcs = generate_hash_funcs(k, m)
        self.sketch_matrix = np.zeros((self.k, self.m))
        self.transformed_matrix = np.zeros((self.k, self.m))
        self.is_hadamard = is_hadamard

        self.last_estimated = self.n
        self.ones = np.ones(self.m)

        if self.is_hadamard:
            self.had = hadamard(self.m)
    def __init__(self, epsilon, k, m, index_mapper=None, fo_server=None, lh_k=100, estimator_norm=0, sketch_method=0,
                 count_sketch=False):
        """
        Args:
            epsilon (float): Privacy budget
            k (integer): The number of hash functions used in the sketch
            m (integer): Size of the sketch vector to privatise
            index_mapper (optional function): Index mapper function
            fo_server (FreqOracleServer): The FO server used for estimation. Needs to be the same as the FO client that is being used. Default is FastLH Server
            lh_k (Optional int): If no FO server is passed, this can be used to set the k parameter for the FastLH server that is used
            estimator_norm (Optional int): Normalisation performed when estimated sketch rows
                           0 - No Norm
                           1 - Additive Norm
                           2 - Prob Simplex
                           3 (or otherwise) - Threshold cut
            sketch_method (Optional int): The sketch method used in the estimation -
                            0 - Takes the minimum of sketch entries (Count-Min Sketch)
                            1 - Takes the median of sketch entries (Count-Median Sketch)
                            2 - Takes the mean (no debiasing)
                            Anything else - Takes the Unbiased Mean (Count-Mean Sketch)
            count_sketch (optional - boolean): If True, will use count-sketch for estimation (instead of count-mean sketch)
        """
        self.sketch_based = True
        self.k = k
        self.lh_k = lh_k
        self.m = m
        self.hash_funcs = generate_hash_funcs(self.k, self.m)
        self.count_sketch = count_sketch
        self.sketch_method = sketch_method
        self.estimator_norm = estimator_norm
        self.set_name("Sketch Response")

        d = self.m
        self.cs_map = None

        if self.count_sketch:
            self.h_funcs = generate_hash_funcs(k, m)
            self.g_funcs = generate_hash_funcs(k, 2)
            d = 2 * self.m

            def cs_map(x):
                if x > 0:
                    return x - 1
                else:
                    return 2 * self.m - abs(x)

            self.cs_map = cs_map
            self.hash_funcs = list(zip(self.h_funcs, self.g_funcs))

        super().__init__(epsilon, d, index_mapper=index_mapper)
        self.aggregated_data = defaultdict(list)
        self.estimator_list = []

        if isinstance(fo_server, FreqOracleServer) and not isinstance(fo_server, FastLHServer):
            fo_server.update_params(index_mapper=lambda x: x, d=d)
            for i in range(0, self.k):
                self.estimator_list.append(copy.deepcopy(fo_server))
        else:
            try:
                lh_k = fo_server.k
            except AttributeError:
                lh_k = self.lh_k

            # All FLH estimators will use the same hash funcs so we only need to generate the hash matrix across the domain once to save time
            for i in range(0, self.k):
                if i >= 1:
                    self.estimator_list.append(
                        FastLHServer(self.epsilon, d, lh_k, hash_matrix=self.estimator_list[0].hash_matrix,
                                     index_mapper=lambda x: x))
                else:
                    self.estimator_list.append(FastLHServer(self.epsilon, d, lh_k, index_mapper=lambda x: x))
        try:
            self.server_fo_hash_funcs = self.estimator_list[0].get_hash_funcs()
        except AttributeError:
            self.server_fo_hash_funcs = None

        self.reset() # This should be reworked to just initialise the sketch_matrix here...