def local_transform(self, transformation, k, force_unknown=None): ''' return a new AnonymDataFrame with a transformed self.df df is modified by application of transformation The main difference with transformation is that here tranformation are applied by each group only if needed. - transformation: can be - a list of tuple with: - first element is the name the column - second element is the transformation - no dict here as order counts - k: un entier est le k-anonymat recherché Note: it does have effect here but transformation are applied in the self.variables order or in the order of list when transformation is a list ''' if force_unknown is None: force_unknown = self.unknown self.transformation = transformation assert isinstance(transformation, list) assert all([len(x) == 2 for x in transformation]) assert all([x[0] in self.df.columns for x in transformation]) variables = [x[0] for x in transformation] derniere_transfo = transformation[-1] anonymized_df = self.df.copy() if get_k(anonymized_df, variables, force_unknown) >= k: self.anonymized_df = anonymized_df return self.copy() if len(transformation) == 1: colname = transformation[0][0] transfo = transformation[0][1] anonymized_df[colname] = transfo(anonymized_df[colname]) self.anonymized_df = anonymized_df return self.copy() if get_k(anonymized_df, variables[:-1], force_unknown) < k: anonymized_df = self.local_transform(transformation[:-1], k).anonymized_df # on a une table k-anonymisée lorsqu'elle est restreinte aux # len(variables) - 1 premières variables # on applique l'aggrégation locale d'une variable par groupe grp = anonymized_df.groupby(variables[:-1]) fonction = derniere_transfo[1] variable = derniere_transfo[0] anonymized_df[variable] = grp[variable].apply(fonction) #assert get_k(anonymized_df, variables, force_unknown) >= k self.anonymized_df = anonymized_df return self.copy()
def test_get_k(self): iris = pd.read_csv("data/iris.csv") k = get_k(iris, ['Name']) self.assertEqual(k, 50)
def get_k(self): return get_k(self.df, self.identifiant)
def get_final_k(self): return get_k(self.anonymized_df, self.identifiant, self.unknown)
def get_k(self): return get_k(self.df, self.identifiant, self.unknown)