Ejemplo n.º 1
0
    def local_transform(self, transformation, k, force_unknown=None):
        '''
         return a new AnonymDataFrame with a transformed self.df
         df is modified by application of transformation

        The main difference with transformation is that here
        tranformation are applied by each group only if needed.

        - transformation: can be
            - a list of tuple with:
                - first element is the name the column
                - second element is the transformation
            - no dict here as order counts

        - k: un entier est le k-anonymat recherché

        Note: it does have effect here but transformation are applied
        in the self.variables order or in the order of list when
        transformation is a list

        '''

        if force_unknown is None:
            force_unknown = self.unknown
        self.transformation = transformation
        assert isinstance(transformation, list)
        assert all([len(x) == 2 for x in transformation])
        assert all([x[0] in self.df.columns for x in transformation])
        variables = [x[0] for x in transformation]
        derniere_transfo = transformation[-1]
        anonymized_df = self.df.copy()

        if get_k(anonymized_df, variables, force_unknown) >= k:
            self.anonymized_df = anonymized_df
            return self.copy()

        if len(transformation) == 1:
            colname = transformation[0][0]
            transfo = transformation[0][1]
            anonymized_df[colname] = transfo(anonymized_df[colname])
            self.anonymized_df = anonymized_df
            return self.copy()

        if get_k(anonymized_df, variables[:-1], force_unknown) < k:
            anonymized_df = self.local_transform(transformation[:-1],
                                                 k).anonymized_df
        # on a une table k-anonymisée lorsqu'elle est restreinte aux
        # len(variables) - 1 premières variables

        # on applique l'aggrégation locale d'une variable par groupe
        grp = anonymized_df.groupby(variables[:-1])
        fonction = derniere_transfo[1]
        variable = derniere_transfo[0]
        anonymized_df[variable] = grp[variable].apply(fonction)
        #assert get_k(anonymized_df, variables, force_unknown) >= k

        self.anonymized_df = anonymized_df
        return self.copy()
Ejemplo n.º 2
0
    def local_transform(self, transformation, k, force_unknown=None):
        '''
         return a new AnonymDataFrame with a transformed self.df
         df is modified by application of transformation

        The main difference with transformation is that here
        tranformation are applied by each group only if needed.

        - transformation: can be
            - a list of tuple with:
                - first element is the name the column
                - second element is the transformation
            - no dict here as order counts

        - k: un entier est le k-anonymat recherché

        Note: it does have effect here but transformation are applied
        in the self.variables order or in the order of list when
        transformation is a list

        '''

        if force_unknown is None:
            force_unknown = self.unknown
        self.transformation = transformation
        assert isinstance(transformation, list)
        assert all([len(x) == 2 for x in transformation])
        assert all([x[0] in self.df.columns for x in transformation])
        variables = [x[0] for x in transformation]
        derniere_transfo = transformation[-1]
        anonymized_df = self.df.copy()

        if get_k(anonymized_df, variables, force_unknown) >= k:
            self.anonymized_df = anonymized_df
            return self.copy()

        if len(transformation) == 1:
            colname = transformation[0][0]
            transfo = transformation[0][1]
            anonymized_df[colname] = transfo(anonymized_df[colname])
            self.anonymized_df = anonymized_df
            return self.copy()

        if get_k(anonymized_df, variables[:-1], force_unknown) < k:
            anonymized_df = self.local_transform(transformation[:-1], k).anonymized_df
        # on a une table k-anonymisée lorsqu'elle est restreinte aux
        # len(variables) - 1 premières variables

        # on applique l'aggrégation locale d'une variable par groupe
        grp = anonymized_df.groupby(variables[:-1])
        fonction = derniere_transfo[1]
        variable = derniere_transfo[0]
        anonymized_df[variable] = grp[variable].apply(fonction)
        #assert get_k(anonymized_df, variables, force_unknown) >= k

        self.anonymized_df = anonymized_df
        return self.copy()
Ejemplo n.º 3
0
 def test_get_k(self):
     iris = pd.read_csv("data/iris.csv")
     k = get_k(iris, ['Name'])
     self.assertEqual(k, 50)
Ejemplo n.º 4
0
 def get_k(self):
     return get_k(self.df, self.identifiant)
Ejemplo n.º 5
0
 def get_final_k(self):
     return get_k(self.anonymized_df, self.identifiant, self.unknown)
Ejemplo n.º 6
0
 def get_k(self):
     return get_k(self.df, self.identifiant, self.unknown)
Ejemplo n.º 7
0
 def test_get_k(self):
     iris = pd.read_csv("data/iris.csv")
     k = get_k(iris, ['Name'])
     self.assertEqual(k, 50)
Ejemplo n.º 8
0
 def  get_final_k(self):
     return get_k(self.anonymized_df, self.identifiant, self.unknown)
Ejemplo n.º 9
0
 def get_k(self):
     return get_k(self.df, self.identifiant, self.unknown)
Ejemplo n.º 10
0
 def get_k(self):
     return get_k(self.df, self.identifiant)