コード例 #1
0
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 20 11:37:34 2016

@author: Alexis Eidelman
"""

#TODO: import unittest

from anonymizer.anonymDF import AnonymDataFrame
import anonymizer.transformations as transfo
from generate_tab import random_table_test_anonym

tab = random_table_test_anonym(1000, 8, 5)

test = AnonymDataFrame(tab, ['identifiant'], 'sensible')

test.get_k()
test.get_l()

nb_cols = 4
tab = random_table_test_anonym((1000, nb_cols), 8, 5)
nom_cols = ['ident_' + str(k) for k in range(nb_cols)]
tab = tab.astype(str)

test = AnonymDataFrame(tab, nom_cols, 'sensible')

test.get_k()
test.get_l()

コード例 #2
0
var = avantages.columns.tolist()
var.remove('ligne_type')
var.remove('avant_nature')

# ## II. Traitement des données brutes (sans INSEE)

# On k-anonymise dès maintenant la base brute.
# On définit ici k = 5

ordre_aggregation = [
    'benef_dept', 'benef_categorie_code', 'qualite', 'benef_pays_code',
    'benef_titre_code', 'benef_identifiant_type_code'
]

Avantages = AnonymDataFrame(avantages.copy(),
                            ordre_aggregation,
                            unknown='non renseigné')
k = 5


def aggregation_serie(x):
    return (local_aggregation(x, k, 'regroup_with_smallest', 'non renseigné'))


def aggregation_year(x):
    return (local_aggregation(x, k, 'with_closest', 'non renseigné'))


method_anonymisation = [
    (name, aggregation_serie) for name in ordre_aggregation[:-1]
] + [('date', aggregation_year)]
コード例 #3
0
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 20 11:37:34 2016

@author: Alexis Eidelman
"""

#TODO: import unittest

from anonymizer.anonymDF import AnonymDataFrame
import anonymizer.transformations as transfo
from generate_tab import random_table_test_anonym

tab = random_table_test_anonym(1000, 8, 5)

test = AnonymDataFrame(tab, ['identifiant'], 'sensible')

test.get_k()
test.get_l()


nb_cols = 4
tab = random_table_test_anonym((1000, nb_cols), 8, 5)
nom_cols = ['ident_' + str(k) for k in range(nb_cols)]
tab = tab.astype(str)

test = AnonymDataFrame(tab, nom_cols, 'sensible')

test.get_k()
test.get_l()
コード例 #4
0


# ## II. Traitement des données brutes (sans INSEE)

# On k-anonymise dès maintenant la base brute.
# On définit ici k = 5

ordre_aggregation = ['benef_dept',
        'benef_categorie_code',
        'qualite',
        'benef_pays_code',
        'benef_titre_code',
        'benef_identifiant_type_code']

Avantages = AnonymDataFrame(avantages.copy(),  ordre_aggregation, unknown='non renseigné')
k = 5

def aggregation_serie(x):
    return(local_aggregation(x, k, 'regroup_with_smallest', 'non renseigné'))

def aggregation_year(x):
    return(local_aggregation(x, k, 'with_closest', 'non renseigné'))
    
method_anonymisation = [(name, aggregation_serie) for name in ordre_aggregation[:-1]] + [('date', aggregation_year)]

Avantages.local_transform(method_anonymisation, k)

modalites_modifiees = (Avantages.anonymized_df.values != avantages.values).sum()
modalites_intactes = (Avantages.anonymized_df.values == avantages.values).sum()
コード例 #5
0
len(liste_races)


# ## II. Anonymisation 

# On définit les variables à anonymiser

ordre_aggregation = ['Race',
                     'Sexe',
                     'Robe',
                     'Pays de naissance',
                     'Destiné à la consommation humaine',
                     'Date de naissance']


Equides = AnonymDataFrame(equides,  ordre_aggregation, unknown='non renseigné')

def aggregation_serie(x):
        return(local_aggregation(x, 5, 'regroup_with_smallest', 'non renseigné'))
method_anonymisation = [(name, aggregation_serie) for name in ordre_aggregation[:-1]]

def aggregation_year(x):
        return(local_aggregation(x, 5, 'with_closest', 'non renseigné'))
method_anonymisation += [('Date de naissance', aggregation_year)]

Equides.local_transform(method_anonymisation, 5)

Equides.df = Equides.anonymized_df

Equides.get_k()
コード例 #6
0
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 20 11:37:34 2016

@author: Alexis Eidelman
"""

#TODO: import unittest

from anonymizer.anonymDF import AnonymDataFrame
from generate_tab import random_table_test_anonym

tab = random_table_test_anonym(1000, 8, 5)

test = AnonymDataFrame(tab, ['identifiant'], 'sensible')

test.get_k()
test.get_l()