# -*- coding: utf-8 -*- """ Created on Wed Jan 20 11:37:34 2016 @author: Alexis Eidelman """ #TODO: import unittest from anonymizer.anonymDF import AnonymDataFrame import anonymizer.transformations as transfo from generate_tab import random_table_test_anonym tab = random_table_test_anonym(1000, 8, 5) test = AnonymDataFrame(tab, ['identifiant'], 'sensible') test.get_k() test.get_l() nb_cols = 4 tab = random_table_test_anonym((1000, nb_cols), 8, 5) nom_cols = ['ident_' + str(k) for k in range(nb_cols)] tab = tab.astype(str) test = AnonymDataFrame(tab, nom_cols, 'sensible') test.get_k() test.get_l()
var = avantages.columns.tolist() var.remove('ligne_type') var.remove('avant_nature') # ## II. Traitement des données brutes (sans INSEE) # On k-anonymise dès maintenant la base brute. # On définit ici k = 5 ordre_aggregation = [ 'benef_dept', 'benef_categorie_code', 'qualite', 'benef_pays_code', 'benef_titre_code', 'benef_identifiant_type_code' ] Avantages = AnonymDataFrame(avantages.copy(), ordre_aggregation, unknown='non renseigné') k = 5 def aggregation_serie(x): return (local_aggregation(x, k, 'regroup_with_smallest', 'non renseigné')) def aggregation_year(x): return (local_aggregation(x, k, 'with_closest', 'non renseigné')) method_anonymisation = [ (name, aggregation_serie) for name in ordre_aggregation[:-1] ] + [('date', aggregation_year)]
# ## II. Traitement des données brutes (sans INSEE) # On k-anonymise dès maintenant la base brute. # On définit ici k = 5 ordre_aggregation = ['benef_dept', 'benef_categorie_code', 'qualite', 'benef_pays_code', 'benef_titre_code', 'benef_identifiant_type_code'] Avantages = AnonymDataFrame(avantages.copy(), ordre_aggregation, unknown='non renseigné') k = 5 def aggregation_serie(x): return(local_aggregation(x, k, 'regroup_with_smallest', 'non renseigné')) def aggregation_year(x): return(local_aggregation(x, k, 'with_closest', 'non renseigné')) method_anonymisation = [(name, aggregation_serie) for name in ordre_aggregation[:-1]] + [('date', aggregation_year)] Avantages.local_transform(method_anonymisation, k) modalites_modifiees = (Avantages.anonymized_df.values != avantages.values).sum() modalites_intactes = (Avantages.anonymized_df.values == avantages.values).sum()
len(liste_races) # ## II. Anonymisation # On définit les variables à anonymiser ordre_aggregation = ['Race', 'Sexe', 'Robe', 'Pays de naissance', 'Destiné à la consommation humaine', 'Date de naissance'] Equides = AnonymDataFrame(equides, ordre_aggregation, unknown='non renseigné') def aggregation_serie(x): return(local_aggregation(x, 5, 'regroup_with_smallest', 'non renseigné')) method_anonymisation = [(name, aggregation_serie) for name in ordre_aggregation[:-1]] def aggregation_year(x): return(local_aggregation(x, 5, 'with_closest', 'non renseigné')) method_anonymisation += [('Date de naissance', aggregation_year)] Equides.local_transform(method_anonymisation, 5) Equides.df = Equides.anonymized_df Equides.get_k()
# -*- coding: utf-8 -*- """ Created on Wed Jan 20 11:37:34 2016 @author: Alexis Eidelman """ #TODO: import unittest from anonymizer.anonymDF import AnonymDataFrame from generate_tab import random_table_test_anonym tab = random_table_test_anonym(1000, 8, 5) test = AnonymDataFrame(tab, ['identifiant'], 'sensible') test.get_k() test.get_l()