def test_membership_inference_torfi_mismatch(self): n = 10000 m = 17 missing_value = -999999 pri = Privacy() header = [] for i in range(m): header = np.append(header, 'col' + str(i)) # create dummy dataset for high risk of membership disclosure r_trn = np.random.normal(loc=0, size=(n, m)) r_tst = np.random.normal(loc=0, size=(n, m)) s = np.random.normal(loc=10, size=(n, m)) res_mi = pri.membership_inference(mat_f_r_trn=r_trn, mat_f_r_tst=r_tst, mat_f_s=s, header=header, missing_value=missing_value, mi_type='torfi', n_cpu=1) avg_p_trn = np.mean(res_mi['prob'][np.where(res_mi['label'] == 1)]) avg_p_tst = np.mean(res_mi['prob'][np.where(res_mi['label'] == 0)]) assert np.allclose(avg_p_trn, avg_p_tst, atol=0.05)
if args.sample_privacy < len(r_tst['x']): idx = np.random.choice(range(len(r_tst['x'])), args.sample_privacy, replace=False) r_tst['x'] = r_tst['x'][idx, :] # analysis if args.analysis_privacy == 'nearest_neighbors': res = pri.assess_memorization(mat_f_r=r_trn['x'], mat_f_s=s['x'], missing_value=args.missing_value_privacy, header=r_trn['header']) elif args.analysis_privacy == 'membership_inference': res = pri.membership_inference( mat_f_r_trn=r_trn['x'], mat_f_r_tst=r_tst['x'], mat_f_s=s['x'], header=r_trn['header'], missing_value=args.missing_value_privacy) else: print('Error: do not recognize analysis_privacy option ' + args.analysis_privacy) sys.exit(0) # output if args.output_privacy == 'file': outfile = args.outprefix_privacy + '_' + args.analysis_privacy + '.pkl' res['analysis'] = args.analysis_privacy pri.save_obj(res, file_name=outfile) elif args.output_privacy == 'plot': # TODO: write plot_privacy function