def test_assess_memorization(self): n = 1000 m = 3 missing_value = -999999 pri = Privacy() header = [] for i in range(m): header = np.append(header, 'col' + str(i)) x_real = np.random.random(size=(n, m)) x_synth = np.random.random(size=(n, m)) res = pri.assess_memorization(mat_f_r=x_real, mat_f_s=x_synth, missing_value=missing_value, header=header, metric='euclidean', debug=False) assert np.mean(res['real']) < np.mean(res['rand'])
s['x'] = s['x'][idx, :] if args.sample_privacy < len(r_trn['x']): idx = np.random.choice(range(len(r_trn['x'])), args.sample_privacy, replace=False) r_trn['x'] = r_trn['x'][idx, :] if args.sample_privacy < len(r_tst['x']): idx = np.random.choice(range(len(r_tst['x'])), args.sample_privacy, replace=False) r_tst['x'] = r_tst['x'][idx, :] # analysis if args.analysis_privacy == 'nearest_neighbors': res = pri.assess_memorization(mat_f_r=r_trn['x'], mat_f_s=s['x'], missing_value=args.missing_value_privacy, header=r_trn['header']) elif args.analysis_privacy == 'membership_inference': res = pri.membership_inference( mat_f_r_trn=r_trn['x'], mat_f_r_tst=r_tst['x'], mat_f_s=s['x'], header=r_trn['header'], missing_value=args.missing_value_privacy) else: print('Error: do not recognize analysis_privacy option ' + args.analysis_privacy) sys.exit(0) # output if args.output_privacy == 'file':