def test_generate(self): path_checkpoint='.' prefix_checkpoint='test' n_epochs=10 cor = Corgan() # dummy dataset n_gen = 500 n = 1000 m = 7 x = np.random.randint(low=0, high=2, size=(n,m)) model = cor.train(x=x, n_epochs_pretrain=10, n_epochs=10, path_checkpoint=path_checkpoint, prefix_checkpoint=prefix_checkpoint) x_synth = cor.generate(model = model, n_gen=n_gen) # clean up file_ckpt=os.path.join(path_checkpoint, prefix_checkpoint + ".model_epoch_%d.pth" % n_epochs) os.remove(file_ckpt) assert len(x_synth) == n_gen
def test_train(self): path_checkpoint='.' prefix_checkpoint='test' n_epochs=10 cor = Corgan() # dummy dataset n = 1000 m = 7 x = np.random.randint(low=0, high=2, size=(n,m)) model = cor.train(x=x, n_epochs_pretrain=10, n_epochs=10, batch_size=512, path_checkpoint=path_checkpoint, prefix_checkpoint=prefix_checkpoint) file_ckpt=os.path.join(path_checkpoint, prefix_checkpoint + ".model_epoch_%d.pth" % n_epochs) res = os.path.isfile(file_ckpt) os.remove(file_ckpt) assert res
def membership_inference_hayes(self, r_trn, r_tst, s_all, n_cpu): """Membership inference scenario as in Hayes et al. 2018. Parameters ---------- r_trn : TYPE DESCRIPTION. r_tst : TYPE DESCRIPTION. s_all : TYPE DESCRIPTION. n_cpu : TYPE DESCRIPTION. Returns ------- dict DESCRIPTION. """ cor = Corgan() # evaluation set x_all = np.row_stack((r_tst, r_trn)) y_all = np.append(np.zeros(len(r_tst)), np.ones(len(r_trn))) # train shadow GAN gan_shadow = cor.train(x=s_all, n_cpu=n_cpu, debug=True) # load shadow discriminator minibatch_averaging = gan_shadow['parameter_dict'][ 'minibatch_averaging'] feature_size = gan_shadow['parameter_dict']['feature_size'] d_shadow = Discriminator(minibatch_averaging=minibatch_averaging, feature_size=feature_size) d_shadow.load_state_dict(gan_shadow['Discriminator_state_dict']) d_shadow.eval() # calculate probabilities from shadow discriminator p_all = d_shadow(x_all) roc = metrics.roc_curve(y_true=y_all, y_score=p_all) auc = metrics.roc_auc_score(y_true=y_all, y_score=p_all) return { 'prob': p_all, 'label': y_all, 'roc': roc, 'auc': auc, 'analysis': 'membership_inference' }
def test_save_and_load(self): path_checkpoint='.' prefix_checkpoint='test' n_epochs=10 cor = Corgan() # dummy dataset n_gen = 500 n = 1000 m = 7 x = np.random.randint(low=0, high=2, size=(n,m)) model_saved = cor.train(x=x, n_epochs_pretrain=10, n_epochs=10, path_checkpoint=path_checkpoint, prefix_checkpoint=prefix_checkpoint) file = 'test.pkl' cor.save_obj(obj=model_saved, file_name=file) model_loaded = cor.load_obj(file) x_synth = cor.generate(model = model_loaded, n_gen=n_gen) # clean up file_ckpt=os.path.join(path_checkpoint, prefix_checkpoint + ".model_epoch_%d.pth" % n_epochs) os.remove(file_ckpt) os.remove(file) assert len(x_synth) == n_gen
# preprocess meta = pre.get_metadata(arr=arr, header=header) obj_d = pre.get_discretized_matrix(arr=arr, meta=meta, header=header) # split data r_all = obj_d['x'] n_subset_r = round(len(r_all) * args.frac_train) idx_trn = np.random.choice(len(r_all), n_subset_r, replace=False) idx_tst = np.setdiff1d(range(len(r_all)), idx_trn) r_trn = r_all[idx_trn, :] r_tst = r_all[idx_tst, :] # train and save model if args.train_type == 'corgan': syn = Corgan(debug=debug, n_cpu=args.n_cpu_train) elif args.train_type == 'ppgan': syn = Ppgan(debug=debug, n_cpu=args.n_cpu_train) model = syn.train(x=r_trn, n_epochs=args.n_epoch) model['m'] = meta model['header'] = obj_d['header'] syn.save_obj(model, outfile) elif args.task == 'generate': pre = Preprocessor(missing_value=args.missing_value_generate) outfile = args.outprefix_generate + '.csv' syn = Corgan() model = syn.load_obj(args.file_model) if model['parameter_dict']['model'] == 'corgan':