コード例 #1
0
 def test_generate(self):
     
     path_checkpoint='.'
     prefix_checkpoint='test'
     n_epochs=10
     cor = Corgan()
     
     # dummy dataset
     n_gen = 500
     n = 1000
     m = 7
     x = np.random.randint(low=0, high=2, size=(n,m))
     
     model = cor.train(x=x, 
                          n_epochs_pretrain=10,
                          n_epochs=10,
                          path_checkpoint=path_checkpoint, 
                          prefix_checkpoint=prefix_checkpoint)
     
     x_synth = cor.generate(model = model, n_gen=n_gen)
     
     # clean up 
     file_ckpt=os.path.join(path_checkpoint, prefix_checkpoint + ".model_epoch_%d.pth" % n_epochs)
     os.remove(file_ckpt)
     
     assert len(x_synth) == n_gen
コード例 #2
0
 def test_train(self):
     
     path_checkpoint='.'
     prefix_checkpoint='test'
     n_epochs=10
     cor = Corgan()
     
     # dummy dataset
     n = 1000
     m = 7
     x = np.random.randint(low=0, high=2, size=(n,m))
     
     model = cor.train(x=x, 
                          n_epochs_pretrain=10,
                          n_epochs=10,
                          batch_size=512,
                          path_checkpoint=path_checkpoint, 
                          prefix_checkpoint=prefix_checkpoint)
     
     file_ckpt=os.path.join(path_checkpoint, prefix_checkpoint + ".model_epoch_%d.pth" % n_epochs)
     res = os.path.isfile(file_ckpt)
     
     os.remove(file_ckpt)
     
     assert res
コード例 #3
0
    def membership_inference_hayes(self, r_trn, r_tst, s_all, n_cpu):
        """Membership inference scenario as in Hayes et al. 2018.

        Parameters
        ----------
        r_trn : TYPE
            DESCRIPTION.
        r_tst : TYPE
            DESCRIPTION.
        s_all : TYPE
            DESCRIPTION.
        n_cpu : TYPE
            DESCRIPTION.

        Returns
        -------
        dict
            DESCRIPTION.

        """

        cor = Corgan()

        # evaluation set
        x_all = np.row_stack((r_tst, r_trn))
        y_all = np.append(np.zeros(len(r_tst)), np.ones(len(r_trn)))

        # train shadow GAN
        gan_shadow = cor.train(x=s_all, n_cpu=n_cpu, debug=True)

        # load shadow discriminator
        minibatch_averaging = gan_shadow['parameter_dict'][
            'minibatch_averaging']
        feature_size = gan_shadow['parameter_dict']['feature_size']
        d_shadow = Discriminator(minibatch_averaging=minibatch_averaging,
                                 feature_size=feature_size)
        d_shadow.load_state_dict(gan_shadow['Discriminator_state_dict'])
        d_shadow.eval()

        # calculate probabilities from shadow discriminator
        p_all = d_shadow(x_all)

        roc = metrics.roc_curve(y_true=y_all, y_score=p_all)
        auc = metrics.roc_auc_score(y_true=y_all, y_score=p_all)

        return {
            'prob': p_all,
            'label': y_all,
            'roc': roc,
            'auc': auc,
            'analysis': 'membership_inference'
        }
コード例 #4
0
 def test_save_and_load(self):
     
     path_checkpoint='.'
     prefix_checkpoint='test'
     n_epochs=10
     cor = Corgan()
     
     # dummy dataset
     n_gen = 500
     n = 1000
     m = 7
     x = np.random.randint(low=0, high=2, size=(n,m))
     
     model_saved = cor.train(x=x, 
                          n_epochs_pretrain=10,
                          n_epochs=10,
                          path_checkpoint=path_checkpoint, 
                          prefix_checkpoint=prefix_checkpoint)
     
     file = 'test.pkl'
     cor.save_obj(obj=model_saved, file_name=file)
     model_loaded = cor.load_obj(file)
     x_synth = cor.generate(model = model_loaded, n_gen=n_gen)
     
     # clean up 
     file_ckpt=os.path.join(path_checkpoint, prefix_checkpoint + ".model_epoch_%d.pth" % n_epochs)
     os.remove(file_ckpt)
     os.remove(file)
     
     assert len(x_synth) == n_gen
コード例 #5
0
    # preprocess
    meta = pre.get_metadata(arr=arr, header=header)
    obj_d = pre.get_discretized_matrix(arr=arr, meta=meta, header=header)

    # split data
    r_all = obj_d['x']
    n_subset_r = round(len(r_all) * args.frac_train)
    idx_trn = np.random.choice(len(r_all), n_subset_r, replace=False)
    idx_tst = np.setdiff1d(range(len(r_all)), idx_trn)
    r_trn = r_all[idx_trn, :]
    r_tst = r_all[idx_tst, :]

    # train and save model
    if args.train_type == 'corgan':
        syn = Corgan(debug=debug, n_cpu=args.n_cpu_train)
    elif args.train_type == 'ppgan':
        syn = Ppgan(debug=debug, n_cpu=args.n_cpu_train)
    model = syn.train(x=r_trn, n_epochs=args.n_epoch)
    model['m'] = meta
    model['header'] = obj_d['header']
    syn.save_obj(model, outfile)

elif args.task == 'generate':

    pre = Preprocessor(missing_value=args.missing_value_generate)
    outfile = args.outprefix_generate + '.csv'

    syn = Corgan()
    model = syn.load_obj(args.file_model)
    if model['parameter_dict']['model'] == 'corgan':