Esempio n. 1
0
 def test_which_array(self):
     
     rea = Realism()
     x = np.array(['a','b','c'])
     idx = 1
     item = x[idx]
     
     assert idx == rea.which(x,item)[0]
Esempio n. 2
0
 def test_which_list(self):
     
     rea = Realism()
     x = ['a','b','c']
     idx = 1
     item = x[idx]
     
     assert idx == rea.which(x, item)[0]
Esempio n. 3
0
 def test_validate_univariate(self):
     
     rea = Realism()
     n = 1000
     m = 17
     v = np.full(shape=m, fill_value=False)
     
     prefix='col'
     header = np.full(shape=m, fill_value='', dtype='<U'+str(len(str(m-1))+len(prefix)))
     for i in range(m):
         header[i] = prefix + str(i).zfill(len(str(m-1)))
     
     x = np.random.randint(low=0, high=2, size=(n,m))
     res = rea.validate_univariate(arr_r=x, arr_s=x, header=header)
     
     for j in range(m):
         if res['frq_r'][j] == res['frq_s'][j]:
             v[j] = True
     
     assert v.all()
Esempio n. 4
0
 def test_gan_train_match(self):
     
     rea = Realism()
     
     n = 1000
     m_2 = 3
     threshold = 0.05
     max_beta = 10
     n_epoch = 100
     
     beta = np.append(np.random.randint(low=-max_beta,high=0,size=(m_2,1)), 
                      np.random.randint(low=0,high=max_beta,size=(m_2,1)))
     x_real = np.random.randint(low=0, high=2, size=(n,m_2*2))
     x_for_e = np.reshape(np.matmul(x_real, beta), (n,1)) + 0.5 * np.random.random(size=(n,1))
     y_real = np.reshape(np.round(1.0 / (1.0 + np.exp(-x_for_e))), (n,))
     
     res_real = rea.gan_train(x_synth=x_real, y_synth=y_real, 
                                   x_real=x_real, y_real=y_real, n_epoch=n_epoch)
     res_gan_train1 = rea.gan_train(x_synth=x_real, y_synth=y_real, 
                                   x_real=x_real, y_real=y_real, n_epoch=n_epoch)
     
     assert (abs(res_real['auc'] - res_gan_train1['auc']) < threshold)
Esempio n. 5
0
 def test_gan_test_mismatch(self):
     
     rea = Realism()
     
     n = 1000
     m_2 = 3
     threshold = 0.05
     max_beta = 10
     n_epoch = 100
     
     beta = np.append(np.random.randint(low=-max_beta,high=0,size=(m_2,1)), 
                      np.random.randint(low=0,high=max_beta,size=(m_2,1)))
     x_real = np.random.randint(low=0, high=2, size=(n,m_2*2))
     x_for_e = np.reshape(np.matmul(x_real, beta), (n,1)) + 0.5 * np.random.random(size=(n,1))
     y_real = np.reshape(np.round(1.0 / (1.0 + np.exp(-x_for_e))), (n,))
     
     # flip label to ensure AUCs are very different
     x_synth = x_real
     y_synth = 1 - y_real
     res_real = rea.gan_train(x_synth=x_real, y_synth=y_real, 
                                   x_real=x_real, y_real=y_real, n_epoch=n_epoch)
     res_gan_test2 = rea.gan_test(x_synth, y_synth, x_real, y_real, n_epoch=n_epoch)
     
     assert (abs(res_real['auc'] - res_gan_test2['auc']) > threshold)
Esempio n. 6
0
    elif model['parameter_dict']['model'] == 'ppgan':
        syn = Ppgan(debug=False, n_cpu=1)

    s = syn.generate(model, n_gen=args.generate_size)

    f = pre.restore_matrix(arr=s, meta=model['m'], header=model['header'])
    np.savetxt(fname=outfile,
               fmt='%s',
               X=f['x'],
               delimiter=',',
               header=','.join(f['header']),
               comments='')

elif args.task == 'realism':

    rea = Realism()
    pre = Preprocessor(missing_value=args.missing_value_realism)
    r_trn = pre.read_file(args.file_realism_real_train)
    r_tst = pre.read_file(args.file_realism_real_test)
    s = pre.read_file(args.file_realism_synth)

    # analysis
    if args.analysis_realism == 'feature_frequency':
        res = rea.feature_frequency(mat_f_r_trn=r_trn['x'],
                                    mat_f_r_tst=r_tst['x'],
                                    mat_f_s=s['x'],
                                    header=r_trn['header'],
                                    missing_value=args.missing_value_realism)

    elif args.analysis_realism == 'feature_effect':
        res = rea.feature_effect(mat_f_r_trn=r_trn['x'],