def test_which_array(self): rea = Realism() x = np.array(['a','b','c']) idx = 1 item = x[idx] assert idx == rea.which(x,item)[0]
def test_which_list(self): rea = Realism() x = ['a','b','c'] idx = 1 item = x[idx] assert idx == rea.which(x, item)[0]
def test_validate_univariate(self): rea = Realism() n = 1000 m = 17 v = np.full(shape=m, fill_value=False) prefix='col' header = np.full(shape=m, fill_value='', dtype='<U'+str(len(str(m-1))+len(prefix))) for i in range(m): header[i] = prefix + str(i).zfill(len(str(m-1))) x = np.random.randint(low=0, high=2, size=(n,m)) res = rea.validate_univariate(arr_r=x, arr_s=x, header=header) for j in range(m): if res['frq_r'][j] == res['frq_s'][j]: v[j] = True assert v.all()
def test_gan_train_match(self): rea = Realism() n = 1000 m_2 = 3 threshold = 0.05 max_beta = 10 n_epoch = 100 beta = np.append(np.random.randint(low=-max_beta,high=0,size=(m_2,1)), np.random.randint(low=0,high=max_beta,size=(m_2,1))) x_real = np.random.randint(low=0, high=2, size=(n,m_2*2)) x_for_e = np.reshape(np.matmul(x_real, beta), (n,1)) + 0.5 * np.random.random(size=(n,1)) y_real = np.reshape(np.round(1.0 / (1.0 + np.exp(-x_for_e))), (n,)) res_real = rea.gan_train(x_synth=x_real, y_synth=y_real, x_real=x_real, y_real=y_real, n_epoch=n_epoch) res_gan_train1 = rea.gan_train(x_synth=x_real, y_synth=y_real, x_real=x_real, y_real=y_real, n_epoch=n_epoch) assert (abs(res_real['auc'] - res_gan_train1['auc']) < threshold)
def test_gan_test_mismatch(self): rea = Realism() n = 1000 m_2 = 3 threshold = 0.05 max_beta = 10 n_epoch = 100 beta = np.append(np.random.randint(low=-max_beta,high=0,size=(m_2,1)), np.random.randint(low=0,high=max_beta,size=(m_2,1))) x_real = np.random.randint(low=0, high=2, size=(n,m_2*2)) x_for_e = np.reshape(np.matmul(x_real, beta), (n,1)) + 0.5 * np.random.random(size=(n,1)) y_real = np.reshape(np.round(1.0 / (1.0 + np.exp(-x_for_e))), (n,)) # flip label to ensure AUCs are very different x_synth = x_real y_synth = 1 - y_real res_real = rea.gan_train(x_synth=x_real, y_synth=y_real, x_real=x_real, y_real=y_real, n_epoch=n_epoch) res_gan_test2 = rea.gan_test(x_synth, y_synth, x_real, y_real, n_epoch=n_epoch) assert (abs(res_real['auc'] - res_gan_test2['auc']) > threshold)
elif model['parameter_dict']['model'] == 'ppgan': syn = Ppgan(debug=False, n_cpu=1) s = syn.generate(model, n_gen=args.generate_size) f = pre.restore_matrix(arr=s, meta=model['m'], header=model['header']) np.savetxt(fname=outfile, fmt='%s', X=f['x'], delimiter=',', header=','.join(f['header']), comments='') elif args.task == 'realism': rea = Realism() pre = Preprocessor(missing_value=args.missing_value_realism) r_trn = pre.read_file(args.file_realism_real_train) r_tst = pre.read_file(args.file_realism_real_test) s = pre.read_file(args.file_realism_synth) # analysis if args.analysis_realism == 'feature_frequency': res = rea.feature_frequency(mat_f_r_trn=r_trn['x'], mat_f_r_tst=r_tst['x'], mat_f_s=s['x'], header=r_trn['header'], missing_value=args.missing_value_realism) elif args.analysis_realism == 'feature_effect': res = rea.feature_effect(mat_f_r_trn=r_trn['x'],