def impute_gain(x, odir): python_exe = 'python3' script = '{}/gain/gain.py'.format(utilmlab.get_proj_dir()) fn_i = '{}/xmissing.csv.gz'.format(odir) fn_o = '{}/ximputed.csv'.format(odir) x.to_csv(fn_i, compression='gzip', index=False) if os.path.isfile(fn_o): os.remove(fn_o) utilmlab.exe_cmd( logger, '{} {} -i {} -o {} --testall 1'.format(python_exe, script, fn_i, fn_o)) return pd.read_csv(fn_o)
numexp = args.numexp proj_dir = utilmlab.get_proj_dir() \ if args.projdir is None else args.projdir alg = 'asac' if args.exe is not None: python_exe = args.exe else: python_exe = 'python' if sys.version_info[0] < 3 else 'python3' niter = args.it version = 1 resdir = '{}/result/{}/v_{}/h_{}'.format( proj_dir, alg, version, os.environ['HOSTNAME'] if 'HOSTNAME' in os.environ else 'unknown') utilmlab.ensure_dir(resdir) logger = utilmlab.init_logger(resdir, 'log_test_{}.txt'.format(alg)) result_lst = [] script = Path('{}/alg/asac/Main_Synthetic_Exp1.py'.format(proj_dir)) utilmlab.exe_cmd( logger, '{} {} {}'.format(python_exe, script, '--it {} -n {}'.format(niter, numexp)))
if dataset == 'bc': # from sklearn: A copy of UCI ML Breast # Cancer Wisconsin (Diagnostic) dataset x, y = load_breast_cancer(return_X_y=True) elif dataset == 'cover': x, y = fetch_covtype(return_X_y=True) else: assert 0 lbl = 'target' df = pd.DataFrame(x) df[lbl] = y df.to_csv(fn_csv, index=False, compression='gzip', sep=sep) utilmlab.exe_cmd( logger, '{} {} -i {} --target {} -o {} --verbose {} --it {} -n {} --separator {}' .format(python_exe, script, fn_csv, lbl, resdir, verbose, niter, nsample, sep)) utilmlab.exe_cmd( logger, '{} {} -i {} -o {} --verbose {} --it {} --target {} --model {} --separator {}' ' --nstage {} -n {}'.format( python_exe, script, fn_csv, resdir, verbose, niter, lbl, fn_model,
dataset = 'bc' odir = '{}/misc/dataset_{}'.format(resdir, dataset) fn_csv, fn_missing_csv, fn_imputed_csv = set_filenames(odir) script_create_missing = Path( '{}/alg/gain/create_missing.py'.format(proj_dir)) script = Path('{}/alg/gain/gain.py'.format(proj_dir)) script_ana = Path('{}/alg/gain/gain_ana.py'.format(proj_dir)) for islabel in [0, 1]: for autocat in [0, 1, 2]: utilmlab.exe_cmd( logger, '{} {} --dataset {} -o {} ' '--oref {} --istarget {}'.format(python_exe, script_create_missing, dataset, fn_missing_csv, fn_csv, islabel)) utilmlab.exe_cmd( logger, '{} {} -i {} {} ' '-o {} --it {} --testall 1 --autocategorical {}'.format( python_exe, script, fn_missing_csv, '--target target' if islabel else '', fn_imputed_csv, niter, autocat)) result_lst = [] dataset_prop = [('spambase', None), ('spambase', 'label'), ('bc', None), ('spam', None), ('letter-recognition', None), ('letter-recognition', 'lettr'), ('letter', None)]
if not os.path.isfile('{}/alg/gcit/ccle_experiments/data/mutation.txt.gz'. format(proj_dir)): print('warning: data files for ccle_experiments not found') sys.exit(0) alg = 'gcit' version = 1 if args.exe is not None: python_exe = args.exe else: python_exe = 'python' if sys.version_info[0] < 3 else 'python3' if args.o is None: resdir = '{}/result/{}/v_{}/h_{}'.format( proj_dir, alg, version, os.environ['HOSTNAME'] if 'HOSTNAME' in os.environ else 'unknown') else: resdir = args.o utilmlab.ensure_dir(resdir) logger = utilmlab.init_logger(resdir, 'log_test_{}.txt'.format(alg)) script = Path('{}/alg/gcit/ccle_experiments/' 'ccle_experiment.py'.format(proj_dir)) odir = resdir utilmlab.exe_cmd(logger, '{} {}'.format(python_exe, script))
# execute all unit tests f_lst = utilmlab.find_file_dir( '{}/alg'.format(proj_dir), 'test_*.py') logger.info('Unit tests found:{}'.format(f_lst)) for fpy in f_lst: if 'test_alg.py' in fpy: continue utilmlab.exe_cmd( logger, '{} {} {} {}'.format( python_exe, Path(fpy), '--it {}'.format(args.it) if args.it is not None else '', '--exe {}'.format(args.exe) if args.exe is not None else '' ) ) # execute all notebooks f_lst = utilmlab.find_file_dir( '{}/alg'.format(utilmlab.get_proj_dir()), '*.ipynb') logger.info('notebooks found:{}'.format(f_lst)) cwd = os.getcwd()
if not is_only_notebook: # execute all unit tests f_lst = utilmlab.find_file_dir(test_dir, 'test_*.py') logger.info('Unit tests found:{}'.format(f_lst)) for fpy in f_lst: if 'test_alg.py' in fpy: continue time_start = time.time() utilmlab.exe_cmd( logger, '{} {} {} {}'.format( python_exe, Path(fpy), '--it {}'.format(args.it) if args.it is not None else '', '--exe {}'.format(args.exe) if args.exe is not None else '')) logger.info('time={}'.format(time.time() - time_start)) # execute all notebooks f_lst = utilmlab.find_file_dir(test_dir, '*.ipynb') logger.info('notebooks found:{}'.format(f_lst)) cwd = os.getcwd() fn_html_lst = [] for fnb in f_lst:
elif dataset == 'cover': x, y = fetch_covtype(return_X_y=True) else: assert 0 lbl = 'target' df = pd.DataFrame(x) df[lbl] = y df.to_csv(fn_csv, index=False, compression='gzip', sep=sep) try: utilmlab.exe_cmd( logger, 'Rscript {} -i {} --target {} --exe {} --it {} ' ' --replication {} --projdir {}'.format( script, fn_csv, lbl, python_exe, niter, nreplication, proj_dir), assert_on_error= not generate_error # assert if an error is not expected ) except: if generate_error: logger.info('expected error generated') pass assert 0 fn_data_csv = '{}/data.csv'.format(resdir) fn_json = '{}/generated_data_properties.json'.format(resdir) utilmlab.exe_cmd( logger, 'Rscript {}/alg/knockoffgan/gen_data.r -o {} --target {} '
for dataset in ['csv', 'bc', 'spambase']: odir = '{}/dataset_{}'.format(resdir, dataset) utilmlab.ensure_dir(odir) fn_feature_score = '{}/feature_score.csv.gz'.format(odir) fn_json = '{}/feature_score.csv.json'.format(odir) fn_plot_sample = '{}/sample.png'.format(odir) fn_plot_global = '{}/global.png'.format(odir) if dataset == 'csv': fn_csv = '{}/spambase.csv.gz'.format(utilmlab.get_data_dir()) utilmlab.exe_cmd( logger, '{} {} -i {} --target label --it {} -o {}'.format( python_exe, script, fn_csv, nepoch, fn_feature_score)) else: if not data_loader_mlab.is_available(dataset): continue utilmlab.exe_cmd( logger, '{} {} --dataset {} --it {} -o {}'.format( python_exe, script, dataset, nepoch, fn_feature_score)) utilmlab.exe_cmd( logger, '{} {} -i {} -o {}'.format(python_exe, script_ana, fn_feature_score, fn_json)) utilmlab.exe_cmd( logger, '{} {} -i {} -oglobal {} -osample {}'.format( python_exe, script_plot, fn_feature_score, fn_plot_global,
'--{} {}'.format(el, args_d[el]) for el in args_d.keys()]) dataset = args_d['dataset'] odir = '{}/dataset_{}'.format( odir, dataset) utilmlab.ensure_dir(odir) utilmlab.exe_cmd( logger, '{} {} {} {} {} {} {}'.format( python_exe, script, cmd_arg, '--it {}'.format(niter), '-o {}'.format(odir), '--itout {}'.format( args.itout) if args.itout is not None else '', '--itrs {}'.format( args.itrs) if args.itrs is not None else '')) utilmlab.exe_cmd( logger, '{} {} {} {} {}'.format( python_exe, script_ana, cmd_arg, '-o {}'.format(odir), '--itout {}'.format( args.itout) if args.itout is not None else ''))