def main(): assert(len(sys.argv) == 2) config_file = io.abspath2(sys.argv[1]) config = load_config(config_file) ext = config['csv_ext'] samplers, examples, file_lookup = \ io.find_traces(config['input_path'], config['exact_name'], ext) print 'found %d samplers and %d examples' % (len(samplers), len(examples)) print '%d files in lookup table' % \ sum(len(file_lookup[k]) for k in file_lookup) # examples = examples[:3] # TODO remove # This could get big print samplers print examples metrics = MOMENT_METRICS.keys() + OTHER_METRICS.keys() R = build_metrics_array(samplers, examples, metrics, file_lookup, config) perf_df, sync_perf = R # Save TS, make sure it has enough info to compute ess and eff io.save_pd(perf_df, config['output_path'], 'perf', ext) # Save diagnostics, make sure it has enough info to compute ess and eff io.save_pd(sync_perf, config['output_path'], 'perf_sync', ext, index=False) # Could also include option to dump everything in netCDF if we want print 'done'
def load_config(config_file): config = ConfigParser.RawConfigParser() assert(os.path.isabs(config_file)) config.read(config_file) D = {} D['input_path'] = io.abspath2(config.get('phase3', 'output_path')) D['output_path'] = io.abspath2(config.get('phase4', 'output_path')) D['n_grid'] = config.getint('phase3', 'n_grid') D['n_chains'] = config.getint('phase3', 'n_chains') D['csv_ext'] = config.get('common', 'csv_ext') D['meta_ext'] = config.get('common', 'meta_ext') D['exact_name'] = config.get('common', 'exact_name') return D
def main(): assert (len(sys.argv) == 4) config_file = io.abspath2(sys.argv[1]) param_name = sys.argv[2] sampler = sys.argv[3] assert (io.is_safe_name(param_name)) config = io.load_config(config_file) run_experiment(config, param_name, sampler) print 'done'
def main(): num_args = len(sys.argv) - 1 if num_args < 1: config_path = '../config.ini' elif num_args > 1: raise Exception('too many arguments: %d. %d expected' % (num_args, 1)) else: config_path = sys.argv[1] config_file = io.abspath2(config_path) np.random.seed(3463) config = io.load_config(config_file) model_list = io.get_model_list(config['input_path'], config['pkl_ext']) np.random.shuffle( model_list) # In case we don't finish at least random subset # model_list = model_list[:5] # TODO remove, test only assert (all(io.is_safe_name(ss) for ss in model_list)) print 'using models:' print model_list # Sort for reprodicibility sampler_list = sorted(BUILD_STEP_PM.keys() + BUILD_STEP_MC.keys()) print 'using samplers:' print sampler_list # Run n_chains in the outer loop since if process get killed we have less # chains but with even distribution over models and samplers. scheduled_jobs = set(queued_or_running_jobs()) for model_name in model_list: # Get the exact samples run_experiment(config, model_name, config['exact_name']) # Get the sampler samples for i in xrange(config['n_chains']): # TODO could put ADVI init here to keep it fixed across samplers for sampler in sampler_list: t = time() job_name = "slurm-%s-%s-%d" % (model_name, sampler, i) cmd_line_args = (config_file, model_name, sampler) if job_name in scheduled_jobs: print '%s already in scheduled jobs, but running anyway' % job_name options = "-c 1 --job-name=%s -t 45:00 --mem=32gb --output %s.out" % ( job_name, job_name) end = "slurm_job_main.sh %s %s %s" % cmd_line_args command = "sbatch %s %s" % (options, end) print 'Executing:', command os.system(command) print 'wall time %fs' % (time() - t) print 'done'
def main(): assert (len(sys.argv) == 2) config_file = io.abspath2(sys.argv[1]) config = ConfigParser.RawConfigParser() config.read(config_file) input_original = io.abspath2(config.get('phase1', 'output_path')) input_exact = io.abspath2(config.get('phase3', 'output_path')) exact_name = config.get('common', 'exact_name') csv_ext = config.get('common', 'csv_ext') sep = '_' _, examples, file_lookup = io.find_traces(input_exact, exact_name, csv_ext) for example in examples: original_chain, _ = example.rsplit(sep, 1) X_original = io.load_np(input_original, original_chain, csv_ext) assert (X_original.ndim == 2) D = X_original.shape[1] fname_exact, = file_lookup[(example, exact_name)] X_exact = io.load_np(input_exact, fname_exact, '') assert (X_exact.ndim == 2 and X_exact.shape[1] == D) print example for ii in xrange(D): stat, pval = ss.ttest_ind(X_original[:, ii], X_exact[:, ii], equal_var=False) print 'dim %d t: %f p = %f' % (ii, stat, pval) stat, pval = ss.levene(X_original[:, ii], X_exact[:, ii], center='median') print 'dim %d BF: %f p = %f' % (ii, stat, pval) stat, pval = ss.ks_2samp(X_original[:, ii], X_exact[:, ii]) print 'dim %d KS: %f p = %f' % (ii, stat, pval) print '-' * 20 print 'done'
def main(): '''This program can be run in parallel across different MC_chain files indep. This is a top level routine so I am not worried about needing a verbosity setting.''' assert (len(sys.argv) == 3 ) # Print usage error instead to be user friendly config_file = io.abspath2(sys.argv[1]) mc_chain_name = sys.argv[2] assert (io.is_safe_name(mc_chain_name)) print 'config %s' % config_file config = io.load_config(config_file) run_experiment(config, mc_chain_name) print 'done'
def main(): assert (len(sys.argv) == 2 ) # Print usage error instead to be user friendly config_file = io.abspath2(sys.argv[1]) config = ConfigParser.RawConfigParser() config.read(config_file) input_path = io.abspath2(config.get('phase1', 'output_path')) data_ext = config.get('common', 'csv_ext') print 'searching for input data in' print input_path chain_files = sorted(f for f in os.listdir(input_path) if f.endswith(data_ext)) np.random.shuffle(chain_files) print 'found %d files' % len(chain_files) for chain in chain_files: print '-' * 20 print chain X = io.load_np(input_path, chain, '') assert (X.ndim == 2) moments_report_w_burn(X) print 'done'
def main(): num_args = len(sys.argv) - 1 if num_args < 1: config_path = '../config.ini' elif num_args > 1: raise Exception('too many arguments: %d. %d expected' % (num_args, 1)) else: config_path = sys.argv[1] config_file = io.abspath2(config_path) config = io.load_config(config_file) model_list = io.get_model_list(config['input_path'], config['pkl_ext']) # model_list = model_list[:5] # TODO remove, test only assert (all(io.is_safe_name(ss) for ss in model_list)) print 'using models:' print model_list # Sort for reprodicibility sampler_list = sorted(BUILD_STEP_PM.keys() + BUILD_STEP_MC.keys()) print 'using samplers:' print sampler_list # Get the exact samples for model_name in model_list: run_experiment(config, model_name, config['exact_name']) # Run n_chains in the outer loop since if process get killed we have less # chains but with even distribution over models and samplers. for model_name in model_list: for _ in xrange(config['n_chains']): # TODO could put ADVI init here to keep it fixed across samplers for sampler in sampler_list: t = time() try: run_experiment(config, model_name, sampler) except Exception as err: print '%s/%s failed' % (model_name, sampler) print str(err) print 'wall time %fs' % (time() - t) print 'done'
def main(): num_args = len(sys.argv) - 1 if num_args < 1: config_path = '../config.ini' elif num_args > 1: raise Exception('too many arguments: %d. %d expected' % (num_args, 1)) else: config_path = sys.argv[1] config_file = io.abspath2(config_path) print 'config %s' % config_file config = io.load_config(config_file) print(config['input_path']) chains = io.get_chains(config['input_path'], config['csv_ext'], config['size_limit_bytes']) print 'inputs chains:' print chains print 'Running njobs=%d in parallel' % config['njobs'] try_run_experiment_with_config = partial(try_run_experiment, config) Parallel(n_jobs=config['njobs'])(map( delayed(try_run_experiment_with_config), chains)) print 'done'