def main(): mkdir_p(EXP_DIR) parser = argparse.ArgumentParser(description='gprf_opt') parser.add_argument('--ntrain', dest='ntrain', type=int, help="number of points to locate") parser.add_argument('--ntest', dest='ntest', type=int, default=500, help="sample additional test points to evaluate predictive accuracy (not in paper)") parser.add_argument('--nblocks', dest='nblocks', default=1, type=int, help="divides the sampled points into a grid of this many blocks. May do strange things if number is not a perfect square. Mutually exclusive with rpc_blocksize. ") parser.add_argument('--rpc_blocksize', dest='rpc_blocksize', default=-1, type=int, help="divides the sampled points into blocks using recursive projection clustering, aiming for this target blocksize. Mutually exclusive with nblocks. ") parser.add_argument('--lscale', dest='lscale', type=float, help="SE kernel lengthscale for the sampled functions") parser.add_argument('--obs_std', dest='obs_std', type=float, help="std of Gaussian noise corrupting the X locations") parser.add_argument('--local_dist', dest='local_dist', default=1.0, type=float, help="when using RPC clustering, specifies minimum kernel value necessary to connect blocks in a GPRF (1.0 corresponds to local GPs). When using grids of blocks, any setting other than 1.0 yields a GPRF with neighbor connections. ") parser.add_argument('--method', dest='method', default="l-bfgs-b", type=str, help="any optimization method supported by scipy.optimize.minimize (l-bfgs-b)") parser.add_argument('--seed', dest='seed', default=0, type=int, help="seed for generating synthetic data") parser.add_argument('--yd', dest='yd', default=50, type=int, help="number of output dimensions to sample (50)") parser.add_argument('--maxsec', dest='maxsec', default=3600, type=int, help="maximum number of seconds to run the optimization (3600)") parser.add_argument('--task', dest='task', default="x", type=str, help="'x', 'cov', or 'xcov' to infer locations, kernel hyperparams, or both. (x)") parser.add_argument('--analyze', dest='analyze', default=False, action="store_true", help="just analyze existing saved inference results, don't do any new inference") parser.add_argument('--analyze_full', dest='analyze_full', default=False, action="store_true", help="do a fuller (slower) analysis that also computes predictive accuracy") parser.add_argument('--parallel', dest='parallel', default=False, action="store_true", help="run multiple threads for local/gprf blocks. be careful when combining this with multithreaded BLAS libraries.") parser.add_argument('--init_seed', dest='init_seed', default=-1, type=int, help="if >=0, initialize optimization to locations generated from this random seed.") parser.add_argument('--init_true', dest='init_true', default=False, action="store_true", help="initialize optimization at true X locations instead of observed locations (False)") parser.add_argument('--noise_var', dest='noise_var', default=0.01, type=float, help="variance of iid noise in synthetic Y values") parser.add_argument('--gplvm_type', dest='gplvm_type', default="gprf", type=str, help = "use 'sparse' or 'bayesian' for GPy inducing point comparison (gprf)") parser.add_argument('--num_inducing', dest='num_inducing', default=0, type=int, help="number of inducing points to use with sparse approximations") args = parser.parse_args() d = exp_dir(args) do_run(d=d, lscale=args.lscale, obs_std=args.obs_std, local_dist=args.local_dist, n=args.ntrain+args.ntest, ntrain=args.ntrain, nblocks=args.nblocks, yd=args.yd, method=args.method, rpc_blocksize=args.rpc_blocksize, seed=args.seed, maxsec=args.maxsec, analyze_only=args.analyze, analyze_full = args.analyze_full, task=args.task, init_seed=args.init_seed, noise_var=args.noise_var, parallel=args.parallel, gplvm_type=args.gplvm_type, num_inducing=args.num_inducing, init_true=args.init_true)
def seismic_exp_dir(args): npts, block_size, thresh, init_cov, init_x, task, synth_lscale, obs_std = args.npts, args.rpc_blocksize, args.threshold, args.init_cov, args.init_x, args.task, args.synth_lscale, args.obs_std import hashlib base_dir = os.path.join(os.environ["HOME"], "seismic_experiments") init_str = "default" if init_cov or init_x: init_str = "_%s" % hashlib.md5(init_cov+init_x).hexdigest()[:8] run_name = "%d_%d_%.4f_%s_%s_%.0f_%.1f" % (npts, block_size, thresh, init_str, task, synth_lscale, obs_std) d = os.path.join(base_dir, run_name) mkdir_p(d) return d
def seismic_exp_dir(args): npts, block_size, thresh, init_cov, init_x, task, synth_lscale, obs_std = args.npts, args.rpc_blocksize, args.threshold, args.init_cov, args.init_x, args.task, args.synth_lscale, args.obs_std import hashlib base_dir = os.path.join(os.environ["HOME"], "seismic_experiments") init_str = "default" if init_cov or init_x: init_str = "_%s" % hashlib.md5(init_cov + init_x).hexdigest()[:8] run_name = "%d_%d_%.4f_%s_%s_%.0f_%.1f" % ( npts, block_size, thresh, init_str, task, synth_lscale, obs_std) d = os.path.join(base_dir, run_name) mkdir_p(d) return d
def sample_data(n, ntrain, lscale, obs_std, yd, seed, centers, noise_var, rpc_blocksize=-1): sample_basedir = os.path.join(os.environ["HOME"], "gprf_experiments", "synthetic_datasets") mkdir_p(sample_basedir) sample_fname = "%d_%d_%.6f_%.6f_%d_%d%s.pkl" % (n, ntrain, lscale, obs_std, yd, seed, "" if noise_var==0.01 else "_%.4f" % noise_var) sample_fname_full = os.path.join(sample_basedir, sample_fname) try: with open(sample_fname_full, 'rb') as f: sdata = pickle.load(f) except IOError: sdata = SampledData(n=n, ntrain=ntrain, lscale=lscale, obs_std=obs_std, seed=seed, yd=yd, noise_var=noise_var) with open(sample_fname_full, 'wb') as f: pickle.dump(sdata, f) if centers is not None: sdata.set_centers(centers) else: np.random.seed(seed) sdata.cluster_rpc(rpc_blocksize) return sdata
def sample_data(n, ntrain, lscale, obs_std, yd, seed, centers, noise_var, rpc_blocksize=-1): sample_basedir = os.path.join(os.environ["HOME"], "gprf_experiments", "synthetic_datasets") mkdir_p(sample_basedir) sample_fname = "%d_%d_%.6f_%.6f_%d_%d%s.pkl" % ( n, ntrain, lscale, obs_std, yd, seed, "" if noise_var == 0.01 else "_%.4f" % noise_var) sample_fname_full = os.path.join(sample_basedir, sample_fname) try: with open(sample_fname_full, 'rb') as f: sdata = pickle.load(f) except IOError: sdata = SampledData(n=n, ntrain=ntrain, lscale=lscale, obs_std=obs_std, seed=seed, yd=yd, noise_var=noise_var) with open(sample_fname_full, 'wb') as f: pickle.dump(sdata, f) if centers is not None: sdata.set_centers(centers) else: np.random.seed(seed) sdata.cluster_rpc(rpc_blocksize) return sdata
def exp_dir(args): run_name = build_run_name(args) exp_dir = os.path.join(EXP_DIR, run_name) mkdir_p(exp_dir) return exp_dir
def main(): mkdir_p(EXP_DIR) parser = argparse.ArgumentParser(description='gprf_opt') parser.add_argument('--ntrain', dest='ntrain', type=int, help="number of points to locate") parser.add_argument( '--ntest', dest='ntest', type=int, default=500, help= "sample additional test points to evaluate predictive accuracy (not in paper)" ) parser.add_argument( '--nblocks', dest='nblocks', default=1, type=int, help= "divides the sampled points into a grid of this many blocks. May do strange things if number is not a perfect square. Mutually exclusive with rpc_blocksize. " ) parser.add_argument( '--rpc_blocksize', dest='rpc_blocksize', default=-1, type=int, help= "divides the sampled points into blocks using recursive projection clustering, aiming for this target blocksize. Mutually exclusive with nblocks. " ) parser.add_argument('--lscale', dest='lscale', type=float, help="SE kernel lengthscale for the sampled functions") parser.add_argument( '--obs_std', dest='obs_std', type=float, help="std of Gaussian noise corrupting the X locations") parser.add_argument( '--local_dist', dest='local_dist', default=1.0, type=float, help= "when using RPC clustering, specifies minimum kernel value necessary to connect blocks in a GPRF (1.0 corresponds to local GPs). When using grids of blocks, any setting other than 1.0 yields a GPRF with neighbor connections. " ) parser.add_argument( '--method', dest='method', default="l-bfgs-b", type=str, help= "any optimization method supported by scipy.optimize.minimize (l-bfgs-b)" ) parser.add_argument('--seed', dest='seed', default=0, type=int, help="seed for generating synthetic data") parser.add_argument('--yd', dest='yd', default=50, type=int, help="number of output dimensions to sample (50)") parser.add_argument( '--maxsec', dest='maxsec', default=3600, type=int, help="maximum number of seconds to run the optimization (3600)") parser.add_argument( '--task', dest='task', default="x", type=str, help= "'x', 'cov', or 'xcov' to infer locations, kernel hyperparams, or both. (x)" ) parser.add_argument( '--analyze', dest='analyze', default=False, action="store_true", help= "just analyze existing saved inference results, don't do any new inference" ) parser.add_argument( '--analyze_full', dest='analyze_full', default=False, action="store_true", help= "do a fuller (slower) analysis that also computes predictive accuracy") parser.add_argument( '--parallel', dest='parallel', default=False, action="store_true", help= "run multiple threads for local/gprf blocks. be careful when combining this with multithreaded BLAS libraries." ) parser.add_argument( '--init_seed', dest='init_seed', default=-1, type=int, help= "if >=0, initialize optimization to locations generated from this random seed." ) parser.add_argument( '--init_true', dest='init_true', default=False, action="store_true", help= "initialize optimization at true X locations instead of observed locations (False)" ) parser.add_argument('--noise_var', dest='noise_var', default=0.01, type=float, help="variance of iid noise in synthetic Y values") parser.add_argument( '--gplvm_type', dest='gplvm_type', default="gprf", type=str, help= "use 'sparse' or 'bayesian' for GPy inducing point comparison (gprf)") parser.add_argument( '--num_inducing', dest='num_inducing', default=0, type=int, help="number of inducing points to use with sparse approximations") args = parser.parse_args() d = exp_dir(args) do_run(d=d, lscale=args.lscale, obs_std=args.obs_std, local_dist=args.local_dist, n=args.ntrain + args.ntest, ntrain=args.ntrain, nblocks=args.nblocks, yd=args.yd, method=args.method, rpc_blocksize=args.rpc_blocksize, seed=args.seed, maxsec=args.maxsec, analyze_only=args.analyze, analyze_full=args.analyze_full, task=args.task, init_seed=args.init_seed, noise_var=args.noise_var, parallel=args.parallel, gplvm_type=args.gplvm_type, num_inducing=args.num_inducing, init_true=args.init_true)