Beispiel #1
0
def main():

    mkdir_p(EXP_DIR)

    parser = argparse.ArgumentParser(description='gprf_opt')
    parser.add_argument('--ntrain', dest='ntrain', type=int, help="number of points to locate")
    parser.add_argument('--ntest', dest='ntest', type=int, default=500, help="sample additional test points to evaluate predictive accuracy (not in paper)")
    parser.add_argument('--nblocks', dest='nblocks', default=1, type=int, help="divides the sampled points into a grid of this many blocks. May do strange things if number is not a perfect square. Mutually exclusive with rpc_blocksize. ")
    parser.add_argument('--rpc_blocksize', dest='rpc_blocksize', default=-1, type=int, help="divides the sampled points into blocks using recursive projection clustering, aiming for this target blocksize. Mutually exclusive with nblocks. ")
    parser.add_argument('--lscale', dest='lscale', type=float, help="SE kernel lengthscale for the sampled functions")
    parser.add_argument('--obs_std', dest='obs_std', type=float, help="std of Gaussian noise corrupting the X locations")
    parser.add_argument('--local_dist', dest='local_dist', default=1.0, type=float, help="when using RPC clustering, specifies minimum kernel value necessary to connect blocks in a GPRF (1.0 corresponds to local GPs). When using grids of blocks, any setting other than 1.0 yields a GPRF with neighbor connections. ")
    parser.add_argument('--method', dest='method', default="l-bfgs-b", type=str, help="any optimization method supported by scipy.optimize.minimize (l-bfgs-b)")
    parser.add_argument('--seed', dest='seed', default=0, type=int, help="seed for generating synthetic data")
    parser.add_argument('--yd', dest='yd', default=50, type=int, help="number of output dimensions to sample (50)")
    parser.add_argument('--maxsec', dest='maxsec', default=3600, type=int, help="maximum number of seconds to run the optimization (3600)")
    parser.add_argument('--task', dest='task', default="x", type=str, help="'x', 'cov', or 'xcov' to infer locations, kernel hyperparams, or both. (x)")
    parser.add_argument('--analyze', dest='analyze', default=False, action="store_true", help="just analyze existing saved inference results, don't do any new inference")
    parser.add_argument('--analyze_full', dest='analyze_full', default=False, action="store_true", help="do a fuller (slower) analysis that also computes predictive accuracy")
    parser.add_argument('--parallel', dest='parallel', default=False, action="store_true", help="run multiple threads for local/gprf blocks. be careful when combining this with multithreaded BLAS libraries.")
    parser.add_argument('--init_seed', dest='init_seed', default=-1, type=int, help="if >=0, initialize optimization to locations generated from this random seed.")
    parser.add_argument('--init_true', dest='init_true', default=False, action="store_true", help="initialize optimization at true X locations instead of observed locations (False)")
    parser.add_argument('--noise_var', dest='noise_var', default=0.01, type=float, help="variance of iid noise in synthetic Y values")
    parser.add_argument('--gplvm_type', dest='gplvm_type', default="gprf", type=str, help = "use 'sparse' or 'bayesian' for GPy inducing point comparison (gprf)")
    parser.add_argument('--num_inducing', dest='num_inducing', default=0, type=int, help="number of inducing points to use with sparse approximations")

    args = parser.parse_args()

    d = exp_dir(args)
    do_run(d=d, lscale=args.lscale, obs_std=args.obs_std, local_dist=args.local_dist, n=args.ntrain+args.ntest, ntrain=args.ntrain, nblocks=args.nblocks, yd=args.yd, method=args.method, rpc_blocksize=args.rpc_blocksize, seed=args.seed, maxsec=args.maxsec, analyze_only=args.analyze, analyze_full = args.analyze_full, task=args.task, init_seed=args.init_seed, noise_var=args.noise_var, parallel=args.parallel, gplvm_type=args.gplvm_type, num_inducing=args.num_inducing, init_true=args.init_true)
Beispiel #2
0
def seismic_exp_dir(args):
    npts, block_size, thresh, init_cov, init_x, task, synth_lscale, obs_std = args.npts, args.rpc_blocksize, args.threshold, args.init_cov, args.init_x, args.task, args.synth_lscale, args.obs_std
    import hashlib
    base_dir = os.path.join(os.environ["HOME"], "seismic_experiments")
    init_str = "default"
    if init_cov or init_x:
        init_str = "_%s" % hashlib.md5(init_cov+init_x).hexdigest()[:8]
    run_name = "%d_%d_%.4f_%s_%s_%.0f_%.1f" % (npts, block_size, thresh, init_str, task, synth_lscale, obs_std)
    d =  os.path.join(base_dir, run_name)
    mkdir_p(d)
    return d
Beispiel #3
0
def seismic_exp_dir(args):
    npts, block_size, thresh, init_cov, init_x, task, synth_lscale, obs_std = args.npts, args.rpc_blocksize, args.threshold, args.init_cov, args.init_x, args.task, args.synth_lscale, args.obs_std
    import hashlib
    base_dir = os.path.join(os.environ["HOME"], "seismic_experiments")
    init_str = "default"
    if init_cov or init_x:
        init_str = "_%s" % hashlib.md5(init_cov + init_x).hexdigest()[:8]
    run_name = "%d_%d_%.4f_%s_%s_%.0f_%.1f" % (
        npts, block_size, thresh, init_str, task, synth_lscale, obs_std)
    d = os.path.join(base_dir, run_name)
    mkdir_p(d)
    return d
Beispiel #4
0
def sample_data(n, ntrain, lscale, obs_std, yd, seed,
                centers, noise_var, rpc_blocksize=-1):
    sample_basedir = os.path.join(os.environ["HOME"], "gprf_experiments", "synthetic_datasets")
    mkdir_p(sample_basedir)
    sample_fname = "%d_%d_%.6f_%.6f_%d_%d%s.pkl" % (n, ntrain, lscale, obs_std, yd, seed, "" if noise_var==0.01 else "_%.4f" % noise_var)
    sample_fname_full = os.path.join(sample_basedir, sample_fname)

    try:
        with open(sample_fname_full, 'rb') as f:
            sdata = pickle.load(f)
    except IOError:
        sdata = SampledData(n=n, ntrain=ntrain, lscale=lscale, obs_std=obs_std, seed=seed, yd=yd, noise_var=noise_var)

        with open(sample_fname_full, 'wb') as f:
            pickle.dump(sdata, f)

    if centers is not None:
        sdata.set_centers(centers)
    else:
        np.random.seed(seed)
        sdata.cluster_rpc(rpc_blocksize)
    return sdata
Beispiel #5
0
def sample_data(n,
                ntrain,
                lscale,
                obs_std,
                yd,
                seed,
                centers,
                noise_var,
                rpc_blocksize=-1):
    sample_basedir = os.path.join(os.environ["HOME"], "gprf_experiments",
                                  "synthetic_datasets")
    mkdir_p(sample_basedir)
    sample_fname = "%d_%d_%.6f_%.6f_%d_%d%s.pkl" % (
        n, ntrain, lscale, obs_std, yd, seed,
        "" if noise_var == 0.01 else "_%.4f" % noise_var)
    sample_fname_full = os.path.join(sample_basedir, sample_fname)

    try:
        with open(sample_fname_full, 'rb') as f:
            sdata = pickle.load(f)
    except IOError:
        sdata = SampledData(n=n,
                            ntrain=ntrain,
                            lscale=lscale,
                            obs_std=obs_std,
                            seed=seed,
                            yd=yd,
                            noise_var=noise_var)

        with open(sample_fname_full, 'wb') as f:
            pickle.dump(sdata, f)

    if centers is not None:
        sdata.set_centers(centers)
    else:
        np.random.seed(seed)
        sdata.cluster_rpc(rpc_blocksize)
    return sdata
Beispiel #6
0
def exp_dir(args):
    run_name = build_run_name(args)
    exp_dir = os.path.join(EXP_DIR, run_name)
    mkdir_p(exp_dir)
    return exp_dir
Beispiel #7
0
def main():

    mkdir_p(EXP_DIR)

    parser = argparse.ArgumentParser(description='gprf_opt')
    parser.add_argument('--ntrain',
                        dest='ntrain',
                        type=int,
                        help="number of points to locate")
    parser.add_argument(
        '--ntest',
        dest='ntest',
        type=int,
        default=500,
        help=
        "sample additional test points to evaluate predictive accuracy (not in paper)"
    )
    parser.add_argument(
        '--nblocks',
        dest='nblocks',
        default=1,
        type=int,
        help=
        "divides the sampled points into a grid of this many blocks. May do strange things if number is not a perfect square. Mutually exclusive with rpc_blocksize. "
    )
    parser.add_argument(
        '--rpc_blocksize',
        dest='rpc_blocksize',
        default=-1,
        type=int,
        help=
        "divides the sampled points into blocks using recursive projection clustering, aiming for this target blocksize. Mutually exclusive with nblocks. "
    )
    parser.add_argument('--lscale',
                        dest='lscale',
                        type=float,
                        help="SE kernel lengthscale for the sampled functions")
    parser.add_argument(
        '--obs_std',
        dest='obs_std',
        type=float,
        help="std of Gaussian noise corrupting the X locations")
    parser.add_argument(
        '--local_dist',
        dest='local_dist',
        default=1.0,
        type=float,
        help=
        "when using RPC clustering, specifies minimum kernel value necessary to connect blocks in a GPRF (1.0 corresponds to local GPs). When using grids of blocks, any setting other than 1.0 yields a GPRF with neighbor connections. "
    )
    parser.add_argument(
        '--method',
        dest='method',
        default="l-bfgs-b",
        type=str,
        help=
        "any optimization method supported by scipy.optimize.minimize (l-bfgs-b)"
    )
    parser.add_argument('--seed',
                        dest='seed',
                        default=0,
                        type=int,
                        help="seed for generating synthetic data")
    parser.add_argument('--yd',
                        dest='yd',
                        default=50,
                        type=int,
                        help="number of output dimensions to sample (50)")
    parser.add_argument(
        '--maxsec',
        dest='maxsec',
        default=3600,
        type=int,
        help="maximum number of seconds to run the optimization (3600)")
    parser.add_argument(
        '--task',
        dest='task',
        default="x",
        type=str,
        help=
        "'x', 'cov', or 'xcov' to infer locations, kernel hyperparams, or both. (x)"
    )
    parser.add_argument(
        '--analyze',
        dest='analyze',
        default=False,
        action="store_true",
        help=
        "just analyze existing saved inference results, don't do any new inference"
    )
    parser.add_argument(
        '--analyze_full',
        dest='analyze_full',
        default=False,
        action="store_true",
        help=
        "do a fuller (slower) analysis that also computes predictive accuracy")
    parser.add_argument(
        '--parallel',
        dest='parallel',
        default=False,
        action="store_true",
        help=
        "run multiple threads for local/gprf blocks. be careful when combining this with multithreaded BLAS libraries."
    )
    parser.add_argument(
        '--init_seed',
        dest='init_seed',
        default=-1,
        type=int,
        help=
        "if >=0, initialize optimization to locations generated from this random seed."
    )
    parser.add_argument(
        '--init_true',
        dest='init_true',
        default=False,
        action="store_true",
        help=
        "initialize optimization at true X locations instead of observed locations (False)"
    )
    parser.add_argument('--noise_var',
                        dest='noise_var',
                        default=0.01,
                        type=float,
                        help="variance of iid noise in synthetic Y values")
    parser.add_argument(
        '--gplvm_type',
        dest='gplvm_type',
        default="gprf",
        type=str,
        help=
        "use 'sparse' or 'bayesian' for GPy inducing point comparison (gprf)")
    parser.add_argument(
        '--num_inducing',
        dest='num_inducing',
        default=0,
        type=int,
        help="number of inducing points to use with sparse approximations")

    args = parser.parse_args()

    d = exp_dir(args)
    do_run(d=d,
           lscale=args.lscale,
           obs_std=args.obs_std,
           local_dist=args.local_dist,
           n=args.ntrain + args.ntest,
           ntrain=args.ntrain,
           nblocks=args.nblocks,
           yd=args.yd,
           method=args.method,
           rpc_blocksize=args.rpc_blocksize,
           seed=args.seed,
           maxsec=args.maxsec,
           analyze_only=args.analyze,
           analyze_full=args.analyze_full,
           task=args.task,
           init_seed=args.init_seed,
           noise_var=args.noise_var,
           parallel=args.parallel,
           gplvm_type=args.gplvm_type,
           num_inducing=args.num_inducing,
           init_true=args.init_true)
Beispiel #8
0
def exp_dir(args):
    run_name = build_run_name(args)
    exp_dir = os.path.join(EXP_DIR, run_name)
    mkdir_p(exp_dir)
    return exp_dir